Professional Documents
Culture Documents
Tejas Sawant
BE-CE-D
Experiment-4
Aim: To implement N-gram (Bigram) model.
Program:
#nltk. download('punkt')
def readData():
text = 'Great course easy to understand. Great course good textbook. Great
course good teacher. Hard assignment great content. Easy assignment great
course.'
print("The given paragraph is:\n", text)
data = nltk.tokenize.sent_tokenize(text)
words = []
for i in range(len(data)):
for word in data[i].split():
words.append(word)
return words
def bigram(data):
Bigrams = []
bigramCounts = {}
unigramCounts = {}
for i in range(len(data)-1):
if i < (len(data) - 1) and data[i+1].islower():
Bigrams.append((data[i], data[i + 1]))
if (data[i], data[i+1]) in bigramCounts:
bigramCounts[(data[i], data[i + 1])] += 1
else:
bigramCounts[(data[i], data[i + 1])] = 1
if data[i] in unigramCounts:
unigramCounts[data[i]] += 1
else:
unigramCounts[data[i]] = 1
return Bigrams, unigramCounts, bigramCounts
def calculateBigramProb(Bigrams, unigramCounts, bigramCounts):
listOfProbability = {}
for bigram in Bigrams:
word1 = bigram[0]
word2 = bigram[1]
listOfProbability[bigram] = (
bigramCounts.get(bigram))/(unigramCounts.get(word1))
return listOfProbability
data = readData()
print('Tokenized form:')
print(data)
Bigrams, unigramCounts, bigramCounts = bigram(data)
print("Word Frequency:\n", unigramCounts)
listOfProbability = calculateBigramProb(Bigrams, unigramCounts, bigramCounts)
word1 = input("Prediction Word:")
mpw, probability = getProbableNextWord(word1, listOfProbability)
print("Most probable next word:", mpw)
Output:
Conclusion:
Thus, we have successfully implemented N-gram (Bigram) model.