You are on page 1of 2

118A1075

Tejas Sawant
BE-CE-D

Experiment-4
Aim: To implement N-gram (Bigram) model.
Program:

from nltk import probability


import operator
import nltk
from nltk.tokenize import word_tokenize

#nltk. download('punkt')

def readData():

text = 'Great course easy to understand. Great course good textbook. Great
course good teacher. Hard assignment great content. Easy assignment great
course.'
print("The given paragraph is:\n", text)
data = nltk.tokenize.sent_tokenize(text)
words = []
for i in range(len(data)):
for word in data[i].split():
words.append(word)
return words

def bigram(data):

Bigrams = []
bigramCounts = {}
unigramCounts = {}
for i in range(len(data)-1):
if i < (len(data) - 1) and data[i+1].islower():
Bigrams.append((data[i], data[i + 1]))
if (data[i], data[i+1]) in bigramCounts:
bigramCounts[(data[i], data[i + 1])] += 1
else:
bigramCounts[(data[i], data[i + 1])] = 1
if data[i] in unigramCounts:
unigramCounts[data[i]] += 1
else:
unigramCounts[data[i]] = 1
return Bigrams, unigramCounts, bigramCounts
def calculateBigramProb(Bigrams, unigramCounts, bigramCounts):
listOfProbability = {}
for bigram in Bigrams:
word1 = bigram[0]
word2 = bigram[1]
listOfProbability[bigram] = (
bigramCounts.get(bigram))/(unigramCounts.get(word1))
return listOfProbability

def getProbableNextWord(word1, BigramsKeyPair):


Bigrams = BigramsKeyPair.keys()
shortlistedBigrams = []
keyPair = {}
for bigram in Bigrams:
if word1 == bigram[0]:
shortlistedBigrams.append(bigram)
for bigram in shortlistedBigrams:
keyPair[bigram] = BigramsKeyPair.get(bigram)
mostProbableBigram = max(keyPair.items(), key=operator.itemgetter(1))

return mostProbableBigram[0], mostProbableBigram[1]

data = readData()
print('Tokenized form:')
print(data)
Bigrams, unigramCounts, bigramCounts = bigram(data)
print("Word Frequency:\n", unigramCounts)
listOfProbability = calculateBigramProb(Bigrams, unigramCounts, bigramCounts)
word1 = input("Prediction Word:")
mpw, probability = getProbableNextWord(word1, listOfProbability)
print("Most probable next word:", mpw)

Output:

Conclusion:
Thus, we have successfully implemented N-gram (Bigram) model.

You might also like