Professional Documents
Culture Documents
ipynb - Colaboratory
NLP Assignment 1
1 import nltk
2 nltk.download()
[ ] conll2007........... Dependency Treebanks from CoNLL 2007 (Catalan
Corpus
[ ] extended_omw........ Extended Open Multilingual WordNet
ChaSen format)
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 1/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
[ ] lin_thesaurus....... Lin's Dependency Thesaurus
part-of-speech tags
1
nltk.download('gutenberg')
2
nltk.download('genesis')
3
nltk.download('inaugural')
4
nltk.download('nps_chat')
5
nltk.download('webtext')
6
nltk.download('treebank')
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 2/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
True
1 from nltk.book import *
Question 1
1
print("No. of words in text2: ", len(text2))
2
print("No. of distinct words in text2: ", len(set(text2)))
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 3/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
Question 2
1
trigram_measures_t6 = nltk.collocations.TrigramAssocMeasures()
2
finder_t6 = nltk.collocations.TrigramCollocationFinder.from_words(text6)
3
finder_t6.nbest(trigram_measures_t6.pmi, 10)
1
trigram_measures_t7 = nltk.collocations.TrigramAssocMeasures()
2
finder_t7 = nltk.collocations.TrigramCollocationFinder.from_words(text7)
3
finder_t7.nbest(trigram_measures_t7.pmi, 10)
Question 3
1
my_string = "Hello, I am using Google Colab"
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 4/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
2
print(my_string)
3
my_string
1
res1 = my_string + my_string
2
print(res1)
4
final_res = my_string + " " + my_string
5
print(final_res)
Question 4
1
my_sent = ["I", "am", "Harshad"]
2
res1 = ' '.join(my_sent)
3
print(res1)
5
res2 = res1.split(' ')
6
print(res2)
I am Harshad
Question 5
1
res = [s for s in set(text5) if len(s)>0 and s[0]=='b']
2
res.sort()
3
print(res)
['b', 'b-day', 'b/c', 'b4', 'babay', 'babble', 'babblein', 'babe', 'babes', 'babi', 'babies', 'babiess', 'baby', 'babycakeses', 'bac
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 5/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
Question 6
1
res = [s for s in set(text7) if len(s)==4]
2
freq = nltk.book.FreqDist(res)
3
freq.most_common(len(res))
[('bell', 1),
('5.70', 1),
('50.1', 1),
('gilt', 1),
('damn', 1),
('feel', 1),
('Tire', 1),
('10th', 1),
('down', 1),
('Dogs', 1),
('Marc', 1),
('Lyle', 1),
('1925', 1),
('term', 1),
('neat', 1),
('*-16', 1),
('*-81', 1),
('0.82', 1),
('bomb', 1),
('year', 1),
('6.53', 1),
('Part', 1),
('kind', 1),
('bids', 1),
('28.5', 1),
('Rev.', 1),
('Life', 1),
('Hong', 1),
('dean', 1),
('used', 1),
('N.Y.', 1),
('Rate', 1),
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 6/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
('Died', 1),
('3.23', 1),
('well', 1),
('turf', 1),
('item', 1),
('59.9', 1),
('87.5', 1),
('Leon', 1),
('16.7', 1),
('week', 1),
('9.45', 1),
('fees', 1),
('News', 1),
('boom', 1),
('Farm', 1),
('Tiny', 1),
('keep', 1),
('CTBS', 1),
('peal', 1),
('7.63', 1),
('Hahn', 1),
('Only', 1),
('till', 1),
('RATE', 1),
('Nev.', 1),
('body', 1),
Question 7
1
for i in text6:
2
if i.isupper():
3
print(i)
ARTHUR
FRENCH
GUARD
ARTHUR
FRENCH
GUARDS
ARTHUR
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 7/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
ARTHUR
FRENCH
GUARD
ARTHUR
FRENCH
GUARD
FRENCH
GUARDS
ARTHUR
BEDEVERE
ARTHUR
FRENCH
GUARDS
ARTHUR
FRENCH
GUARDS
ARTHUR
FRENCH
GUARDS
ARTHUR
ARMY
OF
KNIGHTS
HISTORIAN
WIFE
INSPECTOR
OFFICER
HISTORIAN
WIFE
OFFICER
INSPECTOR
OFFICER
BEDEVERE
INSPECTOR
OFFICER
INSPECTOR
OFFICER
OFFICER
RANDOM
RANDOM
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 8/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
RANDOM
OFFICER
OFFICER
OFFICER
OFFICER
INSPECTOR
OFFICER
CAMERAMAN
Question 8
1
res_a = [i for i in set(text4) if i[-3:]=='ize']
2
print(res_a)
4
res_b = [i for i in set(text4) if 'z' in i]
5
print(res_b)
7
res_c = [i for i in set(text4) if 'iz' in i]
8
print(res_c)
10
res_d = [i for i in set(text4) if i.istitle()]
11
print(res_d)
['revitalize', 'prize', 'size', 'disorganize', 'realize', 'militarize', 'characterize', 'minimize', 'sympathize', 'emphasize', 'patr
['patronized', 'blaze', 'civilized', 'realization', 'revitalize', 'zealous', 'colonization', 'prize', 'civilization', 'hazard', 'sub
['patronized', 'civilized', 'realization', 'revitalize', 'colonization', 'prize', 'civilization', 'subsidized', 'organized', 'patron
['Abraham', 'Isaiah', 'Product', 'Brown', 'Decii', 'Greatness', 'Moral', 'Others', 'Believing', 'Upon', 'Founding', 'Afghanistan',
Question 9
1
string = '''
2
Lorem ipsum dolor sit amet consectetur adipisicing elit. Maxime mollitia,
3
molestiae quas vel sint commodi repudiandae consequuntur voluptatum laborum
4
numquam blanditiis harum quisquam eius sed odit fugiat iusto fuga praesentium
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 9/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
5
optio, eaque rerum! Provident similique accusantium nemo autem. Veritatis
6
obcaecati tenetur iure eius earum ut molestias architecto voluptate aliquam
7
nihil, eveniet aliquid culpa officia aut! Impedit sit sunt quaerat, odit,
8
tenetur error, harum nesciunt ipsum debitis quas aliquid. Reprehenderit,
9
quia. Quo neque error repudiandae fuga? Ipsa laudantium molestias eos
10
sapiente officiis modi at sunt excepturi expedita sint? Sed quibusdam
11
recusandae alias error harum maxime adipisci amet laborum. Perspiciatis
12
minima nesciunt dolorem! Officiis iure rerum voluptates a cumque velit
13
quibusdam sed amet tempora. Sit laborum ab, eius fugit doloribus tenetur
14
fugiat, temporibus enim commodi iusto libero magni deleniti quod quam
15
consequuntur! Commodi minima excepturi repudiandae velit hic maxime
16
doloremque. Quaerat provident commodi consectetur veniam similique ad
17
earum omnis ipsum saepe, voluptas, hic voluptates pariatur est explicabo
18
fugiat, dolorum eligendi quam cupiditate excepturi mollitia maiores labore
19
suscipit quas? Nulla, placeat. Voluptatem quaerat non architecto ab laudantium
20
modi minima sunt esse temporibus sint culpa, recusandae aliquam numquam
21
totam ratione voluptas quod exercitationem fuga. Possimus quis earum veniam
22
quasi aliquam eligendi, placeat qui corporis!
23
'''
24
25
print(set(string.split()))
26
27
res1 = [i for i in set(string.split()) if i[0:2]=='Th']
28
res2 = [i for i in set(string.split()) if len(i)>4]
29
res3_list = [len(i) for i in set(string.split())]
30
res3 = sum(res3_list)/len(res3_list)
31
res4 = [i for i in set(string.split()) if i[-2:]=='ed']
32
res5 = [i for i in set(string.split()) if 'z' in i]
33
res6 = [i for i in set(string.split()) if 'er' in i]
34
res7 = [i for i in set(string.split()) if i.istitle()]
35
36
print(res1)
37
print(res2)
38
print(res3)
39
print(res4)
40
print(res5)
41
print(res6)
42
print(res7)
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 10/11
25/08/2022, 23:15 I041_Devesh_Pawar_NLP_Assignment1.ipynb - Colaboratory
{'Quo', 'ratione', 'placeat', 'eaque', 'totam', 'fuga?', 'Perspiciatis', 'fugiat,', 'sint', 'voluptatum', 'culpa', 'enim', 'omnis',
[]
['ratione', 'placeat', 'eaque', 'totam', 'fuga?', 'Perspiciatis', 'fugiat,', 'voluptatum', 'culpa', 'omnis', 'doloribus', 'cupiditat
6.610738255033557
['sed', 'Sed']
[]
['Perspiciatis', 'Quaerat', 'error,', 'exercitationem', 'rerum', 'quaerat,', 'Reprehenderit,', 'quaerat', 'error', 'Veritatis', 'rer
['Quo', 'Perspiciatis', 'Ipsa', 'Sit', 'Quaerat', 'Lorem', 'Voluptatem', 'Impedit', 'Possimus', 'Reprehenderit,', 'Officiis', 'Commo
https://colab.research.google.com/drive/1MlKE7WNXzXbr27dEYbHOCxYEMQqVoYhb#scrollTo=uoIEGkcoC9HZ&printMode=true 11/11