You are on page 1of 1

SAHIL RAJPUT 2002868

NLP LAB 1.ipynb - Colaboratory

# Tokenization

import nltk
nltk.download('punk
t') import nltk
as n
from nltk.tokenize import
word_tokenize text = “This is
sample text. How are you ?"
sentences = n.sent_tokenize(text)
words =
u.word_tokenize(sentences[0])
print(sentences)
print(words)

['This is sample text.', 'How are you


?'] ['This', 'is', 'sample', 'text',
'.']
[nltk_data] Downloading package punkt to
/root/nltk_data... [nltk_data] Package
puukt is already up-to-date!

# Part of speech tagging

lnport nltk
nltk.download('stopwords')
nltk.download('averaged_perceptron_t
agger') import nltk as n
from nltk.corpus import stopwords
text = “This is sample text. How are
you ?" stop_words =
set(stopwords.words(“english“)) tokeuize =
n.sent_tokenize(text)

for i in tokenize :
wordlist - n.word_tokenize(i)
wordlist = [w for w in wordlist if not w is
stop_words] tagged = n.pos_tag(wordlist)
print(tagged)

[nltk_data] Downloading package stopwords to


/root/nltk_data... [nltk_data] Package stopwords
is already up-to-date! [nltk_data] Downloading package
averaged_perceptron_tagger to [nltk_data]
/root/nltk_data...
[nltk_data] Unzipping taggers/avenaged_penceptron_tagger.zip.
[('This', 'DT'), ('is', 'VBZ'), ('sample', 'JJ'), ('text', 'NN'), ('.', '.')]
[('How', 'WRB'), ('are', 'VBP'), ('you', 'PRP'), ('?', '.')]

Colab paid products - Cancel contracts here 0s


comple

You might also like