Professional Documents
Culture Documents
NLP Lab 1
NLP Lab 1
# Tokenization
import nltk
nltk.download('punk
t') import nltk
as n
from nltk.tokenize import
word_tokenize text = “This is
sample text. How are you ?"
sentences = n.sent_tokenize(text)
words =
u.word_tokenize(sentences[0])
print(sentences)
print(words)
lnport nltk
nltk.download('stopwords')
nltk.download('averaged_perceptron_t
agger') import nltk as n
from nltk.corpus import stopwords
text = “This is sample text. How are
you ?" stop_words =
set(stopwords.words(“english“)) tokeuize =
n.sent_tokenize(text)
for i in tokenize :
wordlist - n.word_tokenize(i)
wordlist = [w for w in wordlist if not w is
stop_words] tagged = n.pos_tag(wordlist)
print(tagged)