Professional Documents
Culture Documents
DA-1
NAME: V.J.KARTHIK
REG. NO. 18BCE0413
import string
#TEXT IS TAKN FROM TXT FILE AS ABOVE
t=str.maketrans(dict.fromkeys(string.punctuation))
new_t=text.translate(t)
new_t=new_t.lower()
print(new_t)
import nltk
import string
nltk.download('stopwords')
nltk.download('punkt')
t=str.maketrans(dict.fromkeys(string.punctuation))
new_t=text.translate(t)
new_t=new_t.lower()
example_sent = new_t
stop_words = set(stopwords.words('english'))
print("\n STOP WORDS \n",stop_words)
word_tokens = word_tokenize(example_sent)
filtered_sentence = [w for w in word_tokens if not w in stop_words]
print("\n FILTERED PARAGRAPH:\n"," ".join(filtered_sentence))
c) Collect those words that occur only once in the
paragraph and print them.
CODE:
#18BCE0413-V.J.KARTHIK
import nltk
import string
nltk.download('stopwords')
nltk.download('punkt')
t=str.maketrans(dict.fromkeys(string.punctuation))
new_t=text.translate(t)
new_t=new_t.lower()
example_sent = new_t
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(example_sent)
filtered_sentence = [w for w in word_tokens if not w in stop_words]
S=" ".join(filtered_sentence)
I=S.split(" ")
print("\n FILTERED PARAGRAPH \n",S)
print("\n LIST OF WORDS APPEARING ONLY ONCE IN FILTERED
PARAGRAPH-")
for j in I:
if S.count(j)==1 and j.isalpha():
print(j,end=",")
d) Collect those words that occur only twice in the
paragraph and print them.
CODE:
#18BCE0413-V.J.KARTHIK
import nltk
import string
nltk.download('stopwords')
nltk.download('punkt')
t=str.maketrans(dict.fromkeys(string.punctuation))
new_t=text.translate(t)
new_t=new_t.lower()
example_sent = new_t
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(example_sent)
filtered_sentence = [w for w in word_tokens if not w in stop_words]
S=" ".join(filtered_sentence)
I=S.split(" ")
print("\n FILTERED PARAGRAPH \n",S)
print("\n LIST OF WORDS APPEARING ONLY TWICE IN FILTERED
PARAGRAPH-")
for j in I:
if S.count(j)==2 and j.isalpha():
print(j,end=",")
e) Make a dictionary of these 4 types of words and apply
stemming to reduce inflected words to their word stem,
base or root form.
CODE:
#18BCE0413-V.J.KARTHIK
import nltk