You are on page 1of 3

STEMMING AND REMOVAL OF STOP WORDS

KEDAR SANJAY DAMKONDWAR BECOC316

CODE: import io from nltk.corpus import

stopwords from nltk.tokenize import

word_tokenize

stop_words = set(stopwords.words('english')) file1 = open("C:\\Users\\Sagar

Patil\\Desktop\\20_newsgroups\\alt.atheism\\abc.txt")

line1 = file1.read() words = line1.split() for

r in words: if not r in stop_words:

appendFile = open('filteredtext.txt','a')

appendFile.write(" "+r)

appendFile.close()

from nltk.stem import PorterStemmer

ps = PorterStemmer()

file2 = open("C:\\Users\\Sagar

Patil\\Desktop\\20_newsgroups\\alt.atheism\\abc.txt") line2 = file2.read()

words = word_tokenize(line2)

for w in words:

print(w, " : ", ps.stem(w))

INPUT FILE:
OUTPUT:

You might also like