You are on page 1of 1

from urllib import request

def processRawText(textURL):
# Write your code here
textcontent=request.urlopen(textURL).read()
tokenizedlcwords=[nltk.word_tokenize(word.lower() for word
in set(textcontent))]
noofwords=len(tokenizedlcwords)
noofunwords=len(set(tokenizedlcwords))
wordcov=int(noofwords/noofunwords)
return noofwords,noofunwords,wordcov

if __name__ == '__main__':
textURL = input()

if not os.path.exists(os.getcwd() + "/nltk_data"):


with zipfile.ZipFile("nltk_data.zip", 'r') as zip_ref:
zip_ref.extractall(os.getcwd())

noofwords, noofunqwords, wordcov, maxfreq = processRawText(textURL)


print(noofwords)
print(noofunqwords)
print(wordcov)
print(maxfreq)

You might also like