You are on page 1of 1

text_file <- readLines("Speech.

txt")
text_file1 <- paste(text_file, collapse = "")
View(text_file1)
#Let's get to cleanup now
# convert to lower case
clean_text <- tolower(text_file1)
# remove punctuation using gsub() function ,
#gsub() is used to replace all the matches of a pattern from a string.
clean_text1 <- gsub(pattern = "\\W", replace = " " ,clean_text)
#remove digits
clean_text2 <- gsub(pattern = "\\d", replace = " ", clean_text1)
head(clean_text2)
stopwords()
clean_text3 <- removeWords(clean_text2,words = c(stopwords(),"usa","â"))
#remove single letters
# \\b[A-z] represents strings with any letter between a-z.
#The string can take uppercase letters as well as lower case letters
#subset \\{1} says that the strings end with length one
clean_text4 <- gsub(pattern = "\\b[A-z]\\b{1}", replace = " ", clean_text3 )
clean_text5 <- stripWhitespace(clean_text4)
# Let's get individual words using strsplit
clean_text6 <- strsplit(clean_text5, " ")
#frequency of words
word_freq <- table(clean_text6)
wordcloud2(word_freq, color = "random-dark", backgroundColor = "cyan",size = 0.5, shape =
"triangle")
get_nrc_sentiment(clean_text5)
sen_result <- get_nrc_sentiment(as.character(clean_text5))
sen_result1<-data.frame(t(sen_result))
new_sen_result <- data.frame(rowSums(sen_result1))
#name rows and columns of the dataframe
names(new_sen_result)[1] <- "count"
new_result <- cbind("sentiment" = rownames(new_sen_result), new_sen_result)
library(ggplot2)

#plot the first 8 rows for emotions


qplot(sentiment, data=new_result[1:8,], weight=count, geom="bar",fill=sentiment)
+ggtitle("Martin Lurther King ")
#plot the last 2 rows for positive and negative
qplot(sentiment, data=new_result[9:10,], weight=count, geom="bar",fill=sentiment)
+ggtitle("Martin Lurther King")

You might also like