Professional Documents
Culture Documents
txt")
text_file1 <- paste(text_file, collapse = "")
View(text_file1)
#Let's get to cleanup now
# convert to lower case
clean_text <- tolower(text_file1)
# remove punctuation using gsub() function ,
#gsub() is used to replace all the matches of a pattern from a string.
clean_text1 <- gsub(pattern = "\\W", replace = " " ,clean_text)
#remove digits
clean_text2 <- gsub(pattern = "\\d", replace = " ", clean_text1)
head(clean_text2)
stopwords()
clean_text3 <- removeWords(clean_text2,words = c(stopwords(),"usa","â"))
#remove single letters
# \\b[A-z] represents strings with any letter between a-z.
#The string can take uppercase letters as well as lower case letters
#subset \\{1} says that the strings end with length one
clean_text4 <- gsub(pattern = "\\b[A-z]\\b{1}", replace = " ", clean_text3 )
clean_text5 <- stripWhitespace(clean_text4)
# Let's get individual words using strsplit
clean_text6 <- strsplit(clean_text5, " ")
#frequency of words
word_freq <- table(clean_text6)
wordcloud2(word_freq, color = "random-dark", backgroundColor = "cyan",size = 0.5, shape =
"triangle")
get_nrc_sentiment(clean_text5)
sen_result <- get_nrc_sentiment(as.character(clean_text5))
sen_result1<-data.frame(t(sen_result))
new_sen_result <- data.frame(rowSums(sen_result1))
#name rows and columns of the dataframe
names(new_sen_result)[1] <- "count"
new_result <- cbind("sentiment" = rownames(new_sen_result), new_sen_result)
library(ggplot2)