Professional Documents
Culture Documents
install.packages("rtweet")l
ibrary(rtweet)
data.library(tidyverse)
clean_tweets <- function(x)
{x%>%
#RemoveURLs
str_remove_all(" ?(f|ht)(tp)(s?)(://)(.*)[.|/](.*)")
%>%# Remove mentions e.g.
"@my_account"str_remove_all("@[[:alnum:]_]{4,}")%>
%
# Remove
hashtagsstr_remove_all("#[[:alnum:]_]+")%>%
# Replace "&" character reference with
"and"str_replace_all("&","and") %>%
# Remove puntucation, using a standard character
classstr_remove_all("[[:punct:]]")%>%
# Remove "RT: " from beginning of
retweetsstr_remove_all("^RT:?") %>%
# Replace any newline characters with a
spacestr_replace_all("\\\n"," ") %>%
# Make everything
lowercasestr_to_lower()%>%
# Remove any trailing whitespace around the
textstr_trim("both")
}
tweets%>%clean_tweets