You are on page 1of 25

과제 3

201721501 조예슬

2020 10 15

관심있는 해쉬태그의 최근 일주일 간 트윗을 수집

해쉬태그는 ‘#savagelove’

0. 패키지 불러오기
#install.packages("tm")
library(tm) #텍스트마이닝 용 패키지

#install.packages("ggplot2")
library(ggplot2)

library(rtweet)
#install.packages("wordcloud")
library(wordcloud)

## Loading required package: RColorBrewer

library(data.table)
#install.packages("igraph")
library(igraph)

library(RColorBrewer)
library(dplyr)

library(stringr)

library(syuzhet)

1.트위터 연결
# set the NEW credentials
APP_NAME <- "DA_R"
API_KEY <- "HICUeqGrTscHkO61ksfEB3KUJ"
API_KEY_SECRET <- "svN1MqmMEYBjfOvnl9tKvw82jhAsheEGGsKMqFbPsxVRDQVr4K"
ACCESS_TOKEN <- "1307869013381267456-wEy86seKVRa0D1ukqmet2bxzYrqfJ0"
ACCESS_TOKEN_SECRET <- "Bhbo6klXu2tcnIApFbxvOCvUwfI6kdFKyvzNliLHnfyO3"

# connect to twitter app


twitter_token <- create_token(app = APP_NAME, consumer_key = API_KEY, consume
r_secret = API_KEY_SECRET, access_token = ACCESS_TOKEN, access_secret = ACCES
S_TOKEN_SECRET)

2. 어떤 기기로 접속했는지 파악하는 함수 생성


# encode tweet source as iPhone, iPad, Android or Web
# gsub (global substitute): Replace 1st arg with 2nd arg in 3rd arg string
encodeSource <- function(x) {
if(grepl("Twitter for iPhone", x)){ #grep 은 특정 문자열을 찾는 함수
"iphone"
}else if(grepl("Twitter for iPad", x)){
"ipad"
}else if(grepl("Twitter for Android", x)){
"android"
} else if(grepl("Twitter Web App", x)){
"Web"
}else if(grepl("Hootsuite Inc.", x)){
"hootsuite"
}else if(grepl("TweetDeck", x)){
"tweetdeck"
}else {
"others"
}
}

3. 관심있는 해쉬태그의 최근 일주일 간 트윗을 수집


searchTerm <- "#savagelove"
trendingTweets = search_tweets(searchTerm, n=18000, lang = "en", since="2020-
10-17", until="2020-10-24")
class(trendingTweets)

## [1] "tbl_df" "tbl" "data.frame"

head(trendingTweets)

## # A tibble: 6 x 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 994954~ 13197546~ 2020-10-23 21:36:37 AprilDJones "Let~ Twitt~
## 2 126984~ 13197366~ 2020-10-23 20:25:10 KittyCatSo~ "Yes~ Twitt~
## 3 126984~ 13183069~ 2020-10-19 21:44:00 KittyCatSo~ "@Th~ Twitt~
## 4 126984~ 13197131~ 2020-10-23 18:51:42 KittyCatSo~ "@ji~ Twitt~
## 5 126984~ 13183047~ 2020-10-19 21:35:11 KittyCatSo~ "Who~ Twitt~
## 6 126984~ 13183057~ 2020-10-19 21:39:30 KittyCatSo~ "Sti~ Twitt~
## # ... with 84 more variables: display_text_width <dbl>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, quote_count <int>,
## # reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## # urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## # media_t.co <list>, media_expanded_url <list>, media_type <list>,
## # ext_media_url <list>, ext_media_t.co <list>, ext_media_expanded_url <l
ist>,
## # ext_media_type <chr>, mentions_user_id <list>, mentions_screen_name <l
ist>,
## # lang <chr>, quoted_status_id <chr>, quoted_text <chr>,
## # quoted_created_at <dttm>, quoted_source <chr>, quoted_favorite_count <
int>,
## # quoted_retweet_count <int>, quoted_user_id <chr>, quoted_screen_name <
chr>,
## # quoted_name <chr>, quoted_followers_count <int>,
## # quoted_friends_count <int>, quoted_statuses_count <int>,
## # quoted_location <chr>, quoted_description <chr>, quoted_verified <lgl>,
## # retweet_status_id <chr>, retweet_text <chr>, retweet_created_at <dttm>,
## # retweet_source <chr>, retweet_favorite_count <int>,
## # retweet_retweet_count <int>, retweet_user_id <chr>,
## # retweet_screen_name <chr>, retweet_name <chr>,
## # retweet_followers_count <int>, retweet_friends_count <int>,
## # retweet_statuses_count <int>, retweet_location <chr>,
## # retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## # place_name <chr>, place_full_name <chr>, place_type <chr>, country <ch
r>,
## # country_code <chr>, geo_coords <list>, coords_coords <list>,
## # bbox_coords <list>, status_url <chr>, name <chr>, location <chr>,
## # description <chr>, url <chr>, protected <lgl>, followers_count <int>,
## # friends_count <int>, listed_count <int>, statuses_count <int>,
## # favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## # profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>

dim(trendingTweets)

## [1] 748 90

# check current rate limit


rateLimit <- rate_limit()
rateLimit[rateLimit$limit > rateLimit$remaining, ]

## # A tibble: 2 x 7
## query limit remaining reset reset_at timestamp
app
## <chr> <int> <int> <drtn> <dttm> <dttm>
<chr>
## 1 applicat~ 180 179 15.02~ 2020-10-25 05:07:16 2020-10-25 04:52:15
DA_R
## 2 search/t~ 180 171 14.85~ 2020-10-25 05:07:06 2020-10-25 04:52:15
DA_R
# perform a quick cleanup/transformation
#View(trendingTweets)

head(trendingTweets$text)

## [1] "Let’s be real. I found #SavageLove to be irritating until the @BTS_tw


t remix made it tolerable. Thought it was called Selfish Love when I first he
ard it. Tho the way Jason Derulo apparently acting my original thought might
have been more aptly named. #SavageLoveBTS #SavageBTSArmy"
## [2] "Yesterday I dreamt that @SUBWAY had an Instagram ad in which @jasonde
rulo sings Sandwich Love instead of Savage Love. And that was the best thing
happened in 2020 https://t.co/JUchG2nHKr #SavageLove #SavageLoveRemix #Subway
#JasonDerulo @instagram"
## [3] "@TheEllenShow please make @jasonderulo sing Sandwich Love in your sho
w https://t.co/iA4ptJqGMW #TheEllenShow #JasonDerulo #SavageLove #SavageLoveR
emix"

## [4] "@jimmyfallon can you show this to @jasonderulo #FallonTonight https:


//t.co/iA4ptJqGMW #SavageLove"

## [5] "Who‘s a friend of Jason Derulo? I wanna see him eating a sandwich and
dance to this song so badly <U+0001F605>@jasonderulo #SandwichLove #SavageLo
ve #JasonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW"

## [6] "Still my favorite Savage Love Remix #SavageLoveRemix #SavageLove #Sa


ndwichlove https://t.co/iA4ptJqGMW @jasonderulo @BTS_twt #BTS @bts_bighit"

trendingTweets$text <- sapply(trendingTweets$text,function(x) iconv(enc2utf8


(x), sub="byte"))
head(trendingTweets$text)

## Let’s be real. I found #SavageLove to be irritating until the @


BTS_twt remix made it tolerable. Thought it was called Selfish Love when I fi
rst heard it. Tho the way Jason Derulo apparently acting my original thought
might have been more aptly named. #SavageLoveBTS #SavageBTSArmy
## "Let<e2>\u0080 셲 be real. I found #SavageLove to be irritating until the @
BTS_twt remix made it tolerable. Thought it was called Selfish Love when I fi
rst heard it. Tho the way Jason Derulo apparently acting my original thought
might have been more aptly named. #SavageLoveBTS #SavageBTSArmy"
## Yesterday I dreamt that @SUBW
AY had an Instagram ad in which @jasonderulo sings Sandwich Love instead of S
avage Love. And that was the best thing happened in 2020 https://t.co/JUchG2n
HKr #SavageLove #SavageLoveRemix #Subway #JasonDerulo @instagram
## "Yesterday I dreamt that @SUBWA
Y had an Instagram ad in which @jasonderulo sings Sandwich Love instead of Sa
vage Love. And that was the best thing happened in 2020 https://t.co/JUchG2nH
Kr #SavageLove #SavageLoveRemix #Subway #JasonDerulo @instagram"
##
@TheEllen
Show please make @jasonderulo sing Sandwich Love in your show https://t.co/iA
4ptJqGMW #TheEllenShow #JasonDerulo #SavageLove #SavageLoveRemix
##
"@TheEllenS
how please make @jasonderulo sing Sandwich Love in your show https://t.co/iA4
ptJqGMW #TheEllenShow #JasonDerulo #SavageLove #SavageLoveRemix"
##

@jimmyfallon can you show this to


@jasonderulo #FallonTonight https://t.co/iA4ptJqGMW #SavageLove
##

"@jimmyfallon can you show this to @


jasonderulo #FallonTonight https://t.co/iA4ptJqGMW #SavageLove"
##
Who‘s a friend of Jason Derulo? I wanna see him eating a sandwich a
nd dance to this song so badly <U+0001F605>@jasonderulo #SandwichLove #Savage
Love #JasonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW
##
"Who<e2>\u0080 쁲 a friend of Jason Derulo? I wanna see him eating a sandwich
and dance to this song so badly <f0>윑<85>@jasonderulo #SandwichLove #Savage
Love #JasonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW"
##
S
till my favorite Savage Love Remix #SavageLoveRemix #SavageLove #Sandwichlov
e https://t.co/iA4ptJqGMW @jasonderulo @BTS_twt #BTS @bts_bighit
##
"St
ill my favorite Savage Love Remix #SavageLoveRemix #SavageLove #Sandwichlove
https://t.co/iA4ptJqGMW @jasonderulo @BTS_twt #BTS @bts_bighit"

#tail(trendingTweets$text)
head(trendingTweets$created_at)

## [1] "2020-10-23 21:36:37 UTC" "2020-10-23 20:25:10 UTC"


## [3] "2020-10-19 21:44:00 UTC" "2020-10-23 18:51:42 UTC"
## [5] "2020-10-19 21:35:11 UTC" "2020-10-19 21:39:30 UTC"

class(trendingTweets$created_at)

## [1] "POSIXct" "POSIXt"

save(trendingTweets, file = "trendingTweets20200917.Rda")

# see how many missing values are there on a per column basis
sapply(trendingTweets, function(x) sum(is.na(x)))
4. 수집된 트윗의 일별 빈도를 비교

trendingTweets$Date <- as.Date(trendingTweets$created_at)


x1 <- unique(trendingTweets$Date)
x1 <- sort(x1, decreasing = T)
str(x1)

## Date[1:7], format: "2020-10-23" "2020-10-22" "2020-10-21" "2020-10-20" "2


020-10-19" ...

ggplot(data = trendingTweets, aes(x = created_at)) + scale_x_datetime(breaks


= "1 day") +
geom_histogram(aes(fill = ..count..)) +
theme(legend.position = "none") +
xlab("Time") + ylab("Number of tweets") +
scale_fill_gradient(low = "midnightblue", high = "aquamarine4")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

5. 수집된 트윗의 감정분포를 비교


#extract timeline tweets
extractTimelineTweets <- function(username,tweetCount){
# timeline tweets
twitterUser <- lookup_users(username)[[1]] #get user id
tweets <- get_timeline(twitterUser, n=tweetCount)
# tweets$text <- sapply(tweets['text'], function(x) iconv(enc2utf8(x), sub=
"byte"))

return(tweets)
}

encodeSentiment <- function(x) {


if(x <= -1){
"1) very negative"
}else if(x > -1 & x < 0){
"2) negative"
}else if(x > 0 & x < 1){
"4) positive"
}else if(x >= 1){
"5) very positive"
}else {
"3) neutral"
}
}

trendingTweets$text <- sapply(trendingTweets['text'], function(x) iconv(enc2u


tf8(x), sub="byte"))
head(trendingTweets$text, 5)

#text

## Let’s be real. I found #SavageLove to be irritating until the @BTS_twt rem


ix made it tolerable. Thought it was called Selfish Love when I first heard i
t. Tho the way Jason Derulo apparently acting my original thought might have
been more aptly named. #SavageLoveBTS #SavageBTSArmy "Let<e2><c2>\u0080<ec>뀼
be real. I found #SavageLove to be irritating until the @BTS_twt remix made
it tolerable. Thought it was called Selfish Love when I first heard it. Tho t
he way Jason Derulo apparently acting my original thought might have been mor
e aptly named. #SavageLoveBTS #SavageBTSArmy"

## Yesterday I dreamt that @SUBWAY had an Instagram ad in which @jasonderulo


sings Sandwich Love instead of Savage Love. And that was the best thing happe
ned in 2020 https://t.co/JUchG2nHKr #SavageLove #SavageLoveRemix #Subway #Jas
onDerulo @instagram "Yesterday I dreamt tha
t @SUBWAY had an Instagram ad in which @jasonderulo sings Sandwich Love inste
ad of Savage Love. And that was the best thing happened in 2020 https://t.co/
JUchG2nHKr #SavageLove #SavageLoveRemix #Subway #JasonDerulo @instagram"
## @TheEllenShow please make @jasonderulo sing Sandwich Love in your show htt
ps://t.co/iA4ptJqGMW #TheEllenShow #JasonDerulo #SavageLove #SavageLoveRemix

"@TheEllenShow please m
ake @jasonderulo sing Sandwich Love in your show https://t.co/iA4ptJqGMW #The
EllenShow #JasonDerulo #SavageLove #SavageLoveRemix"

## @jimmyfallon can you show this to @jasonderulo #FallonTonight https://t.c


o/iA4ptJqGMW #SavageLove

"@jimmyfallon can you s


how this to @jasonderulo #FallonTonight https://t.co/iA4ptJqGMW #SavageLove"

## Who‘s a friend of Jason Derulo? I wanna see him eating a sandwich and danc
e to this song so badly <U+0001F605>@jasonderulo #SandwichLove #SavageLove #J
asonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW
"Who<e2><c2>
\u0080<ec>겛 a friend of Jason Derulo? I wanna see him eating a sandwich and
dance to this song so badly <f0><ec>쐭<85>@jasonderulo #SandwichLove #SavageL
ove #JasonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW"

i <- 18
trendingTweets$text[i]

## [1] "#NowPlaying #Playlist #Reels #reelsinstagram https://t.co/Vd7xUUsQbf


with #AvioncitoDePapel #ChicaIdeal #lagrosera #VidaDeRico #SATISFACCI<ed>슇 N
#DemasiadasMujeres #LaNota #Desesperado #PaTi #Nosedacuenta #parce #favorito
#porfaremix #SavageLove #WAPChallenge #tattooremix #Caramelo"

nohandles <- str_replace_all(trendingTweets$text, "@\\w+", "")


nohandles <- str_replace_all(trendingTweets$text, "http\\w+", "")
nohandles[i]

## [1] "#NowPlaying #Playlist #Reels #reelsinstagram ://t.co/Vd7xUUsQbf with


#AvioncitoDePapel #ChicaIdeal #lagrosera #VidaDeRico #SATISFACCI<ed>슇 N #Dema
siadasMujeres #LaNota #Desesperado #PaTi #Nosedacuenta #parce #favorito #por
faremix #SavageLove #WAPChallenge #tattooremix #Caramelo"

wordCorpus <- Corpus(VectorSource(nohandles))


class(wordCorpus)

## [1] "SimpleCorpus" "Corpus"

wordCorpus[[i]]$content
## [1] "#NowPlaying #Playlist #Reels #reelsinstagram ://t.co/Vd7xUUsQbf with
#AvioncitoDePapel #ChicaIdeal #lagrosera #VidaDeRico #SATISFACCI<ed>슇 N #Dema
siadasMujeres #LaNota #Desesperado #PaTi #Nosedacuenta #parce #favorito #por
faremix #SavageLove #WAPChallenge #tattooremix #Caramelo"

wordCorpus <- tm_map(wordCorpus, removePunctuation) #쉼표,온점 없애기

## Warning in tm_map.SimpleCorpus(wordCorpus, removePunctuation): transformat


ion
## drops documents

wordCorpus[[i]]$content

## [1] "NowPlaying Playlist Reels reelsinstagram tcoVd7xUUsQbf with Avioncito


DePapel ChicaIdeal lagrosera VidaDeRico SATISFACCIed 슇 N DemasiadasMujeres La
Nota Desesperado PaTi Nosedacuenta parce favorito porfaremix SavageLove WAPC
hallenge tattooremix Caramelo"

wordCorpus <- tm_map(wordCorpus, content_transformer(tolower)) #소문자로 바꿈

## Warning in tm_map.SimpleCorpus(wordCorpus, content_transformer(tolower)):


## transformation drops documents

wordCorpus[[i]]$content

## [1] "nowplaying playlist reels reelsinstagram tcovd7xuusqbf with avioncito


depapel chicaideal lagrosera vidaderico satisfaccied 슇 n demasiadasmujeres la
nota desesperado pati nosedacuenta parce favorito porfaremix savagelove wapc
hallenge tattooremix caramelo"

wordCorpus <- tm_map(wordCorpus, removeWords, stopwords("english")) #자주 쓰이


지만 의미 없는 단어들 지우기

## Warning in tm_map.SimpleCorpus(wordCorpus, removeWords, stopwords("english


")):
## transformation drops documents

wordCorpus[[i]]$content

## [1] "nowplaying playlist reels reelsinstagram tcovd7xuusqbf avioncitodepa


pel chicaideal lagrosera vidaderico satisfaccied 슇 n demasiadasmujeres lanota
desesperado pati nosedacuenta parce favorito porfaremix savagelove wapchall
enge tattooremix caramelo"

wordCorpus <- tm_map(wordCorpus, removeWords, c("amp"))#manual assignment # n%


표시 지움
## Warning in tm_map.SimpleCorpus(wordCorpus, removeWords, c("amp")):
## transformation drops documents

wordCorpus <- tm_map(wordCorpus, removeWords, c("http\\w+"))

## Warning in tm_map.SimpleCorpus(wordCorpus, removeWords, c("http\\w+")):


## transformation drops documents

wordCorpus[[i]]$content

## [1] "nowplaying playlist reels reelsinstagram tcovd7xuusqbf avioncitodepa


pel chicaideal lagrosera vidaderico satisfaccied 슇 n demasiadasmujeres lanota
desesperado pati nosedacuenta parce favorito porfaremix savagelove wapchall
enge tattooremix caramelo"

wordCorpus <- tm_map(wordCorpus, stripWhitespace) #공란 많은 것은 하나짜리로 줄


여줌

## Warning in tm_map.SimpleCorpus(wordCorpus, stripWhitespace): transformatio


n
## drops documents

wordCorpus[[i]]$content

## [1] "nowplaying playlist reels reelsinstagram tcovd7xuusqbf avioncitodepap


el chicaideal lagrosera vidaderico satisfaccied 슇 n demasiadasmujeres lanota
desesperado pati nosedacuenta parce favorito porfaremix savagelove wapchallen
ge tattooremix caramelo"

#str(wordCorpus)

pal <- brewer.pal(9,"YlGnBu")


pal <- pal[-(1:4)]
set.seed(123)
par(mar = c(0,0,0,0), mfrow = c(1, 1)) #마진 최소화, 멀티프레임 row 옵션=1 개만
보여줌
wordcloud(words = wordCorpus, scale=c(3,0.1), max.words=1000, random.order=FA
LSE,
rot.per=0.35, use.r.layout=FALSE, colors=pal)
## Sentiment analysis using syuzhet
# tweetSentiments <- get_sentiment(tweetsDF$text, method = "syuzhet")
tweetSentiments <- get_sentiment(content(wordCorpus), method = "syuzhet")
get_sentiment_dictionary(dictionary = 'syuzhet')

tweets <- cbind(trendingTweets, tweetSentiments)


# names(trendingTweets)

tweets$sentiment <- sapply(tweets$tweetSentiments,encodeSentiment)


head(tweets, n = 1)

## user_id status_id created_at screen_name


## 1 994954678994694146 1319754626028216323 2020-10-23 21:36:37 AprilDJones
##

text
## 1 Let<e2><c2>\u0080<ec>뀼 be real. I found #SavageLove to be irritating un
til the @BTS_twt remix made it tolerable. Thought it was called Selfish Love
when I first heard it. Tho the way Jason Derulo apparently acting my original
thought might have been more aptly named. #SavageLoveBTS #SavageBTSArmy
## source display_text_width reply_to_status_id reply_to_user_id
## 1 Twitter Web App 280 <NA> <NA>
## reply_to_screen_name is_quote is_retweet favorite_count retweet_count
## 1 <NA> FALSE FALSE 0 0
## quote_count reply_count hashtags symbols
## 1 NA NA SavageLove, SavageLoveBTS, SavageBTSArmy NA
## urls_url urls_t.co urls_expanded_url media_url media_t.co media_expanded
_url
## 1 NA NA NA NA NA
NA
## media_type ext_media_url ext_media_t.co ext_media_expanded_url ext_media
_type
## 1 NA NA NA NA
<NA>
## mentions_user_id mentions_screen_name lang quoted_status_id quoted_text
## 1 335141638 BTS_twt en <NA> <NA>
## quoted_created_at quoted_source quoted_favorite_count quoted_retweet_cou
nt
## 1 <NA> <NA> NA
NA
## quoted_user_id quoted_screen_name quoted_name quoted_followers_count
## 1 <NA> <NA> <NA> NA
## quoted_friends_count quoted_statuses_count quoted_location quoted_descri
ption
## 1 NA NA <NA>
<NA>
## quoted_verified retweet_status_id retweet_text retweet_created_at
## 1 NA <NA> <NA> <NA>
## retweet_source retweet_favorite_count retweet_retweet_count retweet_user
_id
## 1 <NA> NA NA <
NA>
## retweet_screen_name retweet_name retweet_followers_count
## 1 <NA> <NA> NA
## retweet_friends_count retweet_statuses_count retweet_location
## 1 NA NA <NA>
## retweet_description retweet_verified place_url place_name place_full_nam
e
## 1 <NA> NA <NA> <NA> <NA
>
## place_type country country_code geo_coords coords_coords
## 1 <NA> <NA> <NA> NA, NA NA, NA
## bbox_coords
## 1 NA, NA, NA, NA, NA, NA, NA, NA
## status_url name
## 1 https://twitter.com/AprilDJones/status/1319754626028216323 April<U+2077>
## location
## 1 USA
## desc
ription
## 1 The only way to make a change is TOGETHER! #blacklivesmatter #equality #
justice
## url protected followers_count friends_count listed_count statuses_count
## 1 <NA> FALSE 23 250 0 804
## favourites_count account_created_at verified profile_url
## 1 662 2018-05-11 14:57:35 FALSE <NA>
## profile_expanded_url account_lang
## 1 <NA> NA
## profile_banner_url
## 1 https://pbs.twimg.com/profile_banners/994954678994694146/1528223546
## profile_background_url
## 1 http://abs.twimg.com/images/themes/theme1/bg.png
## profile_image_
url
## 1 http://pbs.twimg.com/profile_images/1155201712392671233/qMnqUWiJ_normal.
jpg
## Date tweetSentiments sentiment
## 1 2020-10-23 0.65 4) positive

#plot by sentiment score


qplot(tweets$tweetSentiments) + theme(legend.position="none")+
xlab("Sentiment Score") +
ylab("Number of tweets") +
ggtitle("Tweets by Sentiment Score")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


#plot by sentiment category
ggplot(tweets, aes(sentiment)) +
geom_bar(fill = "aquamarine4") +
theme(legend.position="none", axis.title.x = element_blank()) +
ylab("Number of tweets") +
ggtitle("Tweets by Sentiment")

# NRC Sample (various emotions such as anger, fear, joy, ...)


library(stringi) #분노, 기대,,.
# tweetsDF$text <- stri_trans_general(tweetsDF$text, "latin-ascii") #to remov
e non-English text
trendingTweets$text <- stri_trans_general(content(wordCorpus), "latin-ascii")
#to remove non-English text
tweetSentiments <- get_nrc_sentiment(trendingTweets$text) #카테고리에 맞게 뽑음

## Warning: `filter_()` is deprecated as of dplyr 0.7.0.


## Please use `filter()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.


## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Warning: `data_frame()` is deprecated as of tibble 1.1.0.


## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

head(tweetSentiments)

## anger anticipation disgust fear joy sadness surprise trust negative posi
tive
## 1 2 1 2 0 2 0 0 2 2
3
## 2 1 0 0 1 1 0 0 0 1
1
## 3 0 1 0 0 2 1 0 2 0
2
## 4 0 0 0 0 0 0 0 1 0
0
## 5 0 0 0 0 2 1 0 2 1
2
## 6 1 0 0 1 2 0 0 1 1
2

head(trendingTweets$text)

## [1] "lete2c2\u0080ec 뀼 real found savagelove irritating btstwt remix made


tolerable thought called selfish love first heard tho way jason derulo appare
ntly acting original thought might aptly named savagelovebts savagebtsarmy"
## [2] "yesterday dreamt subway instagram ad jasonderulo sings sandwich love
instead savage love best thing happened 2020 tcojuchg2nhkr savagelove savagel
overemix subway jasonderulo instagram"
## [3] "theellenshow please make jasonderulo sing sandwich love show tcoia4pt
jqgmw theellenshow jasonderulo savagelove savageloveremix"

## [4] "jimmyfallon can show jasonderulo fallontonight tcoia4ptjqgmw savagelo


ve"

## [5] "whoe2c2\u0080ec 겛 friend jason derulo wanna see eating sandwich dance
song badly f0ec 쐭 85jasonderulo sandwichlove savagelove jasonderulo jimmyfal
lon pleeease tcoia4ptjqgmw"
## [6] "still favorite savage love remix savageloveremix savagelove sandwichl
ove tcoia4ptjqgmw jasonderulo btstwt bts btsbighit"

trendingTweets$text[1]
## [1] "lete2c2\u0080ec 뀼 real found savagelove irritating btstwt remix made
tolerable thought called selfish love first heard tho way jason derulo appare
ntly acting original thought might aptly named savagelovebts savagebtsarmy"

tweets <- cbind(trendingTweets, tweetSentiments)


# names(tweets)

#View(tweets)
#tweets[1, c(5, 91:100)]
str(tweets[,c(92:100)])

## 'data.frame': 748 obs. of 9 variables:


## $ anger : num 2 1 0 0 0 1 1 0 0 0 ...
## $ anticipation: num 1 0 1 0 0 0 0 0 0 0 ...
## $ disgust : num 2 0 0 0 0 0 0 0 0 0 ...
## $ fear : num 0 1 0 0 0 1 1 0 0 0 ...
## $ joy : num 2 1 2 0 2 2 1 1 0 0 ...
## $ sadness : num 0 0 1 0 1 0 0 0 0 0 ...
## $ surprise : num 0 0 0 0 0 0 0 1 0 0 ...
## $ trust : num 2 0 2 1 2 1 1 2 0 0 ...
## $ negative : num 2 1 0 0 1 1 1 0 0 0 ...

sentimentTotals <- data.frame(colSums(tweets[,c(92:100)])) #3200 개 트윗에 10


개의 감정을 영역별로 합산
names(sentimentTotals) <- "count"
sentimentTotals <- cbind("sentiment" = rownames(sentimentTotals), sentimentTo
tals)
rownames(sentimentTotals) <- NULL
sentimentTotals

## sentiment count
## 1 anger 179
## 2 anticipation 117
## 3 disgust 48
## 4 fear 102
## 5 joy 205
## 6 sadness 52
## 7 surprise 92
## 8 trust 924
## 9 negative 173

ggplot(data = sentimentTotals, aes(x = sentiment, y = count)) +


geom_bar(aes(fill = sentiment), stat = "identity") +
theme(legend.position = "none") +
xlab("Sentiment") + ylab("Total Count") + ggtitle("Total Sentiment Score fo
r All Tweets")
6. 해당 해쉬태그로 가장 많은 트윗을 올린 계정을 대상으로 Follower network 구성하여
시각화
# accounts which tweet about quakes
#Corpus = 말뭉치, 문서들의 묶음. 여기서는 사용자 이름들의 묶음
namesCorpus <- Corpus(VectorSource(trendingTweets$screen_name)) #using Scree
nName
class(trendingTweets$screen_name)

## [1] "character"

class(VectorSource(trendingTweets$screen_name))

## [1] "VectorSource" "SimpleSource" "Source"

#str(namesCorpus)
class(namesCorpus)

## [1] "SimpleCorpus" "Corpus"

pal <- brewer.pal(9,"YlGnBu")


pal <- pal[-(1:4)]

set.seed(42) #결과값 고정
par(mar = c(0,0,0,0), mfrow = c(1,1)) #마진 상하좌우=0, 그림 한장 가운데
wordcloud(words = namesCorpus, scale=c(2,0.5), min.freq=2, max.words=500, #최
소빈도 2 개이상 나온것들만, 최대 단어 개수 500 개 한정
random.order=F, rot.per=0.3, use.r.layout=TRUE, colors=pal)

warnings()

# append rows to dataframe


append_to_df<-function(dt, elems)
{
return(rbindlist(list(dt, elems),use.names = TRUE))
}

# Begin with a certain usernames


coreUserName <- "venusfm"

twitterUser <- lookup_users(coreUserName)


# names(twitterUser)

# Extract Followers for the core user


twitterUser_follower_IDs <- get_followers(twitterUser$user_id, retryonratelim
it = 10)

## 75000 followers!

str(twitterUser_follower_IDs) #1303 명의 팔로워


## tibble [1,303 x 1] (S3: tbl_df/tbl/data.frame)
## $ user_id: chr [1:1303] "1314936140890345473" "101437210" "370433551" "10
43557539240194056" ...
## - attr(*, "next_cursor")= chr "0"

head(twitterUser_follower_IDs) #팔로워 아이디 보여줌

## # A tibble: 6 x 1
## user_id
## <chr>
## 1 1314936140890345473
## 2 101437210
## 3 370433551
## 4 1043557539240194056
## 5 1304061846291390464
## 6 1296155868040486913

twitterUser_followers_df <- lookup_users(twitterUser_follower_IDs$user_id) #


팔로워들로 구성된 데이터프레임
head(twitterUser_followers_df)

## # A tibble: 6 x 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 131493~ <NA> NA KwstasSpan~ <NA> <NA>
## 2 101437~ 13200903~ 2020-10-24 19:50:49 cacoteo "#No~ Cacot~
## 3 370433~ 13200352~ 2020-10-24 16:11:35 cncoanalyt~ "@Va~ Twitt~
## 4 104355~ 13199127~ 2020-10-24 08:05:03 RapTaLife "Ecr~ RapTa~
## 5 130406~ 13040714~ 2020-09-10 14:57:10 djmike2714~ "Ο..~ Twitt~
## 6 129615~ 13197397~ 2020-10-23 20:37:34 FaydeeAywa "Now~ Twitt~
## # ... with 84 more variables: display_text_width <int>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, quote_count <int>,
## # reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## # urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## # media_t.co <list>, media_expanded_url <list>, media_type <list>,
## # ext_media_url <list>, ext_media_t.co <list>, ext_media_expanded_url <l
ist>,
## # ext_media_type <chr>, mentions_user_id <list>, mentions_screen_name <l
ist>,
## # lang <chr>, quoted_status_id <chr>, quoted_text <chr>,
## # quoted_created_at <dttm>, quoted_source <chr>, quoted_favorite_count <
int>,
## # quoted_retweet_count <int>, quoted_user_id <chr>, quoted_screen_name <
chr>,
## # quoted_name <chr>, quoted_followers_count <int>,
## # quoted_friends_count <int>, quoted_statuses_count <int>,
## # quoted_location <chr>, quoted_description <chr>, quoted_verified <lgl>,
## # retweet_status_id <chr>, retweet_text <chr>, retweet_created_at <dttm>,
## # retweet_source <chr>, retweet_favorite_count <int>,
## # retweet_retweet_count <int>, retweet_user_id <chr>,
## # retweet_screen_name <chr>, retweet_name <chr>,
## # retweet_followers_count <int>, retweet_friends_count <int>,
## # retweet_statuses_count <int>, retweet_location <chr>,
## # retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## # place_name <chr>, place_full_name <chr>, place_type <chr>, country <ch
r>,
## # country_code <chr>, geo_coords <list>, coords_coords <list>,
## # bbox_coords <list>, status_url <chr>, name <chr>, location <chr>,
## # description <chr>, url <chr>, protected <lgl>, followers_count <int>,
## # friends_count <int>, listed_count <int>, statuses_count <int>,
## # favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## # profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>

# names(twitterUser_followers_df)

# filter dummy accounts (and reduce the number of followers for performance)
filtered_df <- filter(twitterUser_followers_df,
followers_count < 200 &
followers_count > 50 &
#statuses_count > 10000 & #to reduce number of follow
ers
# statuses_count > 100 &
# statuses_count < 5000 & #too many tweets from bots?
protected==FALSE)

filtered_follower_IDs <- filtered_df$screen_name


length(filtered_follower_IDs)

## [1] 218

# prepare edge data frame (edges to coreUserName)


edge_df<-data.frame(from=filtered_follower_IDs,
to=rep(coreUserName,
length(filtered_follower_IDs)),
stringsAsFactors=FALSE)
head(edge_df)

## from to
## 1 2jkeg18On2y4aqw venusfm
## 2 IoannisMarkan16 venusfm
## 3 tsiou06 venusfm
## 4 labosojunior venusfm
## 5 KapareliChara venusfm
## 6 InLavHandMade venusfm
tail(edge_df)

## from to
## 213 Kpapoutsis venusfm
## 214 Thanasis_varva venusfm
## 215 lostavenue4 venusfm
## 216 Commandexe venusfm
## 217 Lefharalabo venusfm
## 218 spirosaslanis venusfm

edge_df <- append_to_df(edge_df,list(from=filtered_follower_IDs,


to=rep(coreUserName,
length(filtered_follower_IDs))))
# above lines were used to add edges to coreUserName later

# Iterate and extract list of followers of followers


counter = 1
# for(follower in filtered_follower_IDs){ #filtered_follower_IDs * 60x seco
nds -> stop it and use saved# file!
# # fetch follower list for current user
# followerScreenNameList <- lookup_users(get_followers(follower)$user_id)$s
creen_name #2 차 팔로워 찾기
# Sys.sleep(61) #twitter API limit is 15 for 15 mins
# print(paste("Processing completed for:",
# follower,
# "(",counter,"/",
# length(filtered_follower_IDs),")"
# ))
# # append to edge list
# edge_df <- append_to_df(edge_df,list(from=followerScreenNameList,
# to=rep(follower,
# length(followerScreenNameLis
t))))
# counter <- counter + 1
# }
#
save(edge_df, file = "과제 3_20201025.Rda")
load("과제 3_20201025.Rda")

# prepare network object

#net <- graph.data.frame(edge_df, directed=T) #same with graph_from_data_fra


me()
net <- graph_from_data_frame(edge_df, directed=T) #from ->to 화살표 표시
class(net)
## [1] "igraph"

table(edge_df$to) #1 차 팔로워

##
## venusfm
## 436

table(edge_df$from) #1,2 차 팔로워

##
## 2jkeg18On2y4aqw AAMOXIL abcd90101 abdeeholic07 ABU
KIPZ1
## 2 2 2 2
2
## Acheron_Radio adabako adelalgarhy1 aggelika_knl AKordon
ouris
## 2 2 2 2
2
## alexoulas AllGd66 AmaliadaCityGR AmaliasHotel Amoamigem
eliya
## 2 2 2 2
2
2
--- end [218]

edge_df[to=="venusfm"]$from #'venusfm'를 팔로워하는 계정

## [1] "2jkeg18On2y4aqw" "IoannisMarkan16" "tsiou06" "labosojunior"

## [5] "KapareliChara" "InLavHandMade" "NancySinop" "Dhika_Prasety


aa"
## [9] "AKordonouris" "VicVicEntertain" "Arivndn121" "konstadopoulo
sD"
## [13] "abdeeholic07" "MarifayMichalas" "PerronitosU" "mariapap36"

## [17] "MariaBarberi7" "thyellakamariou" "korina_krvnr21" "adelalgarhy1"

--- end [436]

edge_df[from=="venusfm"]$to #맞팔계정

## character(0)

# simplify network
net <- simplify(net, remove.multiple = F, remove.loops = T)
# temp -> temp is deleted later using remove.loops option

# adjust the size of nodes based on in and out degrees


deg <- degree(net, mode="all") #들어오고 나가는 화살표 개수
V(net)$size <- deg*0.05 + 1 #중요한 사람인지 판단하여 사이즈 조절(중요할수록 사이
즈 커짐)
V(net)[name == coreUserName]$size <- 15 #'venusfm'는 가장 크게
V(net)[size >= 5]$name #1 차 팔로워 중에서 2 차 팔로워 있는 계정

## [1] "venusfm"

V(net)[name == coreUserName]$size #2 차 팔로워

## [1] 15

# node coloring
pal3 <- brewer.pal(10, "Set3")

# overall follower graph


op <- par(mar = c(0,0,0,0))
plot(net, edge.arrow.size=0.1, vertex.label.cex=1,
#vertex.label = ifelse(V(net)$size >= 15, V(net)$name, NA),
vertex.label = ifelse(V(net)$size >= 5, V(net)$name, NA),
vertex.color = pal3)

par(op)
############################################################################
# Friends Among Follwers (optional)
############################################################################

# Plot to highlight Followers with large number of followers


deg <- degree(net, mode="out")
V(net)$size <- deg*0.05+2
V(net)[size==max(V(net)$size)] #the most ties

## + 218/219 vertices, named, from 85024ca:


## [1] 2jkeg18On2y4aqw IoannisMarkan16 tsiou06 labosojunior
## [5] KapareliChara InLavHandMade NancySinop Dhika_Prasetyaa
## [9] AKordonouris VicVicEntertain Arivndn121 konstadopoulosD
## [13] abdeeholic07 MarifayMichalas PerronitosU mariapap36
## [17] MariaBarberi7 thyellakamariou korina_krvnr21 adelalgarhy1
## [21] gewrgiaavram96 elenh1970 nt_fw ChrisValasellis
## [25] DimitisPapanik JohnnySo92 LenaBrb emmodesign
## [29] sBiLLySss maraki1002 georgp23_george nikoskara99
## [33] tragousth KlonarisTasos ThanasisTsoukla nairobeez
## [37] denelpack ilida911 tzivonas mary_ktn
## + ... omitted several vertices

# Highlight the coreUser


V(net)[coreUserName]$size <- 15

# identify friend vertices (the vertices coreUserName is also following)


friendVertices <- ends(net, es=E(net)[from(coreUserName)])[,2] #ends finds
vertices at the ends of edges
ends(net, es=E(net)[from(coreUserName)])[,2]

## character(0)

# Generate edge color variable: (normal: grey80, friend: red)


ecol <- rep("grey80", ecount(net))
ecol[which (V(net)$name %in% friendVertices)] <- 'red'

# Generate edge width variable: (normal: 2, friend: 4)


ew <- rep(2, ecount(net))
ew[which (V(net)$name %in% friendVertices)] <- 4

# add core_user for vertex coloring


friendVertices <- append(friendVertices,coreUserName)

# Generate node color variable: (normal: grey80, friend & coreUser: gold)
vcol <- rep("grey80", vcount(net))
vcol[which (V(net)$name %in% friendVertices)] <- "gold"

# vertex label size


V(net)$label.cex <- 1.2

plot(net,
vertex.color=vcol,
edge.color=ecol,
edge.width=ew,
edge.arrow.mode=0,
vertex.label = ifelse(V(net)$name %in% friendVertices, V(net)$name, NA),

vertex.label.color="black",
vertex.label.font=2,
edge.curved=0.1
)

You might also like