Professional Documents
Culture Documents
201721501 조예슬
2020 10 15
해쉬태그는 ‘#savagelove’
0. 패키지 불러오기
#install.packages("tm")
library(tm) #텍스트마이닝 용 패키지
#install.packages("ggplot2")
library(ggplot2)
library(rtweet)
#install.packages("wordcloud")
library(wordcloud)
library(data.table)
#install.packages("igraph")
library(igraph)
library(RColorBrewer)
library(dplyr)
library(stringr)
library(syuzhet)
1.트위터 연결
# set the NEW credentials
APP_NAME <- "DA_R"
API_KEY <- "HICUeqGrTscHkO61ksfEB3KUJ"
API_KEY_SECRET <- "svN1MqmMEYBjfOvnl9tKvw82jhAsheEGGsKMqFbPsxVRDQVr4K"
ACCESS_TOKEN <- "1307869013381267456-wEy86seKVRa0D1ukqmet2bxzYrqfJ0"
ACCESS_TOKEN_SECRET <- "Bhbo6klXu2tcnIApFbxvOCvUwfI6kdFKyvzNliLHnfyO3"
head(trendingTweets)
## # A tibble: 6 x 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 994954~ 13197546~ 2020-10-23 21:36:37 AprilDJones "Let~ Twitt~
## 2 126984~ 13197366~ 2020-10-23 20:25:10 KittyCatSo~ "Yes~ Twitt~
## 3 126984~ 13183069~ 2020-10-19 21:44:00 KittyCatSo~ "@Th~ Twitt~
## 4 126984~ 13197131~ 2020-10-23 18:51:42 KittyCatSo~ "@ji~ Twitt~
## 5 126984~ 13183047~ 2020-10-19 21:35:11 KittyCatSo~ "Who~ Twitt~
## 6 126984~ 13183057~ 2020-10-19 21:39:30 KittyCatSo~ "Sti~ Twitt~
## # ... with 84 more variables: display_text_width <dbl>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, quote_count <int>,
## # reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## # urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## # media_t.co <list>, media_expanded_url <list>, media_type <list>,
## # ext_media_url <list>, ext_media_t.co <list>, ext_media_expanded_url <l
ist>,
## # ext_media_type <chr>, mentions_user_id <list>, mentions_screen_name <l
ist>,
## # lang <chr>, quoted_status_id <chr>, quoted_text <chr>,
## # quoted_created_at <dttm>, quoted_source <chr>, quoted_favorite_count <
int>,
## # quoted_retweet_count <int>, quoted_user_id <chr>, quoted_screen_name <
chr>,
## # quoted_name <chr>, quoted_followers_count <int>,
## # quoted_friends_count <int>, quoted_statuses_count <int>,
## # quoted_location <chr>, quoted_description <chr>, quoted_verified <lgl>,
## # retweet_status_id <chr>, retweet_text <chr>, retweet_created_at <dttm>,
## # retweet_source <chr>, retweet_favorite_count <int>,
## # retweet_retweet_count <int>, retweet_user_id <chr>,
## # retweet_screen_name <chr>, retweet_name <chr>,
## # retweet_followers_count <int>, retweet_friends_count <int>,
## # retweet_statuses_count <int>, retweet_location <chr>,
## # retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## # place_name <chr>, place_full_name <chr>, place_type <chr>, country <ch
r>,
## # country_code <chr>, geo_coords <list>, coords_coords <list>,
## # bbox_coords <list>, status_url <chr>, name <chr>, location <chr>,
## # description <chr>, url <chr>, protected <lgl>, followers_count <int>,
## # friends_count <int>, listed_count <int>, statuses_count <int>,
## # favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## # profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
dim(trendingTweets)
## [1] 748 90
## # A tibble: 2 x 7
## query limit remaining reset reset_at timestamp
app
## <chr> <int> <int> <drtn> <dttm> <dttm>
<chr>
## 1 applicat~ 180 179 15.02~ 2020-10-25 05:07:16 2020-10-25 04:52:15
DA_R
## 2 search/t~ 180 171 14.85~ 2020-10-25 05:07:06 2020-10-25 04:52:15
DA_R
# perform a quick cleanup/transformation
#View(trendingTweets)
head(trendingTweets$text)
## [5] "Who‘s a friend of Jason Derulo? I wanna see him eating a sandwich and
dance to this song so badly <U+0001F605>@jasonderulo #SandwichLove #SavageLo
ve #JasonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW"
#tail(trendingTweets$text)
head(trendingTweets$created_at)
class(trendingTweets$created_at)
# see how many missing values are there on a per column basis
sapply(trendingTweets, function(x) sum(is.na(x)))
4. 수집된 트윗의 일별 빈도를 비교
return(tweets)
}
#text
"@TheEllenShow please m
ake @jasonderulo sing Sandwich Love in your show https://t.co/iA4ptJqGMW #The
EllenShow #JasonDerulo #SavageLove #SavageLoveRemix"
## Who‘s a friend of Jason Derulo? I wanna see him eating a sandwich and danc
e to this song so badly <U+0001F605>@jasonderulo #SandwichLove #SavageLove #J
asonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW
"Who<e2><c2>
\u0080<ec>겛 a friend of Jason Derulo? I wanna see him eating a sandwich and
dance to this song so badly <f0><ec>쐭<85>@jasonderulo #SandwichLove #SavageL
ove #JasonDerulo @jimmyfallon PLEEEASE https://t.co/iA4ptJqGMW"
i <- 18
trendingTweets$text[i]
wordCorpus[[i]]$content
## [1] "#NowPlaying #Playlist #Reels #reelsinstagram ://t.co/Vd7xUUsQbf with
#AvioncitoDePapel #ChicaIdeal #lagrosera #VidaDeRico #SATISFACCI<ed>슇 N #Dema
siadasMujeres #LaNota #Desesperado #PaTi #Nosedacuenta #parce #favorito #por
faremix #SavageLove #WAPChallenge #tattooremix #Caramelo"
wordCorpus[[i]]$content
wordCorpus[[i]]$content
wordCorpus[[i]]$content
wordCorpus[[i]]$content
wordCorpus[[i]]$content
#str(wordCorpus)
text
## 1 Let<e2><c2>\u0080<ec>뀼 be real. I found #SavageLove to be irritating un
til the @BTS_twt remix made it tolerable. Thought it was called Selfish Love
when I first heard it. Tho the way Jason Derulo apparently acting my original
thought might have been more aptly named. #SavageLoveBTS #SavageBTSArmy
## source display_text_width reply_to_status_id reply_to_user_id
## 1 Twitter Web App 280 <NA> <NA>
## reply_to_screen_name is_quote is_retweet favorite_count retweet_count
## 1 <NA> FALSE FALSE 0 0
## quote_count reply_count hashtags symbols
## 1 NA NA SavageLove, SavageLoveBTS, SavageBTSArmy NA
## urls_url urls_t.co urls_expanded_url media_url media_t.co media_expanded
_url
## 1 NA NA NA NA NA
NA
## media_type ext_media_url ext_media_t.co ext_media_expanded_url ext_media
_type
## 1 NA NA NA NA
<NA>
## mentions_user_id mentions_screen_name lang quoted_status_id quoted_text
## 1 335141638 BTS_twt en <NA> <NA>
## quoted_created_at quoted_source quoted_favorite_count quoted_retweet_cou
nt
## 1 <NA> <NA> NA
NA
## quoted_user_id quoted_screen_name quoted_name quoted_followers_count
## 1 <NA> <NA> <NA> NA
## quoted_friends_count quoted_statuses_count quoted_location quoted_descri
ption
## 1 NA NA <NA>
<NA>
## quoted_verified retweet_status_id retweet_text retweet_created_at
## 1 NA <NA> <NA> <NA>
## retweet_source retweet_favorite_count retweet_retweet_count retweet_user
_id
## 1 <NA> NA NA <
NA>
## retweet_screen_name retweet_name retweet_followers_count
## 1 <NA> <NA> NA
## retweet_friends_count retweet_statuses_count retweet_location
## 1 NA NA <NA>
## retweet_description retweet_verified place_url place_name place_full_nam
e
## 1 <NA> NA <NA> <NA> <NA
>
## place_type country country_code geo_coords coords_coords
## 1 <NA> <NA> <NA> NA, NA NA, NA
## bbox_coords
## 1 NA, NA, NA, NA, NA, NA, NA, NA
## status_url name
## 1 https://twitter.com/AprilDJones/status/1319754626028216323 April<U+2077>
## location
## 1 USA
## desc
ription
## 1 The only way to make a change is TOGETHER! #blacklivesmatter #equality #
justice
## url protected followers_count friends_count listed_count statuses_count
## 1 <NA> FALSE 23 250 0 804
## favourites_count account_created_at verified profile_url
## 1 662 2018-05-11 14:57:35 FALSE <NA>
## profile_expanded_url account_lang
## 1 <NA> NA
## profile_banner_url
## 1 https://pbs.twimg.com/profile_banners/994954678994694146/1528223546
## profile_background_url
## 1 http://abs.twimg.com/images/themes/theme1/bg.png
## profile_image_
url
## 1 http://pbs.twimg.com/profile_images/1155201712392671233/qMnqUWiJ_normal.
jpg
## Date tweetSentiments sentiment
## 1 2020-10-23 0.65 4) positive
head(tweetSentiments)
## anger anticipation disgust fear joy sadness surprise trust negative posi
tive
## 1 2 1 2 0 2 0 0 2 2
3
## 2 1 0 0 1 1 0 0 0 1
1
## 3 0 1 0 0 2 1 0 2 0
2
## 4 0 0 0 0 0 0 0 1 0
0
## 5 0 0 0 0 2 1 0 2 1
2
## 6 1 0 0 1 2 0 0 1 1
2
head(trendingTweets$text)
## [5] "whoe2c2\u0080ec 겛 friend jason derulo wanna see eating sandwich dance
song badly f0ec 쐭 85jasonderulo sandwichlove savagelove jasonderulo jimmyfal
lon pleeease tcoia4ptjqgmw"
## [6] "still favorite savage love remix savageloveremix savagelove sandwichl
ove tcoia4ptjqgmw jasonderulo btstwt bts btsbighit"
trendingTweets$text[1]
## [1] "lete2c2\u0080ec 뀼 real found savagelove irritating btstwt remix made
tolerable thought called selfish love first heard tho way jason derulo appare
ntly acting original thought might aptly named savagelovebts savagebtsarmy"
#View(tweets)
#tweets[1, c(5, 91:100)]
str(tweets[,c(92:100)])
## sentiment count
## 1 anger 179
## 2 anticipation 117
## 3 disgust 48
## 4 fear 102
## 5 joy 205
## 6 sadness 52
## 7 surprise 92
## 8 trust 924
## 9 negative 173
## [1] "character"
class(VectorSource(trendingTweets$screen_name))
#str(namesCorpus)
class(namesCorpus)
set.seed(42) #결과값 고정
par(mar = c(0,0,0,0), mfrow = c(1,1)) #마진 상하좌우=0, 그림 한장 가운데
wordcloud(words = namesCorpus, scale=c(2,0.5), min.freq=2, max.words=500, #최
소빈도 2 개이상 나온것들만, 최대 단어 개수 500 개 한정
random.order=F, rot.per=0.3, use.r.layout=TRUE, colors=pal)
warnings()
## 75000 followers!
## # A tibble: 6 x 1
## user_id
## <chr>
## 1 1314936140890345473
## 2 101437210
## 3 370433551
## 4 1043557539240194056
## 5 1304061846291390464
## 6 1296155868040486913
## # A tibble: 6 x 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 131493~ <NA> NA KwstasSpan~ <NA> <NA>
## 2 101437~ 13200903~ 2020-10-24 19:50:49 cacoteo "#No~ Cacot~
## 3 370433~ 13200352~ 2020-10-24 16:11:35 cncoanalyt~ "@Va~ Twitt~
## 4 104355~ 13199127~ 2020-10-24 08:05:03 RapTaLife "Ecr~ RapTa~
## 5 130406~ 13040714~ 2020-09-10 14:57:10 djmike2714~ "Ο..~ Twitt~
## 6 129615~ 13197397~ 2020-10-23 20:37:34 FaydeeAywa "Now~ Twitt~
## # ... with 84 more variables: display_text_width <int>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, quote_count <int>,
## # reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## # urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## # media_t.co <list>, media_expanded_url <list>, media_type <list>,
## # ext_media_url <list>, ext_media_t.co <list>, ext_media_expanded_url <l
ist>,
## # ext_media_type <chr>, mentions_user_id <list>, mentions_screen_name <l
ist>,
## # lang <chr>, quoted_status_id <chr>, quoted_text <chr>,
## # quoted_created_at <dttm>, quoted_source <chr>, quoted_favorite_count <
int>,
## # quoted_retweet_count <int>, quoted_user_id <chr>, quoted_screen_name <
chr>,
## # quoted_name <chr>, quoted_followers_count <int>,
## # quoted_friends_count <int>, quoted_statuses_count <int>,
## # quoted_location <chr>, quoted_description <chr>, quoted_verified <lgl>,
## # retweet_status_id <chr>, retweet_text <chr>, retweet_created_at <dttm>,
## # retweet_source <chr>, retweet_favorite_count <int>,
## # retweet_retweet_count <int>, retweet_user_id <chr>,
## # retweet_screen_name <chr>, retweet_name <chr>,
## # retweet_followers_count <int>, retweet_friends_count <int>,
## # retweet_statuses_count <int>, retweet_location <chr>,
## # retweet_description <chr>, retweet_verified <lgl>, place_url <chr>,
## # place_name <chr>, place_full_name <chr>, place_type <chr>, country <ch
r>,
## # country_code <chr>, geo_coords <list>, coords_coords <list>,
## # bbox_coords <list>, status_url <chr>, name <chr>, location <chr>,
## # description <chr>, url <chr>, protected <lgl>, followers_count <int>,
## # friends_count <int>, listed_count <int>, statuses_count <int>,
## # favourites_count <int>, account_created_at <dttm>, verified <lgl>,
## # profile_url <chr>, profile_expanded_url <chr>, account_lang <lgl>,
## # profile_banner_url <chr>, profile_background_url <chr>,
## # profile_image_url <chr>
# names(twitterUser_followers_df)
# filter dummy accounts (and reduce the number of followers for performance)
filtered_df <- filter(twitterUser_followers_df,
followers_count < 200 &
followers_count > 50 &
#statuses_count > 10000 & #to reduce number of follow
ers
# statuses_count > 100 &
# statuses_count < 5000 & #too many tweets from bots?
protected==FALSE)
## [1] 218
## from to
## 1 2jkeg18On2y4aqw venusfm
## 2 IoannisMarkan16 venusfm
## 3 tsiou06 venusfm
## 4 labosojunior venusfm
## 5 KapareliChara venusfm
## 6 InLavHandMade venusfm
tail(edge_df)
## from to
## 213 Kpapoutsis venusfm
## 214 Thanasis_varva venusfm
## 215 lostavenue4 venusfm
## 216 Commandexe venusfm
## 217 Lefharalabo venusfm
## 218 spirosaslanis venusfm
table(edge_df$to) #1 차 팔로워
##
## venusfm
## 436
##
## 2jkeg18On2y4aqw AAMOXIL abcd90101 abdeeholic07 ABU
KIPZ1
## 2 2 2 2
2
## Acheron_Radio adabako adelalgarhy1 aggelika_knl AKordon
ouris
## 2 2 2 2
2
## alexoulas AllGd66 AmaliadaCityGR AmaliasHotel Amoamigem
eliya
## 2 2 2 2
2
2
--- end [218]
edge_df[from=="venusfm"]$to #맞팔계정
## character(0)
# simplify network
net <- simplify(net, remove.multiple = F, remove.loops = T)
# temp -> temp is deleted later using remove.loops option
## [1] "venusfm"
## [1] 15
# node coloring
pal3 <- brewer.pal(10, "Set3")
par(op)
############################################################################
# Friends Among Follwers (optional)
############################################################################
## character(0)
# Generate node color variable: (normal: grey80, friend & coreUser: gold)
vcol <- rep("grey80", vcount(net))
vcol[which (V(net)$name %in% friendVertices)] <- "gold"
plot(net,
vertex.color=vcol,
edge.color=ecol,
edge.width=ew,
edge.arrow.mode=0,
vertex.label = ifelse(V(net)$name %in% friendVertices, V(net)$name, NA),
vertex.label.color="black",
vertex.label.font=2,
edge.curved=0.1
)