NLP Project

10/23/22, 7:53 AM WION Twitter Account - Jupyter Notebook
Import Libraries
In [57]:
import tweepy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from textblob import TextBlob
from tweepy import API
from tweepy import Cursor
from tweepy import OAuthHandler
import json
import datetime
import re
import string
import seaborn as sns
import nltk
Store API Keys

In [58]:
df = pd.read_csv('API_SET1.csv')
In [59]:
TwitterApiKey = df["Twitter_Api_Key"][0]
TwitterApiSecret = df["Twitter_Api_Secret_Key"][0]
TwitterApiAccessToken = df["Twitter_Api_access_token"][0]
TwitterApiSecretToken = df["Twitter_Api_secret_access_token"][0]
In [60]:
auth = tweepy.OAuthHandler(TwitterApiKey,TwitterApiSecret)
auth.set_access_token(TwitterApiAccessToken,TwitterApiSecretToken)
twitterApi = tweepy.API(auth,wait_on_rate_limit=True)
Selection of Twitter Account

In [61]:
twitterAccount = "@WIONews"
localhost:8888/notebooks/WION Twitter Account.ipynb 1/23

In [62]:
tweets = tweepy.Cursor(twitterApi.user_timeline,
screen_name=twitterAccount,
count=None,
since_id=None,
max_id=None,trim_user=True,exclude_replies=True,contributor_details=F
include_entities=False).items(50);
Converted Tweets into DataFrame

In [63]:
df = pd.DataFrame(data=[tweet.text for tweet in tweets],columns=["Tweets"])
Unexpected parameter: contributor_details
Unexpected parameter: include_entities
In [64]:
df.head(10)
Out[64]:
Tweets
0 Several protests against a planned reform that...
1 #InPics | Did you know the #TajMahal is not th...
2 These satellites are owned by the London-based...
3 The bill is being promoted by President Gustav...
4 #InPics | Diwali is India’s biggest festival a...
5 As per reports, former prime minister Johnson ...
6 Mateschitz, whose estimated net worth is $27 b...
7 #InPics | Every shape and size is beautiful, a...
8 #InPics | The leg room has always been a major...
9 #InPics | From Hagrid to Severus Snape: 'Harry...
Text Cleaning

In [65]:
def cleanupTweet(txt):
txt = re.sub(r'@[A-Za-z0-9_]+','',txt)
txt = re.sub(r'#','',txt)
txt = re.sub(r'RT :','',txt)
txt = re.sub(r'https?:\/\/[A-Za-z0-9\.\/]+','',txt)
return txt
In [66]:
df['Tweet'] = df['Tweets'].apply(cleanupTweet)
In [67]:
df.head()
Out[67]:
Tweets Tweet
0 Several protests against a planned reform that... Several protests against a planned reform that...
1 #InPics | Did you know the #TajMahal is not th... InPics | Did you know the TajMahal is not the ...
2 These satellites are owned by the London-based... These satellites are owned by the London-based...
3 The bill is being promoted by President Gustav... The bill is being promoted by President Gustav...
4 #InPics | Diwali is India’s biggest festival a... InPics | Diwali is India’s biggest festival an...
Text Subjectivity
In [68]:
def getTextSubjectivity(txt):
return TextBlob(txt).sentiment.subjectivity
Text Polarity
In [69]:
def getTextPolarity(txt):
return TextBlob(txt).sentiment.polarity
In [70]:
df['Subjectivity'] = df['Tweets'].apply(getTextSubjectivity)
In [71]:
df['Polarity'] = df['Tweets'].apply(getTextPolarity)

In [72]:
df.head()
Out[72]:
Tweets Tweet Subjectivity Polarity
Several protests against a planned Several protests against a planned

0 0.355556 0.177778
reform that... reform that...
#InPics | Did you know the #TajMahal InPics | Did you know the TajMahal is
1 0.500000 0.625000
is not th... not the ...
These satellites are owned by the These satellites are owned by the
2 0.000000 0.000000
London-based... London-based...
The bill is being promoted by The bill is being promoted by

3 0.166667 0.125000
President Gustav... President Gustav...
#InPics | Diwali is India’s biggest InPics | Diwali is India’s biggest

4 0.750000 0.575000
festival a... festival an...
Sentiment Intensity Analyzer

In [73]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()
In [74]:
df['Polarity'] = df['Tweet'].apply(lambda Tweet: sid.polarity_scores(Tweet))

df.head()
Out[74]:
Tweets Tweet Subjectivity Polarity
{'neg': 0.088, 'neu':

Several protests against a Several protests against a
0 0.355556 0.691, 'pos': 0.221,
planned reform that... planned reform that...
'co...
{'neg': 0.0, 'neu': 1.0,

#InPics | Did you know the InPics | Did you know the
1 0.500000 'pos': 0.0,
#TajMahal is not th... TajMahal is not the ...
'compound...
{'neg': 0.0, 'neu': 1.0,

These satellites are owned by These satellites are owned by
2 0.000000 'pos': 0.0,
the London-based... the London-based...
'compound...
{'neg': 0.0, 'neu':

The bill is being promoted by The bill is being promoted by
3 0.166667 0.865, 'pos': 0.135,
President Gustav... President Gustav...
'comp...
#InPics | Diwali is India’s InPics | Diwali is India’s biggest {'neg': 0.0, 'neu': 0.61,
4 0.750000
biggest festival a... festival an... 'pos': 0.39, 'compou...

In [75]:
df['Compound'] = df['Polarity'].apply(lambda score_dict: score_dict['compound'])

df.head()
Out[75]:
Tweets Tweet Subjectivity Polarity Compound
{'neg': 0.088, 'neu':

Several protests against Several protests against
0 0.355556 0.691, 'pos': 0.221, 0.4404
a planned reform that... a planned reform that...
'co...
{'neg': 0.0, 'neu':

#InPics | Did you know InPics | Did you know the
1 0.500000 1.0, 'pos': 0.0, 0.0000
the #TajMahal is not th... TajMahal is not the ...
'compound...
These satellites are These satellites are {'neg': 0.0, 'neu':

2 owned by the London- owned by the London- 0.000000 1.0, 'pos': 0.0, 0.0000
based... based... 'compound...
{'neg': 0.0, 'neu':

The bill is being promoted The bill is being promoted
3 0.166667 0.865, 'pos': 0.135, 0.4215
by President Gustav... by President Gustav...
'comp...
{'neg': 0.0, 'neu':

#InPics | Diwali is India’s InPics | Diwali is India’s
4 0.750000 0.61, 'pos': 0.39, 0.7876
biggest festival a... biggest festival an...
'compou...
In [76]:
df['comp_score'] = df['Compound'].apply(lambda c: 'pos' if c>=0 else 'neg')

df.head()
Out[76]:
Tweets Tweet Subjectivity Polarity Compound comp_score
{'neg': 0.088,
Several protests Several protests
'neu': 0.691,
0 against a planned against a planned 0.355556 0.4404 pos
'pos': 0.221,
'co...
#InPics | Did you

InPics | Did you {'neg': 0.0, 'neu':
know the
1 know the TajMahal 0.500000 1.0, 'pos': 0.0, 0.0000 pos
#TajMahal is not
is not the ... 'compound...
th...
These satellites are These satellites are {'neg': 0.0, 'neu':

2 owned by the owned by the 0.000000 1.0, 'pos': 0.0, 0.0000 pos
London-based... London-based... 'compound...
The bill is being The bill is being {'neg': 0.0, 'neu':

3 promoted by promoted by 0.166667 0.865, 'pos': 0.4215 pos
President Gustav... President Gustav... 0.135, 'comp...
#InPics | Diwali is InPics | Diwali is {'neg': 0.0, 'neu':

4 India’s biggest India’s biggest 0.750000 0.61, 'pos': 0.39, 0.7876 pos
festival a... festival an... 'compou...

In [77]:
df['neg_score'] = df['Polarity'].apply(lambda score_dict: score_dict['neg'])

df.head()
Out[77]:
Tweets Tweet Subjectivity Polarity Compound comp_score neg_score
Several Several
{'neg': 0.088,
protests protests
'neu': 0.691,
0 against a against a 0.355556 0.4404 pos 0.088
'pos': 0.221,
planned planned
'co...
#InPics | Did InPics | Did {'neg': 0.0,

you know the you know the 'neu': 1.0,
1 0.500000 0.0000 pos 0.000
#TajMahal is TajMahal is 'pos': 0.0,
not th... not the ... 'compound...
These These
{'neg': 0.0,
satellites are satellites are
'neu': 1.0,
2 owned by the owned by the 0.000000 0.0000 pos 0.000
'pos': 0.0,
London- London-
'compound...
based... based...
The bill is The bill is

{'neg': 0.0,
being being
'neu': 0.865,
3 promoted by promoted by 0.166667 0.4215 pos 0.000
'pos': 0.135,
President President
'comp...
Gustav... Gustav...
#InPics | InPics |
{'neg': 0.0,
Diwali is Diwali is
'neu': 0.61,
4 India’s India’s 0.750000 0.7876 pos 0.000
'pos': 0.39,
biggest biggest
'compou...
festival a... festival an...

In [78]:
df['pos_score'] = df['Polarity'].apply(lambda score_dict: score_dict['pos'])

df.head()
Out[78]:
Tweets Tweet Subjectivity Polarity Compound comp_score neg_score pos_sco
Several Several
protests protests {'neg': 0.088,
against a against a 'neu': 0.691,
0 0.355556 0.4404 pos 0.088 0.22
planned planned 'pos': 0.221,
reform reform 'co...
that... that...
InPics |
#InPics |
Did you {'neg': 0.0,
Did you
know the 'neu': 1.0,
1 know the 0.500000 0.0000 pos 0.000 0.00
TajMahal 'pos': 0.0,
#TajMahal
is not the 'compound...
is not th...
...
These These
satellites satellites
{'neg': 0.0,
are are
'neu': 1.0,
2 owned by owned 0.000000 0.0000 pos 0.000 0.00
'pos': 0.0,
the by the
'compound...
London- London-
based... based...

being being {'neg': 0.0,
promoted promoted 'neu': 0.865,
3 0.166667 0.4215 pos 0.000 0.13
by by 'pos': 0.135,
President President 'comp...
Gustav... Gustav...
#InPics | InPics |
Diwali is Diwali is {'neg': 0.0,
India’s India’s 'neu': 0.61,
4 0.750000 0.7876 pos 0.000 0.39
biggest biggest 'pos': 0.39,
festival festival 'compou...
a... an...

In [79]:
df['neu_score'] = df['Polarity'].apply(lambda score_dict: score_dict['neu'])

df.head()
Out[79]:
Several Several
0 0.355556 0.4404 pos 0.088 0.22
that... that...
InPics |
#InPics |
Did you
1 know the 0.500000 0.0000 pos 0.000 0.00
#TajMahal
is not th...
...
These These
{'neg': 0.0,
are are
'neu': 1.0,
'pos': 0.0,
the by the
'compound...
London- London-
based... based...

3 0.166667 0.4215 pos 0.000 0.13
by by 'pos': 0.135,
Gustav... Gustav...
#InPics | InPics |
4 0.750000 0.7876 pos 0.000 0.39
a... an...
Final Data

In [80]:
df.head()
Out[80]:
Several Several
0 0.355556 0.4404 pos 0.088 0.22
that... that...
InPics |
#InPics |
Did you
1 know the 0.500000 0.0000 pos 0.000 0.00
#TajMahal
is not th...
...
These These
{'neg': 0.0,
are are
'neu': 1.0,
'pos': 0.0,
the by the
'compound...
London- London-
based... based...

3 0.166667 0.4215 pos 0.000 0.13
by by 'pos': 0.135,
Gustav... Gustav...
#InPics | InPics |
4 0.750000 0.7876 pos 0.000 0.39
a... an...
In [81]:
df.isnull().sum()
Out[81]:
Tweets 0
Tweet 0
Subjectivity 0
Polarity 0
Compound 0
comp_score 0
neg_score 0
pos_score 0
neu_score 0
dtype: int64

Correlation
In [82]:
sns.set(rc={'figure.figsize':(10,7)})
sns.heatmap(df.corr(),annot = True,cmap = 'binary_r')
Out[82]:
<AxesSubplot:>
In [83]:
df.corr()
Out[83]:
Subjectivity Compound neg_score pos_score neu_score
Subjectivity 1.000000 0.335752 -0.087934 0.431684 -0.301995
Compound 0.335752 1.000000 -0.761780 0.812773 -0.082637
neg_score -0.087934 -0.761780 1.000000 -0.292075 -0.561845
pos_score 0.431684 0.812773 -0.292075 1.000000 -0.627070
neu_score -0.301995 -0.082637 -0.561845 -0.627070 1.000000
Word Cloud

In [84]:
from wordcloud import WordCloud, STOPWORDS , ImageColorGenerator
In [85]:
tweet_All = " ".join(review for review in df['Tweet'])

fig = plt.subplots(1, figsize=(50,50))
wordcloud_ALL = WordCloud(max_font_size=50, max_words=100, background_color="black").genera
plt.imshow(wordcloud_ALL, interpolation='bilinear')
plt.title("Tweets",fontsize=120)
plt.axis('off')
Out[85]:
(-0.5, 399.5, 199.5, -0.5)
Visualization
Compound score

In [86]:
sns.set_style('whitegrid')
sns.countplot(x='comp_score',data=df,palette='BrBG')
Out[86]:
<AxesSubplot:xlabel='comp_score', ylabel='count'>
Compound value

In [87]:
sns.countplot(x='Compound',data=df,palette='BrBG')
Out[87]:
<AxesSubplot:xlabel='Compound', ylabel='count'>
Negative score

In [88]:
sns.countplot(x='neg_score',data=df,palette='tab10_r')
Out[88]:
<AxesSubplot:xlabel='neg_score', ylabel='count'>
Positive score

In [89]:
sns.countplot(x='pos_score',data=df,palette='tab10')
Out[89]:
<AxesSubplot:xlabel='pos_score', ylabel='count'>
Neutral score

In [90]:
sns.countplot(x='neu_score',data=df,palette='winter')
Out[90]:
<AxesSubplot:xlabel='neu_score', ylabel='count'>
Subjectivity score
In [91]:
sns.countplot(x='Subjectivity',data=df,palette='winter')
Out[91]:
<AxesSubplot:xlabel='Subjectivity', ylabel='count'>
Negative score and Compoud score

In [92]:
sns.countplot(x='neg_score',hue='comp_score',data=df,palette='Paired_r')
Out[92]:
<AxesSubplot:xlabel='neg_score', ylabel='count'>
Positive score and Compound score

In [93]:
sns.countplot(x='pos_score',hue='comp_score',data=df,palette='Paired')
Out[93]:
<AxesSubplot:xlabel='pos_score', ylabel='count'>
Neutral score and Compound score

In [94]:
sns.countplot(x='neu_score',hue='comp_score',data=df,palette='binary')
Out[94]:
<AxesSubplot:xlabel='neu_score', ylabel='count'>

Subjectivity score and Compound score

In [95]:
sns.countplot(x='Subjectivity',hue='comp_score',data=df,palette='binary_r')
Out[95]:
<AxesSubplot:xlabel='Subjectivity', ylabel='count'>
Positive score and Negative score

In [96]:
sns.lineplot( x='pos_score',size=None,linewidth=3,
y='neg_score',
data=df,color='green')
Out[96]:
<AxesSubplot:xlabel='pos_score', ylabel='neg_score'>

Machine Learning
In [147]:
X = df['Tweet']
y = df['comp_score']
In [148]:
from sklearn.model_selection import train_test_split
In [149]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=100)
CountVectorizer
In [150]:
from sklearn.feature_extraction.text import CountVectorizer

count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
X_train_counts
Out[150]:
<40x403 sparse matrix of type '<class 'numpy.int64'>'
with 650 stored elements in Compressed Sparse Row format>
(Documents,Features)
In [151]:
X_train_counts.shape
Out[151]:
(40, 403)

In [152]:
count_vect.get_feature_names()
'provocative',
'putting',
'qualifies',
'quality',
'raab',
'race',
'rained',
'ready',
'recep',
'reform',
'rejects',
'restrictions',
'rishi',
'risk',
'ronaldo',
'room',
'roslyn',
'rounder',
'russia',
'sadly',
Transform Counts to Frequencies with Tf-IDF

In [153]:
from sklearn.feature_extraction.text import TfidfTransformer

tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_train_tfidf.shape
Out[153]:
(40, 403)
Tf-IDF Vectorizer
In [154]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_train_tfidf.shape
Out[154]:
(40, 403)
Training a Classifier

In [155]:
from sklearn.svm import LinearSVC

clf = LinearSVC()
clf.fit(X_train_tfidf,y_train)
Out[155]:
LinearSVC()
Pipeline
In [156]:
from sklearn.pipeline import Pipeline

text_clf = Pipeline([('tfidf',TfidfVectorizer()),('clf',LinearSVC()),])
text_clf.fit(X_train,y_train)
Out[156]:
Pipeline(steps=[('tfidf', TfidfVectorizer()), ('clf', LinearSVC())])
Results
In [157]:
predictions = text_clf.predict(X_test)
In [158]:
from sklearn import metrics
In [159]:
print(metrics.confusion_matrix(y_test,predictions))
[[1 2]
[0 7]]
In [160]:
metrics.f1_score(y_test, predictions, average='weighted', labels=np.unique(predictions))
Out[160]:
0.7625000000000001
Accuracy 80%

In [161]:
print(metrics.classification_report(y_test,predictions,zero_division=1))
precision recall f1-score support
neg 1.00 0.33 0.50 3
pos 0.78 1.00 0.88 7
accuracy 0.80 10
macro avg 0.89 0.67 0.69 10
weighted avg 0.84 0.80 0.76 10
In [ ]:

NLP Project

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

NLP Project

Uploaded by

Copyright:

Available Formats

10/23/22, 7:53 AM WION Twitter Account - Jupyter Notebook

Store API Keys

Selection of Twitter Account

localhost:8888/notebooks/WION Twitter Account.ipynb 1/23

Converted Tweets into DataFrame

df = pd.DataFrame(data=[tweet.text for tweet in tweets],columns=["Tweets"])

Unexpected parameter: contributor_details

Unexpected parameter: include_entities

Unexpected parameter: contributor_details

Unexpected parameter: include_entities

Unexpected parameter: contributor_details

Unexpected parameter: include_entities

0 Several protests against a planned reform that...

1 #InPics | Did you know the #TajMahal is not th...

2 These satellites are owned by the London-based...

3 The bill is being promoted by President Gustav...

4 #InPics | Diwali is India’s biggest festival a...

5 As per reports, former prime minister Johnson ...

6 Mateschitz, whose estimated net worth is $27 b...

7 #InPics | Every shape and size is beautiful, a...

8 #InPics | The leg room has always been a major...

9 #InPics | From Hagrid to Severus Snape: 'Harry...

localhost:8888/notebooks/WION Twitter Account.ipynb 2/23

localhost:8888/notebooks/WION Twitter Account.ipynb 3/23

Tweets Tweet Subjectivity Polarity

Several protests against a planned Several protests against a planned

The bill is being promoted by The bill is being promoted by

#InPics | Diwali is India’s biggest InPics | Diwali is India’s biggest

Sentiment Intensity Analyzer

from nltk.sentiment.vader import SentimentIntensityAnalyzer

df['Polarity'] = df['Tweet'].apply(lambda Tweet: sid.polarity_scores(Tweet))

Tweets Tweet Subjectivity Polarity

{'neg': 0.088, 'neu':

{'neg': 0.0, 'neu': 1.0,

{'neg': 0.0, 'neu': 1.0,

{'neg': 0.0, 'neu':

localhost:8888/notebooks/WION Twitter Account.ipynb 4/23

df['Compound'] = df['Polarity'].apply(lambda score_dict: score_dict['compound'])

Tweets Tweet Subjectivity Polarity Compound

{'neg': 0.088, 'neu':

{'neg': 0.0, 'neu':

These satellites are These satellites are {'neg': 0.0, 'neu':

{'neg': 0.0, 'neu':

{'neg': 0.0, 'neu':

df['comp_score'] = df['Compound'].apply(lambda c: 'pos' if c>=0 else 'neg')

Tweets Tweet Subjectivity Polarity Compound comp_score

#InPics | Did you

These satellites are These satellites are {'neg': 0.0, 'neu':

The bill is being The bill is being {'neg': 0.0, 'neu':

#InPics | Diwali is InPics | Diwali is {'neg': 0.0, 'neu':

localhost:8888/notebooks/WION Twitter Account.ipynb 5/23

df['neg_score'] = df['Polarity'].apply(lambda score_dict: score_dict['neg'])

Tweets Tweet Subjectivity Polarity Compound comp_score neg_score

#InPics | Did InPics | Did {'neg': 0.0,

The bill is The bill is

localhost:8888/notebooks/WION Twitter Account.ipynb 6/23

df['pos_score'] = df['Polarity'].apply(lambda score_dict: score_dict['pos'])

Tweets Tweet Subjectivity Polarity Compound comp_score neg_score pos_sco

The bill is The bill is

localhost:8888/notebooks/WION Twitter Account.ipynb 7/23

df['neu_score'] = df['Polarity'].apply(lambda score_dict: score_dict['neu'])

Tweets Tweet Subjectivity Polarity Compound comp_score neg_score pos_sco

The bill is The bill is