You are on page 1of 18

1/28/22, 6:06 PM Sentiment Analysis -Copy1

Author: Adegbenro Michael Olusola:

Note:
From the machine learning point of view, raw text is useless. Only if we manage to transform
it into meaningful numbers, can we feed it into our machine-learning algorithms such as clustering.
The same is true for more mundane operations on text,
such as similarity measurement

This project can pull data from Tweeter but to do that you need to request for your own API keys
specified below (I removed mine):

my_api_key = "xxxxxxxxx"
my_api_secret = "yyyyyyy"

If you don't have API keys already, you may use "Raw Data" which i pulled from tweeter using:

You can specifiy amount of tweets you want to pull. Here I pulled 100

Import Necessary Libraries


In [1]:
import pandas as pd

import numpy as np

import re

from nltk.corpus import stopwords

from wordcloud import WordCloud,STOPWORDS

from nltk.stem.porter import PorterStemmer

import nltk

from nltk.corpus import stopwords

import matplotlib

import matplotlib.pyplot as plt

from pandas.plotting import scatter_matrix

%matplotlib inline

from nltk.stem import WordNetLemmatizer

import seaborn as sns

sns.set(style="white",color_codes=True)

from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder,StandardScaler

sns.set(font_scale=1.5)

from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA

from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split

from textblob import TextBlob

from sklearn.metrics import confusion_matrix

from sklearn.metrics import classification_report

from sklearn.metrics import accuracy_score

from sklearn.metrics import precision_score

from sklearn.metrics import recall_score

from sklearn.metrics import f1_score

from sklearn.linear_model import LinearRegression

from sklearn import metrics

import tweepy as tw

import warnings

warnings.filterwarnings('ignore')

from matplotlib.axes._axes import _log as matplotlib_axes_logger

matplotlib_axes_logger.setLevel('ERROR')

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 1/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

Extract Data fromm Twitter


In [2]:
# you need to get your own keys to extratc data from twitter. I have removed mine. If y
# you need not to run this cell (delete) then run the next cell to read the data (Raw D
my_api_key = "xxxxxxxxxxxxxxxxxx"

my_api_secret = "xxxxxxxxxxxxxxx"

# authenticate

auth = tw.OAuthHandler(my_api_key, my_api_secret)

api = tw.API(auth, wait_on_rate_limit=True)

search_query = "#Farmer's Protest -filter:retweet"

# get tweets from the API

#tweets = tw.Cursor(api.search_tweets,q=search_query,lang="en",since="2015-09-16").item

tweets = tw.Cursor(api.search_tweets,q=search_query,lang="en").items(50)

# store the API responses in a list

tweets_copy = []

for tweet in tweets:

tweets_copy.append(tweet)

print("Total Tweets fetched:", len(tweets_copy))

# intialize the dataframe

data= pd.DataFrame()

# populate the dataframe

for tweet in tweets_copy:

hashtags = []

try:

for hashtag in tweet.entities["hashtags"]:

hashtags.append(hashtag["text"])

text = api.get_status(id=tweet.id, tweet_mode='extended').full_text

except:

pass

data = data.append(pd.DataFrame({'user_name': tweet.user.name,'ID': tweet.id_str,

'user_location': tweet.user.location,

'user_description': tweet.user.description,

'user_verified': tweet.user.verified,

'date': tweet.created_at,

'text': text,

'language': tweet.lang,

'favourites-count': tweet.favorite_count,

'author': tweet.user.screen_name,

'retweet-count': tweet.retweet_count,

'hashtags': [hashtags if hashtags else None],


'source': tweet.source}))

Total Tweets fetched: 50

In [6]:
# Run this cell if you have API key inputed in cell one above

data = pd.read_csv("Raw Data.csv")

In [5]:
#save raw data extrated in local drive

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 2/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

# if you didn't run first cell, you need to remove this cell.

data.to_csv("Raw Data.csv")

Clean text - Raw data will make your model predict


wrongly
My analysis is based on tweet "text" only. That is why i cleaned "text" only. You
may clean other features if you want.
In [4]:
#Remove twitter handlers

data.text = data.text.apply(lambda x:re.sub('@[^\s]+','',x))

#remove hashtags

data.text = data.text.apply(lambda x:re.sub(r'\B#\S+','',x))

# Remove URLS

data.text = data.text.apply(lambda x:re.sub(r"http\S+", "", x))

# Remove all the special characters

data.text = data.text.apply(lambda x:' '.join(re.findall(r'\w+', x)))

#remove all single characters

data.text = data.text.apply(lambda x:re.sub(r'\s+[a-zA-Z]\s+', '', x))

# Substituting multiple spaces with single space

data.text = data.text.apply(lambda x:re.sub(r'\s+', ' ', x, flags=re.I))

#Remove stop word

stop_words = stopwords.words('english')

data.text = data.text.apply(lambda x: ' '.join([word for word in x.split() if word not

Alternative Method for data cleaning - effective in


cleaning mutiple features
In [5]:
import string

def clean_text(text):

'''Make text lowercase, remove text in square brackets,remove links,remove punctuat


and remove words containing numbers.'''

text = str(text).lower()

text = re.sub('\[.*?\]', '', text)

text = re.sub('https?://\S+|www\.\S+', '', text)

text = re.sub('<.*?>+', '', text)

text = re.sub('[%s]' % re.escape(string.punctuation), '', text)

text = re.sub('\n', '', text)

text = re.sub('\w*\d\w*', '', text)

return text

data["user_description"] = data["user_description"].apply(lambda x:clean_text(x))

data["user_name"] = data["user_name"].apply(lambda x:clean_text(x))

In [6]:
wml = WordNetLemmatizer()

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 3/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1
lemma_words=[]

for word in data.text:

tokens = wml.lemmatize(word)

lemma_words.append(tokens)

In [7]:
# Now we have cleaned data for three features: user_description, text, and user_name

# Although, we don't need more than text to perform our analysis

pd.DataFrame(data).head()

Out[7]:
user_name ID user_location user_description user_verified

dhakkan from deccan driving 202


0 dhakkandodger 1487183172240506882 False
a dodge dr sastey ... 21:57:53

ordinary girl in a messed up 202


0 jonna 🇸🇪 1487182849144864770 Sverige False
world never fell... 21:56:36

romanian mlm hehim 조선 202


0 vlad mao 1487177895378313219 False
하나 korea is one 21:36:55

☭ new afrikan preparing for student of mlm 📕🚩 • new 202


0 1487177760070119424 False
maoist ☭ class struggle afrikan revolutionary... 21:36:23

nicolas gabriel anos comunista☭ 202


0 1487177133592133636 🇵🇪🇹🇷🇵🇭🇮🇳 False
☭🇧🇷 antifascistaantirevisionistam... 21:33:53

In [8]:
data.to_csv("Clean Data.csv")

data

Out[8]:
user_name ID user_location user_description user_verified

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 4/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

user_name ID user_location user_description user_verified

dhakkan from deccan driving 202


0 dhakkandodger 1487183172240506882 False
a dodge dr sastey ... 21:57:5

ordinary girl in a messed up 202


0 jonna 🇸🇪 1487182849144864770 Sverige False
world never fell... 21:56:3

romanian mlm hehim 조선 202


0 vlad mao 1487177895378313219 False
하나 korea is one 21:36:5

☭ new afrikan preparing for student of mlm 📕🚩 • new 202


0 1487177760070119424 False
maoist ☭ class struggle afrikan revolutionary... 21:36:2

nicolas gabriel anos comunista☭ 202


0 1487177133592133636 🇵🇪🇹🇷🇵🇭🇮🇳 False
☭🇧🇷 antifascistaantirevisionistam... 21:33:5

... ... ... ... ... ...

202
0 rohit sharma 1486515814840766464 India manmauji False
01:46:0

mohammad Homnabad, 202


0 1486513715197333510 False
shanawaz India 01:37:4

🇮🇳🎠🎠⚔⚔
🇮🇳a lone kaffir bio doesnt speak but tweets 202
0 1486513298996482049 In Patriots ♥ False
army®🇮🇳⚔⚔ believe in 🕉 not l... 01:36:0
🎠🎠🇮🇳

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 5/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

user_name ID user_location user_description user_verified

202
0 javeed shariff 1486509959281926148 javeed shariff False
01:22:4

ivor eugene Mumbai, hindustani christianvande 202


0 1486500595603103750 False
britto India. mataramindia firsti ... 00:45:3

200 rows × 13 columns

Vader Sentiment Analysis


VADER sentimental analysis relies on a dictionary that maps lexical features to emotion intensities
known as sentiment scores. The sentiment score of a text can be obtained by summing up each
word's intensity in the text.

For example,- Words like 'love,' 'enjoy,' 'happy,' 'like' all convey a positive sentiment. Also, VADER is
intelligent enough to understand these words' basic context, such as "did not love" as a negative
statement. It also understands the emphasis of capitalization and punctuation, such as "ENJOY."

In [9]:
## Added "Sentiment" column and categorized in positive, negative and neutral

In [10]:
sid = SIA()

data['Sentiments'] = data['text'].apply(lambda x: sid.polarity_scores(' '.joi


data['Positive Sentiment'] = data['Sentiments'].apply(lambda x: x['pos']+1*(10**-6))

data['Neutral Sentiment'] = data['Sentiments'].apply(lambda x: x['neu']+1*(10**-6))

data['Negative Sentiment'] = data['Sentiments'].apply(lambda x: x['neg']+1*(10**-6))

In [11]:
# drop sentiments column... not needed

data.drop(columns=['Sentiments'],inplace=True)

data.head()

Out[11]:
user_name ID user_location user_description user_verified

dhakkan from deccan driving 202


0 dhakkandodger 1487183172240506882 False
a dodge dr sastey ... 21:57:53

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 6/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

user_name ID user_location user_description user_verified

ordinary girl in a messed up 202


0 jonna 🇸🇪 1487182849144864770 Sverige False
world never fell... 21:56:36

romanian mlm hehim 조선 202


0 vlad mao 1487177895378313219 False
하나 korea is one 21:36:55

☭ new afrikan preparing for student of mlm 📕🚩 • new 202


0 1487177760070119424 False
maoist ☭ class struggle afrikan revolutionary... 21:36:23

nicolas gabriel anos comunista☭ 202


0 1487177133592133636 🇵🇪🇹🇷🇵🇭🇮🇳 False
☭🇧🇷 antifascistaantirevisionistam... 21:33:53

In [12]:
#Number of Words

data['Number of Words'] =data.text.apply(lambda x:len(x.split(' ')))

#Average Word Length

data['Mean Word Length'] = data.text.apply(lambda x:np.round(np.mean([len(w) for w in x


data.head()

Out[12]:
user_name ID user_location user_description user_verified

dhakkan from deccan driving 202


0 dhakkandodger 1487183172240506882 False
a dodge dr sastey ... 21:57:53

ordinary girl in a messed up 202


0 jonna 🇸🇪 1487182849144864770 Sverige False
world never fell... 21:56:36

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 7/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

user_name ID user_location user_description user_verified

romanian mlm hehim 조선 202


0 vlad mao 1487177895378313219 False
하나 korea is one 21:36:55

☭ new afrikan preparing for student of mlm 📕🚩 • new 202


0 1487177760070119424 False
maoist ☭ class struggle afrikan revolutionary... 21:36:23

nicolas gabriel anos comunista☭ 202


0 1487177133592133636 🇵🇪🇹🇷🇵🇭🇮🇳 False
☭🇧🇷 antifascistaantirevisionistam... 21:33:53

In [13]:
# WordCloud using atual clean data

#allWords = ' '.join( [cmts for cmts in data.text])

#wordCloud = WordCloud(width = 500, height = 300, random_state = 21, max_font_size = 11


#plt.imshow(wordCloud, interpolation= 'bilinear')

#plt.axis('off')

#plt.show

Sentimental Analysis

Polarity and Subjectivity

In starting with the analysis we will create the new columns namely Polarity and Subjectivity and
acquire the very values of each comment. Polarity ranges from -1 to 1 and measures how positive or
negative a comment is. It simply means emotions expressed in a sentence. Subjectivity expresses
some personal feelings, views, or beliefs. A subjective sentence may not express any sentiment.

In [14]:
# get subjectivity

def getSubjectivity(txt):

return TextBlob(txt).sentiment.subjectivity

# get polarity

def getPolarity(txt):

return TextBlob(txt).sentiment.polarity

#Columns

data['Subjectivity'] = data['text'].apply(getSubjectivity)

data['Polarity'] = data['text'].apply(getPolarity)

data.head()

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 8/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

Out[14]:
user_name ID user_location user_description user_verified

dhakkan from deccan driving 202


0 dhakkandodger 1487183172240506882 False
a dodge dr sastey ... 21:57:53

ordinary girl in a messed up 202


0 jonna 🇸🇪 1487182849144864770 Sverige False
world never fell... 21:56:36

romanian mlm hehim 조선 202


0 vlad mao 1487177895378313219 False
하나 korea is one 21:36:55

☭ new afrikan preparing for student of mlm 📕🚩 • new 202


0 1487177760070119424 False
maoist ☭ class struggle afrikan revolutionary... 21:36:23

nicolas gabriel anos comunista☭ 202


0 1487177133592133636 🇵🇪🇹🇷🇵🇭🇮🇳 False
☭🇧🇷 antifascistaantirevisionistam... 21:33:53

In [15]:
# function to compute analysis

def getAnalysis(score):

if score < 0 :

return 'Negative'

elif score == 0:

return 'Neutral'

else:

return 'Positive'

data['Analysis'] = data['Polarity'].apply(getAnalysis)

In [16]:
data.head()

Out[16]:
user_name ID user_location user_description user_verified

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentime… 9/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

user_name ID user_location user_description user_verified

dhakkan from deccan driving 202


0 dhakkandodger 1487183172240506882 False
a dodge dr sastey ... 21:57:53

ordinary girl in a messed up 202


0 jonna 🇸🇪 1487182849144864770 Sverige False
world never fell... 21:56:36

romanian mlm hehim 조선 202


0 vlad mao 1487177895378313219 False
하나 korea is one 21:36:55

☭ new afrikan preparing for student of mlm 📕🚩 • new 202


0 1487177760070119424 False
maoist ☭ class struggle afrikan revolutionary... 21:36:23

nicolas gabriel anos comunista☭ 202


0 1487177133592133636 🇵🇪🇹🇷🇵🇭🇮🇳 False
☭🇧🇷 antifascistaantirevisionistam... 21:33:53

5 rows × 21 columns

In [17]:
# % Percentages:

pcomments = data[data.Analysis == 'Positive']

pcomments = pcomments['text']

print('Positive: ' +str(round((pcomments.shape[0]/data.shape[0])*100, 1))+ '%')

ncomments = data[data.Analysis == 'Negative']

ncomments = ncomments['text']

print('Negative: ' +str(round((ncomments.shape[0]/data.shape[0])*100, 1))+ '%')

nucomments = data[data.Analysis == 'Neutral']

nucomments = nucomments['text']

print('Nuetral: ' +str(round((nucomments.shape[0]/data.shape[0])*100, 1))+ '%')

Positive: 25.5%

Negative: 11.0%

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 10/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

Nuetral: 63.5%

In [18]:
# the below function will create a word cloud

def wordcloud_draw(data, color = 'black'):

words = ' '.join(data)

cleaned_word = " ".join([word for word in words.split()

if 'http' not in word # double check for nay links

and not word.startswith('#') # removing hash tags

and word != 'rt'

])

wordcloud = WordCloud(stopwords=STOPWORDS, # using stopwords provided by Word cloud


background_color=color,

width=2500,

height=2000

).generate(cleaned_word)

# using matplotlib to display the images in notebook itself.

plt.figure(1,figsize=(5, 7))

plt.imshow(wordcloud)

plt.axis('off')

plt.show()

In [19]:
wordcloud_draw(data.text, 'black')

In [20]:
print("Positive words are", pcomments.count())

wordcloud_draw(pcomments, 'black')

Positive words are 51

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 11/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

In [21]:
print("Negative words are", ncomments.count())

wordcloud_draw(ncomments)

Negative words are 22

In [22]:
print("Neutral words are", nucomments.count())

wordcloud_draw(nucomments, 'black')

Neutral words are 127

In [23]:
# Value Count

data['Analysis'].value_counts

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 12/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1
# Plot

plt.title('Sentiment Analysis')

plt.xlabel('Sentiment')

plt.ylabel('Counts')

data['Analysis'].value_counts().plot(kind= 'bar')

plt.show()

More on sentiment analysis: https://www.projectpro.io/article/sentiment-analysis-project-ideas-


with-source-code/518

Check Analysis Accuracy


In [24]:
data.isnull().sum()

user_name 0

Out[24]:
ID 0

user_location 0

user_description 0

user_verified 0

date 0

text 0

language 0

favourites-count 0

author 0

retweet-count 0

hashtags 174

source 0

Positive Sentiment 0

Neutral Sentiment 0

Negative Sentiment 0

Number of Words 0

Mean Word Length 0

Subjectivity 0

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 13/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1
Polarity 0

Analysis 0

dtype: int64

In [25]:
data.shape

(200, 21)
Out[25]:

In [26]:
data.dropna(inplace=True)

data.isnull().sum()

user_name 0

Out[26]:
ID 0

user_location 0

user_description 0

user_verified 0

date 0

text 0

language 0

favourites-count 0

author 0

retweet-count 0

hashtags 0

source 0

Positive Sentiment 0

Neutral Sentiment 0

Negative Sentiment 0

Number of Words 0

Mean Word Length 0

Subjectivity 0

Polarity 0

Analysis 0

dtype: int64

In [27]:
data.shape

(26, 21)
Out[27]:

In [28]:
data.columns

Index(['user_name', 'ID', 'user_location', 'user_description', 'user_verified',

Out[28]:
'date', 'text', 'language', 'favourites-count', 'author',

'retweet-count', 'hashtags', 'source', 'Positive Sentiment',

'Neutral Sentiment', 'Negative Sentiment', 'Number of Words',

'Mean Word Length', 'Subjectivity', 'Polarity', 'Analysis'],

dtype='object')

In [29]:
# drop irrelevant data

data = data.drop(['user_name', 'ID','language', 'author','Positive Sentiment',

'Neutral Sentiment', 'Negative Sentiment', 'Number of Words',

'Mean Word Length','hashtags'], axis=1)

In [30]:
# check data types and encode object type

data.dtypes

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 14/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

user_location object

Out[30]:
user_description object

user_verified bool

date datetime64[ns, UTC]

text object

favourites-count int64

retweet-count int64

source object

Subjectivity float64

Polarity float64

Analysis object

dtype: object

In [31]:
enco = LabelEncoder()

data['user_location'] = enco.fit_transform(data['user_location'])

data['user_description'] = enco.fit_transform(data['user_description'])

data['user_verified'] = enco.fit_transform(data['user_verified'])

data['text'] = enco.fit_transform(data['text'])

data['date'] = enco.fit_transform(data['date'])

data['source'] = enco.fit_transform(data['source'])

data['Analysis'] = enco.fit_transform(data['Analysis'])

In [32]:
data.head()

Out[32]: favourites- retweet-


user_location user_description user_verified date text source Subjectivity
count count

0 3 7 0 25 10 0 0 3 0.0

0 8 3 1 24 8 0 3 3 0.0

0 0 19 0 23 8 0 3 1 0.0

0 2 16 1 22 11 20 3 0 0.0

0 0 2 0 21 4 0 0 2 0.0

In [33]:
X = data.drop(["Analysis"], axis=1)

y= data.Analysis

In [34]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.10,random_state=1)

In [35]:
#Feature Scaling/Standardize (not important step but it boost accuracy)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

x_train = sc.fit_transform(x_train)

x_test = sc.transform(x_test)

In [36]:
print (x_train.shape, y_train.shape)

print (x_test.shape, y_test.shape)

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 15/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

(23, 10) (23,)

(3, 10) (3,)

In [37]:
# use another model to confirm the accuracy

In [38]:
# Apply model and check error- eg linear reg

from sklearn.metrics import mean_squared_error,r2_score

from math import sqrt

linreg=LinearRegression()

linreg.fit(x_train,y_train)

y_predict = linreg.predict(x_test)

print("r2 score is: {:.2f}".format(r2_score(y_test,y_predict)))

print()

print("mse is:{:.2f}".format(sqrt(mean_squared_error(y_test,y_predict))))

print()

print("Intercept is: {:.2f}".format(linreg.intercept_))

print()

print("Coefficient:",linreg.coef_)

r2 score is: 1.00

mse is:0.03

Intercept is: 1.52

Coefficient: [-0.0313303 0.0405771 0.03586336 0.0344194 -0.08785787 -0.00592136

0.01844016 0.00673632 -0.82793525 1.42585067]

In [39]:
classifier = LogisticRegression(random_state = 0)

classifier.fit(x_train, y_train)

y_pred = classifier.predict(x_test)

accuracy = metrics.accuracy_score(y_test,y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))

Accuracy: 100.00%

In [40]:
from sklearn.metrics import confusion_matrix, accuracy_score

def plot_confusion_matrix(cm, classes,

normalize=False,

title='Confusion matrix',

cmap=plt.cm.Blues):

"""

This function prints and plots the confusion matrix.

Normalization can be applied by setting `normalize=True`.

"""

plt.imshow(cm, interpolation='nearest', cmap=cmap)


plt.title(title)

plt.colorbar()

tick_marks = np.arange(len(classes))

plt.xticks(tick_marks, classes, rotation=45)

plt.yticks(tick_marks, classes)

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 16/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

if normalize:

cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

print("Normalized confusion matrix")

else:

print('Confusion matrix, without normalization')

print(cm)

thresh = cm.max() / 2.

for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):

plt.text(j, i, cm[i, j],

horizontalalignment="center",

color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()

plt.ylabel('True label')

plt.xlabel('Predicted label')

# Compute confusion matrix

cnf_matrix = confusion_matrix(y_test, y_pred)

In [41]:
import itertools

plt.figure(figsize=(7,5))

plot_confusion_matrix(cnf_matrix, classes=['1','2','3'],title='Confusion matrix, withou


accuracy_score(y_test, y_pred)

accuracy = classifier.score(x_test, y_test)

print()

print("Accuracy: {:.2f}%".format(accuracy*100))

Confusion matrix, without normalization

[[1 0]

[0 2]]

Accuracy: 100.00%

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 17/18


1/28/22, 6:06 PM Sentiment Analysis -Copy1

In [42]: plt.figure(figsize=(20,7))

sns.heatmap(data.corr(), annot = True)

<AxesSubplot:>
Out[42]:

In [43]:
print(classification_report(y_test,y_pred))

precision recall f1-score support

1 1.00 1.00 1.00 1

2 1.00 1.00 1.00 2

accuracy 1.00 3

macro avg 1.00 1.00 1.00 3

weighted avg 1.00 1.00 1.00 3

localhost:8888/nbconvert/html/Documents/IT Courses/Machine Learning/Refrence folders/Natural Langage Processing/Sentiment Analysis/ Sentim… 18/18

You might also like