Professional Documents
Culture Documents
Top Streamers On Twitch
Top Streamers On Twitch
Introduction
Fardosa Mohamed Salat 657967
In [1]:
import pandas as pd
In [2]:
import numpy as np
In [3]:
np.random.seed(7)
In [4]:
data=pd.read_csv('twitchdata-update.csv')
In [5]:
data.head()
In [6]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Channel 1000 non-null object
1 Watch time(Minutes) 1000 non-null int64
2 Stream time(minutes) 1000 non-null int64
3 Peak viewers 1000 non-null int64
4 Average viewers 1000 non-null int64
5 Followers 1000 non-null int64
6 Followers gained 1000 non-null int64
7 Views gained 1000 non-null int64
8 Partnered 1000 non-null bool
9 Mature 1000 non-null bool
10 Language 1000 non-null object
dtypes: bool(2), int64(7), object(2)
memory usage: 72.4+ KB
In [7]:
data.isnull().sum()
Out[7]: Channel 0
Watch time(Minutes) 0
localhost:8888/nbconvert/html/OneDrive/Desktop/APT3025/Top Streamers on Twitch.ipynb?download=false 1/10
10/9/21, 5:43 PM Top Streamers on Twitch
Stream time(minutes) 0
Peak viewers 0
Average viewers 0
Followers 0
Followers gained 0
Views gained 0
Partnered 0
Mature 0
Language 0
dtype: int64
In [8]:
data.dtypes
In [9]:
data.describe()
In [10]:
data[data["Followers gained"] == data["Followers gained"].min()]
In [11]:
data.columns
In [12]:
import matplotlib.pyplot as plt
import seaborn as sns
In [13]:
fig = plt.figure(figsize=(10,6))
sns.countplot(x="Mature", data=data)
plt.title("Mature (+18) streamers")
plt.show()
In [14]:
languages_values = data["Language"].value_counts()
languages_values
In [15]:
localhost:8888/nbconvert/html/OneDrive/Desktop/APT3025/Top Streamers on Twitch.ipynb?download=false 3/10
10/9/21, 5:43 PM Top Streamers on Twitch
languages = data["Language"].unique()
languages
In [16]:
fig = plt.figure(figsize=(10, 6))
languages_sns = sns.barplot(x=languages[:10], y=languages_values[:10])
languages_sns.set(xlabel="Languages", ylabel="Streamers languages")
plt.title("Twitch Top 10 Languages")
plt.xticks(rotation=45)
plt.show()
In [17]:
adult = pd.get_dummies(data['Mature'], drop_first=True)
In [18]:
data['Adult'] = adult
In [19]:
data.head()
In [20]:
data.drop(['Channel','Partnered','Mature','Language'], axis=1, inplace=True)
In [21]:
data.head()
In [22]:
X = data[['Watch time(Minutes)','Stream time(minutes)','Peak viewers','Followers','F
y = data['Adult']
In [23]:
y
Out[23]: 0 0
1 0
2 1
3 0
4 0
..
995 0
996 0
997 0
998 0
999 0
Name: Adult, Length: 1000, dtype: uint8
In [24]:
from sklearn.model_selection import train_test_split
In [25]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state
In [26]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classif
from sklearn.linear_model import LogisticRegression
import wandb
import time
In [27]:
def train_eval_pipeline(model, train_data, test_data, name):
#initialize wandb
wandb.init(project="Twitch Streamers", name=name)
#assign the data
(X_train, y_train) = train_data
(X_test, y_test) = test_data
end =time.time()-start
prediction = model.predict(X_test)
In [28]:
logreg=LogisticRegression()
In [30]:
train_eval_pipeline(logreg, (X_train, y_train), (X_test, y_test), "Logistic_Regressi
Run summary:
accuracy 78.0
precision 0.39
recall 0.5
training_time 0.07776
Run history:
accuracy ▁
precision ▁
recall ▁
training_time ▁
Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
Synced Logistic_Regression_Twitch_Streamers:
https://wandb.ai/fardosa1904/Twitch%20Streamers/runs/3g3mnlhm
...Successfully finished last run (ID:3g3mnlhm). Initializing new run:
Accuracy score of the logistic regression classifier with default hyperparameter val
ues 78.00%
C:\Users\fardo\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 i
n labels with no predicted samples. Use `zero_division` parameter to control this be
havior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\fardo\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 i
n labels with no predicted samples. Use `zero_division` parameter to control this be
havior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\fardo\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 i
n labels with no predicted samples. Use `zero_division` parameter to control this be
havior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\fardo\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 i
n labels with no predicted samples. Use `zero_division` parameter to control this be
havior.
_warn_prf(average, modifier, msg_start, len(result))
C:\Users\fardo\anaconda3\lib\site-packages\sklearn\metrics\_classification.py:1245:
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 i
n labels with no predicted samples. Use `zero_division` parameter to control this be
havior.
_warn_prf(average, modifier, msg_start, len(result))
In [40]:
testData = data
In [41]:
testData
In [42]:
testY= testData['Adult']
In [43]:
testY
Out[43]: 0 0
1 0
2 1
3 0
4 0
..
995 0
996 0
997 0
998 0
999 0
Name: Adult, Length: 1000, dtype: uint8
In [56]:
testX= testData.drop(['Watch time(Minutes)','Adult'], axis= 1)
In [57]:
testX
In [58]:
clf=tree.DecisionTreeClassifier(criterion='entropy', max_depth = 4)
In [59]:
clf=clf.fit(X,y)
In [60]:
predY=clf.predict(testX)
In [61]:
predY
Out[61]: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
localhost:8888/nbconvert/html/OneDrive/Desktop/APT3025/Top Streamers on Twitch.ipynb?download=false 9/10
10/9/21, 5:43 PM Top Streamers on Twitch
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
In [62]:
predictions= pd.concat([testData[testData.columns[0]], testData['Adult'],pd.Series(p
In [63]:
predictions
0 6196161750 0 0
1 6091677300 0 0
2 5644590915 1 0
3 3970318140 0 0
4 3671000070 0 0
995 122524635 0 0
996 122523705 0 0
997 122452320 0 0
998 122311065 0 0
999 122192850 0 0
In [64]:
from sklearn.metrics import accuracy_score
In [65]:
print('Accuracy on test data is %.2f'%(accuracy_score(testY,predY)*100.))
In [ ]: