Assignment 3

import numpy as np #
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
print(os.listdir("../input"))
['pulsar_stars.csv']
İmport library
df=pd.read_csv('../input/pulsar_stars.csv')
Exploratory Data Analysis

# look first five data
df.head()
Mean of the integrated profile ... target_class

0 140.562500 ... 0
1 102.507812 ... 0
2 103.015625 ... 0
3 136.750000 ... 0
4 88.726562 ... 0
[5 rows x 9 columns]
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17898 entries, 0 to 17897
Data columns (total 9 columns):
Mean of the integrated profile 17898 non-null
float64
Standard deviation of the integrated profile 17898 non-null
float64
Excess kurtosis of the integrated profile 17898 non-null
float64
Skewness of the integrated profile 17898 non-null
float64
Mean of the DM-SNR curve 17898 non-null
float64
Standard deviation of the DM-SNR curve 17898 non-null
float64
Excess kurtosis of the DM-SNR curve 17898 non-null
float64
Skewness of the DM-SNR curve 17898 non-null
float64
target_class 17898 non-null int64
dtypes: float64(8), int64(1)
memory usage: 1.2 MB
# summary statistic data

df.describe()
Mean of the integrated profile ... target_class

count 17898.000000 ... 17898.000000
mean 111.079968 ... 0.091574
std 25.652935 ... 0.288432
min 5.812500 ... 0.000000
25% 100.929688 ... 0.000000
50% 115.078125 ... 0.000000
75% 127.085938 ... 0.000000
max 192.617188 ... 1.000000
[8 rows x 9 columns]
df.target_class.value_counts()
sns.countplot(df.target_class)
plt.show()
Logistic Regression
#Set x and y values
y=df.target_class.values
x_df=df.drop(['target_class'],axis=1)
#normalization
x=(x_df-np.min(x_df))/(np.max(x_df)-np.min(x_df))
# train/test
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,rando
m_state=1)
from sklearn.linear_model import LogisticRegression

lr=LogisticRegression()
lr.fit(x_train,y_train)
print('lr accuracy :', lr.score(x_test,y_test))
# confusion matrix
y_pred = lr.predict(x_test)
y_true = y_test
from sklearn.metrics import confusion_matrix
cm_lr = confusion_matrix(y_true,y_pred)
lr accuracy : 0.9743016759776536
/opt/conda/lib/python3.6/site-packages/sklearn/linear_model/
logistic.py:433: FutureWarning: Default solver will be changed to
'lbfgs' in 0.22. Specify a solver to silence this warning.
FutureWarning)
KNN
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train,y_train)
print('knn accuracy :',knn.score(x_test,y_test))
# confisioun matrix
y_pred = knn.predict(x_test)
y_true = y_test
# confisuon matrix
cm_knn = confusion_matrix(y_true,y_pred)
knn accuracy : 0.978584729981378
Find best k value

score_list=[]
for each in range(1,15):
knn2=KNeighborsClassifier(n_neighbors=each)
knn2.fit(x_train,y_train)
score_list.append(knn2.score(x_test,y_test))
plt.plot(range(1,15),score_list)
plt.xlabel('k values')
plt.ylabel('accuracy')
plt.show()
SVM
from sklearn.svm import SVC
svm=SVC(random_state=1)
svm.fit(x_train,y_train)
print('svm accuracy :', svm.score(x_test,y_test))
# confisuon matrix
y_pred = svm.predict(x_test)
y_true = y_test
cm_svm = confusion_matrix(y_true,y_pred)
/opt/conda/lib/python3.6/site-packages/sklearn/svm/base.py:196:
FutureWarning: The default value of gamma will change from 'auto' to
'scale' in version 0.22 to account better for unscaled features. Set
gamma explicitly to 'auto' or 'scale' to avoid this warning.
"avoid this warning.", FutureWarning)
svm accuracy : 0.9731843575418995
Naive Bayes
from sklearn.naive_bayes import GaussianNB
nb=GaussianNB()
nb.fit(x_train,y_train)
print('nb accuracy : ', nb.score(x_test,y_test))
# confisuon matrix
y_pred = nb.predict(x_test)
y_true = y_test
cm_nb = confusion_matrix(y_true,y_pred)
nb accuracy : 0.9437616387337058
Decision Tree
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier()
dt.fit(x_train,y_train)
print('dt.accuracy : ', nb.score(x_test,y_test))
# confisuon matrix
y_pred = dt.predict(x_test)
y_true = y_test
cm_dt = confusion_matrix(y_true,y_pred)
dt.accuracy : 0.9437616387337058
Random Forest
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier()
rf.fit(x_train,y_train)
print('rf accuracy : ', rf.score(x_test,y_test))
# confision matrix
y_pred = rf.predict(x_test)
y_true = y_test
cm_rf = confusion_matrix(y_true,y_pred)
/opt/conda/lib/python3.6/site-packages/sklearn/ensemble/forest.py:246:
FutureWarning: The default value of n_estimators will change from 10
in version 0.20 to 100 in 0.22.
"10 in version 0.20 to 100 in 0.22.", FutureWarning)
rf accuracy : 0.9774674115456239
Visualization Confision Matrix
plt.figure(figsize=(20,15))
plt.suptitle("Confusion Matrixes",fontsize=20)
plt.subplot(2,3,1)
plt.title("Logistic Regression Confusion Matrix")
sns.heatmap(cm_lr,cbar=False,annot=True,cmap="Greens",fmt="d")
plt.subplot(2,3,2)
plt.title("K Nearest Neighbors Confusion Matrix")
sns.heatmap(cm_knn,cbar=False,annot=True,cmap="Greens",fmt="d")
plt.subplot(2,3,3)
plt.title("Support Vector Machine Confusion Matrix")
sns.heatmap(cm_svm,cbar=False,annot=True,cmap="Greens",fmt="d")
plt.subplot(2,3,4)
plt.title("Naive Bayes Confusion Matrix")
sns.heatmap(cm_nb,cbar=False,annot=True,cmap="Greens",fmt="d")
plt.subplot(2,3,5)
plt.title("Decision Tree Classifier Confusion Matrix")
sns.heatmap(cm_dt,cbar=False,annot=True,cmap="Greens",fmt="d")
plt.subplot(2,3,6)
plt.title("Random Forest Confusion Matrix")
sns.heatmap(cm_rf,cbar=False,annot=True,cmap="Greens",fmt="d")
plt.show()
CONCLUSİON: in this data set, KNN algorithm has the highest accuracy for predicte.We can
see that on the conclusion matrix

Assignment 3

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Assignment 3

Uploaded by

Copyright:

Available Formats

import numpy as np #

Exploratory Data Analysis

Mean of the integrated profile ... target_class

# summary statistic data

Mean of the integrated profile ... target_class

from sklearn.linear_model import LogisticRegression

knn accuracy : 0.978584729981378

Find best k value

svm accuracy : 0.9731843575418995

You might also like