You are on page 1of 13

9/7/2018 komal_knn1_minMaxScalar

In [36]: import numpy as np


import pandas as pd

import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split


from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

import seaborn as sns


sns.set(font_scale=1.5)
sns.set(style='white',color_codes=True)

In [2]: location = r"D:\komal\SIMPLILEARN\MY COURSES\IN PROGRESS\DATA SCIENCE WITH PYT


HON\Live class downloads\Aug 11 Sat - Sep 15 Sat - Attending\datasets\iris.cs
v"

In [3]: # load the training data from breast cancer data set
df_iris = pd.read_csv(location)
df_iris.head()

Out[3]:
sepal_length sepal_width petal_length petal_width class

0 5.1 3.5 1.4 0.2 Iris-setosa

1 4.9 3.0 1.4 0.2 Iris-setosa

2 4.7 3.2 1.3 0.2 Iris-setosa

3 4.6 3.1 1.5 0.2 Iris-setosa

4 5.0 3.6 1.4 0.2 Iris-setosa

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 1/13
9/7/2018 komal_knn1_minMaxScalar

In [4]: # Check the available styles


plt.style.available

Out[4]: ['bmh',
'classic',
'dark_background',
'fast',
'fivethirtyeight',
'ggplot',
'grayscale',
'seaborn-bright',
'seaborn-colorblind',
'seaborn-dark-palette',
'seaborn-dark',
'seaborn-darkgrid',
'seaborn-deep',
'seaborn-muted',
'seaborn-notebook',
'seaborn-paper',
'seaborn-pastel',
'seaborn-poster',
'seaborn-talk',
'seaborn-ticks',
'seaborn-white',
'seaborn-whitegrid',
'seaborn',
'Solarize_Light2',
'tableau-colorblind10',
'_classic_test']

In [5]: plt.style.use('ggplot')

In [6]: # Means are in the same order of magnitude for all features so scaling
# might not be beneficial.
# If mean values were of different orders of magnitude, scaling could
# significantly improve accuracy of a classifier.

df_iris.describe()

Out[6]:
sepal_length sepal_width petal_length petal_width

count 150.000000 150.000000 150.000000 150.000000

mean 5.843333 3.054000 3.758667 1.198667

std 0.828066 0.433594 1.764420 0.763161

min 4.300000 2.000000 1.000000 0.100000

25% 5.100000 2.800000 1.600000 0.300000

50% 5.800000 3.000000 4.350000 1.300000

75% 6.400000 3.300000 5.100000 1.800000

max 7.900000 4.400000 6.900000 2.500000

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 2/13
9/7/2018 komal_knn1_minMaxScalar

In [7]: X = df_iris.drop('class' , 1).values # drop target variable


y1 = df_iris['class'].values
y = df_iris['class']

In [8]: scaler = MinMaxScaler()


scaler

Out[8]: MinMaxScaler(copy=True, feature_range=(0, 1))

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 3/13
9/7/2018 komal_knn1_minMaxScalar

In [9]: X_scaled = scaler.fit_transform(X)

print('X_scaled type is', type(X_scaled))


X_scaled

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 4/13
9/7/2018 komal_knn1_minMaxScalar

X_scaled type is <class 'numpy.ndarray'>

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 5/13
9/7/2018 komal_knn1_minMaxScalar

Out[9]: array([[0.22222222, 0.625 , 0.06779661, 0.04166667],


[0.16666667, 0.41666667, 0.06779661, 0.04166667],
[0.11111111, 0.5 , 0.05084746, 0.04166667],
[0.08333333, 0.45833333, 0.08474576, 0.04166667],
[0.19444444, 0.66666667, 0.06779661, 0.04166667],
[0.30555556, 0.79166667, 0.11864407, 0.125 ],
[0.08333333, 0.58333333, 0.06779661, 0.08333333],
[0.19444444, 0.58333333, 0.08474576, 0.04166667],
[0.02777778, 0.375 , 0.06779661, 0.04166667],
[0.16666667, 0.45833333, 0.08474576, 0. ],
[0.30555556, 0.70833333, 0.08474576, 0.04166667],
[0.13888889, 0.58333333, 0.10169492, 0.04166667],
[0.13888889, 0.41666667, 0.06779661, 0. ],
[0. , 0.41666667, 0.01694915, 0. ],
[0.41666667, 0.83333333, 0.03389831, 0.04166667],
[0.38888889, 1. , 0.08474576, 0.125 ],
[0.30555556, 0.79166667, 0.05084746, 0.125 ],
[0.22222222, 0.625 , 0.06779661, 0.08333333],
[0.38888889, 0.75 , 0.11864407, 0.08333333],
[0.22222222, 0.75 , 0.08474576, 0.08333333],
[0.30555556, 0.58333333, 0.11864407, 0.04166667],
[0.22222222, 0.70833333, 0.08474576, 0.125 ],
[0.08333333, 0.66666667, 0. , 0.04166667],
[0.22222222, 0.54166667, 0.11864407, 0.16666667],
[0.13888889, 0.58333333, 0.15254237, 0.04166667],
[0.19444444, 0.41666667, 0.10169492, 0.04166667],
[0.19444444, 0.58333333, 0.10169492, 0.125 ],
[0.25 , 0.625 , 0.08474576, 0.04166667],
[0.25 , 0.58333333, 0.06779661, 0.04166667],
[0.11111111, 0.5 , 0.10169492, 0.04166667],
[0.13888889, 0.45833333, 0.10169492, 0.04166667],
[0.30555556, 0.58333333, 0.08474576, 0.125 ],
[0.25 , 0.875 , 0.08474576, 0. ],
[0.33333333, 0.91666667, 0.06779661, 0.04166667],
[0.16666667, 0.45833333, 0.08474576, 0. ],
[0.19444444, 0.5 , 0.03389831, 0.04166667],
[0.33333333, 0.625 , 0.05084746, 0.04166667],
[0.16666667, 0.45833333, 0.08474576, 0. ],
[0.02777778, 0.41666667, 0.05084746, 0.04166667],
[0.22222222, 0.58333333, 0.08474576, 0.04166667],
[0.19444444, 0.625 , 0.05084746, 0.08333333],
[0.05555556, 0.125 , 0.05084746, 0.08333333],
[0.02777778, 0.5 , 0.05084746, 0.04166667],
[0.19444444, 0.625 , 0.10169492, 0.20833333],
[0.22222222, 0.75 , 0.15254237, 0.125 ],
[0.13888889, 0.41666667, 0.06779661, 0.08333333],
[0.22222222, 0.75 , 0.10169492, 0.04166667],
[0.08333333, 0.5 , 0.06779661, 0.04166667],
[0.27777778, 0.70833333, 0.08474576, 0.04166667],
[0.19444444, 0.54166667, 0.06779661, 0.04166667],
[0.75 , 0.5 , 0.62711864, 0.54166667],
[0.58333333, 0.5 , 0.59322034, 0.58333333],
[0.72222222, 0.45833333, 0.66101695, 0.58333333],
[0.33333333, 0.125 , 0.50847458, 0.5 ],
[0.61111111, 0.33333333, 0.61016949, 0.58333333],
[0.38888889, 0.33333333, 0.59322034, 0.5 ],
[0.55555556, 0.54166667, 0.62711864, 0.625 ],
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 6/13
9/7/2018 komal_knn1_minMaxScalar

[0.16666667, 0.16666667, 0.38983051, 0.375 ],


[0.63888889, 0.375 , 0.61016949, 0.5 ],
[0.25 , 0.29166667, 0.49152542, 0.54166667],
[0.19444444, 0. , 0.42372881, 0.375 ],
[0.44444444, 0.41666667, 0.54237288, 0.58333333],
[0.47222222, 0.08333333, 0.50847458, 0.375 ],
[0.5 , 0.375 , 0.62711864, 0.54166667],
[0.36111111, 0.375 , 0.44067797, 0.5 ],
[0.66666667, 0.45833333, 0.57627119, 0.54166667],
[0.36111111, 0.41666667, 0.59322034, 0.58333333],
[0.41666667, 0.29166667, 0.52542373, 0.375 ],
[0.52777778, 0.08333333, 0.59322034, 0.58333333],
[0.36111111, 0.20833333, 0.49152542, 0.41666667],
[0.44444444, 0.5 , 0.6440678 , 0.70833333],
[0.5 , 0.33333333, 0.50847458, 0.5 ],
[0.55555556, 0.20833333, 0.66101695, 0.58333333],
[0.5 , 0.33333333, 0.62711864, 0.45833333],
[0.58333333, 0.375 , 0.55932203, 0.5 ],
[0.63888889, 0.41666667, 0.57627119, 0.54166667],
[0.69444444, 0.33333333, 0.6440678 , 0.54166667],
[0.66666667, 0.41666667, 0.6779661 , 0.66666667],
[0.47222222, 0.375 , 0.59322034, 0.58333333],
[0.38888889, 0.25 , 0.42372881, 0.375 ],
[0.33333333, 0.16666667, 0.47457627, 0.41666667],
[0.33333333, 0.16666667, 0.45762712, 0.375 ],
[0.41666667, 0.29166667, 0.49152542, 0.45833333],
[0.47222222, 0.29166667, 0.69491525, 0.625 ],
[0.30555556, 0.41666667, 0.59322034, 0.58333333],
[0.47222222, 0.58333333, 0.59322034, 0.625 ],
[0.66666667, 0.45833333, 0.62711864, 0.58333333],
[0.55555556, 0.125 , 0.57627119, 0.5 ],
[0.36111111, 0.41666667, 0.52542373, 0.5 ],
[0.33333333, 0.20833333, 0.50847458, 0.5 ],
[0.33333333, 0.25 , 0.57627119, 0.45833333],
[0.5 , 0.41666667, 0.61016949, 0.54166667],
[0.41666667, 0.25 , 0.50847458, 0.45833333],
[0.19444444, 0.125 , 0.38983051, 0.375 ],
[0.36111111, 0.29166667, 0.54237288, 0.5 ],
[0.38888889, 0.41666667, 0.54237288, 0.45833333],
[0.38888889, 0.375 , 0.54237288, 0.5 ],
[0.52777778, 0.375 , 0.55932203, 0.5 ],
[0.22222222, 0.20833333, 0.33898305, 0.41666667],
[0.38888889, 0.33333333, 0.52542373, 0.5 ],
[0.55555556, 0.54166667, 0.84745763, 1. ],
[0.41666667, 0.29166667, 0.69491525, 0.75 ],
[0.77777778, 0.41666667, 0.83050847, 0.83333333],
[0.55555556, 0.375 , 0.77966102, 0.70833333],
[0.61111111, 0.41666667, 0.81355932, 0.875 ],
[0.91666667, 0.41666667, 0.94915254, 0.83333333],
[0.16666667, 0.20833333, 0.59322034, 0.66666667],
[0.83333333, 0.375 , 0.89830508, 0.70833333],
[0.66666667, 0.20833333, 0.81355932, 0.70833333],
[0.80555556, 0.66666667, 0.86440678, 1. ],
[0.61111111, 0.5 , 0.69491525, 0.79166667],
[0.58333333, 0.29166667, 0.72881356, 0.75 ],
[0.69444444, 0.41666667, 0.76271186, 0.83333333],
[0.38888889, 0.20833333, 0.6779661 , 0.79166667],

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 7/13
9/7/2018 komal_knn1_minMaxScalar

[0.41666667, 0.33333333, 0.69491525, 0.95833333],


[0.58333333, 0.5 , 0.72881356, 0.91666667],
[0.61111111, 0.41666667, 0.76271186, 0.70833333],
[0.94444444, 0.75 , 0.96610169, 0.875 ],
[0.94444444, 0.25 , 1. , 0.91666667],
[0.47222222, 0.08333333, 0.6779661 , 0.58333333],
[0.72222222, 0.5 , 0.79661017, 0.91666667],
[0.36111111, 0.33333333, 0.66101695, 0.79166667],
[0.94444444, 0.33333333, 0.96610169, 0.79166667],
[0.55555556, 0.29166667, 0.66101695, 0.70833333],
[0.66666667, 0.54166667, 0.79661017, 0.83333333],
[0.80555556, 0.5 , 0.84745763, 0.70833333],
[0.52777778, 0.33333333, 0.6440678 , 0.70833333],
[0.5 , 0.41666667, 0.66101695, 0.70833333],
[0.58333333, 0.33333333, 0.77966102, 0.83333333],
[0.80555556, 0.41666667, 0.81355932, 0.625 ],
[0.86111111, 0.33333333, 0.86440678, 0.75 ],
[1. , 0.75 , 0.91525424, 0.79166667],
[0.58333333, 0.33333333, 0.77966102, 0.875 ],
[0.55555556, 0.33333333, 0.69491525, 0.58333333],
[0.5 , 0.25 , 0.77966102, 0.54166667],
[0.94444444, 0.41666667, 0.86440678, 0.91666667],
[0.55555556, 0.58333333, 0.77966102, 0.95833333],
[0.58333333, 0.45833333, 0.76271186, 0.70833333],
[0.47222222, 0.41666667, 0.6440678 , 0.70833333],
[0.72222222, 0.45833333, 0.74576271, 0.83333333],
[0.66666667, 0.45833333, 0.77966102, 0.95833333],
[0.72222222, 0.45833333, 0.69491525, 0.91666667],
[0.41666667, 0.29166667, 0.69491525, 0.75 ],
[0.69444444, 0.5 , 0.83050847, 0.91666667],
[0.66666667, 0.54166667, 0.79661017, 1. ],
[0.66666667, 0.41666667, 0.71186441, 0.91666667],
[0.55555556, 0.20833333, 0.6779661 , 0.75 ],
[0.61111111, 0.41666667, 0.71186441, 0.79166667],
[0.52777778, 0.58333333, 0.74576271, 0.91666667],
[0.44444444, 0.41666667, 0.69491525, 0.70833333]])

In [10]: # transform back to df for easier exploration/plotting (output of scaler)


X_scaled_df = pd.DataFrame(X_scaled, columns=['s_SepalLength','s_SepalWidth',
's_PetalLength','s_PetalWidth'])

X_scaled_df.head()

Out[10]:
s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth

0 0.222222 0.625000 0.067797 0.041667

1 0.166667 0.416667 0.067797 0.041667

2 0.111111 0.500000 0.050847 0.041667

3 0.083333 0.458333 0.084746 0.041667

4 0.194444 0.666667 0.067797 0.041667

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 8/13
9/7/2018 komal_knn1_minMaxScalar

In [11]: df_iris_scaled = pd.concat([X_scaled_df,y],axis=1)


df_iris_scaled.head()

Out[11]:
s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth class

0 0.222222 0.625000 0.067797 0.041667 Iris-setosa

1 0.166667 0.416667 0.067797 0.041667 Iris-setosa

2 0.111111 0.500000 0.050847 0.041667 Iris-setosa

3 0.083333 0.458333 0.084746 0.041667 Iris-setosa

4 0.194444 0.666667 0.067797 0.041667 Iris-setosa

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 9/13
9/7/2018 komal_knn1_minMaxScalar

In [12]: # Notice x-axis on subplots are all the same for all features (0 to 1)
# after scaling.
fig = plt.figure(figsize=(14,9))
fig.suptitle('Frequency Distribution of Features by Species ',fontsize=20)

ax1 = fig.add_subplot(221)
df_iris_scaled.groupby("class").s_PetalLength.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_PetalLength')

ax2 = fig.add_subplot(222,sharey=ax1)
df_iris_scaled.groupby("class").s_PetalWidth.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_PetalWidth')

ax3 = fig.add_subplot(223,sharey=ax1)
df_iris_scaled.groupby("class").s_SepalLength.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_SepalLength')

ax4 = fig.add_subplot(224,sharey=ax1)
df_iris_scaled.groupby("class").s_SepalWidth.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_SepalWidth');

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 10/13
9/7/2018 komal_knn1_minMaxScalar

In [13]: X_scaled_df.describe()

Out[13]:
s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth

count 150.000000 150.000000 150.000000 150.000000

mean 0.428704 0.439167 0.467571 0.457778

std 0.230018 0.180664 0.299054 0.317984

min 0.000000 0.000000 0.000000 0.000000

25% 0.222222 0.333333 0.101695 0.083333

50% 0.416667 0.416667 0.567797 0.500000

75% 0.583333 0.541667 0.694915 0.708333

max 1.000000 1.000000 1.000000 1.000000

In [18]: # train and test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, rando


m_state = 0)

In [19]: print("train sample size",X_train.shape, type(X_train))


print("test sample size",X_test.shape, type(X_test))

train sample size (105, 4) <class 'numpy.ndarray'>


test sample size (45, 4) <class 'numpy.ndarray'>

In [23]: clf = KNeighborsClassifier(n_neighbors=5)


clf.fit(X_train, y_train)

Out[23]: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',


metric_params=None, n_jobs=1, n_neighbors=5, p=2,
weights='uniform')

In [24]: y_pred = clf.predict(X_test)

In [28]: # Creates a confusion matrix


cm = metrics.confusion_matrix(y_test, y_pred)

In [29]: cm

Out[29]: array([[16, 0, 0],


[ 0, 17, 1],
[ 0, 0, 11]], dtype=int64)

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 11/13
9/7/2018 komal_knn1_minMaxScalar

In [32]: CT=pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], marg


ins=True)
CT

Out[32]:
Predicted Iris-setosa Iris-versicolor Iris-virginica All

True

Iris-setosa 16 0 0 16

Iris-versicolor 0 17 1 18

Iris-virginica 0 0 11 11

All 16 17 12 45

In [38]: from sklearn.metrics import accuracy_score

An insight we can get from the matrix is that the model was very accurate at classifying setosa and
versicolor (True Positive/All = 1.0). However, accuracy for virginica was lower (11/12 = 0.917).

In [39]: plt.figure(figsize=(6,4))
sns.heatmap(CT, annot=True)
plt.title('KNN classification model \nAccuracy:{0:.3f}'.format(accuracy_score(
y_test, y_pred)))
plt.ylabel('True label')
plt.xlabel('Predicted label')

Out[39]: Text(0.5,16,'Predicted label')

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 12/13
9/7/2018 komal_knn1_minMaxScalar

In [42]: from sklearn.metrics import classification_report


print(classification_report(y_test,y_pred))

precision recall f1-score support

Iris-setosa 1.00 1.00 1.00 16


Iris-versicolor 1.00 0.94 0.97 18
Iris-virginica 0.92 1.00 0.96 11

avg / total 0.98 0.98 0.98 45

In [43]: # Classification accuracy : Overall how often is the classifier correct?


print(metrics.accuracy_score(y_test, y_pred))

# classification error : Overall how often is the classifier incorrect?


print(1-metrics.accuracy_score(y_test, y_pred))

0.9777777777777777
0.022222222222222254

In [45]: # Sensitivity : when the actual value is +ve, how often is the predication cor
rect
# Also known as "True Positive Rate" or 'Recall"
# should be MAXIMIZED
#print(metrics.recall_score(y_test, y_pred, average='none'))

# Specificity: When the actual value is -ve, how often the prediction correct
# Also known as "Selective"
# should be MAXIMIZED

# False Positive Rate : when the actual value is negative, how often is the
# prediction incorrect
# 1- Specificity

# Precision: when a +ve value is predicted, how often is the prediction correc
t?
# print(metrics.precision_score(y_test, y_pred, average='none'))

file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 13/13

You might also like