Professional Documents
Culture Documents
In [3]: # load the training data from breast cancer data set
df_iris = pd.read_csv(location)
df_iris.head()
Out[3]:
sepal_length sepal_width petal_length petal_width class
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 1/13
9/7/2018 komal_knn1_minMaxScalar
Out[4]: ['bmh',
'classic',
'dark_background',
'fast',
'fivethirtyeight',
'ggplot',
'grayscale',
'seaborn-bright',
'seaborn-colorblind',
'seaborn-dark-palette',
'seaborn-dark',
'seaborn-darkgrid',
'seaborn-deep',
'seaborn-muted',
'seaborn-notebook',
'seaborn-paper',
'seaborn-pastel',
'seaborn-poster',
'seaborn-talk',
'seaborn-ticks',
'seaborn-white',
'seaborn-whitegrid',
'seaborn',
'Solarize_Light2',
'tableau-colorblind10',
'_classic_test']
In [5]: plt.style.use('ggplot')
In [6]: # Means are in the same order of magnitude for all features so scaling
# might not be beneficial.
# If mean values were of different orders of magnitude, scaling could
# significantly improve accuracy of a classifier.
df_iris.describe()
Out[6]:
sepal_length sepal_width petal_length petal_width
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 2/13
9/7/2018 komal_knn1_minMaxScalar
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 3/13
9/7/2018 komal_knn1_minMaxScalar
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 4/13
9/7/2018 komal_knn1_minMaxScalar
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 5/13
9/7/2018 komal_knn1_minMaxScalar
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 7/13
9/7/2018 komal_knn1_minMaxScalar
X_scaled_df.head()
Out[10]:
s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 8/13
9/7/2018 komal_knn1_minMaxScalar
Out[11]:
s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth class
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 9/13
9/7/2018 komal_knn1_minMaxScalar
In [12]: # Notice x-axis on subplots are all the same for all features (0 to 1)
# after scaling.
fig = plt.figure(figsize=(14,9))
fig.suptitle('Frequency Distribution of Features by Species ',fontsize=20)
ax1 = fig.add_subplot(221)
df_iris_scaled.groupby("class").s_PetalLength.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_PetalLength')
ax2 = fig.add_subplot(222,sharey=ax1)
df_iris_scaled.groupby("class").s_PetalWidth.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_PetalWidth')
ax3 = fig.add_subplot(223,sharey=ax1)
df_iris_scaled.groupby("class").s_SepalLength.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_SepalLength')
ax4 = fig.add_subplot(224,sharey=ax1)
df_iris_scaled.groupby("class").s_SepalWidth.plot(kind='hist',
alpha=0.8,
legend=True,
title='s_SepalWidth');
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 10/13
9/7/2018 komal_knn1_minMaxScalar
In [13]: X_scaled_df.describe()
Out[13]:
s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth
In [29]: cm
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 11/13
9/7/2018 komal_knn1_minMaxScalar
Out[32]:
Predicted Iris-setosa Iris-versicolor Iris-virginica All
True
Iris-setosa 16 0 0 16
Iris-versicolor 0 17 1 18
Iris-virginica 0 0 11 11
All 16 17 12 45
An insight we can get from the matrix is that the model was very accurate at classifying setosa and
versicolor (True Positive/All = 1.0). However, accuracy for virginica was lower (11/12 = 0.917).
In [39]: plt.figure(figsize=(6,4))
sns.heatmap(CT, annot=True)
plt.title('KNN classification model \nAccuracy:{0:.3f}'.format(accuracy_score(
y_test, y_pred)))
plt.ylabel('True label')
plt.xlabel('Predicted label')
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 12/13
9/7/2018 komal_knn1_minMaxScalar
0.9777777777777777
0.022222222222222254
In [45]: # Sensitivity : when the actual value is +ve, how often is the predication cor
rect
# Also known as "True Positive Rate" or 'Recall"
# should be MAXIMIZED
#print(metrics.recall_score(y_test, y_pred, average='none'))
# Specificity: When the actual value is -ve, how often the prediction correct
# Also known as "Selective"
# should be MAXIMIZED
# False Positive Rate : when the actual value is negative, how often is the
# prediction incorrect
# 1- Specificity
# Precision: when a +ve value is predicted, how often is the prediction correc
t?
# print(metrics.precision_score(y_test, y_pred, average='none'))
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html 13/13