You are on page 1of 1

# Import libraries

import numpy as np # linear algebra


import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='white',color_codes=True)
df=pd.read_csv('Iris.csv')
df.head()
df.info()
df.describe()
df.isnull().sum()
df['Species'].value_counts()
df.plot(kind='scatter',x="SepalLengthCm", y="SepalWidthCm")
df.plot(kind='scatter',x="PetalLengthCm", y="PetalWidthCm")
sns.FacetGrid(df,hue="Species").map(plt.scatter,"SepalLengthCm","SepalWidthCm").add
_legend()
sns.FacetGrid(df,hue="Species").map(plt.scatter,"PetalLengthCm","PetalWidthCm").add
_legend()
sns.boxplot(x="Species", y="PetalLengthCm", data=df)
sns.boxplot(x="Species", y="PetalWidthCm", data=df)
sns.stripplot(x="Species", y="PetalLengthCm", data=df, jitter=True,
edgecolor="gray")
sns.FacetGrid(df,hue="Species").map(sns.kdeplot, "PetalLengthCm").add_legend()
sns.pairplot(df.drop("Id", axis=1), hue="Species", size=4)
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
train, test = train_test_split(df, test_size = 0.25,random_state=20)
print(train.shape)
print(test.shape)
train_X=train[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]
train_y=train.Species
test_X=test[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]
test_y=test.Species
train_X.head()
KNN=KNeighborsClassifier(n_neighbors=3)
KNN.fit(train_X,train_y)
prediction=KNN.predict(test_X)
print('The accuracy of the KNN is',metrics.accuracy_score(prediction,test_y)*100,
'percent')
KNN.score(test_X,test_y)
cf_matrix=confusion_matrix(test_y,prediction)
sns.heatmap(cf_matrix, annot=True)
#import classification_report
from sklearn.metrics import classification_report
print(classification_report(test_y,prediction))

You might also like