You are on page 1of 10

#!

/usr/bin/env python

# coding: utf-8

# In[15]:

##Loading Libraries

import pandas as pd

import numpy as np

import os

import matplotlib.pyplot as plt

import seaborn as sns

# In[28]:

df = pd.read_csv('Iris.csv')

# In[27]:

df.head()

# In[22]:
# In[29]:

#to display statistics about dataset

df.describe()

# In[30]:

#to display basic info about dstatype

df.info()

# In[32]:

#to display number of samples in each class

df['target'].value_counts()

# In[33]:

Iris.data.shape
# In[34]:

print(Iris.DESCR)

# In[35]:

df.dtypes

# Preprocessing the dataset

# In[37]:

#check for null values

df.isnull().sum()

# Data Visualization

# In[38]:

# Box and Whisker plots


df.plot(kind = 'box', subplots=True, layout = (3,2) ,figsize=(8,12) )

# In[39]:

#Histograms

df.hist(figsize=(12,12))

plt.show()

# In[45]:

#Scatter plots , create two classes

colors = ['blue','orange','red']

target = ['Iris-setosa','Iris-versicolor','Iris-virginica']

# In[46]:

# iterate the above classes

for i in range(3):

x = df[df['target'] == target[i]]

plt.scatter(x['sepal length (cm)'], x['sepal width (cm)'],c = colors[i], label = target[i])

plt.xlabel("Sepal Length")

plt.ylabel("Sepal Width")

plt.legend()
# In[47]:

for i in range(3):

x = df[df['target'] == target[i]]

plt.scatter(x['petal length (cm)'], x['petal width (cm)'],c = colors[i], label = target[i])

plt.xlabel("Petal Length")

plt.ylabel("Petal Width")

plt.legend()

# In[48]:

for i in range(3):

x = df[df['target'] == target[i]]

plt.scatter(x['sepal length (cm)'], x['petal length (cm)'],c = colors[i], label = target[i])

plt.xlabel("Sepal Length")

plt.ylabel("Petal Length")

plt.legend()

# In[49]:

for i in range(3):

x = df[df['target'] == target[i]]
plt.scatter(x['sepal width (cm)'], x['petal width (cm)'],c = colors[i], label = target[i])

plt.xlabel("Sepal Width")

plt.ylabel("Petal Width")

plt.legend()

# Coorelation Matrix

# A coorelation matrix is a table showing correlation coefficients between variables. Each cell in the
table shows the coorelation between two variables . The value is in the range of -1 to 1 .if two variables
have high coorelation we can neglect one variable from those two

# In[50]:

df.corr()

# In[53]:

corr = df.corr()

fig, ax = plt.subplots(figsize=(5,4))

sns.heatmap(corr ,annot=True, ax=ax, cmap = 'coolwarm')

# ## Label Encoder
# In machine learning we usually deal with datasets that contain multiple lables in one or more than
one columns .these labels can be in the form of words or numbers. Label encoding refers to converting
the labels in to numeric form so as to convert it into machine-readable form

# In[55]:

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

# In[56]:

#here we have coverted entire "target"string in to numeric form

df['target'] = le.fit_transform(df['target'])

df.head()

# ## Model Trainig

# In[71]:

from sklearn.model_selection import train_test_split

#train - 70

#test - 30

X = df.drop(columns=['target'])

Y = df['target']
x_train, x_test, y_train,y_test = train_test_split(X,Y,test_size=0.30)

# In[72]:

#Logistic Regression

from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

# In[73]:

#model Trainig

model.fit(x_train, y_train)

# In[74]:

#print metric to get performance

print("Accuracy:",model.score(x_test,y_test)*100)

# In[75]:

from sklearn.cluster import KMeans


model = KMeans()

# In[76]:

model.fit(x_train, y_train)

# In[77]:

print("Accuracy:",model.score(x_test,y_test))

# In[78]:

from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()

# In[79]:

model.fit(x_train, y_train)

# In[81]:
print("Accuracy:",model.score(x_test,y_test)*100)

# In[ ]:

You might also like