Professional Documents
Culture Documents
/usr/bin/env python
# coding: utf-8
# In[15]:
##Loading Libraries
import pandas as pd
import numpy as np
import os
# In[28]:
df = pd.read_csv('Iris.csv')
# In[27]:
df.head()
# In[22]:
# In[29]:
df.describe()
# In[30]:
df.info()
# In[32]:
df['target'].value_counts()
# In[33]:
Iris.data.shape
# In[34]:
print(Iris.DESCR)
# In[35]:
df.dtypes
# In[37]:
df.isnull().sum()
# Data Visualization
# In[38]:
# In[39]:
#Histograms
df.hist(figsize=(12,12))
plt.show()
# In[45]:
colors = ['blue','orange','red']
target = ['Iris-setosa','Iris-versicolor','Iris-virginica']
# In[46]:
for i in range(3):
x = df[df['target'] == target[i]]
plt.xlabel("Sepal Length")
plt.ylabel("Sepal Width")
plt.legend()
# In[47]:
for i in range(3):
x = df[df['target'] == target[i]]
plt.xlabel("Petal Length")
plt.ylabel("Petal Width")
plt.legend()
# In[48]:
for i in range(3):
x = df[df['target'] == target[i]]
plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.legend()
# In[49]:
for i in range(3):
x = df[df['target'] == target[i]]
plt.scatter(x['sepal width (cm)'], x['petal width (cm)'],c = colors[i], label = target[i])
plt.xlabel("Sepal Width")
plt.ylabel("Petal Width")
plt.legend()
# Coorelation Matrix
# A coorelation matrix is a table showing correlation coefficients between variables. Each cell in the
table shows the coorelation between two variables . The value is in the range of -1 to 1 .if two variables
have high coorelation we can neglect one variable from those two
# In[50]:
df.corr()
# In[53]:
corr = df.corr()
fig, ax = plt.subplots(figsize=(5,4))
# ## Label Encoder
# In machine learning we usually deal with datasets that contain multiple lables in one or more than
one columns .these labels can be in the form of words or numbers. Label encoding refers to converting
the labels in to numeric form so as to convert it into machine-readable form
# In[55]:
le = LabelEncoder()
# In[56]:
df['target'] = le.fit_transform(df['target'])
df.head()
# ## Model Trainig
# In[71]:
#train - 70
#test - 30
X = df.drop(columns=['target'])
Y = df['target']
x_train, x_test, y_train,y_test = train_test_split(X,Y,test_size=0.30)
# In[72]:
#Logistic Regression
model = LogisticRegression()
# In[73]:
#model Trainig
model.fit(x_train, y_train)
# In[74]:
print("Accuracy:",model.score(x_test,y_test)*100)
# In[75]:
# In[76]:
model.fit(x_train, y_train)
# In[77]:
print("Accuracy:",model.score(x_test,y_test))
# In[78]:
model = KNeighborsClassifier()
# In[79]:
model.fit(x_train, y_train)
# In[81]:
print("Accuracy:",model.score(x_test,y_test)*100)
# In[ ]: