Professional Documents
Culture Documents
Lab (CC3230)
Write a program to
implement the
Backpropagation Algorithm.
1
Lab 1
Ananiya Sardana
199303079
CCE-B
Introduction to Python
Program 1: Python Program for factorial of a number
In [1]:
def factorial(n):
if (n==1 or n==0):
return 1
else:
return n * factorial(n - 1)
In [2]:
Enter a number: 10
Factorial of 10 is 3628800
In [3]:
In [4]:
In [5]:
In [6]:
start = int(input("Enter start range:")) end = int(input("Enter end range:")) prime(start,end)
11
13
In [7]:
def fibonacci(n): f1=0
f2=1
if n==0: print(f1)
elif n==1: print(f2)
else:
print(f1," ",f2," ",end=" ") for i in range(3,n+1):
term=f1+f2 f1=f2 f2=term
print(term," ",end=" ")
In [8]:
n = int(input("Enter nth term: ")) fibonacci(n)
Program 5: Python Program for How to check if a given number is Fibonacci number
In [9]:
import math
In [10]:
Enter a number: 12
12 is a not Fibonacci Number
In [11]:
def SumOfCube(n):
sum=0
for i in range(1,n+1):
sum += i*i*i
print("Cube Sum of first",n,"natural numbers:",sum)
In [12]:
In [34]:
def stairCase(num):
for i in range(1,num+1):
for j in range(2):
for k in range(i,num):
print(" ",end="")
for l in range(2*i):
print("*",end="")
print()
In [36]:
Lab 2
Ananiya Sardana
199303079
CCE-B
In [3]:
def smallAndLarge():
max = float('-inf')
min = float('inf')
list1 = []
n = int(input("Enter size of list: "))
for i in range(O,n):
list1.append(int(input("Enter number: ")))
for i in range(O,n):
if list1[i]>max:
max = list1[i]
if list1[i]<min:
min = list1[i]
In [4]:
smallAndLarge()
Program 2: Python program to find and print all even and odd numbers in a list
In [13]:
def oddOrEven():
list1 = []
n = int(input("Enter size of list: "))
for i in range(O,n):
list1.append(int(input("Enter number: ")))
print("\nOdd Numbers in the list:") for i in range(O,n):
if list1[i]%2==1: print(list1[i],end=' ') 5
print("\nEven Numbers in the list:") for i in range(O,n):
if list1[i]%2==O: print(list1[i],end=' ')
In [14]:
Enter size of
oddOrEven() list: 4
Enter number: 1
Enter number: 2
Enter number: 3
Enter number: 4
Program 3: Create a dictionary “Employee" add dataitems name, age, salary and company name by taking input
from the user.
In [15]:
def EmpDict():
Employee = dict({'Name':input("Enter Name of the Employee: "),'Age':int(input("Enter Ag e of the Employee:
")),'Salary':int(input("Enter Salary of the Employee: ")),'Company':in put("Enter Company Name of the Employee: ")})
print(Employee)
In [16]:
EmpDict()
Program 4: Explain the usage of cmp, len,max,min and tuple inbuilt python functions for tuple data structure
using some programming examples.
In [2O]:
#cmp function doesn.t exist in Python3 anymore so we have to create a function
def cmp(a, b):
return (a > b) - (a < b)
In [22]:
def cmpExample():
print("Usage of cmp Example:\n") a = 5
b = 1O
print(cmp(a,b))
a=5
b = 5 print(cmp(a,b))
a = 1O
b=5
print(cmp(a,b))
6
In [23]:
cmpExample()
-1
O
1
In [24]:
def lenExample():
print("Usage of len Example:\n")
a= "Leo Messi"
b= [1,2,3,4,5,6,7]
In [25]:
lenExample()
In [26]:
def MaxAndMinExample():
print("Usage of max and min Example:\n")
a= [1,2,3,4,5,6,7]
In [27]:
MaxAndMinExample()
Lab 3
Ananiya Sardana
199303079
CCE-B
import random
In [2]:
tsp = [
[0, 400, 500, 300],
[400, 0, 300, 500],
[500, 300, 0, 400],
[300, 500, 400, 0],
]
In [3]:
tsp
Out[3]:
In [4]:
len(tsp)
Out[4]:
4
In [5]:
def randomSolution(tsp):
cities = list(range(len(tsp)))
solution=[]
for i in range(len(tsp)):
randomcity = cities[random.randint(0,len(cities)-1)]
solution.append(randomcity)
cities.remove(randomcity) return #drop city that is already appended to solution
solution 8
In [6]:
solution = randomSolution(tsp)
In [7]:
print(solution)
[3, 2, 0, 1]
In [8]:
def routeLength(tsp,solution): routeLength = 0
for i in range(len(solution)): routeLength=routeLength+tsp[solution[i-1]][solution[i]]
return routeLength
In [9]:
routelength = routeLength(tsp,solution)
In [10]:
print(routelength)
1800
In [11]:
def getNeigbours(solution): neighbours = []
for i in range(len(solution)):
for j in range(i+1, len(solution)): neighbour = solution.copy() neighbour[i] = solution[j] neighbour[j] = solution[i]
neighbours.append(neighbour)
return neighbours
In [12]:
neighbours = getNeigbours(solution)
In [13]:
print(neighbours)
[[2, 3, 0, 1], [0, 2, 3, 1], [1, 2, 0, 3], [3, 0, 2, 1], [3, 1, 0, 2], [3, 2, 1, 0]]
In [19]:
def getBestNeigbour(neighbours): neigbourRoute = [0]*len(neighbours) for i in range(len(neighbours)):
neigbourRoute[i] = routeLength(tsp,neighbours[i])
bestNeighbour = neighbours[neigbourRoute.index(min(neigbourRoute))] bestRouteLength = min(neigbourRoute)
return bestRouteLength, bestNeighbour
In [20]:
9
bestRouteLength, bestNeighbour = getBestNeigbour(neighbours)
In [22]:
print(bestRouteLength)
1400
In [21]:
print(bestNeighbour)
[2, 3, 0, 1]
In [55]:
def hillClimbing(tsp):
currentSolution = randomSolution(tsp)
currentRouteLength = routeLength(tsp, currentSolution)
print(currentSolution)
print(currentRouteLength)
neighbors = getNeigbours(currentSolution)
bestneighborroutelength, bestneighbor=getBestNeigbour(neighbors)
# print(bestneighborroutelength)
# print(currentRouteLength)
while (bestneighborroutelength<currentRouteLength):
currentsolution=bestneighbor
currentRouteLength=bestneighborroutelength
neighbors=getNeigbours(currentsolution)
bestneighbor,bestneighborroutelength=getBestNeigbour(neighbors)
In [56]:
print(hillClimbing(tsp))
[3, 0, 1, 2]
1400
None
10
Lab 4
Ananiya Sardana
199303079
CCE-B
A* Algorithm
Swapping of tiles
In [1]:
def move(ar,p,st):
rh=999999
store_st = st.copy()
print("store_st: ",store_st)
for i in range(len(ar)):
dupl_st=st.copy()
temp=dupl_st[p]
#print(temp)
dupl_st[p]=dupl_st[arr[i]]
print("x",dupl_st[p])
dupl_st[arr[i]]=temp
tmp_rh=count(dupl_st)
if tmp_rh<rh:
rh=tmp_rh
store_st=dupl_st.copy()
return store_st,rh
In [2]:
def print_in_format(matrix):
for i in range(9):
if i%3==0 and i>0:
print("")
print(str(matrix[i])+ " ", end = "")
In [3]:
start = [1,2,3,
0,5,6,
4,7,8]
In [4]:
def count(s):
c = 0 ideal=[1,2,3,
4,5,6, 11
7,8,0]
for i in range(9):
if s[i]!=0 and s[i]!=ideal[i]: c = c+1
return c
In [5]:
h = count(start) level = 1
print("\n............Level "+str(level)+"")
print_in_format(start)
print("\nHeuristic Value(misplaced tiles): "+str(h)) while h>0:
pos = int(start.index(0)) level = level+1
if pos == 0: arr = [1,3]
elif pos == 1: arr = [0,2,4]
elif pos == 2: arr = [1,5]
elif pos == 3: arr = [0,4,6]
elif pos == 4: arr = [1,3,5,7]
elif pos == 5: arr = [2,4,8]
elif pos == 6: arr = [3,7]
elif pos == 7: arr = [4,6,8]
elif pos == 8: arr = [5,7]
start,h = move(arr,pos,start) print("\n..............Level:"+str(level)+"")
print_in_format(start)
print("\nheuristic Value(no of misplaced tiles): "+str(h))
............Level 1............
1 2 3
0 5 6
4 7 8
Heuristic Value(misplaced tiles): 3
store_st: [1, 2, 3, 0, 5, 6, 4, 7, 8]
x 1
x 5
x 4
..............Level:2...............
1 2 3
4 5 6
0 7 8
heuristic Value(no of misplaced tiles): 2
store_st: [1, 2, 3, 4, 5, 6, 0, 7, 8]
x 4
x 7
..............Level:3...............
1 2 3
4 5 6
7 0 8
heuristic Value(no of misplaced tiles): 1
store_st: [1, 2, 3, 4, 5, 6, 7, 0, 8]
x 5
x 7
x8
12
..............Level:4...............
123
456
780
heuristic Value(no of misplaced tiles): 0
13
Lab 5
Ananiya Sardana
199303079
CCE-B
# CSP
# find the value of x and y such that value of x is from [1,2,3] and vlaue of y is
from 0 to 9 and x+y>= 5
# constraint module
import constraint
from constraint import *
problem=Problem()
problem.addVariable('x',[1,2,3])
problem.addVariable('y',range(10))
def my_constraint(x,y):
if x+y>=5:
return True problem.addConstraint(my_constraint,
['x','y']) solutions=problem.getSolutions()
for solution in solutions:
print(solution)
{'x': 3, 'y': 9}
{'x': 3, 'y': 8}
{'x': 3, 'y': 7}
{'x': 3, 'y': 6}
{'x': 3, 'y': 5}
{'x': 3, 'y': 4}
{'x': 3, 'y': 3}
{'x': 3, 'y': 2}
{'x': 2, 'y': 9}
{'x': 2, 'y': 8}
{'x': 2, 'y': 7}
{'x': 2, 'y': 6}
{'x': 2, 'y': 5}
{'x': 2, 'y': 4}
{'x': 2, 'y': 3}
{'x': 1, 'y': 9}
{'x': 1, 'y': 8}
{'x': 1, 'y': 7}
{'x': 1, 'y': 6}
{'x': 1, 'y': 5}
{'x': 1, 'y': 4}
In [6]:
import constraint
from constraint import *
problem=Problem() 14
problem.addVariable('x',[1,2,3,4,5,6,7,8,9,10])
problem.addVariable('y',range(1,21))
def my_constraint(x,y):
if (y/x)%2==0:
return True
problem.addConstraint(my_constraint,['x','y'])
solutions=problem.getSolutions()
for solution in solutions:
print(solution)
In [ ]:
15
Lab 6 Part 1
Ananiya Sardana
199303079
CCE-B
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv('canada_per_capita_income.csv')
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
In [3]:
print(y)
print(x)
Out[4]:
LinearRegression()
In [6]:
y_pred
Out[6]:
array([41288.69409442])
In [ ]:
In [ ]:
18
Lab 6 Part 2
Ananiya Sardana
199303079
CCE-B
In [1]:
! pip3 install word2number
Collecting word2number
Downloading word2number-1.1.zip (9.7 kB)
Preparing metadata (setup.py) ... done
Building wheels for collected packages: word2number
Building wheel for word2number (setup.py) ... done
Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5586 sha2
56=91dc8db272f453f02ecf59edfe402555a66b387ca007f41e65e602149c251ec2
Stored in directory: /Users/priyamthakkar/Library/Caches/pip/wheels/cb/f3/5a/d88198fdeb
46781ddd7e7f2653061af83e7adb2a076d8886d6
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1
WARNING: You are using pip version 21.3.1; however, version 22.0.4 is available.
You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/
bin/python3.8 -m pip install --upgrade pip' command.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('hiring.csv')
dataset.experience = dataset.experience.fillna(str("zero"))
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
In [96]:
print(x)
[['zero' 8.0 9]
['zero' 8.0 6]
['five' 6.0 7]
['two' 10.0 10]
['seven' 9.0 6]
['three' 7.0 10]
['ten' nan 7]
['eleven' 7.0 8]] 19
In [97]:
print(y)
In [98]:
print(x[0])
['zero' 8.0 9]
In [100]:
print(x)
[[0 8.0 9]
[0 8.0 6]
[5 6.0 7]
[2 10.0 10]
[7 9.0 6]
[3 7.0 10]
[10 nan 7]
[11 7.0 8]]
In [101]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean') imputer.fit(x[:,:])
x[:,:] = (imputer.transform(x[:,:]))
In [102]:
[[0.0 8.0 9.0]
print(x)
[0.0 8.0 6.0]
[5.0 6.0 7.0]
[2.0 10.0 10.0]
[7.0 9.0 6.0]
[3.0 7.0 10.0]
[10.0 7.857142857142857 7.0]
[11.0 7.0 8.0]]
Out[103]:
LinearRegression()
Predicting the Test set results 20
In [108]: x_test=[[2,9,6],[12,10,10]]
In [109]:
[53290.89 92268.07]
In [110]:
regressor.coef_ Out[110]:
array([2827.63, 1912.94, 2196.98])
In [111]:
regressor.intercept_ Out[111]:
17237.330313727172
In [ ]:
21
Lab 7 Part 1
Ananiya Sardana
199303079
CCE-B
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
df = pd.read_csv (r'insurancedata.csv')
df.head()
Out[2]:
age bought_insurance
0 22 0
1 25 0
2 47 1
3 52 0
4 46 1
In [3]:
plt.scatter(df.age,df.bought_insurance,marker='+',color='red')
Out[3]:
<matplotlib.collections.PathCollection at 0x7f9a78448940>
In [4]:
from sklearn.model_selection import train_test_split
In [5]: 22
df
Out[5]:
age bought_insurance
0 22 0
1 25 0
2 47 1
3 52 0
4 46 1
5 56 1
6 55 0
7 60 1
8 62 1
9 61 1
10 18 0
11 28 0
12 27 0
13 29 0
14 49 1
15 55 1
16 25 1
17 58 1
18 19 0
19 18 0
20 21 0
21 26 0
22 40 1
23 45 1
24 50 1
25 54 1
26 23 0
In [6]:
X_train, X_test, y_train, y_test = train_test_split(df[['age']],df.bought_insurance,trai n_size=0.8)
In [7]:
y_test
Out[7]:
16 1
3 0
18 0
1 0
20 0
4 1
Name: bought_insurance, dtype: int64
In [8]:
23
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
In [9]:
model.fit(X_train, y_train)
Out[9]:
LogisticRegression()
In [10]:
X_test
Out[10]:
age
16 25
3 52
18 19
1 25
20 21
4 46
In [11]:
y_predicted = model.predict(X_test)
y_predicted
Out[11]:
array([0, 1, 0, 0, 0, 1])
In [12]:
model.score(X_test,y_test)
Out[12]:
0.6666666666666666
In [ ]:
24
Lab 7 Part 2
Ananiya Sardana
199303079
CCE-B
In [2]:
digits = load_digits()
print(digits.data.shape)
(1797, 64)
In [3]:
digits.data
Out[3]:
array([[ 0., 0., 5., ..., 0., 0., 0.],
[ 0., 0., 0., ..., 10., 0., 0.],
[ 0., 0., 0., ..., 16., 9., 0.],
...,
[ 0., 0., 1., ..., 6., 0., 0.],
[ 0., 0., 2., ..., 12., 0., 0.],
[ 0., 0., 10., ..., 12., 1., 0.]])
In [4]:
# data visualization
from matplotlib import pyplot as plt
In [5]:
In [6]:
# dataset description
# checking the dataset directory
dir(digits)
Out[6]:
['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names'] 26
In [7]:
digits.data.shape
Out[7]:
(1797, 64)
In [8]:
digits.DESCR
Out[8]:
In [9]:
digits.feature_names[0:5]
Out[9]:
In [10]:
digits.frame
In [11]:
digits.images.shape
Out[11]:
(1797, 8, 8)
In [12]:
digits.target.shape
Out[12]:
(1797,)
In [13]:
# importing model
from sklearn.linear_model import LogisticRegression
27
In [14]:
model = LogisticRegression(max_iter = 10000)
In [15]:
# dataset splitting
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(digits.data, digits.target, train_size
= 0.8, random_state = 42)
In [16]:
x_train.shape
Out[16]:
(1437, 64)
In [17]:
y_train.shape
Out[17]:
(1437,)
In [18]:
y_test.shape
Out[18]:
(360,)
In [19]:
x_test.shape
Out[19]:
(360, 64)
In [21]:
# training model
model.fit(x_train,y_train)
Out[21]:
LogisticRegression(max_iter=10000)
In [22]:
y = model.predict(x_test)
In [23]:
model.score(x_test,y_test)
Out[23]:
0.9722222222222222
In [24]:
y[0:5]
Out[24]:
array([6, 9, 3, 7, 2])
28
Lab 8 Part 1
Ananiya Sardana
199303079
CCE-B
Naive Bayes
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
titanicdf = pd.read_csv('titanic.csv')
In [9]:
titanicdf[0:5]
Out[9]:
PassengerId Survived PcIass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
Cumings, Mrs.
1 2 1 1 John BradIey femaIe 38.0 1 0 PC 17599 71.2833 C85 C
(FIorence
Briggs Th...
STON/O2.
2 3 1 3 Heikkinen, Miss. Laina femaIe 26.0 0 0 7.9250 NaN S
3101282
In [10]:
titanicdf.drop(titanicdf.columns.difference(['Pclass','Age','Fare','Survived']), 1, inpla
ce=True)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: In a
futur e version of pandas all arguments of DataFrame.drop except for the argument
'labels' will be keyword-only
"""Entry point for launching an IPython kernel.
In [11]:
titanicdf
29
Out[11]:
0 0 3 22.0 7.2500
1 1 1 38.0 71.2833
2 1 3 26.0 7.9250
3 1 1 35.0 53.1000
4 0 3 35.0 8.0500
In [12]:
target = titanicdf.iloc[:, 0].values
In [13]:
target
Out[13]:
array([0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1,
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,
1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,
0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0,
0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0,
0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1,
1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0,
1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1,
0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 30
0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1,
1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1,
1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0])
In [20]:
x = titanicdf.iloc[:, 1:].values
In [21]:
x
Out[21]:
In [17]:
print("Number of missing values in Survived:",titanicdf['Survived'].isnull().sum()) print("Number of missing values in
Pclass:",titanicdf['Pclass'].isnull().sum()) print("Number of missing values in Age:",titanicdf['Age'].isnull().sum()) print("Number of
missing values in Fare:",titanicdf['Fare'].isnull().sum())
In [23]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean') imputer.fit(x[:,:])
x[:,:] = imputer.transform(x[:,:])
In [24]:
x
Out[24]:
array([[ 3. , 22. , 7.25 ],
[ 1. , 38. , 71.2833 ],
[ 3. , 26. , 7.925 ],
...,
[ 3. , 29.69911765, 23.45 ],
[ 1. , 26. , 30. ],
[ 3. , 32. , 7.75 ]])
SpIitting the dataset into the Training set and Test set
In [35]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, target, test_size = 0.2, random_s tate = 90)
Feature ScaIing
In [36]: 31
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
GaussianNB()
[[106 17]
[ 30 26]]
Out[38]:
0.7374301675977654
32
Lab 8 Part 2
Ananiya Sardana
199303079
CCE-B
Support Vector
In [40]:
import pandas as pd
from sklearn.datasets import load_iris
iris=load_iris()
In [41]:
dir(iris)
Out[41]:
['DESCR',
'data',
'feature_names',
'filename',
'frame',
'target',
'target_names']
In [7]:
iris.target
Out[7]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
In [42]:
iris.feature_names
Out[42]:
['sepal length (cm)',
'sepal width (cm)',
'petal length (cm)',
'petal width (cm)']
In [9]:
iris.data
Out[9]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2], 33
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
[5.4, 3.9, 1.7, 0.4],
[4.6, 3.4, 1.4, 0.3],
[5. , 3.4, 1.5, 0.2],
[4.4, 2.9, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.1],
[5.4, 3.7, 1.5, 0.2],
[4.8, 3.4, 1.6, 0.2],
[4.8, 3. , 1.4, 0.1],
[4.3, 3. , 1.1, 0.1],
[5.8, 4. , 1.2, 0.2],
[5.7, 4.4, 1.5, 0.4],
[5.4, 3.9, 1.3, 0.4],
[5.1, 3.5, 1.4, 0.3],
[5.7, 3.8, 1.7, 0.3],
[5.1, 3.8, 1.5, 0.3],
[5.4, 3.4, 1.7, 0.2],
[5.1, 3.7, 1.5, 0.4],
[4.6, 3.6, 1. , 0.2],
[5.1, 3.3, 1.7, 0.5],
[4.8, 3.4, 1.9, 0.2],
[5. , 3. , 1.6, 0.2],
[5. , 3.4, 1.6, 0.4],
[5.2, 3.5, 1.5, 0.2],
[5.2, 3.4, 1.4, 0.2],
[4.7, 3.2, 1.6, 0.2],
[4.8, 3.1, 1.6, 0.2],
[5.4, 3.4, 1.5, 0.4],
[5.2, 4.1, 1.5, 0.1],
[5.5, 4.2, 1.4, 0.2],
[4.9, 3.1, 1.5, 0.2],
[5. , 3.2, 1.2, 0.2],
[5.5, 3.5, 1.3, 0.2],
[4.9, 3.6, 1.4, 0.1],
[4.4, 3. , 1.3, 0.2],
[5.1, 3.4, 1.5, 0.2],
[5. , 3.5, 1.3, 0.3],
[4.5, 2.3, 1.3, 0.3],
[4.4, 3.2, 1.3, 0.2],
[5. , 3.5, 1.6, 0.6],
[5.1, 3.8, 1.9, 0.4],
[4.8, 3. , 1.4, 0.3],
[5.1, 3.8, 1.6, 0.2],
[4.6, 3.2, 1.4, 0.2],
[5.3, 3.7, 1.5, 0.2],
[5. , 3.3, 1.4, 0.2],
[7. , 3.2, 4.7, 1.4],
[6.4, 3.2, 4.5, 1.5],
[6.9, 3.1, 4.9, 1.5],
[5.5, 2.3, 4. , 1.3],
[6.5, 2.8, 4.6, 1.5],
[5.7, 2.8, 4.5, 1.3],
[6.3, 3.3, 4.7, 1.6],
[4.9, 2.4, 3.3, 1. ],
[6.6, 2.9, 4.6, 1.3],
[5.2, 2.7, 3.9, 1.4],
[5. , 2. , 3.5, 1. ],
[5.9, 3. , 4.2, 1.5],
[6. , 2.2, 4. , 1. ],
[6.1, 2.9, 4.7, 1.4],
[5.6, 2.9, 3.6, 1.3],
[6.7, 3.1, 4.4, 1.4],
[5.6, 3. , 4.5, 1.5],
[5.8, 2.7, 4.1, 1. ],
[6.2, 2.2, 4.5, 1.5],
[5.6, 2.5, 3.9, 1.1],
[5.9, 3.2, 4.8, 1.8],
[6.1, 2.8, 4. , 1.3],
[6.3, 2.5, 4.9, 1.5],
[6.1, 2.8, 4.7, 1.2], 34
[6.4, 2.9, 4.3, 1.3],
[6.6, 3. , 4.4, 1.4],
[6.8, 2.8, 4.8, 1.4],
[6.7, 3. , 5. , 1.7],
[6. , 2.9, 4.5, 1.5],
[5.7, 2.6, 3.5, 1. ],
[5.5, 2.4, 3.8, 1.1],
[5.5, 2.4, 3.7, 1. ],
[5.8, 2.7, 3.9, 1.2],
[6. , 2.7, 5.1, 1.6],
[5.4, 3. , 4.5, 1.5],
[6. , 3.4, 4.5, 1.6],
[6.7, 3.1, 4.7, 1.5],
[6.3, 2.3, 4.4, 1.3],
[5.6, 3. , 4.1, 1.3],
[5.5, 2.5, 4. , 1.3],
[5.5, 2.6, 4.4, 1.2],
[6.1, 3. , 4.6, 1.4],
[5.8, 2.6, 4. , 1.2],
[5. , 2.3, 3.3, 1. ],
[5.6, 2.7, 4.2, 1.3],
[5.7, 3. , 4.2, 1.2],
[5.7, 2.9, 4.2, 1.3],
[6.2, 2.9, 4.3, 1.3],
[5.1, 2.5, 3. , 1.1],
[5.7, 2.8, 4.1, 1.3],
[6.3, 3.3, 6. , 2.5],
[5.8, 2.7, 5.1, 1.9],
[7.1, 3. , 5.9, 2.1],
[6.3, 2.9, 5.6, 1.8],
[6.5, 3. , 5.8, 2.2],
[7.6, 3. , 6.6, 2.1],
[4.9, 2.5, 4.5, 1.7],
[7.3, 2.9, 6.3, 1.8],
[6.7, 2.5, 5.8, 1.8],
[7.2, 3.6, 6.1, 2.5],
[6.5, 3.2, 5.1, 2. ],
[6.4, 2.7, 5.3, 1.9],
[6.8, 3. , 5.5, 2.1],
[5.7, 2.5, 5. , 2. ],
[5.8, 2.8, 5.1, 2.4],
[6.4, 3.2, 5.3, 2.3],
[6.5, 3. , 5.5, 1.8],
[7.7, 3.8, 6.7, 2.2],
[7.7, 2.6, 6.9, 2.3],
[6. , 2.2, 5. , 1.5],
[6.9, 3.2, 5.7, 2.3],
[5.6, 2.8, 4.9, 2. ],
[7.7, 2.8, 6.7, 2. ],
[6.3, 2.7, 4.9, 1.8],
[6.7, 3.3, 5.7, 2.1],
[7.2, 3.2, 6. , 1.8],
[6.2, 2.8, 4.8, 1.8],
[6.1, 3. , 4.9, 1.8],
[6.4, 2.8, 5.6, 2.1],
[7.2, 3. , 5.8, 1.6],
[7.4, 2.8, 6.1, 1.9],
[7.9, 3.8, 6.4, 2. ],
[6.4, 2.8, 5.6, 2.2],
[6.3, 2.8, 5.1, 1.5],
[6.1, 2.6, 5.6, 1.4],
[7.7, 3. , 6.1, 2.3],
[6.3, 3.4, 5.6, 2.4],
[6.4, 3.1, 5.5, 1.8],
[6. , 3. , 4.8, 1.8],
[6.9, 3.1, 5.4, 2.1],
[6.7, 3.1, 5.6, 2.4],
[6.9, 3.1, 5.1, 2.3],
[5.8, 2.7, 5.1, 1.9],
[6.8, 3.2, 5.9, 2.3],
[6.7, 3.3, 5.7, 2.5],
[6.7, 3. , 5.2, 2.3], 35
[6.3, 2.5, 5. , 1.9],
[6.5, 3. , 5.2, 2. ],
[6.2, 3.4, 5.4, 2.3],
[5.9, 3. , 5.1, 1.8]])
In [8]:
iris.target_names
Out[8]:
In [11]:
df=pd.DataFrame(iris.data,columns=iris.feature_names) df
Out[11]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
In [12]:
iris.target
Out[12]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
In [13]:
df['target']=iris.target df.head()
Out[13]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
In [18]:
df['flowername']=df.target.apply(lambda x: iris.target_names[x]) df.head()
Out[18]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target flowername
In [20]:
iris.target_names
Out[20]:
In [22]:
from sklearn.model_selection import train_test_split x=df.drop(['target','flowername'],axis='columns') y=df.target
In [38]:
x_test
Out[38]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
In [33]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
In [34]:
In [35]:
model.fit(x_train,y_train)
Out[35]:
SVC()
In [36]:
model.score(x_test,y_test)
Out[36]:
0.9666666666666667
In [39]:
model.predict([[5.0,2.0,3.5,1.0]])
Out[39]:
array([1])
In [ ]:
38
Lab 9
Ananiya Sardana
199303079
CCE-B
Decision Tree
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import requests
import io
In [3]:
url = "https://raw.githubusercontent.com/codebasics/py/master/ML/9_decision_tree/salaries
.csv"
In [4]:
download = requests.get(url).content
In [5]:
df = pd.read_csv(io.StringIO(download.decode('utf-8')))
In [6]:
df
Out[6]:
computer
4 googIe bacheIors 0
programmer
computer masters 1
5 googIe
company programmjo degree saIary_more_then_100k
ebr
6 abc pharma saIes executive masters 0
computer 39
7 abc pharma
bacheIors 0
programmer
computer
14facebook bacheIors 1
programmer
computer
15 facebook
programmer masters 1
In [7]:
target = df.iloc[:, -1].values
In [8]:
target
Out[8]:
array([O, O, 1, 1, O, 1, O, O, O, 1, 1, 1, 1, 1, 1, 1])
In [9]:
x = df.iloc[:, O:-1].values
In [1O]:
x
Out[1O]:
In [11]:
target = le_target.fit_transform(target)
40
In [12]:
print(target)
[O O 1 1 O 1 O O O 1 1 1 1 1 1 1]
In [13]:
le_company = LabelEncoder()
x[:,O] = le_company.fit_transform(x[:,O])
In [14]:
le_job = LabelEncoder()
x[:,1] = le_job.fit_transform(x[:,1])
In [15]:
le_degree = LabelEncoder()
x[:,2] = le_degree.fit_transform(x[:,2])
In [16]:
print(target)
[O O 1 1 O 1 O O O 1 1 1 1 1 1 1]
In [17]:
[[2 2 O]
print(x)
[2 2 1]
[2 O O]
[2 O 1]
[2 1 O]
[2 1 1]
[O 2 1]
[O 1 O]
[O O O]
[O O 1]
[1 2 O]
[1 2 1]
[1 O O]
[1 O 1]
[1 1 O]
[1 1 1]]
SpIitting the dataset into the Training set and Test set
In [18]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, target, test_size = O.3, random_s tate = 4O)
Out[19]:
DecisionTreeClassifier(criterion='entropy', random_state=4O)
41
Making the Confusion Matrix
In [2O]:
[[1 O]
[O 4]]
Out[2O]:
1.O
In [22]:
accuracy 1.OO 5
macro avg 1.OO 1.OO 1.OO 5
weighted avg 1.OO 1.OO 1.OO 5
In [ ]:
42
Lab 10
Ananiya Sardana
199303079
CCE-B
K-Nearest Neighbour
In [3]:
import pandas as pd
from sklearn.datasets import load_iris
iris=load_iris()
In [4]:
dir(iris)
Out[4]:
['DESCR',
'data',
'feature_names',
'filename',
'frame',
'target',
'target_names']
In [39]:
iris.frame
In [37]:
iris.feature_names
Out[37]:
['sepal length (cm)',
'sepal width (cm)',
'petal length (cm)',
'petal width (cm)']
In [49]:
df=pd.DataFrame(iris.data,columns=iris.feature_names)
df[9:12]
Out[49]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
In [15]:
df[df.target==2].head()
Out[15]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
1006.33.36.02.52
In [16]:
df['flowername']=df.target.apply(lambda x: iris.target_names[x]) df
Out[16]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target flowername
In [18]:
from sklearn.model_selection import train_test_split x=df.drop(['target','flowername'],axis='columns') y=df.target
In [32]:
y
Out[32]:
0 0
1 0
2 0
3 0
4 0
..
145 2
146 2
147 2
148 2
149 2
Name: target, Length: 150, dtype: int32
In [21]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
In [23]:
len(x_test)
Out[23]:
30
In [52]:
from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=20)
In [53]:
knn.fit(x_train, y_train) KNeighborsClassifier(n_neighbors=20) knn.score(x_test, y_test)
Out[53]:
0.9666666666666667
In [44]:
knn.predict([[4.8,3.0,1.5,0.3]])
Out[44]:
array([0])
In [46]:
from sklearn.metrics import confusion_matrix y_pred = knn.predict(x_test)
cm = confusion_matrix(y_test, y_pred) cm
Out[46]:
array([[10, 0, 0],
[ 0, 10, 1],
[ 0, 0, 9]], dtype=int64) 45
In [54]:
y_pred
Out[54]:
array([2, 0, 1, 1, 1, 0, 0, 2, 2, 2, 2, 1, 2, 0, 0, 1, 2, 1, 2, 2, 1, 0,
1, 0, 0, 0, 0, 1, 1, 2])
In [55]:
y_test
Out[55]:
121 2
21 0
90 1
55 1
72 1
33 0
49 0
149 2
119 2
117 2
135 2
84 1
130 2
13 0
47 0
67 1
83 1
82 1
145 2
116 2
85 1
45 0
86 1
48 0
0 0
34 0
36 0
87 1
91 1
124 2
Name: target, dtype: int32
In [47]:
%matplotlib inline
import matplotlib.pyplot as plt import seaborn as sn plt.figure(figsize=(7,5)) sn.heatmap(cm, annot=True) plt.xlabel('Predicted')
plt.ylabel('Truth')
Out[47]:
In [48]:
print(classification_report(y_test, y_pred))
precision recall f1-score support
accuracy 0.97 30
macro avg 0.97 0.97 0.97 30
weighted avg 0.97 0.97 0.97 30
In [ ]:
47
Lab 11
Ananiya Sardana
199303079
CCE-B
K-Means Clustering
In [29]:
In [30]:
url=("https://raw.githubusercontent.com/codebasics/py/master/ML/13_kmeans/income.csv")
df = pd.read_csv(url)
df.head()
Out[30]:
0 Rob 27 70000
1 Michael 29 90000
2 Mohan 29 61000
3 Ismail 28 60000
4 Kory 42 150000
In [31]:
plt.scatter(df.Age,df['Income($)'])
plt.xlabel('Age')
plt.ylabel('Income($)')
Out[31]:
In [32]:
km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['Age','Income($)']]) y_predicted
Out[32]:
array([2, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0])
In [33]:
df['cluster']=y_predicted df.head()
Out[33]:
0 Rob 27 70000 2
1 Michael 29 90000 2
2 Mohan 29 61000 0
3 Ismail 28 60000 0
4 Kory 42 150000 1
In [34]:
km.cluster_centers_
Out[34]:
array([[3.29090909e+01, 5.61363636e+04],
[3.82857143e+01, 1.50000000e+05],
[3.40000000e+01, 8.05000000e+04]])
In [35]:
df1 = df[df.cluster==0] df2 = df[df.cluster==1] df3 = df[df.cluster==2]
plt.scatter(df1.Age,df1['Income($)'],color='green') plt.scatter(df2.Age,df2['Income($)'],color='red')
plt.scatter(df3.Age,df3['Income($)'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='purple',marker='*', label='centroid')
plt.xlabel('Age') plt.ylabel('Income ($)') plt.legend()
Out[35]:
<matplotlib.legend.Legend at 0x1aca98e40d0>
49
In [36]:
scaler = MinMaxScaler()
scaler.fit(df[['Income($)']])
df['Income($)'] = scaler.transform(df[['Income($)']])
scaler.fit(df[['Age']])
df['Age'] = scaler.transform(df[['Age']])
In [37]:
df.head()
Out[37]:
In [38]:
plt.scatter(df.Age,df['Income($)'])
Out[38]:
<matplotlib.collections.PathCollection at 0x1aca997a520>
In [39]:
km = KMeans(n_clusters=3)
y_predicted = km.fit_predict(df[['Age','Income($)']]) y_predicted
Out[39]:
array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2])
In [40]:
km.cluster_centers_
Out[40]:
array([[0.1372549 , 0.11633428],
[0.72268908, 0.8974359 ],
[0.85294118, 0.2022792 ]])
In [41]:
50
df['cluster']=y_predicted
df.head()
Out[41]:
In [42]:
df1 = df[df.cluster==0] df2 = df[df.cluster==1] df3 = df[df.cluster==2]
plt.scatter(df1.Age,df1['Income($)'],color='green') plt.scatter(df2.Age,df2['Income($)'],color='red')
plt.scatter(df3.Age,df3['Income($)'],color='black')
plt.scatter(km.cluster_centers_[:,0],km.cluster_centers_[:,1],color='purple',marker='*', label='centroid')
plt.legend()
Out[42]:
<matplotlib.legend.Legend at 0x1aca99e0ee0>
In [43]:
sse = []
k_rng = range(1,10) for k in k_rng:
km = KMeans(n_clusters=k) km.fit(df[['Age','Income($)']]) sse.append(km.inertia_)
plt.xlabel('K')
plt.ylabel('Sum of squared error') plt.plot(k_rng,sse)
[<matplotlib.lines.Line2D at 0x1aca9a5d1c0>]
51
In [ ]:
52
Lab 12
Ananiya Sardana
199303079
CCE-B
# PCA
# import digit dataset to implement PCA
from sklearn.datasets import load_digits
import pandas as pd
dataset = load_digits()
# use to check what the dataset is all about
dataset.keys()
Out[1]:
dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'images', 'DESCR']
)
In [2]:
#to shack shape of data [it contain 1797 sample and each sample contain 64 features]
dataset.data.shape
Out[2]:
(1797, 64)
In [3]:
# first element of dataset which is in the form of 1 d array
dataset.data[O]
Out[3]:
array([ O., O., 5., 13., 9., 1., O., O., O., O., 13., 15., 1O.,
15., 5., O., O., 3., 15., 2., O., 11., 8., O., O., 4.,
12., O., O., 8., 8., O., O., 5., 8., O., O., 9., 8.,
O., O., 4., 11., O., 1., 12., 7., O., O., 2., 14., 5.,
1O., 12., O., O., O., O., 6., 13., 1O., O., O., O.])
In [4]:
Out[5]:
<matplotlib.image.AxesImage at Ox1ae666ed4cO>
In [7]:
# check the dataset target
dataset.target[:5]
Out[7]:
array([O, 1, 2, 3, 4])
In [8]:
# create dataframe
df = pd.DataFrame(dataset.data, columns=dataset.feature_names) df.head()
Out[8]:
pixel_0_0 pixel_0_1 pixel_0_2 pixel_0_3 pixel_0_4 pixel_0_5 pixel_0_6 pixel_0_7 pixel_1_0 pixel_1_1 ... pixel_6_6 pixel
00.00.05.013.09.01.00.00.00.00.0 ...0.0
1 0.0 0.0 0.0 12.0 13.0 5.0 0.0 0.0 0.0 0.0 ... 0.0
2 0.0 0.0 0.0 4.0 15.0 12.0 0.0 0.0 0.0 0.0 ... 5.0
3 0.0 0.0 7.0 15.0 13.0 1.0 0.0 0.0 0.0 8.0 ... 9.0
4 0.0 0.0 0.0 1.0 11.0 0.0 0.0 0.0 0.0 0.0 ... 0.0
5 rows × 64 columns
In [9]:
X = df
y = dataset.target
In [1O]:
# scaling the value using standardscaler [range is -1 to 1]. you can use minmax scaler al so
from sklearn.preprocessing import StandardScaler
54
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X) X_scaled
Out[1O]:
In [12]:
# splitting the dataset and use logistic regression
from sklearn.model_selection import train_test_split
Out[12]:
O.9722222222222222
In [14]:
# apply pca for retaining 95 percent useful features
from sklearn.decomposition import PCA
pca = PCA(O.95)
X_pca = pca.fit_transform(X) X_pca.shape
Out[14]:
(1797, 29)
In [17]:
# split new dataframe and train the model
X_train_pca, X_test_pca, y_train, y_test = train_test_split(X_pca, y, test_size=O.2, ran dom_state=3O)
In [18]:
from sklearn.linear_model import LogisticRegression
Out[18]:
O.9694444444444444
In [21]:
#from the above scenario we can see that despite dropping lots of features we obtained ne
ar by similar accuracy
55
In [22]:
Out[22]:
(1797, 2)
In [23]:
Out[23]:
O.6O83333333333333
In [25]:
#We get less accuancy (~6O%) as using only 2 components did not retain much of the featur e information.
#However in real life you will find many cases where using 2 or few PCA components can st ill give you a pretty good accuracy
56
Lab 13
Ananiya Sardana
199303079
CCE-B
Neural Network
In [1]:
In [2]:
In [3]:
In [4]:
In [5]:
In [6]:
# create a neural network with one input and one output layer
model = keras.Sequential([
keras.layers.Dense(10, input_shape=(784,), activation='sigmoid')
])
model.compile(optimizer='adam', 57
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
Epoch 1/5
1875/1875 [==============================] - 2s 1ms/step - loss: 0.4679 - accuracy: 0.877
0
Epoch 2/5
1875/1875 [==============================] - 2s 1ms/step - loss: 0.3044 - accuracy: 0.915
6
Epoch 3/5
1875/1875 [==============================] - 2s 1ms/step - loss: 0.2836 - accuracy: 0.920
8
Epoch 4/5
1875/1875 [==============================] - 2s 1ms/step - loss: 0.2733 - accuracy: 0.923
4
Epoch 5/5
1875/1875 [==============================] - 2s 1ms/step - loss: 0.2669 - accuracy: 0.925
4
Out[6]:
<keras.callbacks.History at 0x7f8bc51d0b50>
In [7]:
#evaluate the performance of th neural network
model.evaluate(X_test_flattened, y_test)
Out[7]:
[0.2720883786678314, 0.9243999719619751]
In [8]:
# some predictions
y_predicted = model.predict(X_test_flattened) y_predicted[0]
Out[8]:
In [9]:
# display input image at particular index
plt.matshow(X_test[0])
Out[9]:
<matplotlib.image.AxesImage at 0x7f8bc55fd250>
58
In [10]:
#np.argmax finds a maximum element from an array and returns the index of it
np.argmax(y_predicted[0])
Out[10]:
In [11]:
#predictions
y_predicted_labels = [np.argmax(i) for i in y_predicted] y_predicted_labels[:5]
Out[11]:
[7, 2, 1, 0, 4]
In [12]:
# create confusion matrix
cm = tf.math.confusion_matrix(labels=y_test,predictions=y_predicted_labels) cm
Out[12]:
In [13]:
#plot confusion matrix import seaborn as sn plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True, fmt='d') plt.xlabel('Predicted') plt.ylabel('Truth')
Out[13]:
In [15]:
#add hidden layer in the neural network #Using hidden layer
model = keras.Sequential([
keras.layers.Dense(100, input_shape=(784,), activation='relu'), keras.layers.Dense(10, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
Epoch 1/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.2746 - accuracy: 0.921
9
Epoch 2/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.1270 - accuracy: 0.962
6
Epoch 3/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0889 - accuracy: 0.973
3
Epoch 4/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0668 - accuracy: 0.979
4
Epoch 5/5
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0533 - accuracy: 0.983
1
Out[15]:
<keras.callbacks.History at 0x7f8bc8723cd0>
In [16]:
#evlaute the performance of the new neural network with hidden layers
model.evaluate(X_test_flattened,y_test)
[0.07935800403356552, 0.9750999808311462]
In [17]:
#predictions
y_predicted = model.predict(X_test_flattened) y_predicted_labels = [np.argmax(i) for i in y_predicted] #create confusion matrix
cm = tf.math.confusion_matrix(labels=y_test,predictions=y_predicted_labels)
#plot confusion matrix plt.figure(figsize = (10,7)) sn.heatmap(cm, annot=True, fmt='d') plt.xlabel('Predicted') plt.ylabel('Truth')
Out[17]:
60
Text(69.0, 0.5, 'Truth')
In [18]:
#Using Flatten layer so that we don't have to call .reshape on input dataset
model = keras.Sequential([ keras.layers.Flatten(input_shape=(28, 28)), keras.layers.Dense(100, activation='relu'),
keras.layers.Dense(10, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
Epoch 1/10
1875/1875 [==============================] - 3s 1ms/step - loss: 0.2665 - accuracy: 0.922
7
Epoch 2/10
1875/1875 [==============================] - 3s 1ms/step - loss: 0.1204 - accuracy: 0.964
2
Epoch 3/10
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0852 - accuracy: 0.974
3
Epoch 4/10
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0642 - accuracy: 0.980
3
Epoch 5/10
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0510 - accuracy: 0.984
5
Epoch 6/10
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0408 - accuracy: 0.987
7
Epoch 7/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.0339 - accuracy: 0.989
2
Epoch 8/10
1875/1875 [==============================] - 3s 1ms/step - loss: 0.0269 - accuracy: 0.991
8
Epoch 9/10