Professional Documents
Culture Documents
1
FIND-S Algorithm
Implement and demonstrate the FIND-S algorithm for finding the most specific hypothesis based on a
given set of training data samples. Read the training data from a .CSV file.
import numpy as np
import pandas as pd
data =pd.read_csv('pgm1.csv')
concepts = np.array(data.iloc[:,0:-1])
target = np.array(data.iloc[:,-1])
print(train(concepts,target))
Output:
['Sunny' 'Warm' '?' 'Strong' '?' '?']
Experiment No. 2
Candidate-Elimination Algorithm
For a given set of training data examples stored in a .CSV file, implement and demonstrate the
Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with
the training examples.
import numpy as np
import pandas as pd
data = pd.DataFrame(data=pd.read_csv('pgm2.csv'))
concepts = np.array(data.iloc[:,0:-1])
target = np.array(data.iloc[:,-1])
def learn(concepts, target):
specific_h = concepts[0].copy()
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
for i, h in enumerate(concepts):
if target[i] == "Yes":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if target[i] == "No":
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
indices = [i for i,val in enumerate(general_h) if val == ['?', '?', '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("Final S:", s_final, sep="\n")
print("Final G:", g_final, sep="\n")
data
Output:
Final S:
['Sunny' 'Warm' 'High' 'Strong' '?' '?']
Final G:
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
Experiment No. 3
ID3 Algorithm
Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate
data set for building the decision tree and apply this knowledge to classify a new sample.
import pandas as pd
import numpy as np
dataset= pd.read_csv('pgm3.csv',names=['outlook','temperature','humidity','wind','class',])
def entropy(target_col):
elements,counts = np.unique(target_col,return_counts = True)
entropy=np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/np.sum(counts))for i in
range(len(elements))])
return entropy
def InfoGain(data,split_attribute_name,target_name="class"):
total_entropy = entropy(data[target_name])
vals,counts= np.unique(data[split_attribute_name],return_counts=True)
Weighted_Entropy =
np.sum([(counts[i]/np.sum(counts))*entropy(data.where(data[split_attribute_name]==vals[i]).dropna()[targe
t_name]) for i in range(len(vals))])
Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain
tree = ID3(dataset,dataset,dataset.columns[:-1])
print(' \nDisplay Tree\n',tree)
Output:
Display Tree
{'outlook': {0: {'humidity': {0.0: 0.0, 1.0: 1.0}}, 1: 1.0, 2: {'wind': {0.0: 1.0,
1.0: 0.0}}}}
Experiment No. 4
Backpropagation Algorithm
Build an Artificial Neural Network by implementing the Backpropagation algorithm and test the same
using appropriate data sets.
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0)
def derivatives_sigmoid(x):
return x * (1 - x)
epoch=7000
lr=0.1
inputlayer_neurons = 2
hiddenlayer_neurons = 3
output_neurons = 1
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#Forward Propogation
for i in range(epoch):
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) *lr
Output:
Input: Actual Output: Predicted Output:
[[0.66666667 1. ] [[92.] [[0.99999813]
[0.33333333 0.55555556] [86.] [0.9999937 ]
[1. 0.66666667]] [89.]] [0.99999853]]
Experiment No. 5
Naive Bayesian Classifier
Write a program to implement the Naive Bayesian classifier for a sample training data set stored as a .CSV
file. Compute the accuracy of the classifier, considering few test data sets.
import csv
import random
import math
import operator
def safe_div(x,y):
if y == 0:
return 0
return x / y
def loadCsv(filename):
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return safe_div(sum(numbers),float(len(numbers)))
def stdev(numbers):
avg = mean(numbers)
variance = safe_div(sum([pow(x-avg,2) for x in numbers]),float(len(numbers)-1))
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
def main():
filename = 'pgm5.csv'
splitRatio = 0.9
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
summaries = summarizeByClass(trainingSet)
predictions = getPredictions(summaries, testSet)
actual = []
for i in range(len(testSet)):
vector = testSet[i]
actual.append(vector[-1])
print('Actual values: {0}%'.format(actual))
print('Predictions: {0}%'.format(predictions))
accuracy = getAccuracy(testSet, predictions)
print('Accuracy: {0}%'.format(accuracy))
main()
Output:
Actual values: [5.0, 5.0]%
Predictions: [5.0, 5.0]%
Accuracy: 100.0%
Experiment No. 6
Naive Bayesian Classifier
Assuming a set of documents that need to be classified, use the Naive Bayesian Classifier model to perform
this task. Built-in Java classes/API can be used to write the program. Calculate the accuracy, precision, and
recall for your data set.
Output:
From: sd345@city.ac.uk (Michael Collier)
Subject: Converting images to HP LaserJet III?
Nntp-Posting-Host: hampton
Organization: The City University
Lines: 14
Accuracy: 0.8348868175765646
precision recall f1-score support
confusion matrix is
[[192 2 6 119]
[ 2 347 4 36]
[ 2 11 322 61]
[ 2 2 1 393]]
Experiment No. 7
Bayesian Network
Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate
the diagnosis of heart patients using standard Heart Disease Data Set. You can use Java/Python ML library
classes/API.
import pandas as pd
data=pd.read_csv("pgm7.csv")
heart_disease=pd.DataFrame(data)
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={
'age':int(input('Enter age {SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2, Youth:3, Teen:4} :')),
'Gender':int(input('enter Gender {Male:0, Female:1} :')),
'Family':int(input('enter Family history {yes:1, No:0} :')),
'diet':int(input('enter diet {High:0, Medium:1} :')),
'Lifestyle':int(input('enter Lifestyle {Athlete:0, Active:1, Moderate:2, Sedentary:3} :')),
'cholestrol':int(input('enter cholestrol {High:0, BorderLine:1, Normal:2} :'))
})
print(q['heartdisease'])
Output:
Enter age {SuperSeniorCitizen:0, SeniorCitizen:1, MiddleAged:2, Youth:3, Teen:4} :2
enter Gender {Male:0, Female:1} :0
enter Family history {yes:1, No:0} :0
enter diet {High:0, Medium:1} :1
enter Lifestyle {Athlete:0, Active:1, Moderate:2, Sedentary:3} :3
enter cholestrol {High:0, BorderLine:1, Normal:2} :1
+----------------+---------------------+
| heartdisease | phi(heartdisease) |
+================+=====================+
| heartdisease_0 | 0.0000 |
+----------------+---------------------+
| heartdisease_1 | 1.0000 |
+----------------+---------------------+
Experiment No. 8
EM Algorithm and k-Means Algorithm
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for clustering using k-
Means algorithm. Compare the results of these two algorithms and comment on the quality of clustering. You
can add Java/Python ML library classes/API in the program.
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.mixture import GaussianMixture
import pandas as pd
X=pd.read_csv("pgm8.csv")
x1 = X['Distance_Feature'].values
x2 = X['Speeding_Feature'].values
X = np.array(list(zip(x1, x2))).reshape(len(x1), 2)
plt.plot()
plt.xlim([0, 100])
plt.ylim([0, 50])
plt.title('Dataset')
plt.scatter(x1,x2)
gmm = GaussianMixture(n_components=3)
gmm.fit(X)
em_predictions = gmm.predict(X)
print("\nEM predictions")
print(em_predictions)
print("mean:\n",gmm.means_)
print('\n')
print("Covariances\n",gmm.covariances_)
print(X)
plt.title('Exceptation Maximum')
plt.scatter(X[:,0], X[:,1],c=em_predictions,s=50)
plt.show()
plt.title('KMEANS')
plt1.scatter(X[:,0], X[:,1], c=kmeans.labels_, cmap='rainbow')
plt1.scatter(kmeans.cluster_centers_[:,0] ,kmeans.cluster_centers_[:,1], color='black')
plt1.show()
Output:
EM predictions
[1 1 1 1 1 2 1 2 2 1 0 0 1 1 0 0 0 1 1 1 1 0 1]
mean:
[[51.49401384 38.73668604]
[55.82978127 23.99404542]
[42.4275654 18.53365945]]
Covariances
[[[ 20.64249138 3.27817009]
[ 3.27817009 21.07999619]]
[[ 47.03399108 10.16152267]
[ 10.16152267 14.81719721]]
[[ 51.65448747 -61.81920385]
[-61.81920385 101.39620353]]]
[[71.24 28. ]
[52.53 25. ]
[64.54 27. ]
[55.69 22. ]
[54.58 25. ]
[41.91 10. ]
[58.64 20. ]
[52.02 8. ]
[31.25 34. ]
[44.31 19. ]
[49.35 40. ]
[58.07 45. ]
[44.22 22. ]
[55.73 19. ]
[46.63 43. ]
[52.97 32. ]
[46.25 35. ]
[51.55 27. ]
[57.05 26. ]
[58.45 30. ]
[43.42 23. ]
[55.68 37. ]
[55.15 18. ]]
[[47.87166667 39. ]
[57.34333333 24.91666667]
[45.176 16.4 ]]
[1 1 1 1 1 2 1 2 0 2 0 0 2 1 0 1 0 1 1 1 2 0 1]
Experiment No. 9
K-NearestNeighbour Algorithm
Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print both correct
and wrong predictions. Java/Python ML library classes can be used for this problem.
Output:
Feature Names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal
width (cm)'] Iris Data: [[5.1 3.5 1.4 0.2]
[4.9 3. 1.4 0.2]
[4.7 3.2 1.3 0.2]
[4.6 3.1 1.5 0.2]
[5.1 3.8 1.5 0.3]
[5.4 3.4 1.7 0.2]
[5.1 3.7 1.5 0.4]
[4.6 3.6 1. 0.2]
[5.1 3.3 1.7 0.5]
[4.8 3.4 1.9 0.2]
[5. 3. 1.6 0.2]
[5. 3.4 1.6 0.4]
[5.2 3.5 1.5 0.2]
[5.2 3.4 1.4 0.2]
[6.5 3. 5.2 2. ]
[6.2 3.4 5.4 2.3]
[5.9 3. 5.1 1.8]] Target Names: ['setosa' 'versicolor' 'virginica'] Target: [0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
2 2]
Accuracy= 0.9473684210526315
Predicted Data
[2 0 1 1 1 2 0 0 1 2 0 1 0 1 1 0 0 0 1 1 0 2 2 2 2 0 0 2 0 2 2 1 2 2 0 2 0
0]
Test data :
[2 0 1 1 1 2 0 0 1 2 0 1 0 2 2 0 0 0 1 1 0 2 2 2 2 0 0 2 0 2 2 1 2 2 0 2 0
0]
Result is
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Total no of samples misclassied = 2
Experiment No. 10
Locally Weighted Regression Algorithm
Implement the non-parametric Locally Weighted Regression algorithm in order to fit data points. Select
appropriate data set for your experiment and draw graphs.
import operator
from os import listdir
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats.stats import pearsonr
def kernel(point,xmat, k):
m,n = np.shape(xmat)
weights = np.mat(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
data = pd.read_csv('lr.csv')
bill =np.array(data.colA)
tip = np.array(data.colB)
mbill=np.mat(bill)
mtip=np.mat(tip)
mbill = np.mat(bill)
mtip = np.mat(tip)
m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X= np.hstack((one.T,mbill.T))
ypred = localWeightRegression(X,mtip,0.2)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
plt.xlabel('colA')
plt.ylabel('colB')
plt.show();
Output: