You are on page 1of 22

Program 1

Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from
a .CSV file.


import csv

with open('Data1.csv', 'r') as f:

reader = csv.reader(f)
your_list = list(reader)

h = [['0', '0', '0', '0', '0', '0']]

for i in your_list:
if i[-1] == "Y":
j = 0
for x in i:
if x != "Y":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
j = j + 1
print("Most specific hypothesis is")

Program 2
For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set
of all hypotheses consistent with the training examples.

import numpy as np
import csv
def candidateElimination():

data = []

csvFile = open('Data2.csv', 'r')

reader = csv.reader(csvFile, delimiter = ',')

for row in reader:


# Convert To Numpy Array

data = np.asarray(data, dtype = 'object')

X = data[:, :-1]
Y = data[:, -1].reshape(X.shape[0], 1)

print ("\nTraining Data :")

print (X)
print ("\nLabels :")
print (Y)

print("\nShape Of X :")
print (X.shape)
print ("\nShape Of Y :")
print (Y.shape)

specificH = [" % " for _ in range(X.shape[1])]

specificH = np.asarray(specificH, dtype = 'object')

generalH = [[" ? " for _ in range(X.shape[1])] for _ in

generalH = np.asarray(generalH, dtype = 'object')

print ("\nInitial Hypothesis :")

print (specificH)

print ("\nInitial General Hypothesis :")

print (generalH)

# Set First Positive Example To Hypothesis

if Y[0] == "P":
specificH = X[0]

for i in range(Y.shape[0]):
if Y[i] == "P":
specificH = X[i]

print ("\nCandidate Elimination : ")

# For Each Training Example

for i in range(X.shape[0]):

# Positive Example
if Y[i] == "P":
for j in range(X.shape[1]):
if X[i][j] != specificH[j]:
specificH[j] = '?'

if specificH[j] != generalH[j][j] and generalH[j][j] !

= "?":
generalH[j][j] = "?"

print ("\n---------Step " + str(i + 1) + "---------\n")

print ("\nSpecific Set : ")
print (specificH)
print ("\nGeneral Set : ")
print (generalH)
print ("\n------------------------\n")

# Negative Example
for j in range(X.shape[1]):
if X[i][j] != specificH[j]:
generalH[j][j] = specificH[j]

print ("\n---------Step " + str(i + 1) + "---------\n")

print ("\nSpecific Set : ")
print (specificH)
print ("\nGeneral Set : ")
print (generalH)
print ("\n------------------------\n")
print ("\nFinal Specific Hypothesis : ")
print (specificH)
print ("\nFinal General Hypothesis : ")
print (generalH)
print ("\n")


Program 3
Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample.


import pandas as pd
import numpy as np
import math

class Node:
def __init__(self,l):
self.branches = {}

def entropy(data):
total_ex = len(data)
positive_ex = len(data.loc[data["Play Tennis"] == 'Y'])
negative_ex = len(data.loc[data["Play Tennis"] == 'N'])
entropy = 0
if(positive_ex > 0):
entropy = (-
if(negative_ex > 0):
entropy += (-
return entropy
def gain(s,data,attrib):
values = set(data[attrib])
gain = s
for val in values:
gain -= len(data.loc[data[attrib] ==
val])/float(len(data))*entropy(data.loc[data[attrib] == val])
return gain

def get_attrib(data):
entropy_s = entropy(data)
attribute =""
max_gain = 0
for attr in data.columns[:len(data.columns)-1]:
g = gain(entropy_s,data,attr)

if g > max_gain:
max_gain = g
attribute = attr

return attribute

def decision_tree(data):

root = Node("NULL")

if(entropy(data) == 0):
if(len(data.loc[data[data.columns[-1]] == 'Y']) == len(data)):
root.label = "Y"
return root
root.label = "N"
return root

if(len(data.columns) == 1):
attrib = get_attrib(data)
root.label = attrib
values = set(data[attrib])

for val in values:

root.branches[val] = decision_tree(data.loc[data[attrib]
== val].drop(attrib,axis = 1))
return root

def get_rules(root,rule,rules):
if not root.branches:
rules.append(rule[:-2]+" => "+root.label)
return rules

for i in root.branches:
get_rules(root.branches[i],rule+root.label+"="+i+" ^ ",rules)
return rules

def test(tree,test_str):
if not tree.branches:
return tree.label
return test(tree.branches[test_str[tree.label]],test_str)

data = pd.read_csv('Data3.csv')

entropy_s = entropy(data)

attrib_count = 0
cols = len(data.columns)-1

tree = decision_tree(data)

rules = get_rules(tree,"",[])

test_str = {}
print("Enter test case input")
for i in data.columns[:-1]:
test_str[i] = input(i+": ")


Program 4 (Mam)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.


#!/usr/bin/env python
# coding: utf-8

# In[1]:

import random
from math import exp
from random import seed

# Initialize a network

def initialize_network(n_inputs, n_hidden, n_outputs):

network = list()
hidden_layer = [{'weights':[random.uniform(-0.5,0.5) for i in range(n_inputs + 1)]} for i in
# for each hidden node list of weights which is equal to no of inputs plus 1(bias)
output_layer = [{'weights':[random.uniform(-0.5,0.5) for i in range(n_hidden + 1)]} for i in
i= 1
print("\n The initialised Neural Network:\n")
for layer in network:
for sub in layer:# each layer consists of list of weight arrays for each node
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)# weight array for the node
return network

# Calculate neuron activation (net) for an input

def activate(weights, inputs):

activation = weights[-1] #intialize induced local field to the bias term
for i in range(len(weights)-1):# take all inputs and find the weighted summation
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation to sigmoid function
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output

def forward_propagate(network, row):
inputs = row
print("inside the forward")

for layer in network:# traverse through the layer

new_inputs = []# inputs to layer
for neuron in layer:# to point to different lists in weights which is weight vector for each
activation = activate(neuron['weights'], inputs)# create a list neuron and values should
be same as weights
neuron['output'] = transfer(activation)
inputs = new_inputs
return inputs

# Calculate the derivative of an neuron output

def transfer_derivative(output):
return output * (1.0 - output)

# Backpropagate error and store in neurons

def backward_propagate_error(network, expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = list()

if i != len(network)-1: #//if it is hidden layer

for j in range(len(layer)):# //each neuron in the current layer
error = 0.0
for neuron in network[i + 1]: #//downstream layer neurons
error += (neuron['weights'][j] * neuron['delta'])
for j in range(len(layer)):
neuron = layer[j]
errors.append(expected[j] - neuron['output'])
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# Update network weights with error

def update_weights(network, row, l_rate):
for i in range(len(network)):
inputs = row[:-1]# all columns except for last one
if i != 0:
inputs = [neuron['output'] for neuron in network[i - 1]]#output of the previous is input
for next
for neuron in network[i]:#neuron with j inputs
for j in range(len(inputs)):
neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]#jth link weigh of a neuron
neuron['weights'][-1] += l_rate * neuron['delta']#updating bias link

# Train a network for a fixed number of epochs

def train_network(network, train, l_rate, n_epoch, n_outputs):

print("\n Network Training Begins:\n")

for epoch in range(n_epoch):

sum_error = 0
for row in train:
outputs = forward_propagate(network, row)
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

print("\n Network Training Ends:\n")

#Test training backprop algorithm

dataset = [[2.7810836,2.550537003,0],

print("\n The input Data Set :\n",dataset)

n_inputs = len(dataset[0]) - 1
print("\n Number of Inputs :\n",n_inputs)
n_outputs = len(set([row[-1] for row in dataset]))
print("\n Number of Outputs :\n",n_outputs)

#Network Initialization
network = initialize_network(n_inputs, 2, n_outputs)

# Training the Network

train_network(network, dataset, 0.5, 20, n_outputs)

print("\n Final Neural Network :")

i= 1
for layer in network:
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)

# In[5]:

from math import exp

# Calculate neuron activation for an input

def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):#skip last value in weight vector it is bias and last value in
inputs it is label
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output

def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
inputs = new_inputs
return inputs

# Make a prediction with a network

def predict(network, row):
outputs = forward_propagate(network, row)
return outputs.index(max(outputs))

# Test making predictions with the network

dataset = [[2.7810836,2.550537003,0],
#network = [[{'weights': [-1.482313569067226, 1.8308790073202204,
1.078381922048799]}, {'weights': [0.23244990332399884, 0.3621998343835864,
# [{'weights': [2.5001872433501404, 0.7887233511355132, -1.1026649757805829]},
{'weights': [-2.429350576245497, 0.8357651039198697, 1.0699217181280656]}]]
for row in dataset:
prediction = predict(network, row)
print('Expected=%d, Got=%d' % (row[-1], prediction))

Program 4 (Alt)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.


import numpy as np
import csv
lines = csv.reader(open(filename,"r"))
lines2= csv.reader(open(filename,"r"))
data = list(lines)
data2 = list(lines2)
for i in range(len(data)):
data[i] = [float(x) for x in data[i][:-1]]
for i in range(len(data2)):
data2[i] = [float(x) for x in data2[i][-1]]
X = np.array((data),dtype=float)
y = np.array((data2),dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=1500 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp),wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH =
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts
contributed to error

d_hiddenlayer = EH * hiddengrad
wout += *lr
wh += *lr

print("Input: \n" + str(X))

print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
Program 5
Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test
data sets.

import csv
import math
import random
#Handle data
def loadCsv(filename):
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
#Split dataset with ratio
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
return [trainSet, copy]
#Separate by Class
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
return separated
#Calculate Mean
def mean(numbers):
return sum(numbers)/float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
#Summarize Dataset
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
#Summarize attributes by class
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
#Calculate Gaussian Probability Density Function
def calculateProbability(x, mean, stdev):
exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1/(math.sqrt(2*math.pi)*stdev))*exponent
#Calculate Class Probabilities
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
#Make a prediction
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
#Get predictions
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
return predictions
#Get Accuracy
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(testSet)))*100.0

def main():
filename = 'Data5.csv'
splitRatio = 0.68
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train = {1} and test = {2}
#prepare model
summaries = summarizeByClass(trainingSet)
#test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy: {0}%'.format(accuracy))



Program 6
Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Calculate the accuracy, precision, and recall for
your data set.

from sklearn.datasets import fetch_20newsgroups #Load finenames and data from 20
newsgroups dataset
from sklearn.metrics import confusion_matrix #It is used to compute accuracy of
from sklearn.metrics import classification_report #Build a text report showing the main
classifications metrics
import numpy as np
import os
print("Number of Training Examples: ",len(
print("Number of Test Examples: ",len(

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.feature_extraction.text import TfidfTransformer

from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score
from sklearn import metrics

print("Accuracy: ",accuracy_score(,predicted))
print("Confusion matrix \n",metrics.confusion_matrix(,predicted))


Program 7
Write a program to construct a Bayesian network considering medical data. Use this
model to demonstrate the diagnosis of heart patients using standard Heart Disease
Data Set.


import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
#read attributes
lines = list(csv.reader(open('Data7_Names.csv','r')))
attributes = lines[0]
#attributes =
#read cleveland heart disease data
heartDisease = pd.read_csv('Data7.csv')
#for row in heartDisease:
# print(row)
heartDisease = heartDisease.replace("?",np.nan)
#display data
print("Few examples from dataset are given below")
print("Attributes and data types")
#Model Bayseian Network
model = BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),
#learning CPDs using maximum likelihood estimators
print("Learning CPDs using maximum likelihood estimators..."),estimator=MaximumLikelihoodEstimator)
#inferencing with bayesian network
print("\nInferencing the bayesian network:")
HeartDisease_infer = VariableElimination(model)
#Computing the probability of bronc given smoke
print("\n1.Probability of heart disease given age=28")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'age':28})
print("\n2.Probability of heart disease given chol(cholestrol)=100")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'chol':100})


Program 8
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data
set for clustering using k-Means algorithm. Compare the results of these two
algorithms and comment on the quality of clustering.

import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from sklearn import preprocessing
#from sklearn.mixture import GMM # Used for older versions of sklearn
from sklearn.mixture import GaussianMixture

iris = datasets.load_iris()

X = pd.DataFrame(
X.columns = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
X_norm = preprocessing.normalize(X)

y = pd.DataFrame(
y.columns = ['Targets']

# K-Means Model
model = KMeans(n_clusters = 3)

# EM Model
#gmm = GMM(n_components = 3) # Used for older versions of sklearn
gmm = GaussianMixture(n_components = 3)
gmm_y = gmm.predict(X_norm)

plt.figure(figsize = (14, 14))

colormap = np.array(['red', 'lime', 'black'])

# Real Clusters
plt.subplot(2, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[y.Targets], s = 40)
plt.title('Real Clusters')
plt.xlabel('Petal Lenght')
plt.ylabel('Petal Width')

# K-Means Output
plt.subplot(2, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[model.labels_], s = 40)
plt.title('K-Means Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

# EM Output
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[gmm_y], s = 40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')


Program 9
Write a program to implement k-Nearest Neighbour algorithm to classify the iris data
set. Print both correct and wrong predictions.


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #Use this for Sk Learn 0.20 version
#from sklearn.cross_validation import train_test_split # Used for older versions of sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

#Input Data
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv("Data_8_9.csv", names = names)

x = dataset.iloc[:, :-1].values
y = dataset.iloc[:,4].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)

scaler = StandardScaler()

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

#Model Creation
classifier = KNeighborsClassifier(n_neighbors = 5), y_train)

y_pred = classifier.predict(x_test)

for i in range(len(y_pred)):
print ("Training Example : ")
print ("Actual Label : ")
print ("Predicted Label : ")
print (y_pred[i])
print ("--------------------------------------------")

print ("Confusion Matrix : ")

print(confusion_matrix(y_test, y_pred))
print ("")
print ("Classification Report : ")
print(classification_report(y_test, y_pred))


Program 10
Implement the non-parametric Locally Weighted Regression algorithm in order to fit
data points. Select appropriate data set for your experiment and draw graphs.

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def kernel(point,xmat, k):

m,n = np.shape(xmat)
weights = np.mat(np.eye((m))) # eye - identity matrix
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights

def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W

def localWeightRegression(xmat,ymat,k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred

def graphPlot(X,ypred):
sortindex = X[:,1].argsort(0) #argsort - index of the smallest
xsort = X[sortindex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[sortindex], color = 'red', linewidth=5)
plt.xlabel('Total bill')

# load data points

data = pd.read_csv('Data10.csv')
bill = np.array(data.total_bill) # We use only Bill amount and Tips data
tip = np.array(data.tip)

mbill = np.mat(bill) # .mat will convert nd array is converted in 2D array

mtip = np.mat(tip)
m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T,mbill.T)) # 244 rows, 2 cols

ypred = localWeightRegression(X,mtip,0.5) # increase k to get smooth curves


You might also like