You are on page 1of 22

Program 1

Implement and demonstrate the FIND-S algorithm for finding the most specific
hypothesis based on a given set of training data samples. Read the training data from
a .CSV file.

Code:

import csv

with open('Data1.csv', 'r') as f:


reader = csv.reader(f)
your_list = list(reader)

h = [['0', '0', '0', '0', '0', '0']]

for i in your_list:
print(i)
if i[-1] == "Y":
j = 0
for x in i:
if x != "Y":
if x != h[0][j] and h[0][j] == '0':
h[0][j] = x
elif x != h[0][j] and h[0][j] != '0':
h[0][j] = '?'
else:
pass
j = j + 1
print("Most specific hypothesis is")
print(h)

-------------------------------------------------------------------------------------------------------------------------------
-
Program 2
For a given set of training data examples stored in a .CSV file, implement and
demonstrate the Candidate-Elimination algorithm to output a description of the set
of all hypotheses consistent with the training examples.

Code:
import numpy as np
import csv
def candidateElimination():

data = []

csvFile = open('Data2.csv', 'r')


reader = csv.reader(csvFile, delimiter = ',')

for row in reader:

data.append(np.array(row))

# Convert To Numpy Array


data = np.asarray(data, dtype = 'object')

X = data[:, :-1]
Y = data[:, -1].reshape(X.shape[0], 1)

print ("\nTraining Data :")


print (X)
print ("\nLabels :")
print (Y)

print("\nShape Of X :")
print (X.shape)
print ("\nShape Of Y :")
print (Y.shape)

specificH = [" % " for _ in range(X.shape[1])]


specificH = np.asarray(specificH, dtype = 'object')

generalH = [[" ? " for _ in range(X.shape[1])] for _ in


range(X.shape[1])]
generalH = np.asarray(generalH, dtype = 'object')

print ("\nInitial Hypothesis :")


print (specificH)

print ("\nInitial General Hypothesis :")


print (generalH)

# Set First Positive Example To Hypothesis


if Y[0] == "P":
specificH = X[0]

else:
for i in range(Y.shape[0]):
if Y[i] == "P":
specificH = X[i]
break

print ("\nCandidate Elimination : ")

# For Each Training Example


for i in range(X.shape[0]):

# Positive Example
if Y[i] == "P":
for j in range(X.shape[1]):
if X[i][j] != specificH[j]:
specificH[j] = '?'

if specificH[j] != generalH[j][j] and generalH[j][j] !


= "?":
generalH[j][j] = "?"

print ("\n---------Step " + str(i + 1) + "---------\n")


print ("\nSpecific Set : ")
print (specificH)
print ("\nGeneral Set : ")
print (generalH)
print ("\n------------------------\n")

# Negative Example
else:
for j in range(X.shape[1]):
if X[i][j] != specificH[j]:
generalH[j][j] = specificH[j]

print ("\n---------Step " + str(i + 1) + "---------\n")


print ("\nSpecific Set : ")
print (specificH)
print ("\nGeneral Set : ")
print (generalH)
print ("\n------------------------\n")
print ("\nFinal Specific Hypothesis : ")
print (specificH)
print ("\nFinal General Hypothesis : ")
print (generalH)
print ("\n")

candidateElimination()

-------------------------------------------------------------------------------------------------------------------------------
-
Program 3
Write a program to demonstrate the working of the decision tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample.

Code:

import pandas as pd
import numpy as np
import math

class Node:
def __init__(self,l):
self.label=l
self.branches = {}

def entropy(data):
total_ex = len(data)
positive_ex = len(data.loc[data["Play Tennis"] == 'Y'])
negative_ex = len(data.loc[data["Play Tennis"] == 'N'])
entropy = 0
if(positive_ex > 0):
entropy = (-
1)*(positive_ex/float(total_ex))*(math.log(positive_ex,2)-
math.log(total_ex,2))
if(negative_ex > 0):
entropy += (-
1)*(negative_ex/float(total_ex))*(math.log(negative_ex,2)-
math.log(total_ex,2))
return entropy
def gain(s,data,attrib):
values = set(data[attrib])
print(values)
gain = s
for val in values:
gain -= len(data.loc[data[attrib] ==
val])/float(len(data))*entropy(data.loc[data[attrib] == val])
return gain

def get_attrib(data):
entropy_s = entropy(data)
attribute =""
max_gain = 0
for attr in data.columns[:len(data.columns)-1]:
g = gain(entropy_s,data,attr)

if g > max_gain:
max_gain = g
attribute = attr

return attribute

def decision_tree(data):

root = Node("NULL")

if(entropy(data) == 0):
if(len(data.loc[data[data.columns[-1]] == 'Y']) == len(data)):
root.label = "Y"
return root
else:
root.label = "N"
return root

if(len(data.columns) == 1):
return
else:
attrib = get_attrib(data)
root.label = attrib
values = set(data[attrib])

for val in values:


root.branches[val] = decision_tree(data.loc[data[attrib]
== val].drop(attrib,axis = 1))
return root

def get_rules(root,rule,rules):
if not root.branches:
rules.append(rule[:-2]+" => "+root.label)
return rules

for i in root.branches:
get_rules(root.branches[i],rule+root.label+"="+i+" ^ ",rules)
return rules

def test(tree,test_str):
if not tree.branches:
return tree.label
return test(tree.branches[test_str[tree.label]],test_str)

data = pd.read_csv('Data3.csv')

entropy_s = entropy(data)

attrib_count = 0
cols = len(data.columns)-1

tree = decision_tree(data)

rules = get_rules(tree,"",[])
print(rules)

test_str = {}
print("Enter test case input")
for i in data.columns[:-1]:
test_str[i] = input(i+": ")

print(test_str)
print(test(tree,test_str))

-------------------------------------------------------------------------------------------------------------------------------
-
Program 4 (Mam)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.

Code:

#!/usr/bin/env python
# coding: utf-8

# In[1]:

import random
from math import exp
from random import seed

# Initialize a network

def initialize_network(n_inputs, n_hidden, n_outputs):


network = list()
hidden_layer = [{'weights':[random.uniform(-0.5,0.5) for i in range(n_inputs + 1)]} for i in
range(n_hidden)]
# for each hidden node list of weights which is equal to no of inputs plus 1(bias)
network.append(hidden_layer)
output_layer = [{'weights':[random.uniform(-0.5,0.5) for i in range(n_hidden + 1)]} for i in
range(n_outputs)]
network.append(output_layer)
i= 1
print("\n The initialised Neural Network:\n")
for layer in network:
j=1
for sub in layer:# each layer consists of list of weight arrays for each node
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)# weight array for the node
j=j+1
i=i+1
return network

# Calculate neuron activation (net) for an input

def activate(weights, inputs):


activation = weights[-1] #intialize induced local field to the bias term
for i in range(len(weights)-1):# take all inputs and find the weighted summation
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation to sigmoid function
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output


def forward_propagate(network, row):
inputs = row
print("inside the forward")

for layer in network:# traverse through the layer


new_inputs = []# inputs to layer
for neuron in layer:# to point to different lists in weights which is weight vector for each
neuron
activation = activate(neuron['weights'], inputs)# create a list neuron and values should
be same as weights
print("activation",activation)#opt
neuron['output'] = transfer(activation)
print(neuron['output'])#opt
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs

# Calculate the derivative of an neuron output


def transfer_derivative(output):
return output * (1.0 - output)

# Backpropagate error and store in neurons


def backward_propagate_error(network, expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = list()

if i != len(network)-1: #//if it is hidden layer


for j in range(len(layer)):# //each neuron in the current layer
error = 0.0
for neuron in network[i + 1]: #//downstream layer neurons
error += (neuron['weights'][j] * neuron['delta'])
errors.append(error)
else:
for j in range(len(layer)):
neuron = layer[j]
errors.append(expected[j] - neuron['output'])
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# Update network weights with error


def update_weights(network, row, l_rate):
for i in range(len(network)):
inputs = row[:-1]# all columns except for last one
if i != 0:
inputs = [neuron['output'] for neuron in network[i - 1]]#output of the previous is input
for next
for neuron in network[i]:#neuron with j inputs
for j in range(len(inputs)):
neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]#jth link weigh of a neuron
neuron['weights'][-1] += l_rate * neuron['delta']#updating bias link

# Train a network for a fixed number of epochs


def train_network(network, train, l_rate, n_epoch, n_outputs):

print("\n Network Training Begins:\n")

for epoch in range(n_epoch):


sum_error = 0
for row in train:
outputs = forward_propagate(network, row)
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

print("\n Network Training Ends:\n")

#Test training backprop algorithm


seed(2)
dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]

print("\n The input Data Set :\n",dataset)


n_inputs = len(dataset[0]) - 1
print("\n Number of Inputs :\n",n_inputs)
n_outputs = len(set([row[-1] for row in dataset]))
print("\n Number of Outputs :\n",n_outputs)

#Network Initialization
network = initialize_network(n_inputs, 2, n_outputs)

# Training the Network


train_network(network, dataset, 0.5, 20, n_outputs)

print("\n Final Neural Network :")

i= 1
for layer in network:
j=1
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)
j=j+1
i=i+1

# In[5]:

#Prediction
from math import exp

# Calculate neuron activation for an input


def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):#skip last value in weight vector it is bias and last value in
inputs it is label
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))

# Forward propagate input to a network output


def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs

# Make a prediction with a network


def predict(network, row):
outputs = forward_propagate(network, row)
return outputs.index(max(outputs))

# Test making predictions with the network


dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
#network = [[{'weights': [-1.482313569067226, 1.8308790073202204,
1.078381922048799]}, {'weights': [0.23244990332399884, 0.3621998343835864,
0.40289821191094327]}],
# [{'weights': [2.5001872433501404, 0.7887233511355132, -1.1026649757805829]},
{'weights': [-2.429350576245497, 0.8357651039198697, 1.0699217181280656]}]]
for row in dataset:
prediction = predict(network, row)
print('Expected=%d, Got=%d' % (row[-1], prediction))

-------------------------------------------------------------------------------------------------------------------------------
-
Program 4 (Alt)
Build an Artificial Neural Network by implementing the Backpropagation algorithm
and test the same using appropriate data sets.

Code:

import numpy as np
import csv
filename='Data4.csv'
lines = csv.reader(open(filename,"r"))
lines2= csv.reader(open(filename,"r"))
data = list(lines)
data2 = list(lines2)
for i in range(len(data)):
data[i] = [float(x) for x in data[i][:-1]]
for i in range(len(data2)):
data2[i] = [float(x) for x in data2[i][-1]]
X = np.array((data),dtype=float)
y = np.array((data2),dtype=float)
print(X)
print(y)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
print(X)
y = y/100
print(y)
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=1500 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
print("epoch",i+1)
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts
contributed to error

d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr

print("Input: \n" + str(X))


print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
-------------------------------------------------------------------------------------------------------------------------------
-
Program 5
Write a program to implement the naïve Bayesian classifier for a sample training data
set stored as a .CSV file. Compute the accuracy of the classifier, considering few test
data sets.

Code:
import csv
import math
import random
#Handle data
def loadCsv(filename):
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
#Split dataset with ratio
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, copy]
#Separate by Class
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
#Calculate Mean
def mean(numbers):
return sum(numbers)/float(len(numbers))

def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
#Summarize Dataset
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
#Summarize attributes by class
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
#Calculate Gaussian Probability Density Function
def calculateProbability(x, mean, stdev):
exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
return (1/(math.sqrt(2*math.pi)*stdev))*exponent
#Calculate Class Probabilities
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
#Make a prediction
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
#Get predictions
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
#Get Accuracy
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct/float(len(testSet)))*100.0

def main():
filename = 'Data5.csv'
splitRatio = 0.68
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train = {1} and test = {2}
rows'.format(len(dataset),len(trainingSet),len(testSet)))
#prepare model
summaries = summarizeByClass(trainingSet)
#test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy: {0}%'.format(accuracy))

main()

-------------------------------------------------------------------------------------------------------------------------------
-

Program 6
Assuming a set of documents that need to be classified, use the naïve Bayesian
Classifier model to perform this task. Calculate the accuracy, precision, and recall for
your data set.

Code:
from sklearn.datasets import fetch_20newsgroups #Load finenames and data from 20
newsgroups dataset
from sklearn.metrics import confusion_matrix #It is used to compute accuracy of
classification
from sklearn.metrics import classification_report #Build a text report showing the main
classifications metrics
import numpy as np
import os
#categories=['alt.atheism','soc.religion.christian','comp.graphics','sci.med']
#twenty_train=fetch_20newsgroups(subset='train',categories=categories,shuffle=True)
#twenty_test=fetch_20newsgroups(subset='test',categories=categories,shuffle=True)
twenty_train=fetch_20newsgroups(data_home='./scikit_learn_data',subset='train',shuffle=T
rue)
#print(twenty_train)
twenty_test=fetch_20newsgroups(data_home='./scikit_learn_data',subset='test',shuffle=Tr
ue)
#print(twenty_train)
print("Number of Training Examples: ",len(twenty_train.data))
print("Number of Test Examples: ",len(twenty_test.data))
print(twenty_train.target_names)

from sklearn.feature_extraction.text import CountVectorizer


count_vect=CountVectorizer()
X_train_tf=count_vect.fit_transform(twenty_train.data)
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer=TfidfTransformer()
X_train_tfidf=tfidf_transformer.fit_transform(X_train_tf)
X_train_tfidf.shape

from sklearn.naive_bayes import MultinomialNB


from sklearn.metrics import accuracy_score
from sklearn import metrics
mod=MultinomialNB()
mod.fit(X_train_tfidf,twenty_train.target)
X_test_tf=count_vect.transform(twenty_test.data)
X_test_tfidf=tfidf_transformer.transform(X_test_tf)
predicted=mod.predict(X_test_tfidf)

print("Accuracy: ",accuracy_score(twenty_test.target,predicted))
print(classification_report(twenty_test.target,predicted,target_names=twenty_test.target_n
ames))
print("Confusion matrix \n",metrics.confusion_matrix(twenty_test.target,predicted))

-------------------------------------------------------------------------------------------------------------------------------
-

Program 7
Write a program to construct a Bayesian network considering medical data. Use this
model to demonstrate the diagnosis of heart patients using standard Heart Disease
Data Set.

Code:

import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
#read attributes
lines = list(csv.reader(open('Data7_Names.csv','r')))
attributes = lines[0]
#attributes =
['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak',
'slope','ca',thal','heartdisease']
#read cleveland heart disease data
heartDisease = pd.read_csv('Data7.csv')
#for row in heartDisease:
# print(row)
heartDisease = heartDisease.replace("?",np.nan)
#display data
print("Few examples from dataset are given below")
print(heartDisease.head())
print("Attributes and data types")
print(heartDisease.dtypes)
#Model Bayseian Network
model = BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),
('sex','trestbps'),('exang','trestbps'),('trestbps','heartdisease'),
('fbs','heartdisease'),('heartdisease','restecg'),('heartdisease','thalach'),
('heartdisease','chol')])
#learning CPDs using maximum likelihood estimators
print("Learning CPDs using maximum likelihood estimators...")
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
#inferencing with bayesian network
print("\nInferencing the bayesian network:")
HeartDisease_infer = VariableElimination(model)
#Computing the probability of bronc given smoke
print("\n1.Probability of heart disease given age=28")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'age':28})
print(q["heartdisease"])
print("\n2.Probability of heart disease given chol(cholestrol)=100")
q = HeartDisease_infer.query(variables=['heartdisease'],evidence={'chol':100})
print(q['heartdisease'])

-------------------------------------------------------------------------------------------------------------------------------
-

Program 8
Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data
set for clustering using k-Means algorithm. Compare the results of these two
algorithms and comment on the quality of clustering.

Code:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
from sklearn import preprocessing
#from sklearn.mixture import GMM # Used for older versions of sklearn
from sklearn.mixture import GaussianMixture

iris = datasets.load_iris()

X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
X_norm = preprocessing.normalize(X)

y = pd.DataFrame(iris.target)
y.columns = ['Targets']

# K-Means Model
model = KMeans(n_clusters = 3)
model.fit(X_norm)

# EM Model
#gmm = GMM(n_components = 3) # Used for older versions of sklearn
gmm = GaussianMixture(n_components = 3)
gmm.fit(X_norm)
gmm_y = gmm.predict(X_norm)

plt.figure(figsize = (14, 14))


colormap = np.array(['red', 'lime', 'black'])

# Real Clusters
plt.subplot(2, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[y.Targets], s = 40)
plt.title('Real Clusters')
plt.xlabel('Petal Lenght')
plt.ylabel('Petal Width')

# K-Means Output
plt.subplot(2, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[model.labels_], s = 40)
plt.title('K-Means Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

# EM Output
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c = colormap[gmm_y], s = 40)
plt.title('GMM Clustering')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

plt.show()

-------------------------------------------------------------------------------------------------------------------------------
-

Program 9
Write a program to implement k-Nearest Neighbour algorithm to classify the iris data
set. Print both correct and wrong predictions.

Code:

#import
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #Use this for Sk Learn 0.20 version
#from sklearn.cross_validation import train_test_split # Used for older versions of sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

#Input Data
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv("Data_8_9.csv", names = names)
print(dataset.head())

#Preprocessing
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:,4].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)

scaler = StandardScaler()
scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

#Model Creation
classifier = KNeighborsClassifier(n_neighbors = 5)
classifier.fit(x_train, y_train)

#Prediction
y_pred = classifier.predict(x_test)

for i in range(len(y_pred)):
print ("Training Example : ")
print(x_test[i])
print ("Actual Label : ")
print(y_test[i])
print ("Predicted Label : ")
print (y_pred[i])
print ("--------------------------------------------")

print ("Confusion Matrix : ")


print(confusion_matrix(y_test, y_pred))
print ("")
print ("Classification Report : ")
print(classification_report(y_test, y_pred))

-------------------------------------------------------------------------------------------------------------------------------
-

Program 10
Implement the non-parametric Locally Weighted Regression algorithm in order to fit
data points. Select appropriate data set for your experiment and draw graphs.

Code:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def kernel(point,xmat, k):


m,n = np.shape(xmat)
weights = np.mat(np.eye((m))) # eye - identity matrix
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights

def localWeight(point,xmat,ymat,k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W

def localWeightRegression(xmat,ymat,k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred

def graphPlot(X,ypred):
sortindex = X[:,1].argsort(0) #argsort - index of the smallest
xsort = X[sortindex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[sortindex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show()

# load data points


data = pd.read_csv('Data10.csv')
bill = np.array(data.total_bill) # We use only Bill amount and Tips data
tip = np.array(data.tip)

mbill = np.mat(bill) # .mat will convert nd array is converted in 2D array


mtip = np.mat(tip)
m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T,mbill.T)) # 244 rows, 2 cols

ypred = localWeightRegression(X,mtip,0.5) # increase k to get smooth curves


graphPlot(X,ypred)

You might also like