You are on page 1of 6

Maulana azad national institute of

technology,
Bhopal
(Computer science of engineering)

Machine Learning lab


Submitted by:

Harsh Gajbhiye

Scholar number : 201112233

Section : CSE -2

Year : 3rd

Semester – 5th

Assignment no. – 7

Q) Find the optimal accuracy of SVM classifier with ‘RBF’ kernal function on PIMA
Dataset.find 5 accuracy for given 5-fold dataset and find average accuracy and find best C
(Regularization parameter) and Sigma (Kernal Width) for the 5-fold Dataset with best
Accuracy.

Code

"""LIBSVM"""

!pip install libsvm-official

import sklearn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from libsvm.svmutil import *
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
p1train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-1tra.csv")
p1test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-1tst.csv")
p2train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-2tra.csv")
p2test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-2tst.csv")
p3train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-3tra.csv")
p3test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-3tst.csv")
p4train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-4tra.csv")
p4test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-4tst.csv")
p5train=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-5tra.csv")
p5test=pd.read_csv("/content/drive/MyDrive/pima-5fold/pima-5-5tst.csv")

"""Regularization Parameter varies from 2^-18 , 2^-16 , .... 2^15


Kernal Wdth(Sigma) varies from 2^-18 , 2^-16 , .... 2^20
"""

regular = []
kwidth=[]
x=-18
x2=1
for i in range(x,x2,2):
regular.append(pow(2,i))
kwidth.append(pow(2,i))
x=1
x2=16
for i in range(x,x2,1):
regular.append(pow(2,i))
x=1
x2=21
for i in range(x,x2,1):
kwidth.append(pow(2,i))

results=[]

def make_model(X_train,X_test,y_train,y_test):
a,rp,kw=0,0,0
for x in regular:
for y in kwidth:
model = svm_train(y_train, X_train, f'-c {x} -t 2 -g {y}')
p_label, p_acc, p_val = svm_predict(y_test, X_test, model)
if(a<p_acc[0]):
a = p_acc[0]
rp=x
kw=y
results.append([a,rp,kw])
def Preprocess(ptrain, ptest):
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
ptrain['target']=le.fit_transform(ptrain.target)
ptest['target']=le.fit_transform(ptest.target)

X_train = ptrain.drop('target', axis=1)


X_test =ptest.drop('target', axis=1)
y_train =ptrain.target
y_test =ptest.target

from sklearn.preprocessing import StandardScaler


sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

return X_train,X_test,y_train,y_test

def fiveFold(ptrain,ptest):

X_train,X_test,y_train,y_test = Preprocess(ptrain,ptest)
y_test = y_test.values.tolist()
y_train = y_train.values.tolist()
X_tr=[]
X_ts=[]

for l in X_test:
X_ts.append(l.tolist())
for l in X_train:
X_tr.append(l.tolist())

X_train = X_tr
X_test = X_ts
make_model(X_train,X_test,y_train,y_test)

"""We are using Already Made 5 cross-fold Pima Database """

fiveFold(p1train,p1test)
fiveFold(p2train,p2test)
fiveFold(p3train,p3test)
fiveFold(p4train,p4test)
fiveFold(p5train,p5test)

i=1
avg_acc=0
for r in results:
print(f"The Accuracy for Pima DataSet {i} is : {round(r[0],2)} %")
print(f"The Regularization Parameter for Pima DataSet {i} is : {r[1]} ")
print(f"The Kernal Width(Sigma) for Pima DataSet {i} is : {round(r[2],8)} ")
print()
avg_acc += round(r[0],2)
i=i+1

print(f"The Average Accuracy Comes Out to be {round(avg_acc/5,2)} %")

Accuracy for All 5 fold Pima Dataset :-


The Average Accuracy Comes out to be 78.77% on all 5 datasets.

The Best Accuracy Comes out to be 81.17% on dataset 1 and dataset 3.

The Best C comes out to be 2048 or 1.

The Best Sigma Comes out to be 0.00390625 or 0.015625.

You might also like