You are on page 1of 10

COMSATS University Islamabad, Lahore Campus

Fall 2022 – Assignment No. 2


Course Title: Machine Learning Course Code: CSC 354
Course Instructor/s: Dr. Allah Bux Sargano Program Name: BSCS
Topic Dataset Classifiers Max. marks: 30
Out Date: 1-11-2022 Due Date: 10-11-2022
Student’s Name: Muhammad Asad Reg. No. SP20-BCS-158
Important Instructions: 
1. Zero tolerance for plagiarism: Plagiarism from any sources, including internet sources and
your fellow students (except allowed sources by the instructor), will result in ZERO marks. 
2. Submission requirements: 
1. Start page(s): this sheet
2. Code
3.  Screenshots of results (Input & output)
3. Late submission policy: deduction @ of 20% of total marks per day
(CLO: 2)
Question No. 1

# Machine Learning Accuracy Training Test Time Dataset


Algorithm (%age) Time

1 Naïve Bayes 96.67 0.1 0.1 IRIS

2 ID3 100 0.3 0.5 IRIS

4 Random Forest 100 0.2 0.4 IRIS

# Machine Learning Accuracy Training Test Time Dataset


Algorithm (%age) Time

1 Naïve Bayes 76.223 0.5 0.8 Titanic

2 ID3 74.82 0.7 0.9 Titanic

4 Random Forest 70.94 0.6 1.1 Titanic


# Naive Bayes Iris

import numpy as np
import pandas as pd
dataset=pd.read_csv('IRIS.csv')
dataset=pd.DataFrame(dataset)
X=dataset.iloc[:,[0,1,2,3]]
Y=dataset.iloc[:,[4]]

dataset.replace(to_replace=['Iris-setosa','Iris-virginica','Iris-
versicolor'],value=['1','2','3'],inplace=True)
dataset.head()

from sklearn.model_selection import train_test_split


xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.2,random_state=0)
#applying naive bayes model for classification
from sklearn.naive_bayes import GaussianNB
classifier=GaussianNB()
classifier.fit(xtrain,ytrain)
ypred=classifier.predict(xtest)

from sklearn.metrics import confusion_matrix


from sklearn.metrics import accuracy_score

print('accuracy is =', accuracy_score(ytest,ypred)*100)


print(confusion_matrix(ytest,ypred))
# Iris Id3
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

df = pd.read_csv('./IRIS.csv')

X = df.iloc[:, [0, 1, 2, 3]].values


y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

clf = DecisionTreeClassifier()

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

plt.figure(figsize=(10, 10))
tree.plot_tree(clf)
#Accuray of the model
ac = accuracy_score(y_test, y_pred)

print("Accuracy: ", ac*100)


print(confusion_matrix(y_test,y_pred))

# Iris random forest


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv('./IRIS.csv')
X = df.iloc[:, [0, 1, 2, 3]].values
y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

rand_forest = RandomForestClassifier(n_estimators=10)

clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

ac = accuracy_score(y_test, y_pred)
print("Accuracy: ", ac*100)
print(confusion_matrix(y_test,y_pred))

# TiTanic NBayes
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

df = pd.read_csv('./titanic.csv')
dataset = df[df['Age'].notna()]
dataset = dataset.sample(frac = 1, random_state = 5).reset_index()
dataset = dataset.drop('index', axis = 1)

X = dataset.iloc[:, [2, 5, 6, 7, 9]].values


y = dataset['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

clf = GaussianNB()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

ac = accuracy_score(y_test,y_pred)
print('\nAccuracy = ', ac*100,'%')

print(confusion_matrix(y_test,y_pred))

# Titanic id3
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.metrics import accuracy_score

# Importing the dataset


df = pd.read_csv('./titanic.csv')
dataset = df[df['Age'].notna()] #Discarding the NaN Values (from Age Column) of titanic
dataset
dataset = dataset.sample(frac = 1, random_state = 18).reset_index()
dataset = dataset.drop('index', axis = 1)

X = dataset.iloc[:, [2, 5, 6, 7, 9]].values #Loading the attributes in X


y = dataset['Survived']#Loading the Survived Labels in y

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 2)

cls = DecisionTreeClassifier()

cls.fit(X_train, y_train)

y_pred = cls.predict(X_test)

plt.figure(figsize=(15, 15))
tree.plot_tree(cls)

ac = accuracy_score(y_test, y_pred)
print("\nAccuracy: ", ac*100)

print(confusion_matrix(y_test,y_pred))
# Titanic Random Forest
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv('./titanic.csv')

X = df.iloc[:, [2,6,7]].values #Loading the attributes in X


y = df['Survived'] #Loading the Classes in y

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

#n_estimators are no. of trees


rand_forest = RandomForestClassifier(n_estimators=10)

clf.fit(X_train,y_train)

y_pred = clf.predict(X_test)

ac = accuracy_score(y_test, y_pred)
print("Accuracy: ", ac*100)

print(confusion_matrix(y_test,y_pred))

You might also like