You are on page 1of 5

LAB - 5 (CB.EN.

U4ECE22115)
Experiment 1: Performance decision tree (DT) and Naive Bayes (NB) in IRIS dataset and note the accuracy using both classifiers 70-30% training-test split.

In [42]: import pandas as pd


from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load IRIS dataset


iris = pd.read_csv(r"C:\Users\Daejuswaram Gopinath\Downloads\Iris_Dataset.csv")
X = iris.iloc[:,0:4]
y = iris.Species

# Split data into training and testing sets (70-30 split)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Decision Tree Classifier


dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train, y_train)
dt_pred = dt_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_pred)

# Naive Bayes Classifier


nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
nb_pred = nb_classifier.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_pred)

print("Decision Tree Accuracy:", dt_accuracy)


print("Naive Bayes Accuracy:", nb_accuracy)

Decision Tree Accuracy: 1.0


Naive Bayes Accuracy: 1.0

In [43]: iris.head()

Out[43]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species

0 1 5.1 3.5 1.4 0.2 Iris-setosa

1 2 4.9 3.0 1.4 0.2 Iris-setosa

2 3 4.7 3.2 1.3 0.2 Iris-setosa

3 4 4.6 3.1 1.5 0.2 Iris-setosa

4 5 5.0 3.6 1.4 0.2 Iris-setosa

Experiment 2: Compare the performance of both classifiers (DT and NB) using 10-fold, leave one out, and 10-fold stratified cross-validation.

In [44]: from sklearn.model_selection import cross_val_score, LeaveOneOut, StratifiedKFold

# Cross-validation methods
cv_methods = ['10-fold', 'Leave One Out', 'Stratified 10-fold']
classifiers = [DecisionTreeClassifier(), GaussianNB()]

for clf in classifiers:


for cv_method in cv_methods:
if cv_method == '10-fold':
scores = cross_val_score(clf, X, y, cv=10)
elif cv_method == 'Leave One Out':
loo = LeaveOneOut()
scores = cross_val_score(clf, X, y, cv=loo)
else:
skf = StratifiedKFold(n_splits=10)
scores = cross_val_score(clf, X, y, cv=skf)
print(f"{cv_method} Cross-validation Accuracy for {type(clf).__name__}: {scores.mean()}")

10-fold Cross-validation Accuracy for DecisionTreeClassifier: 0.9666666666666668


Leave One Out Cross-validation Accuracy for DecisionTreeClassifier: 0.9933333333333333
Stratified 10-fold Cross-validation Accuracy for DecisionTreeClassifier: 0.9333333333333332
10-fold Cross-validation Accuracy for GaussianNB: 0.9866666666666667
Leave One Out Cross-validation Accuracy for GaussianNB: 0.9866666666666667
Stratified 10-fold Cross-validation Accuracy for GaussianNB: 0.9866666666666667

In [45]: from sklearn.preprocessing import LabelEncoder, OneHotEncoder


import pandas as pd

# Assuming 'iris' is a DataFrame version of the Iris dataset


# And 'species' is the column with categorical data to be encoded

# Label Encoding
le = LabelEncoder()
iris['species_encoded'] = le.fit_transform(iris['Species'])

# One Hot Encoding


ohe = OneHotEncoder()
species_ohe = ohe.fit_transform(iris[['Species']]).toarray()
species_ohe_df = pd.DataFrame(species_ohe, columns=[f"species_{i}" for i in range(species_ohe.shape[1])])
iris = pd.concat([iris, species_ohe_df], axis=1)

In [46]: label_encoder = LabelEncoder()


y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Support Vector Machine Classifier without Standardization


svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train_encoded)
y_pred_no_standardization = svm_classifier.predict(X_test)
accuracy_no_standardization = accuracy_score(y_test_encoded, y_pred_no_standardization)

# Support Vector Machine Classifier with Standardization


scaler = StandardScaler()
X_train_standardized = scaler.fit_transform(X_train)
X_test_standardized = scaler.transform(X_test)

svm_classifier_standardized = SVC(kernel='linear')
svm_classifier_standardized.fit(X_train_standardized, y_train_encoded)
y_pred_standardization = svm_classifier_standardized.predict(X_test_standardized)
accuracy_standardization = accuracy_score(y_test_encoded, y_pred_standardization)

# Support Vector Machine Classifier without Normalization


svm_classifier_no_normalization = SVC(kernel='linear')
svm_classifier_no_normalization.fit(X_train, y_train_encoded)
y_pred_no_normalization = svm_classifier_no_normalization.predict(X_test)
accuracy_no_normalization = accuracy_score(y_test_encoded, y_pred_no_normalization)

# Support Vector Machine Classifier with Normalization


normalizer = MinMaxScaler()
X_train_normalized = normalizer.fit_transform(X_train)
X_test_normalized = normalizer.transform(X_test)
svm_classifier_normalized = SVC(kernel='linear')
svm_classifier_normalized.fit(X_train_normalized, y_train_encoded)
y_pred_normalization = svm_classifier_normalized.predict(X_test_normalized)
accuracy_normalization = accuracy_score(y_test_encoded, y_pred_normalization)

# Print accuracies
print("Accuracy without Standardization:", accuracy_no_standardization)
print("Accuracy with Standardization:", accuracy_standardization)
print("Accuracy without Normalization:", accuracy_no_normalization)
print("Accuracy with Normalization:", accuracy_normalization)

Accuracy without Standardization: 1.0


Accuracy with Standardization: 1.0
Accuracy without Normalization: 1.0
Accuracy with Normalization: 1.0

In [47]: iris.head()

Out[47]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species species_encoded species_0 species_1 species_2

0 1 5.1 3.5 1.4 0.2 Iris-setosa 0 1.0 0.0 0.0

1 2 4.9 3.0 1.4 0.2 Iris-setosa 0 1.0 0.0 0.0

2 3 4.7 3.2 1.3 0.2 Iris-setosa 0 1.0 0.0 0.0

3 4 4.6 3.1 1.5 0.2 Iris-setosa 0 1.0 0.0 0.0

4 5 5.0 3.6 1.4 0.2 Iris-setosa 0 1.0 0.0 0.0

Experiment 5: Perform colour classification for the above dataset using SVM, k-NN, DT, NB classifier.

Type - 1 (Considering specific colors)

In [39]: import numpy as np


import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Define the directory containing the image data


DATADIR = r'C:\Users\Daejuswaram Gopinath\Downloads\ColorClassification'

# Define the categories or labels


CATEGORIES = ['Black', 'Blue', 'Brown', 'Green','Violet', 'White']

# Function to load and preprocess images


def load_images_and_labels():
images = []
labels = []
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path,img))
img_array = cv2.resize(img_array, (100, 100)) # Resize images to a fixed size
images.append(img_array.flatten())
labels.append(class_num)
return images, labels
# Load and preprocess images
images, labels = load_images_and_labels()

# Convert to numpy arrays


X = np.array(images)
y = np.array(labels)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Initialize classifiers
svm_classifier = SVC(kernel='linear')
knn_classifier = KNeighborsClassifier(n_neighbors=5)
dt_classifier = DecisionTreeClassifier()
nb_classifier = GaussianNB()

# Train classifiers
svm_classifier.fit(X_train, y_train)
knn_classifier.fit(X_train, y_train)
dt_classifier.fit(X_train, y_train)
nb_classifier.fit(X_train, y_train)

# Predictions
y_pred_svm = svm_classifier.predict(X_test)
y_pred_knn = knn_classifier.predict(X_test)
y_pred_dt = dt_classifier.predict(X_test)
y_pred_nb = nb_classifier.predict(X_test)

# Calculate accuracies
accuracy_svm = accuracy_score(y_test, y_pred_svm)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
accuracy_nb = accuracy_score(y_test, y_pred_nb)

# Print accuracies
print("SVM Accuracy:", accuracy_svm)
print("k-NN Accuracy:", accuracy_knn)
print("Decision Tree Accuracy:", accuracy_dt)
print("Naive Bayes Accuracy:", accuracy_nb)

SVM Accuracy: 0.8076923076923077


k-NN Accuracy: 0.5384615384615384
Decision Tree Accuracy: 0.5384615384615384
Naive Bayes Accuracy: 0.8076923076923077

Type - 2 (Considering all colors)

In [40]: import numpy as np


import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Define the directory containing the image data


DATADIR = r'C:\Users\Daejuswaram Gopinath\Downloads\ColorClassification'

# Define the categories or labels


CATEGORIES = ['orange','Violet','red','Blue','Green','Black','Brown','White']
# Function to load and preprocess images
def load_images_and_labels():
images = []
labels = []
for category in CATEGORIES:
path = os.path.join(DATADIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path,img))
img_array = cv2.resize(img_array, (100, 100)) # Resize images to a fixed size
images.append(img_array.flatten())
labels.append(class_num)
return images, labels

# Load and preprocess images


images, labels = load_images_and_labels()

# Convert to numpy arrays


X = np.array(images)
y = np.array(labels)

# Split the dataset into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Initialize classifiers
svm_classifier = SVC(kernel='linear')
knn_classifier = KNeighborsClassifier(n_neighbors=5)
dt_classifier = DecisionTreeClassifier()
nb_classifier = GaussianNB()

# Train classifiers
svm_classifier.fit(X_train, y_train)
knn_classifier.fit(X_train, y_train)
dt_classifier.fit(X_train, y_train)
nb_classifier.fit(X_train, y_train)

# Predictions
y_pred_svm = svm_classifier.predict(X_test)
y_pred_knn = knn_classifier.predict(X_test)
y_pred_dt = dt_classifier.predict(X_test)
y_pred_nb = nb_classifier.predict(X_test)

# Calculate accuracies
accuracy_svm = accuracy_score(y_test, y_pred_svm)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
accuracy_nb = accuracy_score(y_test, y_pred_nb)

# Print accuracies
print("SVM Accuracy:", accuracy_svm)
print("k-NN Accuracy:", accuracy_knn)
print("Decision Tree Accuracy:", accuracy_dt)
print("Naive Bayes Accuracy:", accuracy_nb)

SVM Accuracy: 0.6363636363636364


k-NN Accuracy: 0.5454545454545454
Decision Tree Accuracy: 0.6666666666666666
Naive Bayes Accuracy: 0.5757575757575758

In [ ]:

You might also like