MLfull

PATEL SUCHI 21BEIT30101
PRACTICAL-3
AIM- Implement Logistic Regression in R or Python.

Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression from sklearn.metrics import
accuracy_score, classification_report, confusion_matrix
# Load dataset df_sal =
pd.read_csv('/content/drive/MyDrive/Dataset/this1.csv')
df_sal.head() df_sal.describe()
# Drop 'Phone' column
df_sal.drop(columns=['Phone'], inplace=True)
df_sal.head() df_sal.describe()
# Convert target variable 'Price ($)' to binary (e.g., high price = 1, low price = 0)
df_sal['Price_Class'] = np.where(df_sal['Price ($)'] > df_sal['Price ($)'].median(), 1, 0)
# Define features (X) and target variable (y)
X = df_sal[['Annual Income']] y =
df_sal['Price_Class']
# Split data into training and testing sets x_train, x_test, y_train, y_test =
train_test_split(X, y, test_size=0.3, random_state=100)
MACHINE LEARNING 7 LDRP-INSTITUTE OF TECHNOLOGY

AND RESEARCH
# Initialize and train the logistic regression model
log_reg = LogisticRegression()
log_reg.fit(x_train, y_train) # Make predictions
y_pred_log_reg = log_reg.predict(x_test)
# Evaluate the model accuracy = accuracy_score(y_test, y_pred_log_reg)
print("Accuracy: {:.2f}%".format(accuracy * 100)) print("Confusion
Matrix:\n", confusion_matrix(y_test, y_pred_log_reg)) print("\nClassification
Report:\n", classification_report(y_test, y_pred_log_reg))
# Visualization (if applicable) plt.scatter(x_test,
y_test) plt.plot(x_test, y_pred_log_reg,
color='red') plt.title("Annual Income vs Price
Classification") plt.xlabel("Annual Income")
plt.ylabel("Price Class") plt.show()

AND RESEARCH
Output:-

AND RESEARCH

AND RESEARCH
PRACTICAL-4
AIM:Implement SVM classifier in R or Python.
import numpy as np
import matplotlib.pyplot as plt from sklearn.datasets import
make_blobs from sklearn.svm import SVC def
plot_svc_decision_function(model, ax=None, plot_support=True):
if ax is None:
ax = plt.gca() xlim = ax.get_xlim()
ylim = ax.get_ylim() x =
np.linspace(xlim[0], xlim[1], 30) y =
np.linspace(ylim[0], ylim[1], 30) Y, X
= np.meshgrid(y, x) xy =
np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape) ax.contour(X, Y, P, colors='k',
levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) if plot_support:
ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, linewidth=1,

facecolors='none')
ax.set_xlim(xlim)
ax.set_ylim(ylim)
X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=1.2)

AND RESEARCH
fig, ax = plt.subplots(1, 2, figsize=(16, 6))
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)
for axi, C in zip(ax, [90.0, 0.01]):
model = SVC(kernel='linear', C=C).fit(X, y)
axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='winter')
plot_svc_decision_function(model, axi)
axi.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, lw=1,

facecolors='none') axi.set_title('C =
{0:.1f}'.format(C), size=14) plt.show()
Output:

AND RESEARCH
PRACTICAL-5
AIM:Implement KNN classifier in R or Python.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score iris =
load_iris()
X, y = iris.data[:, :2], iris.target # Only use the first two features for visualization X_train,
X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
clf = KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(predictions) accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

AND RESEARCH
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor='k')
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.title('KNN Classifier Decision Boundary')
plt.show()
Output:

AND RESEARCH
PRACTICAL-6
Study and Implement K-Fold cross validation and ROC.
K-Fold Cross-Validation:
K-Fold cross-validation is a technique used to evaluate the performance of a machine learning

model. It involves splitting the dataset into k subsets (folds), using k-1 folds for training the
model, and the remaining fold for testing. This process is repeated k times, with each fold used
once as a test set.
ROC:
ROC (Receiver Operating Characteristic) curve is a graphical plot that illustrates the diagnostic
ability of a binary classification model. It shows the trade-off between the true positive rate
(Sensitivity) and false positive rate (1 - Specificity) for different threshold values.
Code:
from scipy import interp
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import StratifiedKFold
import numpy as np
import pandas as pd

AND RESEARCH
data = pd.read_csv('/content/drive/MyDrive/Dataset/weatherAUS.csv')
data.head()
data.describe()
print(data.columns)
label_value_count = data['RainTomorrow'].value_counts()
print(label_value_count)
print(data.info())
data.dropna(subset=['RainTomorrow'], inplace=True)
data.replace({'RainTomorrow': {'Yes': 1, 'No': 0}}, inplace=True)
X = data.loc[:, data.columns != 'RainTomorrow']
y = data.loc[:, 'RainTomorrow']
random_state = np.random.RandomState(0)
clf = RandomForestClassifier(random_state=random_state)
cv = StratifiedKFold(n_splits=5, shuffle=False)
fig, ax = plt.subplots(figsize=[12, 12])
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
i=1
columns_to_drop = ['Date', 'Location',

'Evaporation','Sunshine','WindGustDir','WindDir9am','RainToday','Cloud3pm'] # Add the names
of columns to be dropped

AND RESEARCH
X = X.drop(columns_to_drop, axis=1)
from sklearn.preprocessing importLabelEncoder
label_encoder = LabelEncoder()
for col in X.columns:
if X[col].dtype == 'object': # Check if the column is categorical
X[col] = label_encoder.fit_transform(X[col]
from sklearn.impute import SimpleImputer
imputer =SimpleImputer(strategy='mean') # Use mean imputation
X_imputed = imputer.fit_transform(X)
for train, test in cv.split(X_imputed, y):

print("Train indices:", train)
print("Test indices:", test)
X_train, X_test = X_imputed[train], X_imputed[test]
y_train, y_test = y.iloc[train], y.iloc[test]
prediction = clf.fit(X_train, y_train).predict_proba(X_test)
print("y_test indices:", y_test.index)
fpr, tpr, _ = roc_curve(y_test, prediction[:, 1])
tprs.append(interp(mean_fpr, fpr, tpr))

AND RESEARCH
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
plt.plot(fpr, tpr, lw=2, alpha=0.3, label=f'ROC fold {i} (AUC =
{roc_auc:.2f})')
i += 1
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='pink')
mean_tpr = np.mean(tprs, axis=0)
mean_auc = auc(mean_fpr, mean_tpr)
plt.plot(mean_fpr, mean_tpr, color='blue', label=f'Mean ROC (AUC = {mean_auc:.2f})', lw=2,
alpha=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc='lower right')
plt.text(0.32, 0.7, 'More accurate area',
fontsize=12)
plt.text(0.4, 0.3, 'Less accurate area', fontsize=12)
plt.show()

AND RESEARCH
Output:

AND RESEARCH

AND RESEARCH

AND RESEARCH
PRACTICAL-7
AIM- Implement BPNN Classifier in R or Python.
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score import
numpy as np
X, y = make_classification(n_samples=1000, n_features=20, random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, alpha=0.0001,
solver='adam', verbose=10, random_state=42, tol=0.0001)
train_acc = []
test_acc = []
for epoch in range(1, clf.max_iter + 1):
clf.partial_fit(X_train, y_train, classes=np.unique(y))
train_acc.append(clf.score(X_train, y_train))
test_acc.append(clf.score(X_test, y_test)) plt.figure(figsize=(10, 6))
plt.plot(range(1, clf.max_iter + 1), train_acc, label='Training Accuracy')
plt.plot(range(1, clf.max_iter + 1), test_acc, label='Testing Accuracy')
plt.xlabel('Epochs')

AND RESEARCH
plt.ylabel('Accuracy')
plt.title('BPNN Classifier Training and TestingAccuracy')
plt.legend()
plt.grid(True)
plt.show()
Output:

AND RESEARCH

AND RESEARCH
PRACTICAL-8
AIM- Study and Implement various Ensemble method of classifier : Bagging,

Boosting, Stacking.
Bagging (Bootstrap Aggregating):
Bagging involves training multiple base models (usually decision trees) independently on
different random subsets of the training data (with replacement) and then averaging their
predictions to reduce variance.
Boosting:
Boosting trains multiple weak learners sequentially, where each learner focuses on correcting the
mistakes of its predecessors by giving more weight to misclassified instances. Common
algorithms include AdaBoost and Gradient Boosting.
Stacking:
Stacking combines multiple base classifiers with a meta-classifier (usually a linear model) that
learns to combine the predictions of the base classifiers.
Code:
import numpy as np
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

AND RESEARCH
X, y = make_classification(n_samples=1000, n_features=20, random_state=123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
base_clf1 = DecisionTreeClassifier(random_state=42)
base_clf2 = DecisionTreeClassifier(max_depth=1, random_state=42)
base_clf3 = LogisticRegression(random_state=42)
bagging_clf = BaggingClassifier(base_estimator=base_clf1, n_estimators=10,
random_state=42)
adaboost_clf = AdaBoostClassifier(base_estimator=base_clf2, n_estimators=50,

random_state=42)
stacking_clf = StackingClassifier(estimators=[('dt', base_clf1), ('ab', adaboost_clf), ('lr',

base_clf3)],
final_estimator=LogisticRegression(), cv=5)
bagging_train_acc = []
bagging_test_acc = []
adaboost_train_acc = []
adaboost_test_acc = []
stacking_train_acc = []
stacking_test_acc = []
for epoch in range(1, 100): # Adjust the number of epochs as needed
bagging_clf.fit(X_train, y_train)
bagging_train_acc.append(bagging_clf.score(X_train, y_train))
bagging_test_acc.append(bagging_clf.score(X_test, y_test))
AND RESEARCH
adaboost_clf.fit(X_train, y_train)
adaboost_train_acc.append(adaboost_clf.score(X_train, y_train))
adaboost_test_acc.append(adaboost_clf.score(X_test, y_test))
stacking_clf.fit(X_train, y_train)
stacking_train_acc.append(stacking_clf.score(X_train, y_train))
stacking_test_acc.append(stacking_clf.score(X_test, y_test))
epochs = np.arange(1, 100)
plt.figure(figsize=(10, 6))
plt.plot(epochs, bagging_train_acc, label='Bagging Train Accuracy')
plt.plot(epochs, bagging_test_acc, label='Bagging Test Accuracy')
plt.plot(epochs, adaboost_train_acc, label='AdaBoost Train Accuracy')
plt.plot(epochs, adaboost_test_acc, label='AdaBoost Test Accuracy')
plt.plot(epochs, stacking_train_acc, label='Stacking Train Accuracy')
plt.plot(epochs, stacking_test_acc, label='Stacking Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Classifier Training and Testing Accuracy over Epochs')
plt.legend()
plt.grid(True)
plt.show()

AND RESEARCH
Output:

AND RESEARCH
PRACTICAL-9
AIM- Implement various Clustering algorithm in R and python.
K-Means clustering:Code:
import numpy as np
from sklearn.datasets importmake_blobs
from sklearn.cluster import KMeans
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
kmeans = KMeans(n_clusters=4)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='viridis')
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75)
plt.title('K-Means Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

AND RESEARCH
Output:
Hierarchical clustering (Agglomerative):
Code:
from sklearn.cluster import AgglomerativeClustering
agg_clustering = AgglomerativeClustering(n_clusters=4)
agg_clusters = agg_clustering.fit_predict(X)
plt.scatter(X[:, 0], X[:, 1], c=agg_clusters, s=50, cmap='viridis')
plt.title('Hierarchical Clustering (Agglomerative)')
plt.xlabel('Feature 1')
AND RESEARCH
plt.ylabel('Feature 2')
plt.show()
Output:

AND RESEARCH
PRACTICAL-10
AIM- Study and Implement various Dimensionality technique like PCA and
LDA
Principal Component Analysis (PCA):
PCA is a dimensionality reduction technique that is commonly used for data preprocessing and
feature extraction. It works by transforming the original features into a new set of uncorrelated
variables called principal components. The main goal of PCA is to reduce the dimensionality of
the dataset while retaining as much variance as possible.
Steps in PCA:
● Standardize the data to have zero mean and unit variance.

● Compute the covariance matrix of the standardized data.
● Calculate the eigenvectors and eigenvalues of the covariance matrix.
● Sort the eigenvectors by decreasing eigenvalues and choose the top k
eigenvectors to form the new feature space.
● Project the original data onto the new feature space defined by the selected
eigenvectors.
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing importStandardScaler
from sklearn.decomposition import PCA
importmatplotlib.pyplot as plt
iris = load_iris()
X = iris.data
y = iris.target

AND RESEARCH
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('PCA on Iris Dataset')
plt.colorbar(label='Classes')
plt.show() Output:

AND RESEARCH
Linear Discriminant Analysis (LDA):
LDA is a supervised dimensionality reduction technique that is used for feature extraction and
classification. Unlike PCA, which focuses on maximizing variance, LDA aims to maximize the
separation between classes in the data. It does this by finding the linear combinations of features
that best discriminate between different classes.
Steps in LDA:
● Compute the mean vectors for each class and the overall mean vector of
the data.
● Compute the between-class scatter matrix and within-class scatter matrix.
● Compute the eigenvectors and eigenvalues of the generalized eigenvalue
problem.
● Sort the eigenvectors by decreasing eigenvalues and choose the top k
eigenvectors as the discriminant directions.
● Project the original data onto the discriminant directions to obtain the new
feature space.
Code:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)
X_lda = lda.fit_transform(X_scaled, y)
plt.scatter(X_lda[:, 0], X_lda[:, 1], c=y, cmap='viridis')
plt.xlabel('LDA Component 1') plt.ylabel('LDA
Component 2')
plt.title('LDA on Iris Dataset')
plt.colorbar(label='Classes')
plt.show()

AND RESEARCH
Output:

AND RESEARCH

MLfull

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

MLfull

Uploaded by

Copyright:

Available Formats

PATEL SUCHI 21BEIT30101

AIM- Implement Logistic Regression in R or Python.

import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression from sklearn.metrics import

accuracy_score, classification_report, confusion_matrix

# Load dataset df_sal =

# Drop 'Phone' column

df_sal['Price_Class'] = np.where(df_sal['Price ($)'] > df_sal['Price ($)'].median(), 1, 0)

# Define features (X) and target variable (y)

train_test_split(X, y, test_size=0.3, random_state=100)

MACHINE LEARNING 7 LDRP-INSTITUTE OF TECHNOLOGY

# Initialize and train the logistic regression model

log_reg.fit(x_train, y_train) # Make predictions

# Evaluate the model accuracy = accuracy_score(y_test, y_pred_log_reg)

print("Accuracy: {:.2f}%".format(accuracy * 100)) print("Confusion

Matrix:\n", confusion_matrix(y_test, y_pred_log_reg)) print("\nClassification

Report:\n", classification_report(y_test, y_pred_log_reg))

# Visualization (if applicable) plt.scatter(x_test,

y_test) plt.plot(x_test, y_pred_log_reg,

color='red') plt.title("Annual Income vs Price

Classification") plt.xlabel("Annual Income")

plt.ylabel("Price Class") plt.show()

MACHINE LEARNING 8 LDRP-INSTITUTE OF TECHNOLOGY

MACHINE LEARNING 9 LDRP-INSTITUTE OF TECHNOLOGY

MACHINE LEARNING 10 LDRP-INSTITUTE OF TECHNOLOGY

AIM:Implement SVM classifier in R or Python.

import matplotlib.pyplot as plt from sklearn.datasets import

make_blobs from sklearn.svm import SVC def

plot_svc_decision_function(model, ax=None, plot_support=True):

ax = plt.gca() xlim = ax.get_xlim()

np.linspace(xlim[0], xlim[1], 30) y =

np.linspace(ylim[0], ylim[1], 30) Y, X

P = model.decision_function(xy).reshape(X.shape) ax.contour(X, Y, P, colors='k',

levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) if plot_support:

ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, linewidth=1,

X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=1.2)

MACHINE LEARNING 11 LDRP-INSTITUTE OF TECHNOLOGY

fig, ax = plt.subplots(1, 2, figsize=(16, 6))

fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)

for axi, C in zip(ax, [90.0, 0.01]):

model = SVC(kernel='linear', C=C).fit(X, y)

axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='winter')

axi.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, lw=1,

{0:.1f}'.format(C), size=14) plt.show()

MACHINE LEARNING 12 LDRP-INSTITUTE OF TECHNOLOGY

AIM:Implement KNN classifier in R or Python.

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score iris =

X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

print(predictions) accuracy = accuracy_score(y_test, predictions)

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1

y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

MACHINE LEARNING 13 LDRP-INSTITUTE OF TECHNOLOGY

xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),

np.arange(y_min, y_max, 0.1))

plt.contourf(xx, yy, Z, alpha=0.4)

plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor='k')

plt.title('KNN Classifier Decision Boundary')

MACHINE LEARNING 14 LDRP-INSTITUTE OF TECHNOLOGY

Study and Implement K-Fold cross validation and ROC.

K-Fold cross-validation is a technique used to evaluate the performance of a machine learning