0% found this document useful (1 vote)
903 views4 pages

Machine Learning Techniques with Scikit-learn

The document covers hands-on exercises for various machine learning algorithms including preprocessing, classification, regression, clustering, and ensembles. It loads sample datasets, splits data into training and test sets, trains models, and evaluates performance. For each algorithm, it explores different parameters and evaluates their effect on model performance.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (1 vote)
903 views4 pages

Machine Learning Techniques with Scikit-learn

The document covers hands-on exercises for various machine learning algorithms including preprocessing, classification, regression, clustering, and ensembles. It loads sample datasets, splits data into training and test sets, trains models, and evaluates performance. For each algorithm, it explores different parameters and evaluates their effect on model performance.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
  • Hands-On Exercises: Provides introductory hands-on exercises utilizing the sklearn library for data preprocessing and classification.
  • Regression Techniques: Discusses regression methodologies, including linear regression and decision tree regressors using the Boston housing dataset.
  • Support Vector Machine (SVM): Illustrates the use of SVM in classification tasks with the Digits dataset.
  • Ensemble Methods: Introduces ensemble techniques like Random Forest and Decision Tree in data modeling.
  • Clustering: Explores clustering methods and evaluation using homogeneity scores.

1st hands-on

import [Link] as skl


import [Link] as skp
import numpy as np
iris = skl.load_iris()
iris_normalized = [Link]([Link], norm='l2')
print(iris_normalized.mean(axis=0))

enc = [Link]()
iris_target_onehot = enc.fit_transform([Link](-1, 1))
print(iris_target_onehot.toarray()[[0,50,100]])

[Link][:50,:] = [Link]
iris_imputed = [Link](missing_values="NaN", strategy="mean", axis = 0)
iris_imputed = iris_imputed.fit([Link]).transform([Link])
print(iris_imputed.mean(axis=0))

********************************************************************
2nd hands-on

import [Link] as datasets


from sklearn.model_selection import train_test_split
from [Link] import KNeighborsClassifier

iris = datasets.load_iris()
X_train, X_test, Y_train, Y_test = train_test_split([Link], [Link],
stratify=[Link], random_state=30)

print(X_train.shape)
print(X_test.shape)

knn_clf = KNeighborsClassifier()
knn_clf = knn_clf.fit(X_train, Y_train)

print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))

ls = []
prev_score = 0
n_val = 0
for i in range(3,11):
knn_clf = KNeighborsClassifier(n_neighbors = i)
knn_clf = knn_clf.fit(X_train, Y_train)
score = knn_clf.score(X_test,Y_test)
if prev_score<=score:
prev_score = score
n_val = i
print(n_val)

*****************************Desicion Tree*****************************

import [Link] as datasets


from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeRegressor
import numpy as np

[Link](100)
boston = datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split([Link], [Link],
random_state=30)
print(X_train.shape)
print(X_test.shape)

dt_reg = DecisionTreeRegressor()
dt_reg = dt_reg.fit(X_train, Y_train)
print(dt_reg.score(X_train,Y_train))
print(dt_reg.score(X_test,Y_test))
y_pred = dt_reg.predict(X_test[:2])
print(y_pred)

myList = list(range(2,5))
scores =[]
for i in myList:
dt_reg = DecisionTreeRegressor(max_depth=i)
dt_reg.fit(X_train,Y_train)
[Link](dt_reg.score(X_test, Y_test))
print(myList[[Link](max(scores))])

*****************************SVM*****************************

import [Link] as datasets


from sklearn.model_selection import train_test_split
from [Link] import SVC
import [Link] as preprocessing

digits = datasets.load_digits()
X = [Link]
y = [Link]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,


stratify=y)

print(X_train.shape)
print(X_test.shape)

svm_clf = SVC().fit(X_train, y_train)


print(svm_clf.score(X_test,y_test))

standardizer = [Link]()
standardizer = [Link]([Link])
digits_standardized = [Link]([Link])
X = digits_standardized
y = [Link]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,
stratify=y)
svm_clf2 = SVC().fit(X_train, y_train)
print(svm_clf2.score(X_test,y_test))

*****************************Ensemble*****************************
import [Link] as datasets
from sklearn.model_selection import train_test_split
from [Link] import RandomForestRegressor
import numpy as np

[Link](100)
max_depth = range(3,5)
boston = datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split([Link], [Link],
random_state=30)
print(X_train.shape)
print(X_test.shape)

rf_reg = RandomForestRegressor()
rf_reg = rf_reg.fit(X_train, Y_train)
print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))
y_pred = rf_reg.predict(X_test[:2])
print(y_pred)

a = [3,4,5]
b = [50,100,200]
score = []

for i in range(3):
rf_reg = RandomForestRegressor(n_estimators=b[i], max_depth=a[i])
rf_reg = rf_reg.fit(X_train, Y_train)
[Link](rf_reg.score(X_test,Y_test))
print(max(score))
print(tuple([5,100]))

*****************************Clustering *****************************

import [Link] as datasets


from [Link] import KMeans
from sklearn import metrics
from sklearn.model_selection import train_test_split
from [Link] import AgglomerativeClustering
from [Link] import homogeneity_score
from [Link] import AffinityPropagation
iris = datasets.load_iris()
X_train, X_test, Y_train, Y_test = train_test_split([Link], [Link],
stratify=[Link], random_state=30)

km_cls= KMeans(n_clusters=3)
km_cls = km_cls.fit(X_train)
km_cls.predict(X_test)
print(metrics.homogeneity_score(km_cls.predict(X_test), Y_test))

agg_cls= AgglomerativeClustering(n_clusters=3)
agg_cls = agg_cls.fit(X_train)
agg_cls.fit_predict(X_test)
print(metrics.homogeneity_score(agg_cls.fit_predict(X_test), Y_test))

af_cls= AffinityPropagation()
af_cls = af_cls.fit(X_train)
af_cls.fit_predict(X_test)
print(metrics.homogeneity_score(af_cls.fit_predict(X_test), Y_test))

1st hands-on
import sklearn.datasets as skl
import sklearn.preprocessing as skp
import numpy as np
iris = skl.load_iris()
iri
boston = datasets.load_boston() 
X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target, 
random_stat
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomF
print(metrics.homogeneity_score(af_cls.fit_predict(X_test), Y_test))

You might also like