You are on page 1of 4

1st hands-on

import sklearn.datasets as skl


import sklearn.preprocessing as skp
import numpy as np
iris = skl.load_iris()
iris_normalized = skp.normalize(iris.data, norm='l2')
print(iris_normalized.mean(axis=0))

enc = skp.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))
print(iris_target_onehot.toarray()[[0,50,100]])

iris.data[:50,:] = np.nan
iris_imputed = skp.Imputer(missing_values="NaN", strategy="mean", axis = 0)
iris_imputed = iris_imputed.fit(iris.data).transform(iris.data)
print(iris_imputed.mean(axis=0))

********************************************************************
2nd hands-on

import sklearn.datasets as datasets


from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

iris = datasets.load_iris()
X_train, X_test, Y_train, Y_test = train_test_split(iris.data, iris.target,
stratify=iris.target, random_state=30)

print(X_train.shape)
print(X_test.shape)

knn_clf = KNeighborsClassifier()
knn_clf = knn_clf.fit(X_train, Y_train)

print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))

ls = []
prev_score = 0
n_val = 0
for i in range(3,11):
knn_clf = KNeighborsClassifier(n_neighbors = i)
knn_clf = knn_clf.fit(X_train, Y_train)
score = knn_clf.score(X_test,Y_test)
if prev_score<=score:
prev_score = score
n_val = i
print(n_val)

*****************************Desicion Tree*****************************

import sklearn.datasets as datasets


from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
import numpy as np

np.random.seed(100)
boston = datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target,
random_state=30)
print(X_train.shape)
print(X_test.shape)

dt_reg = DecisionTreeRegressor()
dt_reg = dt_reg.fit(X_train, Y_train)
print(dt_reg.score(X_train,Y_train))
print(dt_reg.score(X_test,Y_test))
y_pred = dt_reg.predict(X_test[:2])
print(y_pred)

myList = list(range(2,5))
scores =[]
for i in myList:
dt_reg = DecisionTreeRegressor(max_depth=i)
dt_reg.fit(X_train,Y_train)
scores.append(dt_reg.score(X_test, Y_test))
print(myList[scores.index(max(scores))])

*****************************SVM*****************************

import sklearn.datasets as datasets


from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import sklearn.preprocessing as preprocessing

digits = datasets.load_digits()
X = digits.data
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,


stratify=y)

print(X_train.shape)
print(X_test.shape)

svm_clf = SVC().fit(X_train, y_train)


print(svm_clf.score(X_test,y_test))

standardizer = preprocessing.StandardScaler()
standardizer = standardizer.fit(digits.data)
digits_standardized = standardizer.transform(digits.data)
X = digits_standardized
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,
stratify=y)
svm_clf2 = SVC().fit(X_train, y_train)
print(svm_clf2.score(X_test,y_test))

*****************************Ensemble*****************************
import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import numpy as np

np.random.seed(100)
max_depth = range(3,5)
boston = datasets.load_boston()
X_train, X_test, Y_train, Y_test = train_test_split(boston.data, boston.target,
random_state=30)
print(X_train.shape)
print(X_test.shape)

rf_reg = RandomForestRegressor()
rf_reg = rf_reg.fit(X_train, Y_train)
print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))
y_pred = rf_reg.predict(X_test[:2])
print(y_pred)

a = [3,4,5]
b = [50,100,200]
score = []

for i in range(3):
rf_reg = RandomForestRegressor(n_estimators=b[i], max_depth=a[i])
rf_reg = rf_reg.fit(X_train, Y_train)
score.append(rf_reg.score(X_test,Y_test))
print(max(score))
print(tuple([5,100]))

*****************************Clustering *****************************

import sklearn.datasets as datasets


from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import homogeneity_score
from sklearn.cluster import AffinityPropagation
iris = datasets.load_iris()
X_train, X_test, Y_train, Y_test = train_test_split(iris.data, iris.target,
stratify=iris.target, random_state=30)

km_cls= KMeans(n_clusters=3)
km_cls = km_cls.fit(X_train)
km_cls.predict(X_test)
print(metrics.homogeneity_score(km_cls.predict(X_test), Y_test))

agg_cls= AgglomerativeClustering(n_clusters=3)
agg_cls = agg_cls.fit(X_train)
agg_cls.fit_predict(X_test)
print(metrics.homogeneity_score(agg_cls.fit_predict(X_test), Y_test))

af_cls= AffinityPropagation()
af_cls = af_cls.fit(X_train)
af_cls.fit_predict(X_test)
print(metrics.homogeneity_score(af_cls.fit_predict(X_test), Y_test))

You might also like