You are on page 1of 4

#1st hands on *Preprocessing*

#Write your code here


from sklearn.datasets import load_iris
import sklearn.preprocessing as preprocessing
from sklearn.impute import SimpleImputer
import numpy as np

iris=load_iris()
normalizer = preprocessing.Normalizer(norm='l2').fit(iris.data)

iris_normalized = normalizer.transform(iris.data)
print(iris_normalized.mean(axis=0))

enc = preprocessing.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))
print(iris_target_onehot.toarray()[[0,50,100]])
iris.data[:50,:]= np.nan
imputer= SimpleImputer(missing_values=np.nan, strategy="mean" )
#imputer = preprocessing.Imputer(missing_values='NaN', strategy='mean')

imputer = imputer.fit(iris.data)
iris_imputed = imputer.transform(iris.data)
print(iris_imputed.mean(axis=0))

#2nd hands on *Hands-On KNN*

#Write your code here


import sklearn.datasets as dataset
from sklearn.model_selection import train_test_split
import numpy as np

iris=dataset.load_iris()
X_train, X_test, Y_train,
Y_test=train_test_split(iris.data,iris.target,stratify=iris.target,random_state=30)
print(X_train.shape)
print(X_test.shape)

#from sklearn.neighbors._classification import KNeighborsClassifier


from sklearn.neighbors import KNeighborsClassifier

knn_clf= KNeighborsClassifier()
knn_clf=knn_clf.fit(X_train,Y_train)

print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))

cluster=3
max_score=0
best_n_neighbour=0
while(cluster<=10):
knn_clf=KNeighborsClassifier(n_neighbors=cluster)
knn_clf=knn_clf.fit(X_train,Y_train)
prev_score=max_score
max_score=knn_clf.score(X_test,Y_test)
if(max_score>prev_score):
best_n_neighbour=cluster
print(str(cluster),knn_clf.score(X_test,Y_test))

cluster=cluster+1

print(best_n_neighbour)

#3rd hands on *Decision Tree - Hands-On*

#Write your code here


import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np
from sklearn.tree import DecisionTreeRegressor
np.random.seed(100)
boston = datasets.load_boston()

X_train,X_test,Y_train,Y_test=model_selection.train_test_split(boston.data,boston.t
arget, random_state=30)
print(X_train.data.shape)
print(X_test.data.shape)

dt_regresssorr=DecisionTreeRegressor()
dt_reg=dt_regresssorr.fit(X_train,Y_train)

print(dt_reg.score(X_train,Y_train))
print(dt_reg.score(X_test,Y_test))

y_pred=dt_reg.predict(X_test[:2])
print(y_pred)
maxdepth = 2
maxscore = 0
for i in range(2,6):
dt_regresssorr=DecisionTreeRegressor(max_depth=i)
dt_reg=dt_regresssorr.fit(X_train,Y_train)
score = dt_reg.score(X_test, Y_test)
if(maxscore < score):
maxdepth = i
maxscore = score
print(maxdepth)

# 4th hands on *Hands-On - SVM*

#Write your code here


import sklearn.datasets as datasets
import sklearn.model_selection as ms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

digits = datasets.load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,
stratify=y)

print(X_train.shape)
print(X_test.shape)

from sklearn.svm import SVC


svm_clf = SVC().fit(X_train, y_train)
print(svm_clf.score(X_test,y_test))

scaler = StandardScaler()
scaler.fit(X)
digits_standardized = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(digits_standardized, y,
random_state=30, stratify=y)

from sklearn.svm import SVC


svm_clf2 = SVC().fit(X_train, y_train)
print(svm_clf2.score(X_test,y_test))

#5th hands on *Hands-On - Clustering*

#Write your code here


import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np
from sklearn.svm import SVC
import sklearn.preprocessing as preprocessing
digits = datasets.load_digits()
X_train,X_test,Y_train,Y_test=model_selection.train_test_split(digits.data,digits.t
arget, random_state=30)
print(X_train.data.shape)
print(X_test.data.shape)

classifier=SVC()
svm_clf=classifier.fit(X_train,Y_train)

print(svm_clf.score(X_test,Y_test))

standardizer=preprocessing.StandardScaler()
standardizer=standardizer.fit(digits.data)
digits_standardized=standardizer.transform(digits.data)

X_train,X_test,Y_train,Y_test=model_selection.train_test_split(digits_standardized,
digits.target, random_state=30)
classifier=SVC()
svm_clf2=classifier.fit(X_train,Y_train)
print(svm_clf2.score(X_test,Y_test))

#6th hands on *Hands-On - Ensemble*


from sklearn.ensemble import RandomForestRegressor
import sklearn.datasets as datasets
import sklearn.model_selection as model_selection
import numpy as np

np.random.seed(100)
boston = datasets.load_boston()

X_train, X_test, Y_train, Y_test = model_selection.train_test_split(boston.data,


boston.target, random_state=30)
print(X_train.shape)
print(X_test.shape)

rf_Regressor = RandomForestRegressor()
rf_reg = rf_Regressor.fit(X_train, Y_train)

print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))

predicted = rf_reg.predict(X_test[:2])
print(predicted)

depths = []
scores = []
c_estimators = 100
for x in range(3, 6):
rf_Regressor = RandomForestRegressor(n_estimators=c_estimators, max_depth=x)
rf_reg = rf_Regressor.fit(X_train, Y_train)
score = rf_reg.score(X_test, Y_test)
depths.append(x)
scores.append(rf_reg.score(X_test, Y_test))

print( (depths[np.argmax(scores)],c_estimators) )

You might also like