Professional Documents
Culture Documents
iris=load_iris()
normalizer = preprocessing.Normalizer(norm='l2').fit(iris.data)
iris_normalized = normalizer.transform(iris.data)
print(iris_normalized.mean(axis=0))
enc = preprocessing.OneHotEncoder()
iris_target_onehot = enc.fit_transform(iris.target.reshape(-1, 1))
print(iris_target_onehot.toarray()[[0,50,100]])
iris.data[:50,:]= np.nan
imputer= SimpleImputer(missing_values=np.nan, strategy="mean" )
#imputer = preprocessing.Imputer(missing_values='NaN', strategy='mean')
imputer = imputer.fit(iris.data)
iris_imputed = imputer.transform(iris.data)
print(iris_imputed.mean(axis=0))
iris=dataset.load_iris()
X_train, X_test, Y_train,
Y_test=train_test_split(iris.data,iris.target,stratify=iris.target,random_state=30)
print(X_train.shape)
print(X_test.shape)
knn_clf= KNeighborsClassifier()
knn_clf=knn_clf.fit(X_train,Y_train)
print(knn_clf.score(X_train,Y_train))
print(knn_clf.score(X_test,Y_test))
cluster=3
max_score=0
best_n_neighbour=0
while(cluster<=10):
knn_clf=KNeighborsClassifier(n_neighbors=cluster)
knn_clf=knn_clf.fit(X_train,Y_train)
prev_score=max_score
max_score=knn_clf.score(X_test,Y_test)
if(max_score>prev_score):
best_n_neighbour=cluster
print(str(cluster),knn_clf.score(X_test,Y_test))
cluster=cluster+1
print(best_n_neighbour)
X_train,X_test,Y_train,Y_test=model_selection.train_test_split(boston.data,boston.t
arget, random_state=30)
print(X_train.data.shape)
print(X_test.data.shape)
dt_regresssorr=DecisionTreeRegressor()
dt_reg=dt_regresssorr.fit(X_train,Y_train)
print(dt_reg.score(X_train,Y_train))
print(dt_reg.score(X_test,Y_test))
y_pred=dt_reg.predict(X_test[:2])
print(y_pred)
maxdepth = 2
maxscore = 0
for i in range(2,6):
dt_regresssorr=DecisionTreeRegressor(max_depth=i)
dt_reg=dt_regresssorr.fit(X_train,Y_train)
score = dt_reg.score(X_test, Y_test)
if(maxscore < score):
maxdepth = i
maxscore = score
print(maxdepth)
digits = datasets.load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30,
stratify=y)
print(X_train.shape)
print(X_test.shape)
scaler = StandardScaler()
scaler.fit(X)
digits_standardized = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(digits_standardized, y,
random_state=30, stratify=y)
classifier=SVC()
svm_clf=classifier.fit(X_train,Y_train)
print(svm_clf.score(X_test,Y_test))
standardizer=preprocessing.StandardScaler()
standardizer=standardizer.fit(digits.data)
digits_standardized=standardizer.transform(digits.data)
X_train,X_test,Y_train,Y_test=model_selection.train_test_split(digits_standardized,
digits.target, random_state=30)
classifier=SVC()
svm_clf2=classifier.fit(X_train,Y_train)
print(svm_clf2.score(X_test,Y_test))
np.random.seed(100)
boston = datasets.load_boston()
rf_Regressor = RandomForestRegressor()
rf_reg = rf_Regressor.fit(X_train, Y_train)
print(rf_reg.score(X_train,Y_train))
print(rf_reg.score(X_test,Y_test))
predicted = rf_reg.predict(X_test[:2])
print(predicted)
depths = []
scores = []
c_estimators = 100
for x in range(3, 6):
rf_Regressor = RandomForestRegressor(n_estimators=c_estimators, max_depth=x)
rf_reg = rf_Regressor.fit(X_train, Y_train)
score = rf_reg.score(X_test, Y_test)
depths.append(x)
scores.append(rf_reg.score(X_test, Y_test))
print( (depths[np.argmax(scores)],c_estimators) )