Practical File of Machine Learning 1905388

Practical File of Machine Learning Laboratory
Department of Information Technology
Submitted To DR. PRADEEP JASWAL
Submitted By :-
Name - Rishabh Kumar
CRN - 1921129
URN - 1905388
Table of Content :-
1. Implement Simple Linear Regression.
2. Implement Random Forest Regression.
3. Implement Logistic Regression.
4. Implement Decision Tree classification algorithms.
5. Implement k-nearest neighbours classification algorithms.
6. Implement Naive Bayes classification algorithms.
7. Implement K-means clustering to Find Natural Patterns in

Data.
8. Implement K- Mode Clustering.
9. Evaluating Machine Learning algorithm with balanced and un-

balanced datasets.
10. Compare various Machine Learning algorithms based on var-

ious performance metrics.
PRACTICAL 1: IMPLEMENTING SIMPLE LINEAR REGRESSION
Simple Linear Regression
Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
Importing the dataset
Figure - 1.1
dataset = pd.read_csv('Salary_Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/3, ran-
dom_state = 0)
Training the Simple Linear Regression model on the Training set
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(X_train, y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
Predicting the Test set results
y_pred = regressor.predict(X_test)
Visualising the Training set results
plt.scatter(X_train, y_train, color = 'red')

plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Training set)')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.show()
Figure - 1.2
Visualising the Test set results
plt.scatter(X_test, y_test, color = 'red')

plt.plot(X_train, regressor.predict(X_train), color = 'blue')
plt.title('Salary vs Experience (Test set)')
plt.xlabel('Years of Experience')
plt.show()
Figure - 1.3
PRACTICAL 2: Implement Random Forest Regression.
Random Forest Regression
import numpy as np
import pandas as pd
Figure - 1.1
dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:-1].values
Training the Random Forest Regression model on the whole dataset
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X, y)
RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
max_depth=None, max_features='auto', max_leaf_nodes=None,

max_samples=None, min_impurity_decrease=0.0,
min_impurity_split=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
n_estimators=10, n_jobs=None, oob_score=False,
random_state=0, verbose=0, warm_start=False)
Predicting a new result
regressor.predict([[6.5]])
array([167000.])
Visualising the Random Forest Regression results (higher resolution)
X_grid = np.arange(min(X), max(X), 0.01)

X_grid = X_grid.reshape((len(X_grid), 1))
plt.scatter(X, y, color = 'red')
plt.plot(X_grid, regressor.predict(X_grid), color = 'blue')
plt.title('Truth or Bluff (Random Forest Regression)')
plt.xlabel('Position level')
plt.show()
Figure - 2.2
PRACTICAL 3 : Implement Logistic Regression
Implement Logistic Regression
import pandas as pd
import numpy as np
dataset = pd.read_csv('...\\User_Data.csv')
# input
x = dataset.iloc[:, [2, 3]].values
# output
y = dataset.iloc[:, 4].values
Figure - 3.1

xtrain, xtest, ytrain, ytest = train_test_split(
x, y, test_size = 0.25, random_state = 0)
from sklearn.preprocessing import StandardScaler

sc_x = StandardScaler()
xtrain = sc_x.fit_transform(xtrain)
xtest = sc_x.transform(xtest)
print (xtrain[0:10, :])
Output :
Training the Simple Linear Regression model on the Training set
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state = 0)
classifier.fit(xtrain, ytrain)
y_pred = classifier.predict(xtest)
Test the performance of our model
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(ytest, y_pred)
print ("Confusion Matrix : \n", cm)
Output :-
Performance measure – Accuracy
from sklearn.metrics import accuracy_score
print ("Accuracy : ", accuracy_score(ytest, y_pred))
Output :
Visualizing the performance of our model
from matplotlib.colors import ListedColormap

X_set, y_set = xtest, ytest
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1,
stop = X_set[:, 0].max() + 1, step = 0.01),
np.arange(start = X_set[:, 1].min() - 1,
stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(
np.array([X1.ravel(), X2.ravel()]).T).reshape(
X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Classifier (Test set)')

plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
Output :-
Practical 4 : Implement Decision Tree classification algorithms
Implement Decision Tree classification algorithms
Importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
importing datasets
data_set= pd.read_csv('user_data.csv')
Figure - 4.1
Extracting Independent and dependent Variable
x= data_set.iloc[:, [2,3]].values
y= data_set.iloc[:, 4].values
Splitting the dataset into training and test set
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, ran-

dom_state=0)
feature Scaling
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
Fitting Decision Tree classifier to the training set
From sklearn.tree import DecisionTreeClassifier
classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)
classifier.fit(x_train, y_train)
Output :-
Predicting the test set result
y_pred= classifier.predict(x_test)
Output :-
Creating the Confusion matrix
cm= confusion_matrix(y_test, y_pred)
Output :-
Visulaizing the trianing set result
x_set, y_set = x_train, y_train
x1, x2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() +

1, step =0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(x1, x2, classifier.predict(nm.array([x1.ravel(), x2.ravel()]).T).re-

shape(x1.shape),
alpha = 0.75, cmap = ListedColormap(('purple','green' )))
mtp.xlim(x1.min(), x1.max())
mtp.ylim(x2.min(), x2.max())
fori, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Decision Tree Algorithm (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
Output :-
Visulaizing the test set result
x_set, y_set = x_test, y_test

1, step =0.01),

shape(x1.shape),
alpha = 0.75, cmap = ListedColormap(('purple','green' )))
fori, j in enumerate(nm.unique(y_set)):
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Decision Tree Algorithm(Test set)')
mtp.xlabel('Age')
mtp.legend()
mtp.show()
Output :-
Practical 5 : Implement k-nearest neighbours classification algorithms
Implement k-nearest neighbours classification algorithms
Importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
Importing datasets
data_set= pd.read_csv('user_data.csv')
Figure 5.1
Extracting Independent and dependent Variable
x= data_set.iloc[:, [2,3]].values
y= data_set.iloc[:, 4].values
Splitting the dataset into training and test set
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, ran-

dom_state=0)
Feature Scaling
st_x= StandardScaler()
x_train= st_x.fit_transform(x_train)
x_test= st_x.transform(x_test)
Fitting K-NN classifier to the training set
from sklearn.neighbors import KNeighborsClassifier
classifier= KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2 )
classifier.fit(x_train, y_train)
Output :-
Predicting the test set result
y_pred= classifier.predict(x_test)
Output :-
Creating the Confusion matrix
cm= confusion_matrix(y_test, y_pred)
Output :-
Visulaizing the trianing set result
x_set, y_set = x_train, y_train

1, step =0.01),

shape(x1.shape),
alpha = 0.75, cmap = ListedColormap(('red','green' )))
for i, j in enumerate(nm.unique(y_set)):
c = ListedColormap(('red', 'green'))(i), label = j)
mtp.title('K-NN Algorithm (Training set)')
mtp.xlabel('Age')
mtp.legend()
mtp.show()
Output :-
Practical 6 : Implement Naive Bayes classification algorithms.
import numpy as np

import pandas as pd
dataset = pd.read_csv('Social_Network_Ads.csv')
X = dataset.iloc[:, :-1].values
Figure 6.1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, ran-
dom_state = 0)
print(X_train)
Output :
print(y_train)
Output :
print(X_test)
Output :
print(y_test)
Output :
Feature Scaling

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train)
Output :
print(X_test)
Output :
Training the Naive Bayes model on the Training set
from sklearn.naive_bayes import GaussianNB

classifier = GaussianNB()
classifier.fit(X_train, y_train)
GaussianNB(priors=None, var_smoothing=1e-09)
Predicting a new result
print(classifier.predict(sc.transform([[30,87000]])))
Predicting the Test set results
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.re-
shape(len(y_test),1)),1))
Output :
Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)
[[65 3] [ 7 25]]
0.9
Visualising the Training set results

X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max()
+ 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step
= 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ra-
vel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red',
'green'))(i), label = j)
plt.title('Naive Bayes (Training set)')
plt.xlabel('Age')
plt.legend()
plt.show()
Output :
Visualising the Test set results

X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max()
+ 10, step = 0.25),
np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step
= 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ra-
vel()]).T)).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red',
'green'))(i), label = j)
plt.title('Naive Bayes (Test set)')
plt.xlabel('Age')
plt.legend()
plt.show()
Output :
Practical 7 : Implement K-means clustering to Find Natural Patterns
in Data.
K-Means Clustering
import numpy as np
import pandas as pd
dataset = pd.read_csv('Mall_Customers.csv')
X = dataset.iloc[:, [3, 4]].values
Figure 7.1
Using the elbow method to find the optimal number of clusters
from sklearn.cluster import KMeans

wcss = []
for i in range(1, 11):
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
kmeans.fit(X)
wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
Output :
Training the K-Means model on the dataset
kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42)

y_kmeans = kmeans.fit_predict(X)
Visualising the clusters
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Clus-

ter 1')
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Clus-
ter 2')
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label =
'Cluster 3')
plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Clus-
ter 4')
plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label =
'Cluster 5')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c =
'yellow', label = 'Centroids')
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()
Output :
Practical 8 : Implement K- Mode Clustering
Importing necessary libraries
import pandas as pd
import numpy as np
!pip install kmodes
from kmodes.kmodes import KModes

%matplotlib inline
Create toy dataset
hair_color = np.array(['blonde', 'brunette', 'red', 'black', 'brunette', 'black', 'red',

'black'])
eye_color = np.array(['amber', 'gray', 'green', 'hazel', 'amber', 'gray', 'green', 'ha-
zel'])
skin_color = np.array(['fair', 'brown', 'brown', 'brown', 'fair', 'brown', 'fair', 'fair'])
person = ['P1','P2','P3','P4','P5','P6','P7','P8']
data = pd.DataFrame({'person':person, 'hair_color':hair_color, 'eye_color':eye_color,
'skin_color':skin_color})
data = data.set_index('person')
data
Figure 8.1
Elbow curve to find optimal K
cost = []
K = range(1,5)
for num_clusters in list(K):
kmode = KModes(n_clusters=num_clusters, init = "random", n_init = 5, verbose=1)
kmode.fit_predict(data)
cost.append(kmode.cost_)
plt.plot(K, cost, 'bx-')

plt.xlabel('No. of clusters')
plt.ylabel('Cost')
plt.title('Elbow Method For Optimal k')
plt.show()
Output :
Building the model with 3 clusters
kmode = KModes(n_clusters=3, init = "random", n_init = 5, verbose=1)

clusters = kmode.fit_predict(data)
clusters
data.insert(0, "Cluster", clusters, True)

data
Figure 8.2
Practical 9 : Evaluating Machine Learning algorithm with balanced and
unbalanced datasets
Balanced Dataset: — Let’s take a simple example if in our data set we have positive
values which are approximately same as negative values. Then we can say our dataset
in balance.
Consider Orange color as a positive values and Blue color as a Negative value. We can
say that the number of positive values and negative values in approximately same.
Imbalanced Dataset: — If there is the very high different between the positive va-
lues and negative values. Then we can say our dataset in Imbalance Dataset.
Techniques to Convert Imbalanced Dataset into Balanced Dataset
A few techniques to solve the class imbalance problem :

1. Use the right evaluation metrics:
Evaluation metrics can be applied such as:
Confusion Matrix: a table showing correct predictions and types of incorrect
predictions.
Precision: the number of true positives divided by all positive predictions.
Precision is also called Positive Predictive Value. It is a measure of a classifi-
er’s exactness. Low precision indicates a high number of false positives.
Recall: the number of true positives divided by the number of positive values
in the test data. Recall is also called Sensitivity or the True Positive Rate. It
is a measure of a classifier’s completeness. Low recall indicates a high number
of false negatives.
F1-Score: the weighted average of precision and recall.
2. Over-sampling (Up Sampling): This technique is used to modify the unequal data
classes to create balanced datasets. When the quantity of data is insufficient, the
oversampling method tries to balance by incrementing the size of rare samples.

3. Under-sampling (Down Sampling): Unlike oversampling, this technique balances
the imbalance dataset by reducing the size of the class which is in abundance. There
are various methods for classification problems such as cluster centroids and Tomek
links. The cluster centroid methods replace the cluster of samples by the cluster cen-
troid of a K-means algorithm and the Tomek link method removes unwanted overlap
between classes until all minimally distanced nearest neighbors are of the same class.
4. Feature selection: In order to tackle the imbalance problem, we calculate the
one-sided metric such as correlation coefficient (CC) and odds ratios (OR) or two-si-
ded metric evaluation such as information gain (IG) and chi-square (CHI) on both the
positive class and negative class. Based on the scores, we then identify the significant
features from each class and take the union of these features to obtain the final set
of features. Then, we use this data to classify the problem.
5. Cost-Sensitive Learning Technique
The Cost-Sensitive Learning (CSL) takes the misclassification costs into considera-
tion by minimising the total cost. The goal of this technique is mainly to pursue a high
accuracy of classifying examples into a set of known classes. It is playing as one of
the important roles in the machine learning algorithms including the real-world data
mining applications.
6. Ensemble Learning Techniques
The ensemble-based method is another technique which is used to deal with imbal-
anced data sets, and the ensemble technique is combined the result or performance
of several classiﬁers to improve the performance of single classiﬁer. This method
modifies the generalisation ability of individual classifiers by assembling various clas-
sifiers. It mainly combines the outputs of multiple base learners. There are various
approaches in ensemble learning such as Bagging, Boosting, etc.
Imbalanced data is one of the potential problems in the field of data mining and ma-
chine learning. This problem can be approached by properly analyzing the data. A few
approaches that help us in tackling the problem at the data point level are undersam-
pling, oversampling, and feature selection. Moving forward, there is still a lot of re-
search required in handling the data imbalance problem more efficiently.

Practical 10 : Compare various Machine Learning algorithms based on
various performance metrics
Performance Metrics in Machine Learning
Performance metrics are a part of every machine learning pipeline. They tell you if
you’re making progress, and put a number on it. All machine learning models, whether
it’s linear regression, or a SOTA technique like BERT, need a metric to judge perfor-
mance.
Regression metrics
Regression models have continuous output. So, we need a metric based on calculating
some sort of distance between predicted and ground truth.
In order to evaluate Regression models, we’ll discuss these metrics in detail:
Mean Absolute Error (MAE),

Mean Squared Error (MSE),
Root Mean Squared Error (RMSE),
R² (R-Squared).
Mean Squared Error (MSE)

Mean squared error is perhaps the most popular metric used for regression problems.
It essentially finds the average of the squared difference between the target value
and the value predicted by the regression model.
Where:
y_j: ground-truth value

y_hat: predicted value from the regression model
N: number of datums
This can be implemented simply using NumPy arrays in Python :
mse = (y-y_hat)**2 print(f"MSE: {mse.mean():0.2f} (+/- {mse.std():0.2f})")

Mean Absolute Error (MAE)
Mean Absolute Error is the average of the difference between the ground truth and
the predicted values. Mathematically, its represented as :
Where:
y_j: ground-truth value

y_hat: predicted value from the regression model
N: number of datums
Similar to MSE, this metric is also simple to implement :
mae = np.abs(y-y_hat) print(f"MAE: {mae.mean():0.2f} (+/- {mae.std():0.2f})")
Root Mean Squared Error (RMSE)

Root Mean Squared Error corresponds to the square root of the average of the
squared difference between the target value and the value predicted by the regres-
sion model. Basically, sqrt(MSE). Mathematically it can be represented as:
It addresses a few downsides in MSE.
Few key points related to RMSE:
It retains the differentiable property of MSE.

It handles the penalization of smaller errors done by MSE by square rooting it.
Error interpretation can be done smoothly, since the scale is now the same as the
random variable.
Since scale factors are essentially normalized, it’s less prone to struggle in the
case of outliers.
Implementation is similar to MSE:
mse = (y-y_hat)**2 rmse = np.sqrt(mse.mean()) print(f"RMSE: {rmse:0.2f}")

R² Coefficient of determination
R² Coefficient of determination actually works as a post metric, meaning it’s a metric
that’s calculated using other metrics.
The point of even calculating this coefficient is to answer the question “How much
(what %) of the total variation in Y(target) is explained by the variation in X(regres-
sion line)”
This coefficient can be implemented simply using NumPy arrays in Python :
# R^2 coefficient of determination SE_line = sum((y-y_hat)**2) SE_mean = sum((y-

y.mean())**2) r2 = 1-(SE_line/SE_mean) print(f"R^2 coefficient of determination:
{r2*100:0.2f}%"
Adjusted R²
The Vanilla R² method suffers from some demons, like misleading the researcher into
believing that the model is improving when the score is increasing but in reality, the
learning is not happening. This can happen when a model overfits the data, in that
case the variance explained will be 100% but the learning hasn’t happened. To rectify
this, R² is adjusted with the number of independent variables.
Adjusted R² is always lower than R², as it adjusts for the increasing predictors and
only shows improvement if there is a real improvement.
Classification metrics
Classification problems are one of the world’s most widely researched areas. Use
cases are present in almost all production and industrial environments. Speech
recognition, face recognition, text classification – the list is endless.
Classification models have discrete output, so we need a metric that compares

discrete classes in some form. Classification Metrics evaluate a model’s performance
and tell you how good or bad the classification is, but each of them evaluates it in a
different way.
Confusion Matrix
Confusion Matrix is a tabular visualization of the ground-truth labels versus model
predictions. Each row of the confusion matrix represents the instances in a predicted
class and each column represents the instances in an actual class. Confusion Matrix is
not exactly a performance metric but sort of a basis on which other metrics evaluate
the results.
In order to understand the confusion matrix, we need to set some value for the null
hypothesis as an assumption. For example, from our Breast Cancer data, let’s assume
our Null Hypothesis H⁰ be “The individual has cancer”.

Practical File of Machine Learning 1905388

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Practical File of Machine Learning 1905388

Uploaded by

Copyright:

Available Formats

Practical File of Machine Learning Laboratory

Department of Information Technology

Submitted To DR. PRADEEP JASWAL

1. Implement Simple Linear Regression.

2. Implement Random Forest Regression.

3. Implement Logistic Regression.

4. Implement Decision Tree classification algorithms.

5. Implement k-nearest neighbours classification algorithms.

6. Implement Naive Bayes classification algorithms.

7. Implement K-means clustering to Find Natural Patterns in

8. Implement K- Mode Clustering.

9. Evaluating Machine Learning algorithm with balanced and un-

10. Compare various Machine Learning algorithms based on var-

Simple Linear Regression

Importing the libraries

Importing the dataset

from sklearn.model_selection import train_test_split

Training the Simple Linear Regression model on the Training set

from sklearn.linear_model import LinearRegression

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

Predicting the Test set results

Visualising the Training set results

plt.scatter(X_train, y_train, color = 'red')

plt.scatter(X_test, y_test, color = 'red')

Random Forest Regression

Importing the libraries

Importing the dataset

Training the Random Forest Regression model on the whole dataset

from sklearn.ensemble import RandomForestRegressor

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',

max_depth=None, max_features='auto', max_leaf_nodes=None,

n_estimators=10, n_jobs=None, oob_score=False,

random_state=0, verbose=0, warm_start=False)

Predicting a new result

Visualising the Random Forest Regression results (higher resolution)

X_grid = np.arange(min(X), max(X), 0.01)

Implement Logistic Regression

Importing the libraries

import matplotlib.pyplot as plt

Importing the dataset

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

print (xtrain[0:10, :])

Training the Simple Linear Regression model on the Training set

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import confusion_matrix

print ("Confusion Matrix : \n", cm)

Performance measure – Accuracy

from sklearn.metrics import accuracy_score

print ("Accuracy : ", accuracy_score(ytest, y_pred))

Visualizing the performance of our model

from matplotlib.colors import ListedColormap

plt.title('Classifier (Test set)')

Implement Decision Tree classification algorithms

import matplotlib.pyplot as mtp

Extracting Independent and dependent Variable

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, ran-

from sklearn.preprocessing import StandardScaler

Fitting Decision Tree classifier to the training set

From sklearn.tree import DecisionTreeClassifier

classifier= DecisionTreeClassifier(criterion='entropy', random_state=0)

Predicting the test set result

Creating the Confusion matrix