You are on page 1of 11

NILESH PARAB

MACHINE
LEARNING
MODELS
Swipe Left

code_archer

Nilesh Parab

nilesh_parab42
Simple Linear Regression

Importing Libraries

In [1]: import pandas as pd


import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler

Loading Data

In [3]: df = pd.read_csv("Salary_Data.csv")
df.head()

Out[3]: YearsExperience Salary

0 1.1 39343.0

1 1.3 46205.0

2 1.5 37731.0

3 2.0 43525.0

4 2.2 39891.0

Missing Value

In [4]: df['YearsExperience'].fillna(df['YearsExperience'].mean(),inplace=True)
df['Salary'].fillna(df['Salary'].mean(),inplace=True)

Separating Data

In [10]: x=df.iloc[:,[0]]
y=df.iloc[:,1]

Test Train Split

In [11]: from sklearn.model_selection import train_test_split


X_train, X_test, Y_train, Y_test = train_test_split(x,y,test_size=1/3, random_state=0)

Simple Linear Regression

In [12]: from sklearn.linear_model import LinearRegression


from sklearn.metrics import r2_score
reg = LinearRegression()
reg.fit(X_train, Y_train)

Out[12]: LinearRegression()

Evaluation of Result

In [13]: Y_pred = reg.predict(x)


print("Accuracy of the model is "+str(r2_score(y,Y_pred)*100)+"%")

Accuracy of the model is 95.65349708076958%


Visualization of Result

In [14]: plt.scatter(y,Y_pred)
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.show()
Multiple Linear Regression

Importing Libraries

In [2]: import pandas as pd


import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler

Loading Data

In [3]: df = pd.read_csv("Data.csv")
df

Out[3]:
Country Age Salary Purchased

0 France 44.0 72000.0 No

1 Spain 27.0 48000.0 Yes

2 Germany 30.0 54000.0 No

3 Spain 38.0 61000.0 No

4 Germany 40.0 NaN Yes

5 France 35.0 58000.0 Yes

6 Spain NaN 52000.0 No

7 France 48.0 79000.0 Yes

8 Germany 50.0 83000.0 No

9 France 37.0 67000.0 Yes

Missing Value Treatement

In [4]: df['Age'].fillna(df['Age'].mean(),inplace=True)
df['Salary'].fillna(df['Salary'].mean(),inplace=True)
df['Purchased'].fillna(df['Purchased'].mode(),inplace=True)

Lable Encoding

In [6]: le = LabelEncoder()
df['Purchased'] = le.fit_transform(df['Purchased'])
df['Country'] = le.fit_transform(df['Country'])

Seperating Variables

In [7]: x=df.iloc[:,[0,1,3]]
y=df.iloc[:,2]

Scaling Variables

In [8]: scaler = StandardScaler()


x = scaler.fit_transform(x)
Test Train Split

In [9]: from sklearn.model_selection import train_test_split


X_train, X_test, Y_train, Y_test = train_test_split(x,y,test_size=1/3, random_state=0)

Linear Regression

In [10]: from sklearn.linear_model import LinearRegression


from sklearn.metrics import r2_score
reg = LinearRegression()
reg.fit(X_train, Y_train)

Out[10]: LinearRegression()

Evaluating Model

In [11]: Y_pred = reg.predict(x)


print("Accuracy of the model is "+str(r2_score(y,Y_pred)*100)+"%")

Accuracy of the model is 85.76396361824233%

Visualizing Results

In [12]: plt.scatter(y,Y_pred)
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.show()
Logistic Regression

Import Libraries

In [100]: import pandas as pd


import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler

Loading Dataset

In [101]: df = pd.read_csv("Social_Network_Ads.csv")
df.head()

Out[101]: Age EstimatedSalary Purchased

0 19 19000 0

1 35 20000 0

2 26 43000 0

3 27 57000 0

4 19 76000 0

Missing Value Treatement

In [102]: df['Age'].fillna(df['Age'].mean(),inplace=True)
df['EstimatedSalary'].fillna(df['EstimatedSalary'].mean(),inplace=True)
df['Purchased'].fillna(df['Purchased'].mean(),inplace=True)

Seperating Dataset

In [103]: x=df.iloc[:,[0,1]]
y=df.iloc[:,2]

Scaling Variables

In [104]: scaler = StandardScaler()


x = scaler.fit_transform(x)

Train Test Split

In [105]: from sklearn.model_selection import train_test_split


X_train, X_test, Y_train, Y_test = train_test_split(x,y,test_size=1/3, random_state=0)

Logistic Regression

In [106]: from sklearn.linear_model import LogisticRegression


from sklearn.metrics import accuracy_score
reg = LogisticRegression()
reg.fit(X_train, Y_train)

Out[106]: LogisticRegression()
Evaluating Results

In [107]: Y_pred = reg.predict(x)


print("Accuracy of the model is "+str(accuracy_score(y,Y_pred)*100)+"%")

Accuracy of the model is 84.5%

Visualizing Result

In [108]: Y_pred = pd.DataFrame(Y_pred,columns = ['Y_pred'])


df = pd.concat([df,Y_pred], axis=1)
df.head()

Out[108]: Age EstimatedSalary Purchased Y_pred

0 19 19000 0 0

1 35 20000 0 0

2 26 43000 0 0

3 27 57000 0 0

4 19 76000 0 0

In [109]: plt.rcParams["figure.figsize"] = [10.50, 4.50]


plt.rcParams["figure.autolayout"] = True
figure, axis = plt.subplots(1, 2)

colors = {0:'MEDIUMVIOLETRED', 1:'INDIGO'}

df.plot.scatter("Age","EstimatedSalary",title="Acctual",ax=axis[0],c=df['Purchased'].map(colors))
df.plot.scatter("Age","EstimatedSalary",title="Predicted",ax=axis[1],c=df['Y_pred'].map(colors))

Out[109]: <AxesSubplot:title={'center':'Predicted'}, xlabel='Age', ylabel='EstimatedSalary'>


Random Forest

Import Libraries

In [13]: import pandas as pd


import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler

Loading Dataset

In [14]: df = pd.read_csv("Social_Network_Ads.csv")
df.head()

Out[14]:
Age EstimatedSalary Purchased

0 19 19000 0

1 35 20000 0

2 26 43000 0

3 27 57000 0

4 19 76000 0

Missing Value Treatement

In [15]: df['Age'].fillna(df['Age'].mean(),inplace=True)
df['EstimatedSalary'].fillna(df['EstimatedSalary'].mean(),inplace=True)
df['Purchased'].fillna(df['Purchased'].mean(),inplace=True)

Seperating Dataset

In [16]: x=df.iloc[:,[0,1]]
y=df.iloc[:,2]

Scaling Variables

In [17]: scaler = StandardScaler()


x = scaler.fit_transform(x)

Train Test Split

In [18]: from sklearn.model_selection import train_test_split


X_train, X_test, Y_train, Y_test = train_test_split(x,y,test_size=1/3, random_state=0)

Random Forest

In [19]: from sklearn.ensemble import RandomForestClassifier


from sklearn.metrics import accuracy_score
clf = RandomForestClassifier()
clf.fit(X_train, Y_train)

Out[19]: RandomForestClassifier()
Evaluating Results

In [20]: Y_pred = clf.predict(x)


print("Accuracy of the model is "+str(accuracy_score(y,Y_pred)*100)+"%")

Accuracy of the model is 96.5%

Visualizing Result

In [10]: Y_pred = pd.DataFrame(Y_pred,columns = ['Y_pred'])


df = pd.concat([df,Y_pred], axis=1)
df.head()

Out[10]:
Age EstimatedSalary Purchased Y_pred

0 19 19000 0 0

1 35 20000 0 0

2 26 43000 0 0

3 27 57000 0 0

4 19 76000 0 0

In [12]: plt.rcParams["figure.figsize"] = [10.50, 4.50]


plt.rcParams["figure.autolayout"] = True
figure, axis = plt.subplots(1, 2)

colors = {0:'MEDIUMVIOLETRED', 1:'INDIGO'}

df.plot.scatter("Age","EstimatedSalary",title="Acctual",ax=axis[0],c=df['Purchased'].map(colors))
df.plot.scatter("Age","EstimatedSalary",title="Prdicted",ax=axis[1],c=df['Y_pred'].map(colors))

Out[12]: <AxesSubplot:title={'center':'Prdicted'}, xlabel='Age', ylabel='EstimatedSalary'>


Support Vector Machine

Import Libraries

In [1]: import pandas as pd


import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler

Loading Dataset

In [2]: df = pd.read_csv("Social_Network_Ads.csv")
df.head()

Out[2]:
Age EstimatedSalary Purchased

0 19 19000 0

1 35 20000 0

2 26 43000 0

3 27 57000 0

4 19 76000 0

Missing Value Treatement

In [3]: df['Age'].fillna(df['Age'].mean(),inplace=True)
df['EstimatedSalary'].fillna(df['EstimatedSalary'].mean(),inplace=True)
df['Purchased'].fillna(df['Purchased'].mean(),inplace=True)

Seperating Dataset

In [4]: x=df.iloc[:,[0,1]]
y=df.iloc[:,2]

Scaling Variables

In [5]: scaler = StandardScaler()


x = scaler.fit_transform(x)

Train Test Split

In [6]: from sklearn.model_selection import train_test_split


X_train, X_test, Y_train, Y_test = train_test_split(x,y,test_size=1/3, random_state=0)

Support Vector Machine

In [9]: from sklearn import svm


from sklearn.metrics import accuracy_score
clf = svm.SVC()
clf.fit(X_train, Y_train)

Out[9]: SVC()
Evaluating Results

In [10]: Y_pred = clf.predict(x)


print("Accuracy of the model is "+str(accuracy_score(y,Y_pred)*100)+"%")

Accuracy of the model is 91.75%

Visualizing Result

In [11]: Y_pred = pd.DataFrame(Y_pred,columns = ['Y_pred'])


df = pd.concat([df,Y_pred], axis=1)
df.head()

Out[11]:
Age EstimatedSalary Purchased Y_pred

0 19 19000 0 0

1 35 20000 0 0

2 26 43000 0 0

3 27 57000 0 0

4 19 76000 0 0

In [12]: plt.rcParams["figure.figsize"] = [10.50, 4.50]


plt.rcParams["figure.autolayout"] = True
figure, axis = plt.subplots(1, 2)

colors = {0:'MEDIUMVIOLETRED', 1:'INDIGO'}

df.plot.scatter("Age","EstimatedSalary",title="Acctual",ax=axis[0],c=df['Purchased'].map(colors))
df.plot.scatter("Age","EstimatedSalary",title="Prdicted",ax=axis[1],c=df['Y_pred'].map(colors))

Out[12]: <AxesSubplot:title={'center':'Prdicted'}, xlabel='Age', ylabel='EstimatedSalary'>

You might also like