You are on page 1of 9

In 

[6]:
import pandas as pd

import numpy as np

wcat=pd.read_csv("D:\\Course\\Python\\Datasets\\wc.at.csv")

In [7]:
wcat

Out[7]: Waist AT

0 74.75 25.72

1 72.60 25.89

2 81.80 42.60

3 83.95 42.80

4 74.65 29.84

... ... ...

104 100.10 124.00

105 93.30 62.20

106 101.80 133.00

107 107.90 208.00

108 108.50 208.00

109 rows × 2 columns

In [1]:
import matplotlib.pyplot as plt

In [4]:
plt.scatter(x=wcat['Waist'],y=wcat['AT'])

Out[4]: <matplotlib.collections.PathCollection at 0x28b500acc48>


In [5]: # corealtion coefficent

np.corrcoef(x=wcat['Waist'],y=wcat['AT'])

Out[5]: array([[1. , 0.81855781],

[0.81855781, 1. ]])

In [9]:
#import statsmodels.formula.api as smf

#model=smf.ols('AT~Waist',data=wcat).fit()

#model.params

#model.summary()

#print (model.conf_int(0.05)) # 95% confidence interval`

#model2 = smf.ols('AT~np.log(Waist)',data=wcat).fit()
#model2.params

#model2.summary()

#pred = model2.predict(wcat)

#pred

0 1

Intercept -259.190053 -172.772923

Waist 2.993689 3.924030

In [5]:
wcat

Out[5]: Waist AT

0 74.75 25.72

1 72.60 25.89

2 81.80 42.60

3 83.95 42.80

4 74.65 29.84

... ... ...

104 100.10 124.00

105 93.30 62.20

106 101.80 133.00

107 107.90 208.00

108 108.50 208.00

109 rows × 2 columns

In [8]:
X =wcat[ 'Waist'].values.reshape(-1,1)

#x1 =np.log(wcat[ 'Waist'].values.reshape(-1,1))

#X2 = np.sqrt(wcat[ 'Waist'].values.reshape(-1,1))

#x3 = np.exp(wcat[ 'Waist'].values.reshape(-1,1))

y = wcat['AT'].values.reshape(-1,1)

In [9]:
X

Out[9]: array([[ 74.75],

[ 72.6 ],

[ 81.8 ],

[ 83.95],

[ 74.65],

[ 71.85],

[ 80.9 ],

[ 83.4 ],

[ 63.5 ],

[ 73.2 ],

[ 71.9 ],

[ 75. ],

[ 73.1 ],

[ 79. ],

[ 77. ],

[ 68.85],

[ 75.95],

[ 74.15],

[ 73.8 ],

[ 75.9 ],

[ 76.85],

[ 80.9 ],

[ 79.9 ],

[ 89.2 ],

[ 82. ],

[ 92. ],

[ 86.6 ],

[ 80.5 ],

[ 86. ],

[ 82.5 ],

[ 83.5 ],

[ 88.1 ],

[ 90.8 ],

[ 89.4 ],

[102. ],

[ 94.5 ],

[ 91. ],

[103. ],

[ 80. ],

[ 79. ],

[ 83.5 ],

[ 76. ],

[ 80.5 ],

[ 86.5 ],

[ 83. ],

[107.1 ],

[ 94.3 ],

[ 94.5 ],

[ 79.7 ],

[ 79.3 ],

[ 89.8 ],

[ 83.8 ],

[ 85.2 ],

[ 75.5 ],

[ 78.4 ],

[ 78.6 ],

[ 87.8 ],

[ 86.3 ],

[ 85.5 ],

[ 83.7 ],

[ 77.6 ],

[ 84.9 ],

[ 79.8 ],

[108.3 ],

[119.6 ],

[119.9 ],

[ 96.5 ],

[105.5 ],

[105. ],

[107. ],

[107. ],

[101. ],

[ 97. ],

[100. ],

[108. ],

[100. ],

[103. ],

[104. ],

[106. ],

[109. ],

[103.5 ],

[110. ],

[110. ],

[112. ],

[108.5 ],

[104. ],

[111. ],

[108.5 ],

[121. ],

[109. ],

[ 97.5 ],

[105.5 ],

[ 98. ],

[ 94.5 ],

[ 97. ],

[105. ],

[106. ],

[ 99. ],

[ 91. ],

[102.5 ],

[106. ],

[109.1 ],

[115. ],

[101. ],

[100.1 ],

[ 93.3 ],

[101.8 ],

[107.9 ],

[108.5 ]])

In [10]:
y

Out[10]: array([[ 25.72],

[ 25.89],

[ 42.6 ],

[ 42.8 ],

[ 29.84],

[ 21.68],

[ 29.08],

[ 32.98],

[ 11.44],

[ 32.22],

[ 28.32],

[ 43.86],

[ 38.21],

[ 42.48],

[ 30.96],

[ 55.78],

[ 43.78],

[ 33.41],

[ 43.35],

[ 29.31],

[ 36.6 ],

[ 40.25],

[ 35.43],

[ 60.09],

[ 45.84],

[ 70.4 ],

[ 83.45],

[ 84.3 ],

[ 78.89],

[ 64.75],

[ 72.56],

[ 89.31],

[ 78.94],

[ 83.55],

[127. ],

[121. ],

[107. ],

[129. ],

[ 74.02],

[ 55.48],

[ 73.13],

[ 50.5 ],

[ 50.88],

[140. ],

[ 96.54],

[118. ],

[107. ],

[123. ],

[ 65.92],

[ 81.29],

[111. ],

[ 90.73],

[133. ],

[ 41.9 ],

[ 41.71],

[ 58.16],

[ 88.85],

[155. ],

[ 70.77],

[ 75.08],

[ 57.05],

[ 99.73],

[ 27.96],

[123. ],

[ 90.41],

[106. ],

[144. ],

[121. ],

[ 97.13],

[166. ],

[ 87.99],

[154. ],

[100. ],

[123. ],

[217. ],

[140. ],

[109. ],

[127. ],

[112. ],

[192. ],

[132. ],

[126. ],

[153. ],

[158. ],

[183. ],

[184. ],

[121. ],

[159. ],

[245. ],

[137. ],

[165. ],

[152. ],

[181. ],

[ 80.95],

[137. ],

[125. ],

[241. ],

[134. ],

[150. ],

[198. ],

[151. ],

[229. ],

[253. ],

[188. ],

[124. ],

[ 62.2 ],

[133. ],

[208. ],

[208. ]])

In [11]:
# Data partition into Traning and Testing for 80 and 20 model

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0

Model Building
In [13]:
from sklearn.linear_model import LinearRegression # Step1

from sklearn import metrics

In [14]:
import sklearn

from sklearn import linear_model

In [15]:
# Training the Algorithm

regressor = LinearRegression() # step 2

regressor.fit(X_train, y_train) #training the algorithm

Out[15]: LinearRegression()

In [16]:
y_pred = regressor.predict(X_test)

In [17]:
y_pred # PRedicted Values for AT

Out[17]: array([[160.65263299],

[ 34.49450742],

[131.35361476],

[ 68.61924631],

[ 69.30863497],

[152.03527469],

[160.65263299],

[ 74.13435562],

[ 48.45462788],

[169.26999129],

[148.58833137],

[ 62.070054 ],

[155.82691234],

[ 54.14208436],

[141.69444472],

[ 80.3388536 ],

[ 58.96780501],

[131.35361476],

[145.14138804],

[ 56.89963901],

[131.69830909],

[ 5.54018352]])
The flatten() function is used to get a copy of an given array collapsed into one dimension.

Array will be converted into One dimensional Line - Y test is array format so it will convert into 1D
format

In [14]:
y_test # Actual Output variable from Y test

Out[14]: array([[183. ],

[ 28.32],

[140. ],

[ 42.6 ],

[ 45.84],

[151. ],

[208. ],

[ 32.98],

[ 43.78],

[121. ],

[ 97.13],

[ 35.43],

[118. ],

[ 57.05],

[109. ],

[133. ],

[ 42.48],

[123. ],

[184. ],

[ 41.71],

[124. ],

[ 11.44]])

In [15]:
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})

df

Out[15]: Actual Predicted

0 183.00 160.652633

1 28.32 34.494507
Actual Predicted

2 140.00 131.353615

3 42.60 68.619246

4 45.84 69.308635

5 151.00 152.035275

6 208.00 160.652633

7 32.98 74.134356

8 43.78 48.454628

9 121.00 169.269991

10 97.13 148.588331

11 35.43 62.070054

12 118.00 155.826912

13 57.05 54.142084

14 109.00 141.694445

15 133.00 80.338854

16 42.48 58.967805

17 123.00 131.353615

18 184.00 145.141388

19 41.71 56.899639

20 124.00 131.698309

21 11.44 5.540184

In [16]:
df1 = df.head(25)

df1.plot(kind='bar',figsize=(16,10))

plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')

plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')

plt.show()

In [31]:
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))

Mean Squared Error: 861.0892456209028

In [32]:
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

Root Mean Squared Error: 29.344322204148842

In [33]:
#Less the error better the model and we can predict the equation

In [34]:
# Get the Intercept Values

print('intercept:', regressor.intercept_)

intercept: [-213.34071739]

In [37]:
# Get the coefficent value

print('slope:', regressor.coef_)

slope: [[3.44694332]]

You might also like