You are on page 1of 6

2/10/23, 9:34 PM housing_21070126112 - Colaboratory

Importing necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

Exploring Dataset

df = pd.read_csv('/content/Housing.csv')
df

price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating aircondit

13300000 7420 4 2 3 yes no no no

12250000 8960 4 4 4 yes no no no

12250000 9960 3 2 2 yes no yes no

12215000 7500 4 2 2 yes no yes no

11410000 7420 4 1 2 yes yes yes no

... ... ... ... ... ... ... ... ...

1820000 3000 2 1 1 yes no yes no

1767150 2400 3 1 1 no no no no

1750000 3620 2 1 1 yes no no no

1750000 2910 3 1 1 no no no no

1750000 3850 3 1 2 yes no no no

ows × 13 columns

df[df.columns].value_counts()

price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
furnishingstatus
1750000 2910 3 1 1 0 0 0 0 0 0 0
0 1
5229000 7085 3 1 1 1 1 1 0 0 2 1
1 1
5110000 11410 2 1 2 1 0 0 0 0 0 1
0 1
5145000 3410 3 1 2 0 0 0 0 1 0 0
1 1
7980 3 1 1 1 0 0 0 0 1 1
1 1

..
3675000 3630 2 1 1 1 0 0 0 1 0 0
2 1
3600 2 1 1 1 0 0 0 0 0 0
0 1
3640000 5960 3 1 2 1 1 1 0 0 0 0
2 1
4280 2 1 1 1 0 0 0 1 2 0
1 1
13300000 7420 4 2 3 1 0 0 0 1 2 1
0 1
Length: 545, dtype: int64

df.shape

(545, 13)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):

https://colab.research.google.com/drive/1_eJyUtCOeeskUN6uNmHQYel6QJP98u0l#scrollTo=eLxfZUqG37F9&printMode=true 1/6
2/10/23, 9:34 PM housing_21070126112 - Colaboratory
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 545 non-null int64
1 area 545 non-null int64
2 bedrooms 545 non-null int64
3 bathrooms 545 non-null int64
4 stories 545 non-null int64
5 mainroad 545 non-null object
6 guestroom 545 non-null object
7 basement 545 non-null object
8 hotwaterheating 545 non-null object
9 airconditioning 545 non-null object
10 parking 545 non-null int64
11 prefarea 545 non-null object
12 furnishingstatus 545 non-null object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB

df.isnull().sum()

price 0
area 0
bedrooms 0
bathrooms 0
stories 0
mainroad 0
guestroom 0
basement 0
hotwaterheating 0
airconditioning 0
parking 0
prefarea 0
furnishingstatus 0
dtype: int64

Encoding

label_encoder = LabelEncoder()

df['mainroad']= label_encoder.fit_transform(df['mainroad'])
df['guestroom'] = label_encoder.fit_transform(df['guestroom'])
df['basement'] = label_encoder.fit_transform(df['basement'])
df['hotwaterheating'] = label_encoder.fit_transform(df['hotwaterheating'])
df['airconditioning'] = label_encoder.fit_transform(df['airconditioning'])
df['prefarea'] = label_encoder.fit_transform(df['prefarea'])
df

price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditi

13300000 7420 4 2 3 1 0 0 0

12250000 8960 4 4 4 1 0 0 0

12250000 9960 3 2 2 1 0 1 0

12215000 7500 4 2 2 1 0 1 0

11410000 7420 4 1 2 1 1 1 0

... ... ... ... ... ... ... ... ...

1820000 3000 2 1 1 1 0 1 0

1767150 2400 3 1 1 0 0 0 0

1750000 3620 2 1 1 1 0 0 0

1750000 2910 3 1 1 0 0 0 0

1750000 3850 3 1 2 1 0 0 0

ws × 13 columns

df['furnishingstatus']= label_encoder.fit_transform(df['furnishingstatus'])
df

https://colab.research.google.com/drive/1_eJyUtCOeeskUN6uNmHQYel6QJP98u0l#scrollTo=eLxfZUqG37F9&printMode=true 2/6
2/10/23, 9:34 PM housing_21070126112 - Colaboratory

price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditi

13300000 7420 4 2 3 1 0 0 0

12250000 8960 4 4 4 1 0 0 0

12250000 9960 3 2 2 1 0 1 0

12215000 7500 4 2 2 1 0 1 0

11410000 7420 4 1 2 1 1 1 0

... ... ... ... ... ... ... ... ...

1820000 3000 2 1 1 1 0 1 0

1767150 2400 3 1 1 0 0 0 0

1750000 3620 2 1 1 1 0 0 0

1750000 2910 3 1 1 0 0 0 0

1750000 3850 3 1 2 1 0 0 0

EDA-visualisation
ws × 13 columns

fig,ax = plt.subplots(figsize=(14,14))
sns.heatmap(df.corr(),annot·=True)

<matplotlib.axes._subplots.AxesSubplot at 0x7fa992ac0c10>

sns.jointplot(data=df,x="area",y="price",hue="furnishingstatus",palette ='dark:salmon_r')

https://colab.research.google.com/drive/1_eJyUtCOeeskUN6uNmHQYel6QJP98u0l#scrollTo=eLxfZUqG37F9&printMode=true 3/6
2/10/23, 9:34 PM housing_21070126112 - Colaboratory

<seaborn.axisgrid.JointGrid at 0x7fa98ec74190>

sns.scatterplot(df["area"],df["price"],hue=df["parking"],style=df['guestroom'],palette ="rainbow")
plt.show()

/usr/local/lib/python3.8/dist-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following var


warnings.warn(

sns.lineplot(data=df,x="parking",y="area",hue="furnishingstatus")

<matplotlib.axes._subplots.AxesSubplot at 0x7fa98f10e5e0>

df.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',


'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
'parking', 'prefarea', 'furnishingstatus'],
dtype='object')

X=df[['area','bedrooms','bathrooms','stories','mainroad','guestroom','basement','hotwaterheating','airconditioning','parking','prefarea'
y=df['price']

Building Predict Model

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size =0.33,random_state =42)

lr = LinearRegression()
lr.fit(X_train,y_train)

LinearRegression()

lr.score(X test,y test)


https://colab.research.google.com/drive/1_eJyUtCOeeskUN6uNmHQYel6QJP98u0l#scrollTo=eLxfZUqG37F9&printMode=true 4/6
2/10/23, 9:34 PM housing_21070126112 - Colaboratory
lr.score(X_test,y_test)

0.6490698391713673

print("Coefficients: ")
print("Intercept: ", lr.intercept_)
list(zip(X, lr.coef_))

Coefficients:
Intercept: -230797.8185174279
[('area', 252.74190153064856),
('bedrooms', 99525.89859410445),
('bathrooms', 1132688.6894603525),
('stories', 414361.566065939),
('mainroad', 458206.3753303046),
('guestroom', 339809.57305214717),
('basement', 518163.65271511185),
('hotwaterheating', 645620.7904595259),
('airconditioning', 698145.6458023346),
('parking', 308089.73272037157),
('prefarea', 533164.4687235687)]

y_pred_lr = lr.predict(X_test)
print("Prediction for test set:{}".format(y_pred_lr))

Prediction for test set:[5578836.89137788 6908580.80418883 2994587.8917112 4449253.227643


3073894.43818285 3771462.08640744 5540722.62040569 6371932.44537064
2982087.30092889 2297233.26692393 9697039.57893372 2967797.25014895
3153649.75369342 3330855.24631724 4008947.07597921 5551831.75338622
2893355.97865461 5010084.86189402 4792191.5835772 3489962.01871127
5816195.66652906 5887283.32636132 2895765.80821271 5147044.72721509
5600590.04662556 7804903.68977246 3612456.31102434 5236285.69400698
8189244.40707374 3146097.88018526 6380528.13701138 3584042.53968342
6588854.95775209 4159285.75281266 3793766.55259037 5788934.88921146
5135789.63990682 4606952.51816643 3192517.59592017 4424004.3418078
4706096.3477768 3589109.35788597 6888361.45206638 3857003.79333652
3679518.44485926 4467132.07855795 6663406.95076588 4067736.12860411
4008082.9168491 3866079.43227518 7652380.72096433 3039826.04683969
4245404.91714089 4382839.55426248 4025068.1470682 2350309.06624536
7781285.2498793 3093978.63786098 4217109.23765421 2933929.83534384
5044261.66401105 3701484.3480854 5223583.2993482 4443419.73963684
4991438.60055601 4699507.87984293 7068708.66872651 3763787.43149448
6462948.27593197 6219128.15258008 4933522.9291185 5042362.60271185
4404632.3151052 7917078.66361503 3658105.25175834 5441913.95322197
4021030.64218442 4421772.97992335 4595723.29582808 4185672.32542952
7588902.30200939 4134764.49787321 6603122.81593197 5638876.24490147
2917059.30678952 7425180.14471842 2780713.8324073 3667447.36718853
7856808.71763188 8488526.24255365 3227880.47117632 5971817.41973317
3350953.97286941 3589187.22887896 7736532.23087004 4560024.22729941
5228436.48008617 6612858.57045975 5133094.30011587 6024675.7677169
3922461.30058747 6436834.19498398 3652403.08779939 6104987.38087438
5237718.50012127 4188308.70777899 7071022.58208035 6482583.12440652
6414958.38379821 6971496.68348625 7166045.38528107 4770462.46972652
4198382.55862109 3435180.20492735 4271798.65357108 3460454.39508042
2897748.02496075 2418549.37965864 3594542.75533599 3981530.87480738
3919623.25131337 4766499.01699714 3907850.31536695 3844243.5259876
6339504.4786813 2221410.69646473 5876093.28881428 3491186.00975471
7924606.18591173 6822873.82662272 3245623.77877937 4914163.17401507
7315892.85277877 6406040.5359875 3409906.01477429 2434021.04626098
3084004.11424408 5583137.15727179 2940768.55882993 3910214.03629887
2497502.01812246 3801698.71672408 5269017.02645636 4285145.22048054
4328809.744646 2508579.90721252 6782089.61872081 5280090.28241009
6837709.1804512 7328931.58282629 5433170.91082884 4851713.03307735
3748459.21931926 3995310.66932824 3216823.24890414 7295345.27126985
8078007.50398623 3828756.30912608 7400902.52894133 4003936.34820044
3912836.68405835 7101236.25560825 5378254.653404 5158774.16967538
5550176.10996035 3524958.37507275 4369571.05932163 6399639.62827692
6171921.18772574 6122631.64291549 3093277.85887843 4653453.13382059
8702222.32844801 5223122.6848213 7158540.60836056 7911441.17003324
3459632.99052759 6781577.88562695 5377642.29086021 4577112.44806688]

lr_diff = pd.DataFrame({'Actual value':y_test,'Predicted value':y_pred_lr})
lr_diff

https://colab.research.google.com/drive/1_eJyUtCOeeskUN6uNmHQYel6QJP98u0l#scrollTo=eLxfZUqG37F9&printMode=true 5/6
2/10/23, 9:34 PM housing_21070126112 - Colaboratory

Actual value Predicted value

316 4060000 5.578837e+06

77 6650000 6.908581e+06

360 3710000 2.994588e+06

90 6440000 4.449253e+06

493 2800000 3.073894e+06

meanAbErr = metrics.mean_absolute_error(y_test, y_pred_lr)
... ... ...
meanSqErr = metrics.mean_squared_error(y_test, y_pred_lr)
17 8960000 7.911441e+06
rootMeanSqErr = np.sqrt(metrics.mean_squared_error(y_test, y_pred_lr))
print("R squared {:.2f}".format(lr.score(X,y)*100))
402 3500000 3.459633e+06
print("Mean Absolute Error: ",meanAbErr)
66 6930000 6.781578e+06
print("Mean Square Error: ",meanSqErr)
from sklearn import metrics
238 4613000 5.377642e+06
print("Root Mean Square Error",rootMeanSqErr)
272 4340000 4.577112e+06
R squared 66.73
Meanrows
180 Absolute Error: 902928.2604905332
× 2 columns
Mean Square Error: 1503987779975.0256
Root Mean Square Error 1226371.795164511

Colab paid products - Cancel contracts here


check 0s completed at 9:26 PM

https://colab.research.google.com/drive/1_eJyUtCOeeskUN6uNmHQYel6QJP98u0l#scrollTo=eLxfZUqG37F9&printMode=true 6/6

You might also like