from google.colab import files


Saving taiwan real estate2 - taiwan real estate2.csv to taiwan real estate2 - taiwan real estate2.csv

import pandas as pd
import seaborn as sns
import numpy as np

taiwan=pd.read_csv('taiwan_real_estate2 - taiwan_real_estate2.csv')

dist_to_mrt_m n_convenience house_age_years price_twd_msq

0 84.87882 10 30 to 45 11.467474

1 306.59470 9 15 to 30 12.768533

2 561.98450 5 0 to 15 14.311649

3 561.98450 5 0 to 15 16.580938

4 390.56840 5 0 to 15 13.040847

... ... ... ... ...

409 4082.01500 0 0 to 15 4.659607

410 90.45606 9 0 to 15 15.128593

411 390.96960 7 15 to 30 12.284418

412 104.81010 5 0 to 15 15.885023

413 90.45606 9 0 to 15 19.334342

414 rows × 4 columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 414 entries, 0 to 413
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 dist_to_mrt_m 414 non-null float64
1 n_convenience 414 non-null int64
2 house_age_years 414 non-null object
3 price_twd_msq 414 non-null float64
dtypes: float64(2), int64(1), object(1)
memory usage: 13.1+ KB

from statsmodels.formula.api import ols

mdl_price_vs_conv = ols('price_twd_msq~n_convenience',data = taiwan).fit()

print (mdl_price_vs_conv.params)

Intercept 8.224237
n_convenience 0.798080
dtype: float64

mdl_price_vs_age = ols('price_twd_msq ~ house_age_years+0', data=taiwan).fit()

house_age_years[0 to 15] 12.637471

house_age_years[15 to 30] 9.876743
house_age_years[30 to 45] 11.393264
dtype: float64

mdl_price_vs_both = ols('price_twd_msq ~n_convenience + house_age_years+0', data=taiwan).fit()

house_age_years[0 to 15] 9.413325

house_age_years[15 to 30] 7.085169
house_age_years[30 to 45] 7.510958
n_convenience 0.791457
dtype: float64


<matplotlib.axes._subplots.AxesSubplot at 0x7f62e14d6fd0>


<matplotlib.axes._subplots.AxesSubplot at 0x7f62e1144280>

coeffs = mdl_price_vs_both.params
ic_0_15, ic_15_30, ic_30_45, slope = coeffs

<matplotlib.axes._subplots.AxesSubplot at 0x7f62e10ce220>

from itertools import product

n_convenience= np.arange(0, 10)
house_age_years = taiwan['house_age_years'].unique()
p= product(n_convenience, house_age_years)
explanatory_data = pd.DataFrame(p,columns=['n_convenience','house_age_years'])

n_convenience house_age_years
0 0 30 to 45
1 0 15 to 30
2 0 0 to 15
3 1 30 to 45
4 1 15 to 30
5 1 0 to 15
6 2 30 to 45
7 2 15 to 30
8 2 0 to 15
9 3 30 to 45
prediction_data = explanatory_data.assign(
price_twd_msq = mdl_price_vs_both.predict(explanatory_data)

n_convenience house_age_years price_twd_msq

0 0 30 to 45 7.510958
1 0 15 to 30 7.085169
2 0 0 to 15 9.413325
3 1 30 to 45 8.302415
4 1 15 to 30 7.876627
5 1 0 to 15 10.204782
6 2 30 to 45 9.093873
7 2 15 to 30 8.668084
8 2 0 to 15 10.996239
9 3 30 to 45 9.885330

