Professional Documents
Culture Documents
pandas as pd
import numpy as np
df =pd.read_csv('train (2).csv')
df
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities ... PoolArea PoolQC Fence MiscFeatur
0 1 60 RL 65.0 8450 Pave NaN Reg Lvl AllPub ... 0 NaN NaN
1 2 20 RL 80.0 9600 Pave NaN Reg Lvl AllPub ... 0 NaN NaN
2 3 60 RL 68.0 11250 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN
3 4 70 RL 60.0 9550 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN
4 5 60 RL 84.0 14260 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1455 1456 60 RL 62.0 7917 Pave NaN Reg Lvl AllPub ... 0 NaN NaN
1456 1457 20 RL 85.0 13175 Pave NaN Reg Lvl AllPub ... 0 NaN MnPrv
1457 1458 70 RL 66.0 9042 Pave NaN Reg Lvl AllPub ... 0 NaN GdPrv She
1458 1459 20 RL 68.0 9717 Pave NaN Reg Lvl AllPub ... 0 NaN NaN
1459 1460 20 RL 75.0 9937 Pave NaN Reg Lvl AllPub ... 0 NaN NaN
variables = df[['MSSubClass','OverallQual','LotArea','OverallCond']]
outcome = df[['SalePrice']]
x_train,x_test,y_train,y_test = train_test_split(variables,outcome)
('variable train set:', (1095, 4)) ('variable test set:', (365, 4)) ('outcome train set:', (1095, 1)) ('outcome t
est set:', (365, 1))
my_model=LinearRegression()
my_model.fit(x_train,y_train)
LinearRegression()
y_pred =my_model.predict(x_test)
mean_squared_error(y_pred,y_test)
2930224777.5694823
my_model.predict([[60,7,10382,6]])
array([[219187.01024042]])
array([[219187.01024042]])
my_model.predict([[20,5,12968,6]])
array([[142030.94071849]])
d =my_model.predict(df[['MSSubClass','OverallQual','LotArea','OverallCond']])
print(d)
[[216458.93818563]
[182665.67710705]
[219913.85824051]
...
[216601.77099769]
[138019.53175477]
[138290.98975908]]
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js