You are on page 1of 2

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt #Data visualisation libraries

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

# membaca file dan direktori tempat iris.data.csv disimpan

#direktori = "D:/LAin-laIN/CPNS/aktualisasi diri/Aktualisasi Nurul/machine.data.csv"

direktori = "realestate.csv"

# memberi nama variabel

#names = ['MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX','PRP']

names = ['X1 transaction date','X2 house age','X3 dist to MRT','X4 num stores','X5 latitude','X6
longitude', 'Y price']

# membaca data dengan library panda

estate = pd.read_csv(direktori, names=names)

#memisahkan training dan testing

x=estate[['X1 transaction date','X2 house age','X3 dist to MRT','X4 num stores','X5 latitude','X6
longitude']]

y=estate['Y price']

#membagi data menjadi 25% untuk test, sisanya untuk training

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

lm = LinearRegression()

lm.fit(X_train,y_train)

predictions = lm.predict(X_test)
plt.scatter(y_test,predictions)

plt.show()

m = X_test.shape[0]

# mean squared error

mse = np.sum((predictions - y_test)**2)

# root mean squared error

# m is the number of training examples

rmse = np.sqrt(mse/m)

You might also like