Professional Documents
Culture Documents
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
data = pd.read_csv('Housing.csv')
data.head()
data.tail(5)
data.info()
data.columns
data.isnull().sum()
data[categorical_col]
def binary_map(x):
return x.map({'yes': 1, 'no': 0})
data[categorical_col] = data[categorical_col].apply(binary_map)
data[categorical_col]
dummy_col = pd.get_dummies(data['furnishingstatus'])
dummy_col.head()
scaler = MinMaxScaler()
df_train[col_to_scale] = scaler.fit_transform(df_train[col_to_scale])
y_train = df_train.pop('price')
x_train = df_train
linear_regression = LinearRegression()
linear_regression.fit(x_train, y_train)
coefficients = linear_regression.coef_
print(coefficients)
df_test[col_to_scale] = scaler.fit_transform(df_test[col_to_scale])
y_test = df_test.pop('price')
x_test = df_test
prediction = linear_regression.predict(x_test)
r2 = r2_score(y_test, prediction)
y_test.shape
y_test_matrix = y_test.values.reshape(-1, 1)
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(x_train, y_train)
dt_y_pred = dt_model.predict(x_test)
plt.figure(figsize=(10, 6))
plt.bar(algorithms, r2_scores, color=['blue', 'green', 'red'])
plt.title('R-squared Comparison')
plt.xlabel('Algorithm')
plt.ylabel('R-squared')
plt.ylim(0, 1)
plt.show()
plt.figure(figsize=(10, 6))
plt.scatter(y_test, knn_y_pred, color='blue')
plt.title('Actual vs. Predicted Median House Value')
plt.xlabel('Actual Median House Value')
plt.ylabel('Predicted Median House Value')
plt.show(
plt.figure(figsize=(10, 6))
plt.scatter(y_test, dt_y_pred, color='blue')
plt.title('Actual vs. Predicted Median House Value')
plt.xlabel('Actual Median House Value')
plt.ylabel('Predicted Median House Value')
plt.show()