Professional Documents
Culture Documents
2nd Code
2nd Code
### Question: create a full pipeline which includes numerical pipeline that
### includes imputer, attribute added and scalar
### such that extra columns are "rooms_per_household",
### "population_per_household" and "bedrooms_per_room"
### plus one hot encoder as a categorical pipeline
### then casting the resulting values from the full pipepline
### into a dataframe table then show that table
num_pipeline = Pipeline([
('imputer', SimpleImputer(strategy="median")),
('attribs_adder', FunctionTransformer(add_extra_features, validate=False)),
('std_scaler', StandardScaler()),
])
num_attribs = list(housing_num)
cat_attribs = ["ocean_proximity"]
full_pipeline = ColumnTransformer([
("num", num_pipeline, num_attribs),
("cat", OneHotEncoder(), cat_attribs),
])
housing_prepared = full_pipeline.fit_transform(housing)
extra_columns =
["rooms_per_household","population_per_household","bedrooms_per_room"]
columns = list(housing.columns)[:-1]+extra_columns+list(cat_encoder.categories_[0])
housing_pipeline = pd.DataFrame(housing_prepared,
columns=columns,
index=housing.index)
housing_pipeline
37:
lin_reg = LinearRegression()
lin_reg.fit(housing_prepared, housing_labels)
housing_predictions = lin_reg.predict(housing_prepared)
7:
import numpy as np
8: