You are on page 1of 2

def run_model(train_df, test_df):

scaler = MinMaxScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

# Define the search space for the CatBoostRegressor hyperparameters


search_space = {
'learning_rate': Real(0.001, 0.3, 'log-uniform'),
'depth': Integer(3, 10),
'l2_leaf_reg': Real(1e-6, 1e-2, 'log-uniform'),
'random_strength': Real(1e-1, 1e2, 'log-uniform'),
'grow_policy': Categorical(['Lossguide', 'Depthwise']),
'max_bin': Integer(32, 512),
'min_data_in_leaf': Integer(1, 50),
'bootstrap_type': Categorical(['Bayesian', 'Bernoulli', 'MVS']),
}

# Train multiple CatBoostRegressor models with different hyperparameters


models = []
for i in range(5):
opt = BayesSearchCV(
estimator=CatBoostRegressor(verbose=False),
search_spaces=search_space,
scoring='neg_mean_squared_error',
cv=5,
n_jobs=-1,
n_iter=200,
verbose=1,
random_state=42
)
opt.fit(train_df[features], target)
best_params = opt.best_params_
model = CatBoostRegressor(verbose=False, **best_params)
model.fit(train_df[features], target)
models.append(model)

# Combine the predictions from the individual models using bagging


bagger = BaggingRegressor(
base_estimator=CatBoostRegressor(verbose=False,**best_params),
n_estimators=5,
random_state=42
)
bagger.fit(train_df[features], target)

# Combine the predictions from the individual models using a StackingRegressor


stacker = StackingRegressor(
estimators=[('model%d' % i, model) for i, model in enumerate(models)],
final_estimator=CatBoostRegressor(verbose=False, **best_params),
cv=5,
passthrough=True
)
stacker.fit(train_df[features], target)

# Use the trained bagger and stacker to make predictions on the test set
test_df['pred1'] = bagger.predict(test_df[features])
test_df['pred2'] = stacker.predict(test_df[features])
test_df['pred'] = (test_df['pred1'] + test_df['pred2']) / 2

return test_df['pred']

You might also like