Pipeline Prediction

[ ]:

import matplotlib.pyplot as plt
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Integer, Categorical, Continuous
from sklearn_genetic.plots import plot_fitness_evolution, plot_search_space
from sklearn_genetic.callbacks import LogbookSaver, ProgressBar
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

Import the data and split it in train and test sets

[2]:

data = load_diabetes()

y = data["target"]
X = data["data"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

Define the regressor to tune

[3]:

rg = GradientBoostingRegressor()
pipe = Pipeline([("scaler", StandardScaler()), ("rg", rg)])

Create the CV strategy and define the param grid

[4]:

cv = KFold(n_splits=5, shuffle=True)

param_grid = {
    "rg__n_estimators": Integer(50, 600),
    "rg__loss": Categorical(["absolute_error", "squared_error"]),
    "rg__max_depth": Integer(2, 10),
    "rg__learning_rate": Continuous(0.001, 0.01, distribution="log-uniform")}

Define the GASearchCV options

[5]:

evolved_estimator = GASearchCV(
    estimator=pipe,
    cv=cv,
    scoring="neg_mean_squared_error",
    population_size=8,
    generations=15,
    tournament_size=3,
    elitism=True,
    keep_top_k=4,
    crossover_probability=0.9,
    mutation_probability=0.08,
    param_grid=param_grid,
    criteria="max",
    algorithm="eaMuCommaLambda",
    n_jobs=-1)

Optionally, create some Callbacks

[6]:

callbacks = [LogbookSaver(checkpoint_path="./logbook.pkl")]

Fit the model and see some results

[7]:

evolved_estimator.fit(X_train, y_train, callbacks=callbacks)
y_predict_ga = evolved_estimator.predict(X_test)
mse = mean_squared_error(y_test, y_predict_ga)

gen     nevals  fitness         fitness_std     fitness_max     fitness_min
0       8       -4238.26        565.058         -3756.78        -5523.09
1       16      -4051.7         352.254         -3756.29        -4631.79
2       16      -3665.2         70.1723         -3572.13        -3749.68
3       16      -3587.2         49.7041         -3507.56        -3644.64
4       16      -3513.45        102.322         -3420.14        -3698.88
5       16      -3678.71        164.066         -3521.83        -4011.67
6       15      -3547.3         35.2384         -3500.55        -3604.69
7       16      -3468.1         51.9507         -3379.15        -3542.45
8       16      -3472.2         51.3204         -3410.31        -3581.8
9       16      -3434.18        43.9115         -3371.14        -3514.54
10      16      -3410.84        94.4055         -3325.11        -3560.57
11      16      -3493.9         93.8796         -3393.02        -3662.27
12      16      -3506.98        32.4934         -3478.73        -3588.94
13      16      -3515.91        133.628         -3300.73        -3696.27
14      16      -3481.82        58.9704         -3399.49        -3616.37
15      16      -3468.11        10.7602         -3450.02        -3476.26

[8]:

print(evolved_estimator.best_params_)
print("mse: ", "{:.2f}".format(mse))
print("Best k solutions: ", evolved_estimator.hof)

{'rg__n_estimators': 561, 'rg__loss': 'squared_error', 'rg__max_depth': 3, 'rg__learning_rate': 0.006219524925263899}
mse:  3169.70
Best k solutions:  {0: {'rg__n_estimators': 561, 'rg__loss': 'squared_error', 'rg__max_depth': 3, 'rg__learning_rate': 0.006219524925263899}, 1: {'rg__n_estimators': 504, 'rg__loss': 'squared_error', 'rg__max_depth': 3, 'rg__learning_rate': 0.006219524925263899}, 2: {'rg__n_estimators': 517, 'rg__loss': 'squared_error', 'rg__max_depth': 3, 'rg__learning_rate': 0.006219524925263899}, 3: {'rg__n_estimators': 561, 'rg__loss': 'squared_error', 'rg__max_depth': 3, 'rg__learning_rate': 0.004507098298712037}}

[9]:

plot = plot_fitness_evolution(evolved_estimator, metric="fitness")
plt.show()

../_images/notebooks_Pipeline_prediction_15_0.png

[10]:

plot_search_space(evolved_estimator)
plt.show()

../_images/notebooks_Pipeline_prediction_16_0.png

[ ]: