MLflow Logger

[1]:

from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Categorical, Integer, Continuous
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
from sklearn_genetic.mlflow_log import MLflowConfig

Import the data and split it in train and test sets

[2]:

data = load_digits()
label_names = data["target_names"]
y = data["target"]
X = data["data"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

Define the classifier to tune and the param grid

[3]:

clf = DecisionTreeClassifier()

params_grid = {
    "min_weight_fraction_leaf": Continuous(0, 0.5),
    "criterion": Categorical(["gini", "entropy"]),
    "max_depth": Integer(2, 20),
    "max_leaf_nodes": Integer(2, 30)}

Create the CV strategy

[4]:

cv = StratifiedKFold(n_splits=3, shuffle=True)

Create the MLflowConfig object and define its options

[5]:

mlflow_config = MLflowConfig(
    tracking_uri="http://localhost:5000",
    experiment="Digits-sklearn-genetic-opt",
    run_name="Decision Tree",
    save_models=True,
    tags={"team": "sklearn-genetic-opt", "version": "0.5.0"})

INFO: 'Digits-sklearn-genetic-opt' does not exist. Creating a new experiment

Define the GASearchCV options

[6]:

evolved_estimator = GASearchCV(
    clf,
    cv=cv,
    scoring="accuracy",
    population_size=4,
    generations=10,
    crossover_probability=0.9,
    mutation_probability=0.05,
    param_grid=params_grid,
    algorithm="eaMuPlusLambda",
    n_jobs=-1,
    verbose=True,
    log_config=mlflow_config)

Fit the model and see some results

[7]:

evolved_estimator.fit(X_train, y_train)
y_predict_ga = evolved_estimator.predict(X_test)
accuracy = accuracy_score(y_test, y_predict_ga)

gen     nevals  fitness         fitness_std     fitness_max     fitness_min
0       4       0.261638        0.046403        0.310889        0.18537
1       8       0.32419         0.0275257       0.344971        0.276808
2       8       0.342893        0.0133196       0.353283        0.320033
3       8       0.35079         0.00249377      0.353283        0.348296
4       8       0.341854        0.013305        0.353283        0.319202
5       8       0.335619        0.0104549       0.348296        0.319202
6       7       0.339983        0.011291        0.349958        0.322527
7       7       0.354115        0.00275696      0.356608        0.349958
8       8       0.352452        0.0054509       0.356608        0.343308
9       7       0.351621        0.00498753      0.356608        0.343308
10      8       0.349543        0.00552957      0.356608        0.34414

[8]:

print(evolved_estimator.best_params_)
print("accuracy score: ", "{:.2f}".format(accuracy))

{'min_weight_fraction_leaf': 0.22010341437935194, 'criterion': 'gini', 'max_depth': 18, 'max_leaf_nodes': 12}
accuracy score:  0.32