MLflow Logger
[1]:
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Categorical, Integer, Continuous
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
from sklearn_genetic.mlflow_log import MLflowConfig
Import the data and split it in train and test sets
[2]:
data = load_digits()
label_names = data["target_names"]
y = data["target"]
X = data["data"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
Define the classifier to tune and the param grid
[3]:
clf = DecisionTreeClassifier()
params_grid = {
"min_weight_fraction_leaf": Continuous(0, 0.5),
"criterion": Categorical(["gini", "entropy"]),
"max_depth": Integer(2, 20),
"max_leaf_nodes": Integer(2, 30)}
Create the CV strategy
[4]:
cv = StratifiedKFold(n_splits=3, shuffle=True)
Create the MLflowConfig object and define its options
[5]:
mlflow_config = MLflowConfig(
tracking_uri="http://localhost:5000",
experiment="Digits-sklearn-genetic-opt",
run_name="Decision Tree",
save_models=True,
tags={"team": "sklearn-genetic-opt", "version": "0.5.0"})
INFO: 'Digits-sklearn-genetic-opt' does not exist. Creating a new experiment
Define the GASearchCV options
[6]:
evolved_estimator = GASearchCV(
clf,
cv=cv,
scoring="accuracy",
population_size=4,
generations=10,
crossover_probability=0.9,
mutation_probability=0.05,
param_grid=params_grid,
algorithm="eaMuPlusLambda",
n_jobs=-1,
verbose=True,
log_config=mlflow_config)
Fit the model and see some results
[7]:
evolved_estimator.fit(X_train, y_train)
y_predict_ga = evolved_estimator.predict(X_test)
accuracy = accuracy_score(y_test, y_predict_ga)
gen nevals fitness fitness_std fitness_max fitness_min
0 4 0.261638 0.046403 0.310889 0.18537
1 8 0.32419 0.0275257 0.344971 0.276808
2 8 0.342893 0.0133196 0.353283 0.320033
3 8 0.35079 0.00249377 0.353283 0.348296
4 8 0.341854 0.013305 0.353283 0.319202
5 8 0.335619 0.0104549 0.348296 0.319202
6 7 0.339983 0.011291 0.349958 0.322527
7 7 0.354115 0.00275696 0.356608 0.349958
8 8 0.352452 0.0054509 0.356608 0.343308
9 7 0.351621 0.00498753 0.356608 0.343308
10 8 0.349543 0.00552957 0.356608 0.34414
[8]:
print(evolved_estimator.best_params_)
print("accuracy score: ", "{:.2f}".format(accuracy))
{'min_weight_fraction_leaf': 0.22010341437935194, 'criterion': 'gini', 'max_depth': 18, 'max_leaf_nodes': 12}
accuracy score: 0.32