Digits Classification

[1]:
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Categorical, Integer, Continuous
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
from sklearn_genetic.callbacks import DeltaThreshold, TimerStopping

Import the data and split it in train and test sets

[2]:
data = load_digits()
label_names = data["target_names"]
y = data["target"]
X = data["data"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

Define the classifier to tune and the param grid

[3]:
clf = DecisionTreeClassifier()

params_grid = {
    "min_weight_fraction_leaf": Continuous(0, 0.5),
    "criterion": Categorical(["gini", "entropy"]),
    "max_depth": Integer(2, 20),
    "max_leaf_nodes": Integer(2, 30),
}

Create the CV strategy and optionally some callbacks

[4]:
cv = StratifiedKFold(n_splits=3, shuffle=True)

delta_callback = DeltaThreshold(threshold=0.001, metric="fitness")
timer_callback = TimerStopping(total_seconds=60)

callbacks = [delta_callback, timer_callback]

Define the GASearchCV options

[5]:
evolved_estimator = GASearchCV(
    clf,
    cv=cv,
    scoring="accuracy",
    population_size=16,
    generations=30,
    crossover_probability=0.9,
    mutation_probability=0.05,
    param_grid=params_grid,
    algorithm="eaSimple",
    n_jobs=-1,
    verbose=True)

Fit the model and see some results

[6]:
evolved_estimator.fit(X_train, y_train, callbacks=callbacks)
y_predict_ga = evolved_estimator.predict(X_test)
accuracy = accuracy_score(y_test, y_predict_ga)
gen     nevals  fitness         fitness_std     fitness_max     fitness_min
0       16      0.363259        0.136399        0.639235        0.189526
1       14      0.450592        0.119266        0.620116        0.27847
2       12      0.54707         0.1376          0.75478         0.26517
3       12      0.625052        0.113433        0.768911        0.346633
4       16      0.667654        0.11493         0.755611        0.400665
5       14      0.727504        0.0156019       0.759767        0.689111
6       16      0.71462         0.0486477       0.758105        0.607648
7       14      0.701164        0.132646        0.764755        0.190357
8       12      0.735661        0.0115332       0.758936        0.715711
9       16      0.735141        0.00947264      0.748961        0.704073
INFO: DeltaThreshold callback met its criteria
INFO: Stopping the algorithm
[7]:
print(evolved_estimator.best_params_)
print("accuracy score: ", "{:.2f}".format(accuracy))
{'min_weight_fraction_leaf': 0.027793264515431237, 'criterion': 'entropy', 'max_depth': 17, 'max_leaf_nodes': 26}
accuracy score:  0.77