Scikit-learn Comparison
[1]:
from sklearn_genetic import GASearchCV
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn_genetic.space import Categorical, Continuous
import scipy.stats as stats
from sklearn.utils.fixes import loguniform
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
import numpy as np
import warnings
warnings.filterwarnings("ignore")
[2]:
data = load_digits()
[3]:
label_names = data['target_names']
y = data['target']
X = data['data']
[4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
[5]:
clf = SGDClassifier(loss='hinge',fit_intercept=True)
1. Random Search
[6]:
param_dist = {'average': [True, False],
'l1_ratio': stats.uniform(0, 1),
'alpha': loguniform(1e-4, 1e0)}
[7]:
n_iter_search = 30
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
n_iter=n_iter_search,n_jobs=-1)
[8]:
random_search.fit(X_train,y_train)
[8]:
RandomizedSearchCV(estimator=SGDClassifier(), n_iter=30, n_jobs=-1,
param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A62568BD60>,
'average': [True, False],
'l1_ratio': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A61065B400>})
[9]:
accuracy_score(y_test, random_search.predict(X_test))
[9]:
0.9629629629629629
[10]:
random_search.best_params_
[10]:
{'alpha': 0.020380435883006108,
'average': True,
'l1_ratio': 0.01937382409973476}
2. Grid Search
[11]:
param_grid = {'average': [True, False],
'l1_ratio': np.linspace(0, 1, num=10),
'alpha': np.power(10, np.arange(-4, 1, dtype=float))}
[12]:
grid_search = GridSearchCV(clf, param_grid=param_grid,n_jobs=-1)
[13]:
grid_search.fit(X_train,y_train)
[13]:
GridSearchCV(estimator=SGDClassifier(), n_jobs=-1,
param_grid={'alpha': array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00]),
'average': [True, False],
'l1_ratio': array([0. , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
0.55555556, 0.66666667, 0.77777778, 0.88888889, 1. ])})
[14]:
accuracy_score(y_test, grid_search.predict(X_test))
[14]:
0.9528619528619529
[15]:
grid_search.best_params_
[15]:
{'alpha': 0.001, 'average': True, 'l1_ratio': 0.4444444444444444}
3. Genetic Algorithm
[16]:
param_grid = {'l1_ratio': Continuous(0,1),
'alpha': Continuous(1e-4,1),
'average': Categorical([True, False])}
evolved_estimator = GASearchCV(clf,
cv=3,
scoring='accuracy',
param_grid=param_grid,
population_size=10,
generations=8,
tournament_size=3,
elitism=True,
verbose=True)
[17]:
evolved_estimator.fit(X_train,y_train)
gen nevals fitness fitness_std fitness_max fitness_min
0 10 0.939817 0.00313682 0.945137 0.934331
1 10 0.940482 0.00433848 0.9468 0.932668
2 10 0.940482 0.00226736 0.943475 0.935162
3 10 0.942228 0.00244479 0.945137 0.938487
4 10 0.939734 0.00420996 0.945137 0.934331
5 10 0.937323 0.00362717 0.944306 0.931837
6 10 0.943475 0.00313241 0.949293 0.939318
7 10 0.940399 0.0042394 0.950125 0.934331
8 10 0.943724 0.00257689 0.948462 0.938487
[17]:
<sklearn_genetic_opt.GASearchCV at 0x1a625628ee0>
[18]:
y_predicy_ga = evolved_estimator.predict(X_test)
[19]:
accuracy_score(y_test,y_predicy_ga)
[19]:
0.968013468013468
[20]:
evolved_estimator.best_params
[20]:
{'l1_ratio': 0.9918490625641972, 'alpha': 0.5633014570910942, 'average': False}