{ "cells": [ { "cell_type": "markdown", "source": [ "# Scikit-learn Comparison" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from sklearn_genetic import GASearchCV\n", "from sklearn.linear_model import SGDClassifier\n", "from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV\n", "from sklearn_genetic.space import Categorical, Continuous\n", "import scipy.stats as stats\n", "from scipy.stats import loguniform\n", "from sklearn.datasets import load_digits\n", "from sklearn.metrics import accuracy_score\n", "import numpy as np\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "data = load_digits() " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "label_names = data['target_names'] \n", "y = data['target']\n", "X = data['data'] " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "clf = SGDClassifier(loss='hinge',fit_intercept=True)" ] }, { "cell_type": "markdown", "source": [ "### 1. Random Search" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "param_dist = {'average': [True, False],\n", " 'l1_ratio': stats.uniform(0, 1),\n", " 'alpha': loguniform(1e-4, 1e0)}" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "n_iter_search = 30\n", "random_search = RandomizedSearchCV(clf, param_distributions=param_dist,\n", " n_iter=n_iter_search,n_jobs=-1)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomizedSearchCV(estimator=SGDClassifier(), n_iter=30, n_jobs=-1,\n", " param_distributions={'alpha': ,\n", " 'average': [True, False],\n", " 'l1_ratio': })" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "random_search.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9629629629629629" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accuracy_score(y_test, random_search.predict(X_test))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'alpha': 0.020380435883006108,\n", " 'average': True,\n", " 'l1_ratio': 0.01937382409973476}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "random_search.best_params_" ] }, { "cell_type": "markdown", "source": [ "### 2. Grid Search" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "param_grid = {'average': [True, False],\n", " 'l1_ratio': np.linspace(0, 1, num=10),\n", " 'alpha': np.power(10, np.arange(-4, 1, dtype=float))}" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "grid_search = GridSearchCV(clf, param_grid=param_grid,n_jobs=-1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GridSearchCV(estimator=SGDClassifier(), n_jobs=-1,\n", " param_grid={'alpha': array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00]),\n", " 'average': [True, False],\n", " 'l1_ratio': array([0. , 0.11111111, 0.22222222, 0.33333333, 0.44444444,\n", " 0.55555556, 0.66666667, 0.77777778, 0.88888889, 1. ])})" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_search.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9528619528619529" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accuracy_score(y_test, grid_search.predict(X_test))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'alpha': 0.001, 'average': True, 'l1_ratio': 0.4444444444444444}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_search.best_params_" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Genetic Algorithm" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "param_grid = {'l1_ratio': Continuous(0,1),\n", " 'alpha': Continuous(1e-4,1),\n", " 'average': Categorical([True, False])}\n", "\n", "evolved_estimator = GASearchCV(clf,\n", " cv=3,\n", " scoring='accuracy',\n", " param_grid=param_grid,\n", " population_size=10,\n", " generations=8,\n", " tournament_size=3,\n", " elitism=True,\n", " verbose=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gen\tnevals\tfitness \tfitness_std\tfitness_max\tfitness_min\n", "0 \t10 \t0.939817\t0.00313682 \t0.945137 \t0.934331 \n", "1 \t10 \t0.940482\t0.00433848 \t0.9468 \t0.932668 \n", "2 \t10 \t0.940482\t0.00226736 \t0.943475 \t0.935162 \n", "3 \t10 \t0.942228\t0.00244479 \t0.945137 \t0.938487 \n", "4 \t10 \t0.939734\t0.00420996 \t0.945137 \t0.934331 \n", "5 \t10 \t0.937323\t0.00362717 \t0.944306 \t0.931837 \n", "6 \t10 \t0.943475\t0.00313241 \t0.949293 \t0.939318 \n", "7 \t10 \t0.940399\t0.0042394 \t0.950125 \t0.934331 \n", "8 \t10 \t0.943724\t0.00257689 \t0.948462 \t0.938487 \n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "evolved_estimator.fit(X_train,y_train)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "y_predicy_ga = evolved_estimator.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.968013468013468" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accuracy_score(y_test,y_predicy_ga)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "{'l1_ratio': 0.9918490625641972, 'alpha': 0.5633014570910942, 'average': False}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "evolved_estimator.best_params_" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 2 }