{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# Scikit-learn Comparison"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn_genetic import GASearchCV\n",
    "from sklearn.linear_model import SGDClassifier\n",
    "from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV\n",
    "from sklearn_genetic.space import Categorical, Continuous\n",
    "import scipy.stats as stats\n",
    "from scipy.stats import loguniform\n",
    "from sklearn.datasets import load_digits\n",
    "from sklearn.metrics import accuracy_score\n",
    "import numpy as np\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = load_digits() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_names = data['target_names'] \n",
    "y = data['target']\n",
    "X = data['data'] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = SGDClassifier(loss='hinge',fit_intercept=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 1. Random Search"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "param_dist = {'average': [True, False],\n",
    "              'l1_ratio': stats.uniform(0, 1),\n",
    "              'alpha': loguniform(1e-4, 1e0)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_iter_search = 30\n",
    "random_search = RandomizedSearchCV(clf, param_distributions=param_dist,\n",
    "                                   n_iter=n_iter_search,n_jobs=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomizedSearchCV(estimator=SGDClassifier(), n_iter=30, n_jobs=-1,\n",
       "                   param_distributions={'alpha': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A62568BD60>,\n",
       "                                        'average': [True, False],\n",
       "                                        'l1_ratio': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A61065B400>})"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random_search.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9629629629629629"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_score(y_test, random_search.predict(X_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'alpha': 0.020380435883006108,\n",
       " 'average': True,\n",
       " 'l1_ratio': 0.01937382409973476}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random_search.best_params_"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 2. Grid Search"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "param_grid = {'average': [True, False],\n",
    "              'l1_ratio': np.linspace(0, 1, num=10),\n",
    "              'alpha': np.power(10, np.arange(-4, 1, dtype=float))}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_search = GridSearchCV(clf, param_grid=param_grid,n_jobs=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(estimator=SGDClassifier(), n_jobs=-1,\n",
       "             param_grid={'alpha': array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00]),\n",
       "                         'average': [True, False],\n",
       "                         'l1_ratio': array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,\n",
       "       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])})"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9528619528619529"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_score(y_test, grid_search.predict(X_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'alpha': 0.001, 'average': True, 'l1_ratio': 0.4444444444444444}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.best_params_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3. Genetic Algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "param_grid = {'l1_ratio': Continuous(0,1),\n",
    "              'alpha': Continuous(1e-4,1),\n",
    "              'average': Categorical([True, False])}\n",
    "\n",
    "evolved_estimator = GASearchCV(clf,\n",
    "                    cv=3,\n",
    "                    scoring='accuracy',\n",
    "                    param_grid=param_grid,\n",
    "                    population_size=10,\n",
    "                    generations=8,\n",
    "                    tournament_size=3,\n",
    "                    elitism=True,\n",
    "                    verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gen\tnevals\tfitness \tfitness_std\tfitness_max\tfitness_min\n",
      "0  \t10    \t0.939817\t0.00313682 \t0.945137   \t0.934331   \n",
      "1  \t10    \t0.940482\t0.00433848 \t0.9468     \t0.932668   \n",
      "2  \t10    \t0.940482\t0.00226736 \t0.943475   \t0.935162   \n",
      "3  \t10    \t0.942228\t0.00244479 \t0.945137   \t0.938487   \n",
      "4  \t10    \t0.939734\t0.00420996 \t0.945137   \t0.934331   \n",
      "5  \t10    \t0.937323\t0.00362717 \t0.944306   \t0.931837   \n",
      "6  \t10    \t0.943475\t0.00313241 \t0.949293   \t0.939318   \n",
      "7  \t10    \t0.940399\t0.0042394  \t0.950125   \t0.934331   \n",
      "8  \t10    \t0.943724\t0.00257689 \t0.948462   \t0.938487   \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<sklearn_genetic_opt.GASearchCV at 0x1a625628ee0>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evolved_estimator.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_predicy_ga = evolved_estimator.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.968013468013468"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_score(y_test,y_predicy_ga)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'l1_ratio': 0.9918490625641972, 'alpha': 0.5633014570910942, 'average': False}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evolved_estimator.best_params_"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}