{ "cells": [ { "cell_type": "markdown", "source": [ "# MLflow Logger" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn_genetic import GASearchCV\n", "from sklearn_genetic.space import Categorical, Integer, Continuous\n", "from sklearn.model_selection import train_test_split, StratifiedKFold\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.datasets import load_digits\n", "from sklearn.metrics import accuracy_score\n", "from sklearn_genetic.mlflow_log import MLflowConfig" ] }, { "cell_type": "markdown", "source": [ "### Import the data and split it in train and test sets" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "data = load_digits()\n", "label_names = data[\"target_names\"]\n", "y = data[\"target\"]\n", "X = data[\"data\"]\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "### Define the classifier to tune and the param grid" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "clf = DecisionTreeClassifier()\n", "\n", "params_grid = {\n", " \"min_weight_fraction_leaf\": Continuous(0, 0.5),\n", " \"criterion\": Categorical([\"gini\", \"entropy\"]),\n", " \"max_depth\": Integer(2, 20),\n", " \"max_leaf_nodes\": Integer(2, 30)}" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "### Create the CV strategy" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 4, "outputs": [], "source": [ "cv = StratifiedKFold(n_splits=3, shuffle=True)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "### Create the MLflowConfig object and define its options" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 5, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO: 'Digits-sklearn-genetic-opt' does not exist. Creating a new experiment\n" ] } ], "source": [ "mlflow_config = MLflowConfig(\n", " tracking_uri=\"http://localhost:5000\",\n", " experiment=\"Digits-sklearn-genetic-opt\",\n", " run_name=\"Decision Tree\",\n", " save_models=True,\n", " tags={\"team\": \"sklearn-genetic-opt\", \"version\": \"0.5.0\"})" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "### Define the GASearchCV options" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 6, "outputs": [], "source": [ "evolved_estimator = GASearchCV(\n", " clf,\n", " cv=cv,\n", " scoring=\"accuracy\",\n", " population_size=4,\n", " generations=10,\n", " crossover_probability=0.9,\n", " mutation_probability=0.05,\n", " param_grid=params_grid,\n", " algorithm=\"eaMuPlusLambda\",\n", " n_jobs=-1,\n", " verbose=True,\n", " log_config=mlflow_config)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "### Fit the model and see some results" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 7, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gen\tnevals\tfitness \tfitness_std\tfitness_max\tfitness_min\n", "0 \t4 \t0.261638\t0.046403 \t0.310889 \t0.18537 \n", "1 \t8 \t0.32419 \t0.0275257 \t0.344971 \t0.276808 \n", "2 \t8 \t0.342893\t0.0133196 \t0.353283 \t0.320033 \n", "3 \t8 \t0.35079 \t0.00249377 \t0.353283 \t0.348296 \n", "4 \t8 \t0.341854\t0.013305 \t0.353283 \t0.319202 \n", "5 \t8 \t0.335619\t0.0104549 \t0.348296 \t0.319202 \n", "6 \t7 \t0.339983\t0.011291 \t0.349958 \t0.322527 \n", "7 \t7 \t0.354115\t0.00275696 \t0.356608 \t0.349958 \n", "8 \t8 \t0.352452\t0.0054509 \t0.356608 \t0.343308 \n", "9 \t7 \t0.351621\t0.00498753 \t0.356608 \t0.343308 \n", "10 \t8 \t0.349543\t0.00552957 \t0.356608 \t0.34414 \n" ] } ], "source": [ "evolved_estimator.fit(X_train, y_train)\n", "y_predict_ga = evolved_estimator.predict(X_test)\n", "accuracy = accuracy_score(y_test, y_predict_ga)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'min_weight_fraction_leaf': 0.22010341437935194, 'criterion': 'gini', 'max_depth': 18, 'max_leaf_nodes': 12}\n", "accuracy score: 0.32\n" ] } ], "source": [ "print(evolved_estimator.best_params_)\n", "print(\"accuracy score: \", \"{:.2f}\".format(accuracy))" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }