Source code for sklearn_genetic.plots

import logging

logger = logging.getLogger(__name__)  # noqa

# Check if seaborn is installed as an extra requirement
try:
    import seaborn as sns
except ModuleNotFoundError:  # noqa
    logger.error(
        "seaborn not found, pip install seaborn to use plots functions"
    )  # noqa
import pandas as pd
import numpy as np

from .utils import logbook_to_pandas
from .parameters import Metrics
from .space import Categorical

"""
This module contains some useful function to explore the results of the optimization routines
"""


[docs]def plot_fitness_evolution(estimator, metric="fitness"): """ Parameters ---------- estimator: estimator object A fitted estimator from :class:`~sklearn_genetic.GASearchCV` metric: {"fitness", "fitness_std", "fitness_max", "fitness_min"}, default="fitness" Logged metric into the estimator history to plot Returns ------- Lines plot with the fitness value in each generation """ if metric not in Metrics.list(): raise ValueError( f"metric must be one of {Metrics.list()}, but got {metric} instead" ) sns.set_style("white") fitness_history = estimator.history[metric] palette = sns.color_palette("rocket") sns.set(rc={"figure.figsize": (10, 10)}) ax = sns.lineplot( x=range(len(estimator)), y=fitness_history, markers=True, palette=palette ) ax.set_title(f"{metric.capitalize()} average evolution over generations") ax.set(xlabel="generations", ylabel=f"fitness ({estimator.scoring})") return ax
[docs]def plot_search_space(estimator, height=2, s=25, features: list = None): """ Parameters ---------- estimator: estimator object A fitted estimator from :class:`~sklearn_genetic.GASearchCV` height: float, default=2 Height of each facet s: float, default=5 Size of the markers in scatter plot features: list, default=None Subset of features to plot, if ``None`` it plots all the features by default Returns ------- Pair plot of the used hyperparameters during the search """ sns.set_style("white") df = logbook_to_pandas(estimator.logbook) if features: stats = df[features] else: variables = [*estimator.space.parameters, "score"] stats = df[variables] g = sns.PairGrid(stats, diag_sharey=False, height=height) g = g.map_upper(sns.scatterplot, s=s, color="r", alpha=0.2) g = g.map_lower( sns.kdeplot, shade=True, cmap=sns.color_palette("ch:s=.25,rot=-.25", as_cmap=True), ) g = g.map_diag(sns.kdeplot, shade=True, palette="crest", alpha=0.2, color="red") return g
[docs]def noise(score): """ Parameters ---------- score: Series The `score` column from the logbook data of :class:`~sklearn_genetic.GASearchCV` Returns ------- Noise to be added to each element of the score to avoid non-unique bin edges """ score_len = len(score) score_std = score.std() noise_ratio = 1e7 noise = (np.random.random(score_len) * score_std / noise_ratio) - ( score_std / 2 * noise_ratio ) return noise
[docs]def plot_parallel_coordinates(estimator, features: list = None): """ Parameters ---------- estimator: estimator object A fitted estimator from :class:`~sklearn_genetic.GASearchCV` features: list, default=None Subset of features to plot, if ``None`` it plots all the features by default Returns ------- Parallel Coordinates plot of the non-categorical values """ df = logbook_to_pandas(estimator.logbook) param_grid = estimator.space.param_grid score = df["score"] if features: non_categorical_features = [] for feature in features: if not isinstance(param_grid[feature], Categorical): non_categorical_features.append(feature) else: logger.warning( "`%s` is Categorical variable! It was dropped from the plot feature list", feature, ) stats = df[non_categorical_features] else: non_categorical_variables = [] for variable, var_type in param_grid.items(): if not isinstance(var_type, Categorical): non_categorical_variables.append(variable) non_categorical_variables.append("score") stats = df[non_categorical_variables] stats["score_quartile"] = pd.qcut(score + noise(score), 4, labels=[1, 2, 3, 4]) g = pd.plotting.parallel_coordinates( stats, "score_quartile", color=("#8E8E8D", "#4ECDC4", "#C7F464", "#FF0000") ) return g