import logging
logger = logging.getLogger(__name__) # noqa
# Check if seaborn is installed as an extra requirement
try:
import seaborn as sns
except ModuleNotFoundError: # noqa
logger.error(
"seaborn not found, pip install seaborn to use plots functions"
) # noqa
import pandas as pd
import numpy as np
from .utils import logbook_to_pandas
from .parameters import Metrics
from .space import Categorical
"""
This module contains some useful function to explore the results of the optimization routines
"""
[docs]def plot_fitness_evolution(estimator, metric="fitness"):
"""
Parameters
----------
estimator: estimator object
A fitted estimator from :class:`~sklearn_genetic.GASearchCV`
metric: {"fitness", "fitness_std", "fitness_max", "fitness_min"}, default="fitness"
Logged metric into the estimator history to plot
Returns
-------
Lines plot with the fitness value in each generation
"""
if metric not in Metrics.list():
raise ValueError(
f"metric must be one of {Metrics.list()}, but got {metric} instead"
)
sns.set_style("white")
fitness_history = estimator.history[metric]
palette = sns.color_palette("rocket")
sns.set(rc={"figure.figsize": (10, 10)})
ax = sns.lineplot(
x=range(len(estimator)), y=fitness_history, markers=True, palette=palette
)
ax.set_title(f"{metric.capitalize()} average evolution over generations")
ax.set(xlabel="generations", ylabel=f"fitness ({estimator.scoring})")
return ax
[docs]def plot_search_space(estimator, height=2, s=25, features: list = None):
"""
Parameters
----------
estimator: estimator object
A fitted estimator from :class:`~sklearn_genetic.GASearchCV`
height: float, default=2
Height of each facet
s: float, default=5
Size of the markers in scatter plot
features: list, default=None
Subset of features to plot, if ``None`` it plots all the features by default
Returns
-------
Pair plot of the used hyperparameters during the search
"""
sns.set_style("white")
df = logbook_to_pandas(estimator.logbook)
if features:
stats = df[features]
else:
variables = [*estimator.space.parameters, "score"]
stats = df[variables]
g = sns.PairGrid(stats, diag_sharey=False, height=height)
g = g.map_upper(sns.scatterplot, s=s, color="r", alpha=0.2)
g = g.map_lower(
sns.kdeplot,
shade=True,
cmap=sns.color_palette("ch:s=.25,rot=-.25", as_cmap=True),
)
g = g.map_diag(sns.kdeplot, shade=True, palette="crest", alpha=0.2, color="red")
return g
[docs]def noise(score):
"""
Parameters
----------
score: Series
The `score` column from the logbook data of :class:`~sklearn_genetic.GASearchCV`
Returns
-------
Noise to be added to each element of the score to avoid non-unique bin edges
"""
score_len = len(score)
score_std = score.std()
noise_ratio = 1e7
noise = (np.random.random(score_len) * score_std / noise_ratio) - (
score_std / 2 * noise_ratio
)
return noise
[docs]def plot_parallel_coordinates(estimator, features: list = None):
"""
Parameters
----------
estimator: estimator object
A fitted estimator from :class:`~sklearn_genetic.GASearchCV`
features: list, default=None
Subset of features to plot, if ``None`` it plots all the features by default
Returns
-------
Parallel Coordinates plot of the non-categorical values
"""
df = logbook_to_pandas(estimator.logbook)
param_grid = estimator.space.param_grid
score = df["score"]
if features:
non_categorical_features = []
for feature in features:
if not isinstance(param_grid[feature], Categorical):
non_categorical_features.append(feature)
else:
logger.warning(
"`%s` is Categorical variable! It was dropped from the plot feature list",
feature,
)
stats = df[non_categorical_features]
else:
non_categorical_variables = []
for variable, var_type in param_grid.items():
if not isinstance(var_type, Categorical):
non_categorical_variables.append(variable)
non_categorical_variables.append("score")
stats = df[non_categorical_variables]
stats["score_quartile"] = pd.qcut(score + noise(score), 4, labels=[1, 2, 3, 4])
g = pd.plotting.parallel_coordinates(
stats, "score_quartile", color=("#8E8E8D", "#4ECDC4", "#C7F464", "#FF0000")
)
return g