from collections import OrderedDict
import numpy as np
from bokeh.embed import components
from cave.analyzer.base_analyzer import BaseAnalyzer
from cave.plot.whisker_quantiles import whisker_quantiles
from cave.utils.hpbandster_helpers import format_budgets
[docs]class BaseParameterImportance(BaseAnalyzer):
def __init__(self,
runscontainer,
):
"""Calculate parameter-importance using the PIMP-package.
"""
super().__init__(runscontainer)
[docs] def parameter_importance(self, modus):
"""
modus: str
modus for parameter importance, from
[forward-selection, ablation, fanova, lpi]
"""
runs_by_budget = self.runscontainer.get_aggregated(keep_budgets=True, keep_folders=False)
formatted_budgets = format_budgets(self.runscontainer.get_budgets(), allow_whitespace=True)
self.result['Importances Per Parameter'] = {}
result = self.result['Importances Per Parameter']
for budget, run in zip(formatted_budgets.values(), runs_by_budget):
self.logger.info("... parameter importance {} on {}".format(modus, run.get_identifier()))
if budget not in result:
result[budget] = OrderedDict()
n_configs = len(run.original_runhistory.get_all_configs())
n_params = len(run.scenario.cs.get_hyperparameters())
if n_configs < n_params:
result[budget] = {
'else': "For this run there are only {} configs, "
"but {} parameters. No reliable parameter importance analysis "
"can be performed.".format(n_configs, n_params)}
continue
try:
run.pimp.evaluate_scenario([modus], run.output_dir)
except RuntimeError as e:
err = "Encountered error '{}' for '{}' in '{}', (for fANOVA this can e.g. happen with too few " \
"data-points).".format(e, run.get_identifier(), modus)
self.logger.info(err, exc_info=1)
result[budget][modus + '_error'] = err
continue
individual_result = self.postprocess(run.pimp, run.output_dir)
result[budget] = individual_result
# Interactive Plots
if self.runscontainer.analyzing_options['Parameter Importance'].getboolean('interactive_bokeh_plots'):
try:
result[budget]['Interactive Plots'] = {
'bokeh': components(run.pimp.evaluator.plot_bokeh(show_plot=False))
}
except AttributeError as err:
self.logger.debug(err, exc_info=1)
run.share_information['parameter_importance'][modus] = run.pimp.evaluator.evaluated_parameter_importance
run.share_information['evaluators'][modus] = run.pimp.evaluator
if self.runscontainer.analyzing_options['Parameter Importance'].getboolean('whisker_quantiles_plot'):
if len(self.runscontainer.get_budgets()) <= 1 and len(self.runscontainer.get_folders()) <= 1:
self.logger.info("The Whisker-Quantiles Plot for Parameter Importance makes only sense with multiple"
"budgets and/or folders, but not with only one budget and one folder.")
self.runscontainer.analyzing_options.set('Parameter Importance', 'whisker_quantiles_plot', 'False')
self.importance_per_budget = None
return
hyperparameters = self.runscontainer.scenario.cs.get_hyperparameter_names()
# Generate data - for each parallel folder and each budget, perform an importance-analysis
importance_per_budget = OrderedDict() # dict[budget][folder] -> (dict[param_name]->float)
for budget in self.runscontainer.get_budgets():
importance_per_budget[budget] = {hp: {} for hp in hyperparameters}
for folder in self.runscontainer.get_folders():
cr = self.runscontainer.get_run(folder, budget)
try:
importance = cr.pimp.evaluate_scenario([modus],
cr.output_dir,
plot_pyplot=False,
plot_bokeh=False)[0][modus]['imp']
except RuntimeError as e:
importance = {}
err = "Encountered error '{}' for '{}' in '{}', (for fANOVA this can e.g. happen with too " \
"few data-points).".format(e, cr.get_identifier(), modus)
self.logger.debug(err, exc_info=1)
self.logger.error(err)
self.logger.debug("Importance for folder %s: %s", folder, importance)
for hp in hyperparameters:
importance_per_budget[budget][hp][folder] = importance.pop(hp, np.nan)
self.importance_per_budget = importance_per_budget
[docs] def plot_whiskers(self):
if self.importance_per_budget is not None:
return whisker_quantiles(self.importance_per_budget)
[docs] def get_html(self, d=None, tooltip=None):
if self.runscontainer.analyzing_options['Parameter Importance'].getboolean('whisker_quantiles_plot'):
self.result['Whisker Plot'] = {'bokeh': components(self.plot_whiskers()),
'tooltip': "Each dot is a parallel run (or folder) of the input data "
"and the whiskers are quartiles."}
return super().get_html(d, tooltip)