Source code for cave.analyzer.parameter_importance.fanova

import operator
import os
from collections import OrderedDict

from pandas import DataFrame

from cave.analyzer.parameter_importance.base_parameter_importance import BaseParameterImportance


[docs]class Fanova(BaseParameterImportance): """ fANOVA (functional analysis of variance) computes the fraction of the variance in the cost space explained by changing a parameter by marginalizing over all other parameters, for each parameter (or for pairs of parameters). Parameters with high importance scores will have a large impact on the performance. To this end, a random forest is trained as an empirical performance model on the available empirical data from the available runhistories. """ def __init__(self, runscontainer, marginal_threshold=0.05): """Wrapper for parameter_importance to save the importance-object/ extract the results. We want to show the top X most important parameter-fanova-plots. Parameters ---------- runscontainer: RunsContainer contains all important information about the configurator runs marginal_threshold: float parameter/s must be at least this important to be mentioned """ super().__init__(runscontainer) self.marginal_threshold = marginal_threshold self.parameter_importance("fanova")
[docs] def get_name(self): return 'fANOVA'
[docs] def postprocess(self, pimp, output_dir): result = OrderedDict() def parse_pairwise(p): """parse pimp's way of having pairwise parameters as key as str and return list of individuals""" res = [tmp.strip('\' ') for tmp in p.strip('[]').split(',')] return res parameter_imp = {k: v * 100 for k, v in pimp.evaluator.evaluated_parameter_importance.items()} param_imp_std = {} if hasattr(pimp.evaluator, 'evaluated_parameter_importance_uncertainty'): param_imp_std = {k: v * 100 for k, v in pimp.evaluator.evaluated_parameter_importance_uncertainty.items()} for k in parameter_imp.keys(): self.logger.debug("fanova-importance for %s: mean (over trees): %f, std: %s", k, parameter_imp[k], str(param_imp_std[k]) if param_imp_std else 'N/A') # Split single and pairwise (pairwise are string: "['p1','p2']") single_imp = {k: v for k, v in parameter_imp.items() if not k.startswith('[') and v > self.marginal_threshold} pairwise_imp = {k: v for k, v in parameter_imp.items() if k.startswith('[') and v > self.marginal_threshold} # Set internal parameter importance for further analysis (such as parallel coordinates) self.fanova_single_importance = single_imp self.fanova_pairwise_importance = single_imp # Dicts to lists of tuples, sorted descending after importance single_imp = OrderedDict(sorted(single_imp.items(), key=operator.itemgetter(1), reverse=True)) pairwise_imp = OrderedDict(sorted(pairwise_imp.items(), key=operator.itemgetter(1), reverse=True)) # Create table table = [] if len(single_imp) > 0: table.extend([(20*"-"+" Single importance: "+20*"-", 20*"-")]) for k, v in single_imp.items(): value = str(round(v, 4)) if param_imp_std: value += " +/- " + str(round(param_imp_std[k], 4)) table.append((k, value)) if len(pairwise_imp) > 0: table.extend([(20*"-"+" Pairwise importance: "+20*"-", 20*"-")]) for k, v in pairwise_imp.items(): name = ' & '.join(parse_pairwise(k)) value = str(round(v, 4)) if param_imp_std: value += " +/- " + str(round(param_imp_std[k], 4)) table.append((name, value)) keys, fanova_table = [k[0] for k in table], [k[1:] for k in table] df = DataFrame(data=fanova_table, index=keys) result['Importance'] = {'table': df.to_html(escape=False, header=False, index=True, justify='left')} # Get plot-paths result['Marginals'] = {p: {'figure': os.path.join(output_dir, "fanova", p + '.png')} for p in single_imp.keys()} # Right now no way to access paths of the plots -> file issue pairwise_plots = {" & ".join(parse_pairwise(p)): os.path.join(output_dir, 'fanova', '_'.join(parse_pairwise(p)) + '.png') for p in pairwise_imp.keys()} result['Pairwise Marginals'] = {p: {'figure': path} for p, path in pairwise_plots.items() if os.path.exists(path)} return result
[docs] def get_jupyter(self): from IPython.core.display import HTML, Image, display for b, result in self.result.items(): error = self.result[b]['else'] if 'else' in self.result[b] else None if error: display(HTML(error)) else: # Show table display(HTML(self.result[b]["Importance"]["table"])) # Show plots display(*list([Image(filename=d["figure"]) for d in self.result[b]['Marginals'].values()])) display(*list([Image(filename=d["figure"]) for d in self.result[b]['Pairwise Marginals'].values()]))
# While working for a prettier solution, this might be an option: # display(HTML(figure_to_html([d["figure"] for d in self.result[b]['Marginals'].values()] + # [d["figure"] for d in self.result[b]['Pairwise Marginals'].values()], # max_in_a_row=3, true_break_between_rows=True)))