Source code for cave.analyzer.parameter_importance.pimp_comparison_table

import os
from collections import OrderedDict

import numpy as np
from bokeh.embed import components
from bokeh.io import output_notebook
from bokeh.plotting import show
from pandas import DataFrame

from cave.analyzer.base_analyzer import BaseAnalyzer
from cave.utils.bokeh_routines import array_to_bokeh_table
from cave.utils.hpbandster_helpers import format_budgets


[docs]class PimpComparisonTable(BaseAnalyzer): """ Parameters are initially sorted by pimp_sort_table_by. Only parameters with an importance greater than 5 in any of the methods are shown. Note, that the values of the used methods are not directly comparable. For more information on the metrics, see respective tooltips.""" def __init__(self, runscontainer, sort_table_by, threshold=0.05): """Create a html-table over all evaluated parameter-importance-methods. Parameters are sorted after their average importance.""" super().__init__(runscontainer) self.sort_table_by = sort_table_by self.threshold = threshold
[docs] def get_name(self): return "Importance Table"
[docs] def run(self): formatted_budgets = list(format_budgets(self.runscontainer.get_budgets(), allow_whitespace=True).values()) for budget, run in zip(formatted_budgets, self.runscontainer.get_aggregated(keep_budgets=True, keep_folders=False)): self.result[budget] = self.plot( pimp=run.pimp, evaluators=list(run.share_information['evaluators'].values()), cs=self.runscontainer.scenario.cs, out_fn=os.path.join(run.output_dir, 'pimp.tex'), )
[docs] def plot(self, pimp, evaluators, cs, out_fn, ): pimp.table_for_comparison(evaluators, out_fn, style='latex') self.logger.info('Creating pimp latex table at %s' % out_fn) parameters = [p.name for p in cs.get_hyperparameters()] index, values, columns = [], [], [] columns = [e.name for e in evaluators] columns_lower = [c.lower() for c in columns] # SORT self.logger.debug("Sort pimp-table by %s" % self.sort_table_by) if self.sort_table_by == "average": # Sort parameters after average importance p_avg = {} for p in parameters: imps = [e.evaluated_parameter_importance[p] for e in evaluators if p in e.evaluated_parameter_importance] p_avg[p] = np.mean(imps) if imps else 0 p_order = sorted(parameters, key=lambda p: p_avg[p], reverse=True) elif self.sort_table_by in columns_lower: def __get_key(p): imp = evaluators[columns_lower.index(self.sort_table_by)].evaluated_parameter_importance return imp[p] if p in imp else 0 p_order = sorted(parameters, key=__get_key, reverse=True) else: raise ValueError("Trying to sort importance table after {}, which " "was not evaluated.".format(self.sort_table_by)) # PREPROCESS for p in p_order: values_for_p = [p] add_parameter = False # Only add parameters where at least one evaluator shows importance > threshold for e in evaluators: if p in e.evaluated_parameter_importance: # Check for threshold value_to_add = e.evaluated_parameter_importance[p] if value_to_add > self.threshold: add_parameter = True # All but forward-selection use values between 0 and 1 if e.name != 'Forward-Selection': value_to_add = value_to_add * 100 # Create string and add uncertainty, if available value_to_add = format(value_to_add, '05.2f') # (leading zeros for sorting!) if (hasattr(e, 'evaluated_parameter_importance_uncertainty') and p in e.evaluated_parameter_importance_uncertainty): value_to_add += ' +/- ' + format(e.evaluated_parameter_importance_uncertainty[p] * 100, '.2f') values_for_p.append(value_to_add) else: values_for_p.append('-') if add_parameter: values.append(values_for_p) # CREATE TABLE comp_table = DataFrame(values, columns=['Parameters'] + columns) sortable = {c: True for c in columns} width = {**{'Parameters': 150}, **{c: 100 for c in columns}} bokeh_table = array_to_bokeh_table(comp_table, sortable=sortable, width=width, logger=self.logger) return {'bokeh': bokeh_table}
[docs] def get_html(self, d=None, tooltip=None): self.run() if len(self.result) == 1 and None in self.result: self.logger.debug("Detected None-key, abstracting away...") self.result = self.result[None] if d is not None: d[self.name] = OrderedDict() script, div = "", "" for b, t in self.result.items(): s_, d_ = components(t) if b == 'bokeh' else components(t['bokeh']) script += s_ div += d_ if d is not None: if b == 'bokeh': d[self.name] = { "bokeh": (s_, d_), "tooltip": self.__doc__, } else: d[self.name][b] = { "bokeh": (s_, d_), "tooltip": self.__doc__, } return script, div
[docs] def get_jupyter(self): self.run() output_notebook() for b, t in self.result.items(): show(t['bokeh'])