Source code for cave.analyzer.configurator.configurator_footprint

import os

import numpy as np
from bokeh.embed import components
from bokeh.io import output_notebook
from bokeh.plotting import show

from cave.analyzer.base_analyzer import BaseAnalyzer
from cave.plot.configurator_footprint import ConfiguratorFootprintPlotter


[docs]class ConfiguratorFootprint(BaseAnalyzer):
    """
    Analysis of the iteratively sampled configurations during the optimization procedure.  Multi-dimensional scaling
    (MDS) is used to reduce dimensionality of the search space and plot the distribution of evaluated
    configurations. The larger the dot, the more often the configuration was evaluated on instances from the set.
    Configurations that were incumbents at least once during optimization are marked as red squares.  Configurations
    acquired through local search are marked with a 'x'.  The downward triangle denotes the final incumbent, whereas
    the orange upward triangle denotes the default configuration.  The heatmap and the colorbar correspond to the
    predicted performance in that part of the search space.
    """

    def __init__(self,
                 runscontainer,
                 max_configurations_to_plot=None,
                 time_slider=None,
                 number_quantiles=None,
                 timeslider_log: bool=None,
                 ):
        """Plot the visualization of configurations, highlighting the
        incumbents. Using original rh, so the explored configspace can be
        estimated.

        Parameters
        ----------
        runscontainer: RunsContainer
            contains all important information about the configurator runs
        max_configurations_to_plot: int
            maximum number of data-points to plot
        time_slider: bool
            whether or not to have a time_slider-widget on cfp-plot
            INCREASES FILE-SIZE DRAMATICALLY
        number_quantiles: int
            if use_timeslider is not off, defines the number of quantiles for the
            slider/ number of static pictures
        timeslider_log: bool
            whether to use a logarithmic scale for the timeslider/quantiles

        Returns
        -------
        script: str
            script part of bokeh plot
        div: str
            div part of bokeh plot
        over_time_paths: List[str]
            list with paths to the different quantiled timesteps of the
            configurator run (for static evaluation)
        """
        super().__init__(runscontainer,
                         max_configurations_to_plot=max_configurations_to_plot,
                         time_slider=time_slider,
                         number_quantiles=number_quantiles,
                         timeslider_log=timeslider_log)

        self.logger.info("... visualizing explored configspace (this may take "
                         "a long time, if there is a lot of data - deactive with --no_configurator_footprint)")
        self.output_dir = self.runscontainer.output_dir
        self.scenario = self.runscontainer.scenario
        # Run-specific / budget specific infos
        if len(self.runscontainer.get_budgets()) > 1:
            self.runs = self.runscontainer.get_aggregated(keep_folders=False, keep_budgets=True)
            rh_labels = ["Budget " + str(r.reduced_to_budgets[0]) for r in self.runs]
        else:
            self.runs = self.runscontainer.get_aggregated(keep_folders=True, keep_budgets=False)
            rh_labels = [os.path.basename(r.path_to_folder).replace('_', ' ') for r in self.runs]
        self.logger.debug("Analyzing runs: {}".format([r.get_identifier() for r in self.runs]))

        self.max_confs = self.options.getint('max_configurations_to_plot')
        self.use_timeslider = self.options.getboolean('time_slider')
        self.num_quantiles = self.options.getint('number_quantiles')
        self.timeslider_log = self.options.getboolean('timeslider_log')

        incumbents = {r.trajectory[-1]['incumbent']: r.trajectory[-1]['cost'] for r in self.runs}
        self.final_incumbent = min(incumbents, key=incumbents.get)

        if self.scenario.feature_array is None:
            self.scenario.feature_array = np.array([[]])

        self.cfp = ConfiguratorFootprintPlotter(
                       scenario=self.scenario,
                       rhs=[r.original_runhistory for r in self.runs],
                       incs=[list(incumbents.keys())],
                       final_incumbent=self.final_incumbent,
                       rh_labels=rh_labels,
                       max_plot=self.max_confs,
                       use_timeslider=self.use_timeslider and self.num_quantiles > 1,
                       num_quantiles=self.num_quantiles,
                       timeslider_log=self.timeslider_log,
                       output_dir=self.output_dir)

[docs]    def get_name(self):
        return "Configurator Footprint"

[docs]    def plot(self):
        try:
            res = self.cfp.run()
        except MemoryError as err:
            self.logger.exception(err)
            raise MemoryError("Memory Error occured in configurator footprint. "
                              "You may want to reduce the number of plotted "
                              "configs (using the '--cfp_max_plot'-argument)")

        bokeh_plot, self.cfp_paths = res
        return bokeh_plot

[docs]    def get_jupyter(self):
        bokeh_plot = self.plot()
        output_notebook()
        show(bokeh_plot)

[docs]    def get_html(self, d=None, tooltip=None):
        bokeh_components = components(self.plot())
        if d is not None:
            if self.num_quantiles == 1 or self.use_timeslider:  # No need for "Static" with one plot / time slider activated
                d[self.name] = {
                    "bokeh" : bokeh_components,
                    "tooltip": self.__doc__,
                }
            else:
                d[self.name] = {
                    "tooltip": self.__doc__,
                    "Interactive" : {"bokeh": (bokeh_components)},
                }
                if all([True for p in self.cfp_paths if os.path.exists(p)]):  # If the plots were actually generated
                    d[self.name]["Static"] = {"figure": self.cfp_paths}
                else:
                    d[self.name]["Static"] = {
                            "else": "This plot is missing. Maybe it was not generated? "
                                    "Check if you installed selenium and phantomjs "
                                    "correctly to activate bokeh-exports. "
                                    "(https://automl.github.io/CAVE/stable/faq.html)"}
        return bokeh_components

[docs]    def get_plots(self):
        return self.cfp_paths