Source code for cave.analyzer.performance.plot_scatter

import os
from typing import List, Union

import numpy as np
from ConfigSpace.configuration_space import Configuration
from smac.runhistory.runhistory import RunHistory

from cave.analyzer.base_analyzer import BaseAnalyzer
from cave.plot.scatter import plot_scatter_plot
from cave.utils.helpers import get_cost_dict_for_config, NotApplicable
from cave.utils.hpbandster_helpers import format_budgets


[docs]class PlotScatter(BaseAnalyzer):
    """
    Scatter plots show the costs of the default and optimized parameter configuration on each instance. Since this
    looses detailed information about the individual cost on each instance by looking at aggregated cost values in
    tables, scatter plots provide a more detailed picture. They provide insights whether overall performance
    improvements can be explained only by some outliers or whether they are due to improvements on the entire
    instance set. On the left side the training-data is scattered, on the right side the test-data is scattered.
    """
    def __init__(self,
                 runscontainer,
                 ):
        """
        Creates a scatterplot of the two configurations on the given set of instances.
        Saves plot to file.
        """
        super().__init__(runscontainer)

        formatted_budgets = format_budgets(self.runscontainer.get_budgets())
        for budget, run in zip(self.runscontainer.get_budgets(),
                               self.runscontainer.get_aggregated(keep_budgets=True, keep_folders=False)):
            self.result[formatted_budgets[budget]] = self._plot_scatter(
                    default=run.default,
                    incumbent=run.incumbent,
                    rh=run.epm_runhistory,
                    train=run.scenario.train_insts,
                    test=run.scenario.test_insts,
                    run_obj=run.scenario.run_obj,
                    cutoff=run.scenario.cutoff,
                    output_dir=run.output_dir,
            )

[docs]    def get_name(self):
        return "Scatter Plot"

[docs]    def _plot_scatter(self,
                      default: Configuration,
                      incumbent: Configuration,
                      rh: RunHistory,
                      train: List[str],
                      test: Union[List[str], None],
                      run_obj: str,
                      cutoff,
                      output_dir):
        """
        Parameters
        ----------
        default, incumbent: Configuration
            configurations to be compared
        rh: RunHistory
            runhistory to use for cost-estimations
        train[, test]: list(str)
            instance-names
        run_obj: str
            run-objective (time or quality)
        cutoff: float
            maximum runtime of ta
        output_dir: str
            output directory
        """
        out_fn_base = os.path.join(output_dir, 'scatter_')
        self.logger.info("... plotting scatter")

        metric = run_obj
        timeout = cutoff
        labels = ["default {}".format(run_obj), "incumbent {}".format(run_obj)]

        def_costs = get_cost_dict_for_config(rh, default).items()
        inc_costs = get_cost_dict_for_config(rh, incumbent).items()

        out_fns = []
        if len(train) <= 1 and len(test) <= 1:
            raise NotApplicable("No instances, so no scatter-plot.")
        for insts, name in [(train, 'train'), (test, 'test')]:
            if len(insts) <= 1:
                self.logger.debug("No %s instances, skipping scatter", name)
                continue
            default = np.array([v for k, v in def_costs if k in insts])
            incumbent = np.array([v for k, v in inc_costs if k in insts])
            min_val = min(min(default), min(incumbent))
            out_fn = out_fn_base + name + '.png'
            out_fns.append(plot_scatter_plot((default,), (incumbent,), labels, metric=metric,
                           min_val=min_val, max_val=timeout, out_fn=out_fn))
            self.logger.debug("Plotted scatter to %s", out_fn)
        return {'figure' : out_fns if len(out_fns) > 0 else None}