Source code for deepcave.plugins.hyperparameter.ablation_paths

# Copyright 2021-2024 The DeepCAVE Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#  noqa: D400
"""
# Ablation Paths

This module provides a plugin for the visualization of the ablation paths.

Provided utilities include getting input and output layout (filtered or non-filtered),
processing the data and loading the outputs.

## Classes
    - Ablation_Paths: This class provides a plugin for the visualization of the ablation paths.
"""

from typing import Any, Callable, Dict, List, Union

import math
from io import StringIO

import dash_bootstrap_components as dbc
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from dash import dcc, html
from dash.exceptions import PreventUpdate

from deepcave import config
from deepcave.evaluators.ablation import Ablation
from deepcave.evaluators.mo_ablation import MOAblation
from deepcave.plugins.static import StaticPlugin
from deepcave.runs import AbstractRun
from deepcave.utils.cast import optional_int
from deepcave.utils.layout import get_checklist_options, get_select_options, help_button
from deepcave.utils.styled_plotty import get_color, save_image



[docs]
class AblationPaths(StaticPlugin):
    """
    Provide a plugin for the visualization of the ablation paths.

    Provided utilities include getting input/output layout, data processing
    and loading outputs.
    """

    id = "ablation_paths"
    name = "Ablation Paths"
    icon = "fas fa-sort-amount-up"
    help = "plugins/ablation_paths.html"
    activate_run_selection = True


[docs]
    @staticmethod
    def get_input_layout(register: Callable) -> List[Any]:
        """
        Get the layout for the input block.

        Parameters
        ----------
        register : Callable
            Method to register (user) variables.
            The register_input function is located in the Plugin superclass.

        Returns
        -------
        List[Any]
            Layout for the input block.
        """
        return [
            dbc.Row(
                [
                    dbc.Col(
                        [
                            dbc.Label("Objective 1"),
                            dbc.Select(
                                id=register("objective_id1", ["value", "options"], type=int),
                                placeholder="Select objective ...",
                            ),
                        ],
                        md=6,
                    ),
                    dbc.Col(
                        [
                            dbc.Label("Objective 2"),
                            dbc.Select(
                                id=register("objective_id2", ["value", "options"], type=int),
                                placeholder="Select objective ...",
                            ),
                        ],
                        md=6,
                    ),
                    dbc.Col(
                        [
                            dbc.Label("Trees"),
                            help_button(
                                "The number of trees of the internal random forest to estimate the "
                                "ablation path importance. "
                                "The more trees are used the more accurate the results. "
                                "However, also it takes longer to compute."
                            ),
                            dbc.Input(id=register("n_trees", type=optional_int), type="number"),
                        ],
                        md=6,
                    ),
                ],
            ),
        ]



[docs]
    @staticmethod
    def get_filter_layout(register: Callable) -> List[dbc.Row]:
        """
        Get the layout for the filter block.

        Parameters
        ----------
        register : Callable
            Method to register (user) variables.
            The register_input function is located in the Plugin superclass.

        Returns
        -------
        List[dbc.Row]
            Layout for the filter block.
        """
        return [
            dbc.Row(
                [
                    dbc.Col(
                        [
                            html.Div(
                                [
                                    dbc.Label("Limit Hyperparameters"),
                                    help_button(
                                        "Shows only the n most important (i.e. first selected) "
                                        "hyperparameters."
                                    ),
                                    dbc.Input(id=register("n_hps", "value"), type="number"),
                                ],
                                className="mb-3",
                            ),
                        ],
                        md=6,
                    ),
                    dbc.Col(
                        [
                            html.Div(
                                [
                                    dbc.Label("Show confidence"),
                                    help_button("Displays the confidence bands."),
                                    dbc.Select(
                                        id=register("show_confidence", ["value", "options"])
                                    ),
                                ]
                            ),
                        ],
                        md=6,
                    ),
                ],
            ),
            dbc.Row(
                [
                    dbc.Col(
                        [
                            html.Div(
                                [
                                    dbc.Label("Budgets"),
                                    help_button(
                                        "Budget refers to the multi-fidelity budget. "
                                        "The hyperparameters are sorted by the highest budget."
                                    ),
                                    dbc.RadioItems(
                                        id=register("budget_id", ["value", "options"]), inline=True
                                    ),
                                ]
                            ),
                        ],
                        md=6,
                    ),
                ],
            ),
        ]



[docs]
    def load_inputs(self) -> Dict[str, Dict[str, Any]]:
        """
        Load the content for the defined inputs in 'get_input_layout' and 'get_filter_layout'.

        This method is necessary to pre-load contents for the inputs.
        If the plugin is called for the first time, or there are no results in the cache,
        the plugin gets its content from this method.

        Returns
        -------
        Dict[str, Dict[str, Any]]
            Content to be filled.
        """
        return {
            "n_trees": {"value": 100},
            "n_hps": {"value": 0},
            "budget_id": {"options": get_checklist_options(), "value": None},
            "show_confidence": {"options": get_select_options(binary=True), "value": "false"},
        }



[docs]
    def load_dependency_inputs(self, run, _, inputs) -> Dict[str, Any]:  # type: ignore # noqa: E501
        """
        Works like 'load_inputs' but called after inputs have changed.

        Note
        ----
        Only the changes have to be returned.
        The returned dictionary will be merged with the inputs.

        Parameters
        ----------
        run:
            The selected run.
        inputs : Dict[str, Any]
            Current content of the inputs.

        Returns
        -------
        Dict[str, Any]
            A dictionary with the changes.
        """
        # Prepare objectives
        objective_names = run.get_objective_names()
        objective_ids = run.get_objective_ids()
        objective_value1 = inputs["objective_id1"]["value"]
        objective_value2 = inputs["objective_id2"]["value"]  # in the multi-objective case

        # Pre-set values
        if objective_value1 is None:
            objective_value1 = objective_ids[0]

        objective_options = get_select_options(objective_names, objective_ids)
        objective_options2 = [
            dict for dict in objective_options if dict["value"] != objective_value1
        ]  # make sure the same objective cannot be chosen twice
        objective_options2 += [
            {"label": "Select objective ...", "value": -1}
        ]  # add the option to deselect the second objective

        # Prepare budgets
        budgets = run.get_budgets(human=True)
        budget_ids = run.get_budget_ids()
        budget_options = get_checklist_options(budgets, budget_ids)
        budget_value = inputs["budget_id"]["value"]

        hp_names = list(run.configspace.keys())
        n_hps = inputs["n_hps"]["value"]

        if n_hps == 0:
            n_hps = len(hp_names)

        # Pre-selection of the hyperparameters
        if run is not None:
            if budget_value is None:
                budget_value = budget_ids[-1]

        return {
            "objective_id1": {
                "options": objective_options,
                "value": objective_value1,
            },
            "objective_id2": {
                "options": objective_options2,
                "value": objective_value2,
            },
            "budget_id": {
                "options": budget_options,
                "value": budget_value,
            },
            "n_hps": {"value": n_hps},
            "n_trees": {"value": inputs["n_trees"]["value"]},
        }



[docs]
    @staticmethod
    def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
        """
        Return raw data based on the run and input data.

        Warning
        -------
        The returned data must be JSON serializable.

        Note
        ----
        The passed inputs are cleaned and therefore differ
        compared to 'load_inputs' or 'load_dependency_inputs'.
        Please see '_clean_inputs' for more information.

        Parameters
        ----------
        run : AbstractRun
            The run to process.
        inputs :  Dict[str, Any]
            The input data.

        Returns
        -------
        Dict[str, Any]
            A serialized dictionary.

        Raises
        ------
        RuntimeError
            If the number of trees is not specified.
        """
        objective: Any = None
        objective = run.get_objective(inputs["objective_id1"])
        if inputs["objective_id2"] not in (None, -1):
            objective = [objective, run.get_objective(inputs["objective_id2"])]
        n_trees = inputs["n_trees"]

        if n_trees is None:
            raise RuntimeError("Please specify the number of trees.")

        budgets = run.get_budgets(include_combined=True)

        evaluator: Union[Ablation, MOAblation, None] = None
        if isinstance(objective, list):
            evaluator = MOAblation(run)
        else:
            evaluator = Ablation(run)

        # Collect data
        data: Dict[Any, Any] = {}
        for budget_id, budget in enumerate(budgets):
            assert isinstance(budget, (int, float))
            evaluator.calculate(objective, budget)
            if isinstance(objective, list):
                assert isinstance(evaluator, MOAblation)
                data[budget_id] = evaluator.get_importances()
            else:
                performances = evaluator.get_ablation_performances()
                improvements = evaluator.get_ablation_improvements()
                data[budget_id] = [performances, improvements]
        return data  # type: ignore



[docs]
    @staticmethod
    def get_output_layout(register: Callable) -> List[dcc.Graph]:
        """
        Get the layout for the output block.

        Parameters
        ----------
        register : Callable
            Method to register outputs.
            The register_input function is located in the Plugin superclass.

        Returns
        -------
        List[dcc.Graph]
            Layout for the output block.
        """
        return [
            dcc.Graph(
                register("perf_graph", "figure"),
                style={"height": config.FIGURE_HEIGHT},
                config={"toImageButtonOptions": {"scale": config.FIGURE_DOWNLOAD_SCALE}},
            ),
            dcc.Graph(
                register("impr_graph", "figure"),
                style={"height": config.FIGURE_HEIGHT},
                config={"toImageButtonOptions": {"scale": config.FIGURE_DOWNLOAD_SCALE}},
            ),
        ]



[docs]
    @staticmethod
    def load_outputs(run, inputs, outputs) -> List[go.Figure]:  # type: ignore
        """
        Read in raw data and prepare for layout.

        Note
        ----
        The passed inputs are cleaned and therefore differ
        compared to 'load_inputs' or 'load_dependency_inputs'.
        Please see '_clean_inputs' for more information.

        Parameters
        ----------
        run
            The selected run.
        inputs
            Input and filter values from the user.
        outputs
            Raw output from the run.

        Returns
        -------
        return [figure1, figure2]
            The figures of the ablation paths.
        """
        if inputs["objective_id2"] not in (None, -1):
            # MO case: other plot
            return AblationPaths.load_outputs_mo(run, inputs, outputs)

        # First selected, should always be shown first
        selected_budget_id = inputs["budget_id"]
        objective = run.get_objective(inputs["objective_id1"])
        n_hps = inputs["n_hps"]
        show_confidence = inputs["show_confidence"]

        if n_hps == "" or n_hps is None:
            raise PreventUpdate
        else:
            n_hps = int(n_hps)

        # Collect data
        data1, data2 = {}, {}
        for budget_id, results in outputs.items():
            # Important to cast budget_id here because of json serialization
            budget_id = int(budget_id)
            if budget_id != selected_budget_id:
                continue

            x = []
            y1, y2 = [], []
            error_y1, error_y2 = [], []
            for hp_name, result in results[0].items():
                x += [hp_name]
                y1 += [result[0]]
                error_y1 += [result[1]]
            for _, result in results[1].items():
                y2 += [result[0]]
                error_y2 += [result[1]]

            data1[budget_id] = (np.array(x), np.array(y1), np.array(error_y1))
            data2[budget_id] = (np.array(x), np.array(y2), np.array(error_y2))

        bar_data1, bar_data2 = [], []

        for budget_id, values in data1.items():
            budget = run.get_budget(budget_id, human=True)

            x = list(values[0][:n_hps])

            bar_data1 += [
                go.Scatter(
                    name=budget,
                    x=x,
                    y=values[1][:n_hps],
                    error_y=dict(array=values[2][:n_hps]) if show_confidence else None,
                    line=dict(color=get_color(0)),
                )
            ]

        for budget_id, values in data2.items():
            budget = run.get_budget(budget_id, human=True)

            x = list(values[0][:n_hps])

            bar_data2 += [
                go.Bar(
                    name=budget,
                    x=x,
                    y=values[1][:n_hps],
                    error_y_array=values[2][:n_hps] if show_confidence else None,
                    marker_color=get_color(0),
                )
            ]

        figure1 = go.Figure(data=bar_data1)
        figure1.update_layout(
            barmode="group",
            title={
                "text": "Ablation Path when Iteratively Setting the Hyperparameters to Their "
                "Incumbent Value",
                "font": {"size": config.FIGURE_FONT_SIZE + 2},
            },
            yaxis_title=objective.name,
            legend={"title": "Budget"},
            margin=dict(t=50, b=0, l=0, r=0),
            xaxis=dict(tickangle=-45),
            font=dict(size=config.FIGURE_FONT_SIZE),
        )
        save_image(figure1, "ablation_path_performance.pdf")

        figure2 = go.Figure(data=bar_data2)
        figure2.update_layout(
            barmode="group",
            title={
                "text": "Change in Objective with Respect to the Previous Ablation Step",
                "font": {"size": config.FIGURE_FONT_SIZE + 2},
            },
            yaxis_title=f"Change in {objective.name}",
            legend={"title": "Budget"},
            margin=dict(t=50, b=0, l=0, r=0),
            xaxis=dict(tickangle=-45),
            font=dict(size=config.FIGURE_FONT_SIZE),
        )
        save_image(figure2, "ablation_path_improvement.pdf")

        return [figure1, figure2]



[docs]
    @staticmethod
    def load_outputs_mo(run, inputs, outputs) -> List[go.Figure]:  # type: ignore
        """
        Multi-objective case for read in raw data and prepare for layout.

        Note
        ----
        The passed inputs are cleaned and therefore differ
        compared to 'load_inputs' or 'load_dependency_inputs'.
        Please see '_clean_inputs' for more information.

        Parameters
        ----------
        run
            The selected run.
        inputs
            Input and filter values from the user.
        outputs
            Raw output from the run.

        Returns
        -------
        return [figure1, None]
            The figure of the ablation paths.
        """
        # First selected, should always be shown first
        objective1 = run.get_objective(inputs["objective_id1"]).name
        selected_budget_id = inputs["budget_id"]
        n_hps = inputs["n_hps"]

        if n_hps == "" or n_hps is None:
            raise PreventUpdate
        else:
            n_hps = int(n_hps)

        # Collect data
        data = {}
        for budget_id, importances_json in outputs.items():
            # Important to cast budget_id here because of json serialization
            budget_id = int(budget_id)
            if budget_id != selected_budget_id:
                continue
            df_importances = pd.read_json(StringIO(importances_json))
            data[budget_id] = df_importances

        # Sort by last fidelity now
        idx = (
            data[selected_budget_id]
            .groupby("hp_name")["importance"]
            .max()
            .sort_values(ascending=False)
            .index
        )
        idx = list(idx[:n_hps]) + ["Default"]

        df = data[selected_budget_id][
            data[selected_budget_id]["hp_name"].isin(idx)
        ].copy()  # only keep selected hps

        df.loc[df["hp_name"] == "Default", "accuracy"] = 1 - df["new_performance"]
        df.loc[df["hp_name"] != "Default", "accuracy"] = df["importance"]

        grouped_df = df.groupby(["weight", "hp_name"])["accuracy"].sum().unstack(fill_value=0)
        color_palette = px.colors.qualitative.Plotly  # Choose a color palette
        colors = {
            hp: color_palette[i % len(color_palette)]
            for i, hp in enumerate(list(run.configspace.keys()) + ["Default"])
        }

        # Create traces for each hp_name
        traces = []
        for column in grouped_df.columns:
            traces.append(
                go.Scatter(
                    x=grouped_df.index,
                    y=grouped_df[column],
                    mode="lines",
                    stackgroup="one",  # This makes the traces stacked
                    name=column,
                    hoverinfo="skip",
                    showlegend=True,
                    opacity=0.2,
                    fillcolor=colors[column],
                    line=dict(color=colors[column]),
                )
            )

        fig = go.Figure(data=traces)

        # Update the layout
        fig.update_layout(
            xaxis_title="Weight for " + objective1,
            yaxis_title="Sum of the weighted <br> normalized performance",
            xaxis=dict(range=[0, 1], tickangle=-45),
            yaxis=dict(
                range=[
                    math.floor(
                        10 * (1 - (df[df["hp_name"] == "Default"]["new_performance"].max() + 0.01))
                    )
                    / 10,
                    1,
                ]
            ),
            margin=config.FIGURE_MARGIN,
            font=dict(size=config.FIGURE_FONT_SIZE),
        )

        save_image(fig, "ablation_path_performance.pdf")

        # create fake figure to hide the second figure which does not exists for MO
        white_fig = go.Figure()
        white_fig.update_layout(
            paper_bgcolor="white",
            plot_bgcolor="white",
            xaxis=dict(showgrid=False, zeroline=False, visible=False),
            yaxis=dict(showgrid=False, zeroline=False, visible=False),
            font=dict(color="white"),
            showlegend=False,
        )

        return [fig, white_fig]
DeepCAVE Documentation

Source code for deepcave.plugins.hyperparameter.ablation_paths