Source code for deepcave.evaluators.mo_fanova

# Copyright 2021-2024 The DeepCAVE Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#  noqa: D400
"""
# fANOVA

This module provides a tool for assessing the importance of an algorithms Hyperparameters.

Utilities provide calculation of the data wrt the budget and train the forest on the encoded data.

## Classes
    - fANOVA: Calculate and provide midpoints and sizes.
"""

from typing import List, Optional, Union

import pandas as pd

from deepcave.evaluators.epm.fanova_forest import FanovaForest
from deepcave.evaluators.fanova import fANOVA
from deepcave.runs import AbstractRun
from deepcave.runs.objective import Objective
from deepcave.utils.multi_objective_importance import get_weightings



[docs]
class MOfANOVA(fANOVA):
    """
    Multi-Objective fANOVA.

    Calculate and provide midpoints and sizes from the forest's split values in order to get
    the marginals.
    Override: to train the random forest with an arbitrary weighting of the objectives
    (multi-objective case).
    """

    def __init__(self, run: AbstractRun):
        if run.configspace is None:
            raise RuntimeError("The run needs to be initialized.")

        super().__init__(run)
        self.importances_ = None


[docs]
    def calculate(
        self,
        objectives: Optional[Union[Objective, List[Objective]]] = None,
        budget: Optional[Union[int, float]] = None,
        n_trees: int = 100,
        seed: int = 0,
    ) -> None:
        """
        Get the data with respect to budget and train the forest on the encoded data.

        Calculates weighted fanova for multiple objectives.

        Note
        ----
        Right now, only `n_trees` is used. It can be further specified if needed.

        Parameters
        ----------
        objectives : Optional[Union[Objective, List[Objective]]], optional
            Considered objectives. By default None. If None, all objectives are considered.
        budget : Optional[Union[int, float]], optional
            Considered budget. By default None. If None, the highest budget is chosen.
        n_trees : int, optional
            How many trees should be used. By default 100.
        seed : int
            Random seed. By default 0.
        """
        if objectives is None:
            objectives = self.run.get_objectives()

        if budget is None:
            budget = self.run.get_highest_budget()

        self.n_trees = n_trees

        # Get data
        df = self.run.get_encoded_data(
            objectives, budget, specific=True, include_combined_cost=True
        )

        # normalize objectives
        assert isinstance(objectives, list)
        objectives_normed = list()
        for obj in objectives:
            normed = obj.name + "_normed"
            df[normed] = (df[obj.name] - df[obj.name].min()) / (
                df[obj.name].max() - df[obj.name].min()
            )
            if obj.optimize == "upper":
                df[normed] = 1 - df[normed]
            objectives_normed.append(normed)
        df = df.dropna(subset=objectives_normed)
        X = df[self.hp_names].to_numpy()
        weightings = get_weightings(objectives_normed, df)
        df_all = pd.DataFrame([])

        # calculate importance for each weighting generated from the pareto efficient points
        for w in weightings:
            Y = sum(df[obj] * weighting for obj, weighting in zip(objectives_normed, w)).to_numpy()

            self._model = FanovaForest(self.cs, n_trees=n_trees, seed=seed)
            self._model.train(X, Y)
            df_res = (
                pd.DataFrame(super(MOfANOVA, self).get_importances(hp_names=None))
                .loc[0:1]
                .T.reset_index()
            )
            df_res["weight"] = w[0]
            df_all = pd.concat([df_all, df_res])
        self.importances_ = df_all.rename(
            columns={0: "importance", 1: "variance", "index": "hp_name"}
        ).reset_index(drop=True)



[docs]
    def get_importances_(self, hp_names: Optional[List[str]] = None) -> str:
        """
        Return the importance scores from the passed Hyperparameter names.

        Parameters
        ----------
        hp_names : Optional[List[str]]
            Selected Hyperparameter names to get the importance scores from. If None, all
            Hyperparameters of the configuration space are used.
        sort : bool, optional
            Whether the Hyperparameters should be sorted by importance. By default True.

        Returns
        -------
        Dict
            Dictionary with Hyperparameter names and the corresponding importance scores and
            variances.

        Raises
        ------
        RuntimeError
            If the important scores are not calculated.
        """
        if self.importances_ is None:
            raise RuntimeError("Importance scores must be calculated first.")

        if hp_names:
            return self.importances_.loc[self.importances_["hp_name"].isin(hp_names)].to_json()
        else:
            return self.importances_.to_json()
DeepCAVE Documentation

Source code for deepcave.evaluators.mo_fanova