Source code for deepcave.evaluators.mo_ablation

# Copyright 2021-2024 The DeepCAVE Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# noqa: D400
"""
# Ablation Paths

This module evaluates the ablation paths.

Ablation Paths is a method to analyze the importance of hyperparameters in a configuration space.
Starting from a default configuration, the default configuration is iteratively changed to the
incumbent configuration by changing one hyperparameter at a time, choosing the
hyperparameter that leads to the largest improvement in the objective function at each step.

## Classes:
    - Ablation: Provide an evaluator of the ablation paths.
"""

from typing import Any, List, Optional, Tuple, Union

import copy

import numpy as np
import pandas as pd

from deepcave.evaluators.ablation import Ablation
from deepcave.evaluators.epm.random_forest_surrogate import RandomForestSurrogate
from deepcave.runs import AbstractRun
from deepcave.runs.objective import Objective
from deepcave.utils.multi_objective_importance import get_weightings


[docs] class MOAblation(Ablation): """ Provide an evaluator of the ablation paths. Override: Multi-Objective case Properties ---------- run : AbstractRun The run to analyze. cs : ConfigurationSpace The configuration space of the run. hp_names : List[str] A list of the hyperparameter names. performances : Optional[Dict[Any, Any]] A dictionary containing the performances for each HP. improvements : Optional[Dict[Any, Any]] A dictionary containing the improvements over the respective previous step for each HP. objectives : Optional[Union[Objective, List[Objective]]] The objective(s) of the run. default_config : Configurations The default configuration of this configuration space. Gets changed step by step towards the incumbent configuration. """ def __init__(self, run: AbstractRun): super().__init__(run) self.models: List = [] self.df_importances = pd.DataFrame([])
[docs] def get_importances(self) -> str: """ Return the importance scores. Returns ------- Dict Dictionary with Hyperparameter names and the corresponding importance scores and variances. Raises ------ RuntimeError If the important scores are not calculated. """ if self.df_importances is None: raise RuntimeError("Importance scores must be calculated first.") return self.df_importances.to_json()
[docs] def predict(self, cfg: list[Any], weighting: np.ndarray) -> Tuple[float, float]: """ Predict the performance of the input configuration. The model results are weighted by the input weightings and summed. Parameters ---------- cfg : Dict Configuration. weighting : List[float] Weightings. Returns ------- mean : float The mean of the weighted sum of predictions. var : float The variance of the weighted sum of predictions. """ mean, var = 0, 0 for model, w in zip(self.models, weighting): pred, var_ = model.predict(np.array([cfg])) mean += w * pred[0] var += w * var_[0] return mean, var
[docs] def calculate( self, objectives: Optional[Union[Objective, List[Objective]]], # noqa budget: Optional[Union[int, float]] = None, # noqa n_trees: int = 50, # noqa seed: int = 0, # noqa ) -> None: """ Calculate the MO ablation path performances and improvements. Parameters ---------- objectives : Optional[Union[Objective, List[Objective]]] The objective(s) to be considered. budget : Optional[Union[int, float]] The budget to be considered. If None, all budgets of the run are considered. Default is None. n_trees : int The number of trees for the surrogate model. Default is 50. seed : int The seed for the surrogate model. Default is 0. """ assert isinstance(objectives, list) for objective in objectives: assert isinstance(objective, Objective) df = self.run.get_encoded_data(objectives, budget, specific=True, include_config_ids=True) # Obtain all configurations with theirs costs df = df.dropna(subset=[obj.name for obj in objectives]) X = df[list(self.run.configspace.keys())].to_numpy() # normalize objectives objectives_normed = list() for obj in objectives: normed = obj.name + "_normed" df[normed] = (df[obj.name] - df[obj.name].min()) / ( df[obj.name].max() - df[obj.name].min() ) if obj.optimize == "upper": df[normed] = 1 - df[normed] objectives_normed.append(normed) # train one model per objective Y = df[normed].to_numpy() model = RandomForestSurrogate(self.cs, seed=seed, n_trees=n_trees) model._fit(X, Y) self.models.append(model) weightings = get_weightings(objectives_normed, df) # calculate importance for each weighting generated from the pareto efficient points for w in weightings: df_res = self.calculate_ablation_path(df, objectives_normed, w, budget) if df_res is None: columns = ["hp_name", "importance", "variance", "new_performance", "weight"] self.df_importances = pd.DataFrame( 0, index=np.arange(len(self.hp_names) + 1), columns=columns ) self.df_importances["hp_name"] = ["Default"] + self.hp_names return df_res["weight"] = w[0] self.df_importances = pd.concat([self.df_importances, df_res]) self.df_importances = self.df_importances.reset_index(drop=True)
[docs] def calculate_ablation_path( self, df: pd.DataFrame, objectives_normed: List[str], weighting: np.ndarray, budget: Optional[Union[int, float]], ) -> pd.DataFrame: """ Calculate the ablation path performances. Parameters ---------- df : pd.DataFrame Dataframe with encoded data. objectives_normed : List[str] The normed objective names to be considered. weighting : np.ndarray The weighting of the objective values. budget : Optional[Union[int, float]] The budget to be considered. If None, all budgets of the run are considered. Default is None. Returns ------- df : pd.DataFrame Dataframe with results of the ablation calculation. """ # Get the incumbent configuration incumbent_cfg_id = np.argmin( sum(df[obj] * w for obj, w in zip(objectives_normed, weighting)) ) incumbent_config = self.run.get_config(df.iloc[incumbent_cfg_id]["config_id"]) # Get the default configuration self.default_config = self.cs.get_default_configuration() default_encode = self.run.encode_config(self.default_config, specific=True) # Obtain the predicted cost of the default and incumbent configuration def_cost, def_std = self.predict(default_encode, weighting) inc_cost, _ = self.predict( self.run.encode_config(incumbent_config, specific=True), weighting ) if inc_cost > def_cost: self.logger.warning( "The predicted incumbent objective is worse than the predicted default " f"objective for budget: {budget}. Aborting ablation path calculation." ) return None else: # Copy the hps names as to not remove objects from the original list hp_it = self.hp_names.copy() df_abl = pd.DataFrame([]) df_abl = pd.concat( [ df_abl, pd.DataFrame( { "hp_name": "Default", "importance": 0, "variance": def_std, "new_performance": def_cost, }, index=[0], ), ] ) for i in range(len(hp_it)): # Get the results of the current ablation iteration continue_ablation, max_hp, max_hp_cost, max_hp_std = self.ablation( budget, incumbent_config, def_cost, hp_it, weighting ) if not continue_ablation: break diff = def_cost - max_hp_cost def_cost = max_hp_cost df_abl = pd.concat( [ df_abl, pd.DataFrame( { "hp_name": max_hp, "importance": diff, "variance": max_hp_std, "new_performance": max_hp_cost, }, index=[i + 1], ), ] ) # Remove the current best hp for keeping the order right hp_it.remove(max_hp) return df_abl.reset_index(drop=True)
[docs] def ablation( self, budget: Optional[Union[int, float]], incumbent_config: Any, def_cost: Any, hp_it: List[str], weighting: np.ndarray[Any, Any], ) -> Tuple[Any, Any, Any, Any]: """ Calculate the ablation importance for each hyperparameter. Parameters ---------- budget: Optional[Union[int, float]] The budget of the run. incumbent_config: Any The incumbent configuration. def_cost: Any The default cost. hp_it: List[str] A list of the HPs that still have to be looked at. weighting : np.ndarray[Any, Any] The weighting of the objective values. Returns ------- Tuple[Any, Any, Any, Any] continue_ablation, max_hp, max_hp_performance, max_hp_std """ max_hp = "" max_hp_difference = 0 for hp in hp_it: if hp in incumbent_config.keys() and hp in self.default_config.keys(): config_copy = copy.copy(self.default_config) config_copy[hp] = incumbent_config[hp] new_cost, _ = self.predict( self.run.encode_config(config_copy, specific=True), weighting ) difference = def_cost - new_cost # Check for the maximum difference hyperparameter in this round if difference > max_hp_difference: max_hp = hp max_hp_difference = difference else: continue hp_count = len(list(self.cs.keys())) if max_hp != "": # For the maximum impact hyperparameter, switch the default with the incumbent value self.default_config[max_hp] = incumbent_config[max_hp] max_hp_cost, max_hp_std = self.predict( self.run.encode_config(self.default_config, specific=True), weighting ) return True, max_hp, max_hp_cost, max_hp_std else: self.logger.info( f"End ablation at step {hp_count - len(hp_it) + 1}/{hp_count} " f"for budget {budget} (remaining hyperparameters not activate in incumbent or " "default configuration)." ) return False, None, None, None