Source code for smac.optimizer.configuration_chooser.epm_chooser

from typing import Any, Iterator, List, Optional, Tuple

import logging

import numpy as np

from smac.configspace import Configuration
from smac.configspace.util import convert_configurations_to_array
from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
from smac.optimizer.acquisition import AbstractAcquisitionFunction
from smac.optimizer.acquisition.maximizer import (
    AcquisitionFunctionMaximizer,
    RandomSearch,
)
from smac.optimizer.configuration_chooser.random_chooser import (
    ChooserNoCoolDown,
    RandomChooser,
)
from smac.runhistory.runhistory import RunHistory
from smac.runhistory.runhistory2epm import AbstractRunHistory2EPM
from smac.scenario.scenario import Scenario
from smac.stats.stats import Stats

__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
__license__ = "3-clause BSD"


[docs]class EPMChooser:
    """Interface to train the EPM and generate/choose next configurations.

    Parameters
    ----------
    scenario: smac.scenario.scenario.Scenario
        Scenario object
    stats: smac.stats.stats.Stats
        statistics object with configuration budgets
    runhistory: smac.runhistory.runhistory.RunHistory
        runhistory with all runs so far
    model: smac.epm.rf_with_instances.RandomForestWithInstances
        empirical performance model (right now, we support only
        RandomForestWithInstances)
    acq_optimizer: smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
        Optimizer of acquisition function.
    restore_incumbent: Configuration
        incumbent to be used from the start. ONLY used to restore states.
    rng: np.random.RandomState
        Random number generator
    random_configuration_chooser:
        Chooser for random configuration -- one of

        * ChooserNoCoolDown(modulus)
        * ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus)
    predict_x_best: bool
        Choose x_best for computing the acquisition function via the model instead of via the observations.
    min_samples_model: int
        Minimum number of samples to build a model
    epm_chooser_kwargs: Any:
        additional arguments passed to EPMChooser (Might be used by its subclasses)
    """

    def __init__(
        self,
        scenario: Scenario,
        stats: Stats,
        runhistory: RunHistory,
        runhistory2epm: AbstractRunHistory2EPM,
        model: RandomForestWithInstances,
        acq_optimizer: AcquisitionFunctionMaximizer,
        acquisition_func: AbstractAcquisitionFunction,
        rng: np.random.RandomState,
        restore_incumbent: Configuration = None,
        random_configuration_chooser: RandomChooser = ChooserNoCoolDown(modulus=2.0),
        predict_x_best: bool = True,
        min_samples_model: int = 1,
        **epm_chooser_kwargs: Any,
    ):
        self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
        self.incumbent = restore_incumbent

        self.scenario = scenario
        self.stats = stats
        self.runhistory = runhistory
        self.rh2EPM = runhistory2epm
        self.model = model
        self.acq_optimizer = acq_optimizer
        self.acquisition_func = acquisition_func
        self.rng = rng
        self.random_configuration_chooser = random_configuration_chooser

        self._random_search = RandomSearch(
            acquisition_func,
            self.scenario.cs,  # type: ignore[attr-defined] # noqa F821
            rng,
        )

        self.initial_design_configs = []  # type: List[Configuration]

        self.predict_x_best = predict_x_best

        self.min_samples_model = min_samples_model
        self.currently_considered_budgets = [
            0.0,
        ]

    def _collect_data_to_train_model(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        # if we use a float value as a budget, we want to train the model only on the highest budget
        available_budgets = []
        for run_key in self.runhistory.data.keys():
            available_budgets.append(run_key.budget)

        # Sort available budgets from highest to lowest budget
        available_budgets = sorted(list(set(available_budgets)), reverse=True)

        # Get #points per budget and if there are enough samples, then build a model
        for b in available_budgets:
            X, Y = self.rh2EPM.transform(
                self.runhistory,
                budget_subset=[
                    b,
                ],
            )
            if X.shape[0] >= self.min_samples_model:
                self.currently_considered_budgets = [
                    b,
                ]
                configs_array = self.rh2EPM.get_configurations(
                    self.runhistory, budget_subset=self.currently_considered_budgets
                )
                return X, Y, configs_array

        return (
            np.empty(shape=[0, 0]),
            np.empty(
                shape=[
                    0,
                ]
            ),
            np.empty(shape=[0, 0]),
        )

    def _get_evaluated_configs(self) -> List[Configuration]:
        return self.runhistory.get_all_configs_per_budget(budget_subset=self.currently_considered_budgets)

[docs]    def choose_next(self, incumbent_value: float = None) -> Iterator[Configuration]:
        """Choose next candidate solution with Bayesian optimization. The suggested configurations
        depend on the argument ``acq_optimizer`` to the ``SMBO`` class.

        Parameters
        ----------
        incumbent_value: float
            Cost value of incumbent configuration (required for acquisition function);
            If not given, it will be inferred from runhistory or predicted;
            if not given and runhistory is empty, it will raise a ValueError.

        Returns
        -------
        Iterator
        """
        self.logger.debug("Search for next configuration")
        X, Y, X_configurations = self._collect_data_to_train_model()

        if X.shape[0] == 0:
            # Only return a single point to avoid an overly high number of
            # random search iterations
            return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1)
        self.model.train(X, Y)

        if incumbent_value is not None:
            best_observation = incumbent_value
            x_best_array = None  # type: Optional[np.ndarray]
        else:
            if self.runhistory.empty():
                raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.")
            x_best_array, best_observation = self._get_x_best(self.predict_x_best, X_configurations)

        self.acquisition_func.update(
            model=self.model,
            eta=best_observation,
            incumbent_array=x_best_array,
            num_data=len(self._get_evaluated_configs()),
            X=X_configurations,
        )

        challengers = self.acq_optimizer.maximize(
            runhistory=self.runhistory,
            stats=self.stats,
            num_points=self.scenario.acq_opt_challengers,  # type: ignore[attr-defined] # noqa F821
            random_configuration_chooser=self.random_configuration_chooser,
        )
        return challengers

    def _get_x_best(self, predict: bool, X: np.ndarray) -> Tuple[np.ndarray, float]:
        """Get value, configuration, and array representation of the "best" configuration.

        The definition of best varies depending on the argument ``predict``. If set to ``True``,
        this function will return the stats of the best configuration as predicted by the model,
        otherwise it will return the stats for the best observed configuration.

        Parameters
        ----------
        predict : bool
            Whether to use the predicted or observed best.

        Returns
        -------
        float
        np.ndarry
        Configuration
        """
        if predict:
            costs = list(
                map(
                    lambda x: (
                        self.model.predict_marginalized_over_instances(x.reshape((1, -1)))[0][0][0],
                        x,
                    ),
                    X,
                )
            )
            costs = sorted(costs, key=lambda t: t[0])
            x_best_array = costs[0][1]
            best_observation = costs[0][0]
            # won't need log(y) if EPM was already trained on log(y)
        else:
            all_configs = self.runhistory.get_all_configs_per_budget(budget_subset=self.currently_considered_budgets)
            x_best = self.incumbent
            x_best_array = convert_configurations_to_array(all_configs)
            best_observation = self.runhistory.get_cost(x_best)
            best_observation_as_array = np.array(best_observation).reshape((1, 1))
            # It's unclear how to do this for inv scaling and potential future scaling.
            # This line should be changed if necessary
            best_observation = self.rh2EPM.transform_response_values(best_observation_as_array)
            best_observation = best_observation[0][0]

        return x_best_array, best_observation
SMAC3 Documentation

Source code for smac.optimizer.configuration_chooser.epm_chooser