from typing import Any, Iterator, List, Optional, Tuple
import logging
import numpy as np
from smac.configspace import Configuration
from smac.configspace.util import convert_configurations_to_array
from smac.epm.random_forest.rf_with_instances import RandomForestWithInstances
from smac.optimizer.acquisition import AbstractAcquisitionFunction
from smac.optimizer.acquisition.maximizer import (
AcquisitionFunctionMaximizer,
RandomSearch,
)
from smac.optimizer.configuration_chooser.random_chooser import (
ChooserNoCoolDown,
RandomChooser,
)
from smac.runhistory.runhistory import RunHistory
from smac.runhistory.runhistory2epm import AbstractRunHistory2EPM
from smac.scenario.scenario import Scenario
from smac.stats.stats import Stats
__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
__license__ = "3-clause BSD"
[docs]class EPMChooser:
"""Interface to train the EPM and generate/choose next configurations.
Parameters
----------
scenario: smac.scenario.scenario.Scenario
Scenario object
stats: smac.stats.stats.Stats
statistics object with configuration budgets
runhistory: smac.runhistory.runhistory.RunHistory
runhistory with all runs so far
model: smac.epm.rf_with_instances.RandomForestWithInstances
empirical performance model (right now, we support only
RandomForestWithInstances)
acq_optimizer: smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer
Optimizer of acquisition function.
restore_incumbent: Configuration
incumbent to be used from the start. ONLY used to restore states.
rng: np.random.RandomState
Random number generator
random_configuration_chooser:
Chooser for random configuration -- one of
* ChooserNoCoolDown(modulus)
* ChooserLinearCoolDown(start_modulus, modulus_increment, end_modulus)
predict_x_best: bool
Choose x_best for computing the acquisition function via the model instead of via the observations.
min_samples_model: int
Minimum number of samples to build a model
epm_chooser_kwargs: Any:
additional arguments passed to EPMChooser (Might be used by its subclasses)
"""
def __init__(
self,
scenario: Scenario,
stats: Stats,
runhistory: RunHistory,
runhistory2epm: AbstractRunHistory2EPM,
model: RandomForestWithInstances,
acq_optimizer: AcquisitionFunctionMaximizer,
acquisition_func: AbstractAcquisitionFunction,
rng: np.random.RandomState,
restore_incumbent: Configuration = None,
random_configuration_chooser: RandomChooser = ChooserNoCoolDown(modulus=2.0),
predict_x_best: bool = True,
min_samples_model: int = 1,
**epm_chooser_kwargs: Any,
):
self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
self.incumbent = restore_incumbent
self.scenario = scenario
self.stats = stats
self.runhistory = runhistory
self.rh2EPM = runhistory2epm
self.model = model
self.acq_optimizer = acq_optimizer
self.acquisition_func = acquisition_func
self.rng = rng
self.random_configuration_chooser = random_configuration_chooser
self._random_search = RandomSearch(
acquisition_func,
self.scenario.cs, # type: ignore[attr-defined] # noqa F821
rng,
)
self.initial_design_configs = [] # type: List[Configuration]
self.predict_x_best = predict_x_best
self.min_samples_model = min_samples_model
self.currently_considered_budgets = [
0.0,
]
def _collect_data_to_train_model(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
# if we use a float value as a budget, we want to train the model only on the highest budget
available_budgets = []
for run_key in self.runhistory.data.keys():
available_budgets.append(run_key.budget)
# Sort available budgets from highest to lowest budget
available_budgets = sorted(list(set(available_budgets)), reverse=True)
# Get #points per budget and if there are enough samples, then build a model
for b in available_budgets:
X, Y = self.rh2EPM.transform(
self.runhistory,
budget_subset=[
b,
],
)
if X.shape[0] >= self.min_samples_model:
self.currently_considered_budgets = [
b,
]
configs_array = self.rh2EPM.get_configurations(
self.runhistory, budget_subset=self.currently_considered_budgets
)
return X, Y, configs_array
return (
np.empty(shape=[0, 0]),
np.empty(
shape=[
0,
]
),
np.empty(shape=[0, 0]),
)
def _get_evaluated_configs(self) -> List[Configuration]:
return self.runhistory.get_all_configs_per_budget(budget_subset=self.currently_considered_budgets)
[docs] def choose_next(self, incumbent_value: float = None) -> Iterator[Configuration]:
"""Choose next candidate solution with Bayesian optimization. The suggested configurations
depend on the argument ``acq_optimizer`` to the ``SMBO`` class.
Parameters
----------
incumbent_value: float
Cost value of incumbent configuration (required for acquisition function);
If not given, it will be inferred from runhistory or predicted;
if not given and runhistory is empty, it will raise a ValueError.
Returns
-------
Iterator
"""
self.logger.debug("Search for next configuration")
X, Y, X_configurations = self._collect_data_to_train_model()
if X.shape[0] == 0:
# Only return a single point to avoid an overly high number of
# random search iterations
return self._random_search.maximize(runhistory=self.runhistory, stats=self.stats, num_points=1)
self.model.train(X, Y)
if incumbent_value is not None:
best_observation = incumbent_value
x_best_array = None # type: Optional[np.ndarray]
else:
if self.runhistory.empty():
raise ValueError("Runhistory is empty and the cost value of " "the incumbent is unknown.")
x_best_array, best_observation = self._get_x_best(self.predict_x_best, X_configurations)
self.acquisition_func.update(
model=self.model,
eta=best_observation,
incumbent_array=x_best_array,
num_data=len(self._get_evaluated_configs()),
X=X_configurations,
)
challengers = self.acq_optimizer.maximize(
runhistory=self.runhistory,
stats=self.stats,
num_points=self.scenario.acq_opt_challengers, # type: ignore[attr-defined] # noqa F821
random_configuration_chooser=self.random_configuration_chooser,
)
return challengers
def _get_x_best(self, predict: bool, X: np.ndarray) -> Tuple[np.ndarray, float]:
"""Get value, configuration, and array representation of the "best" configuration.
The definition of best varies depending on the argument ``predict``. If set to ``True``,
this function will return the stats of the best configuration as predicted by the model,
otherwise it will return the stats for the best observed configuration.
Parameters
----------
predict : bool
Whether to use the predicted or observed best.
Returns
-------
float
np.ndarry
Configuration
"""
if predict:
costs = list(
map(
lambda x: (
self.model.predict_marginalized_over_instances(x.reshape((1, -1)))[0][0][0],
x,
),
X,
)
)
costs = sorted(costs, key=lambda t: t[0])
x_best_array = costs[0][1]
best_observation = costs[0][0]
# won't need log(y) if EPM was already trained on log(y)
else:
all_configs = self.runhistory.get_all_configs_per_budget(budget_subset=self.currently_considered_budgets)
x_best = self.incumbent
x_best_array = convert_configurations_to_array(all_configs)
best_observation = self.runhistory.get_cost(x_best)
best_observation_as_array = np.array(best_observation).reshape((1, 1))
# It's unclear how to do this for inv scaling and potential future scaling.
# This line should be changed if necessary
best_observation = self.rh2EPM.transform_response_values(best_observation_as_array)
best_observation = best_observation[0][0]
return x_best_array, best_observation