Algorithm configuration facade

smac.facade.algorithm_configuration_facade #

AlgorithmConfigurationFacade #

AlgorithmConfigurationFacade(
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction
    | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer
    | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm
    | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder
    | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int
    | Path
    | Literal[False]
    | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None
)

Bases: AbstractFacade

Source code in smac/facade/abstract_facade.py

def __init__(
    self,
    scenario: Scenario,
    target_function: Callable | str | AbstractRunner,
    *,
    model: AbstractModel | None = None,
    acquisition_function: AbstractAcquisitionFunction | None = None,
    acquisition_maximizer: AbstractAcquisitionMaximizer | None = None,
    initial_design: AbstractInitialDesign | None = None,
    random_design: AbstractRandomDesign | None = None,
    intensifier: AbstractIntensifier | None = None,
    multi_objective_algorithm: AbstractMultiObjectiveAlgorithm | None = None,
    runhistory_encoder: AbstractRunHistoryEncoder | None = None,
    config_selector: ConfigSelector | None = None,
    logging_level: int | Path | Literal[False] | None = None,
    callbacks: list[Callback] = None,
    overwrite: bool = False,
    dask_client: Client | None = None,
):
    setup_logging(logging_level)

    if callbacks is None:
        callbacks = []

    if model is None:
        model = self.get_model(scenario)

    if acquisition_function is None:
        acquisition_function = self.get_acquisition_function(scenario)

    if acquisition_maximizer is None:
        acquisition_maximizer = self.get_acquisition_maximizer(scenario)

    if initial_design is None:
        initial_design = self.get_initial_design(scenario)

    if random_design is None:
        random_design = self.get_random_design(scenario)

    if intensifier is None:
        intensifier = self.get_intensifier(scenario)

    if multi_objective_algorithm is None and scenario.count_objectives() > 1:
        multi_objective_algorithm = self.get_multi_objective_algorithm(scenario=scenario)

    if runhistory_encoder is None:
        runhistory_encoder = self.get_runhistory_encoder(scenario)

    if config_selector is None:
        config_selector = self.get_config_selector(scenario)

    # Initialize empty stats and runhistory object
    runhistory = RunHistory(multi_objective_algorithm=multi_objective_algorithm)

    # Set the seed for configuration space
    scenario.configspace.seed(scenario.seed)

    # Set variables globally
    self._scenario = scenario
    self._model = model
    self._acquisition_function = acquisition_function
    self._acquisition_maximizer = acquisition_maximizer
    self._initial_design = initial_design
    self._random_design = random_design
    self._intensifier = intensifier
    self._multi_objective_algorithm = multi_objective_algorithm
    self._runhistory = runhistory
    self._runhistory_encoder = runhistory_encoder
    self._config_selector = config_selector
    self._callbacks = callbacks
    self._overwrite = overwrite

    # Prepare the algorithm executer
    runner: AbstractRunner
    if isinstance(target_function, AbstractRunner):
        runner = target_function
    elif isinstance(target_function, str):
        runner = TargetFunctionScriptRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )
    else:
        runner = TargetFunctionRunner(
            scenario=scenario,
            target_function=target_function,
            required_arguments=self._get_signature_arguments(),
        )

    # In case of multiple jobs, we need to wrap the runner again using DaskParallelRunner
    if (n_workers := scenario.n_workers) > 1 or dask_client is not None:
        if dask_client is not None and n_workers > 1:
            logger.warning(
                "Provided `dask_client`. Ignore `scenario.n_workers`, directly set `n_workers` in `dask_client`."
            )
        else:
            available_workers = joblib.cpu_count()
            if n_workers > available_workers:
                logger.info(f"Workers are reduced to {n_workers}.")
                n_workers = available_workers

        # We use a dask runner for parallelization
        runner = DaskParallelRunner(single_worker=runner, dask_client=dask_client)

    # Set the runner to access it globally
    self._runner = runner

    # Adding dependencies of the components
    self._update_dependencies()

    # We have to update our meta data (basically arguments of the components)
    self._scenario._set_meta(self.meta)

    # We have to validate if the object compositions are correct and actually make sense
    self._validate()

    # Finally we configure our optimizer
    self._optimizer = self._get_optimizer()
    assert self._optimizer

    # Register callbacks here
    for callback in callbacks:
        self._optimizer.register_callback(callback)

    # Additionally, we register the runhistory callback from the intensifier to efficiently update our incumbent
    # every time new information are available
    self._optimizer.register_callback(self._intensifier.get_callback(), index=0)

intensifier `property` #

intensifier: AbstractIntensifier

The optimizer which is responsible for the BO loop. Keeps track of useful information like status.

meta `property` #

meta: dict[str, Any]

Generates a hash based on all components of the facade. This is used for the run name or to determine whether a run should be continued or not.

optimizer `property` #

optimizer: SMBO

The optimizer which is responsible for the BO loop. Keeps track of useful information like status.

runhistory `property` #

runhistory: RunHistory

The runhistory which is filled with all trials during the optimization process.

scenario `property` #

scenario: Scenario

The scenario object which holds all environment information.

ask #

ask() -> TrialInfo

Asks the intensifier for the next trial.

Source code in smac/facade/abstract_facade.py

def ask(self) -> TrialInfo:
    """Asks the intensifier for the next trial."""
    return self._optimizer.ask()

get_acquisition_function `staticmethod` #

get_acquisition_function(
    scenario: Scenario, *, xi: float = 0.0
) -> EI

Returns an Expected Improvement acquisition function.

Parameters#

scenario : Scenario xi : float, defaults to 0.0 Controls the balance between exploration and exploitation of the acquisition function.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_acquisition_function(  # type: ignore
    scenario: Scenario,
    *,
    xi: float = 0.0,
) -> EI:
    """Returns an Expected Improvement acquisition function.

    Parameters
    ----------
    scenario : Scenario
    xi : float, defaults to 0.0
        Controls the balance between exploration and exploitation of the
        acquisition function.
    """
    return EI(xi=xi)

get_acquisition_maximizer `staticmethod` #

get_acquisition_maximizer(
    scenario: Scenario,
) -> LocalAndSortedRandomSearch

Returns local and sorted random search as acquisition maximizer.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_acquisition_maximizer(  # type: ignore
    scenario: Scenario,
) -> LocalAndSortedRandomSearch:
    """Returns local and sorted random search as acquisition maximizer."""
    optimizer = LocalAndSortedRandomSearch(
        scenario.configspace,
        seed=scenario.seed,
    )

    return optimizer

get_config_selector `staticmethod` #

get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16
) -> ConfigSelector

Returns the default configuration selector.

Source code in smac/facade/abstract_facade.py

@staticmethod
def get_config_selector(
    scenario: Scenario,
    *,
    retrain_after: int = 8,
    retries: int = 16,
) -> ConfigSelector:
    """Returns the default configuration selector."""
    return ConfigSelector(scenario, retrain_after=retrain_after, retries=retries)

get_initial_design `staticmethod` #

get_initial_design(
    scenario: Scenario,
    *,
    additional_configs: list[Configuration] = None
) -> DefaultInitialDesign

Returns an initial design, which returns the default configuration.

Parameters#

additional_configs: list[Configuration], defaults to [] Adds additional configurations to the initial design.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_initial_design(  # type: ignore
    scenario: Scenario,
    *,
    additional_configs: list[Configuration] = None,
) -> DefaultInitialDesign:
    """Returns an initial design, which returns the default configuration.

    Parameters
    ----------
    additional_configs: list[Configuration], defaults to []
        Adds additional configurations to the initial design.
    """
    if additional_configs is None:
        additional_configs = []
    return DefaultInitialDesign(
        scenario=scenario,
        additional_configs=additional_configs,
    )

get_intensifier `staticmethod` #

get_intensifier(
    scenario: Scenario,
    *,
    max_config_calls: int = 2000,
    max_incumbents: int = 10
) -> Intensifier

Returns Intensifier as intensifier. Supports budgets.

Parameters#

max_config_calls : int, defaults to 3 Maximum number of configuration evaluations. Basically, how many instance-seed keys should be evaluated at maximum for a configuration. max_incumbents : int, defaults to 10 How many incumbents to keep track of in the case of multi-objective.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_intensifier(
    scenario: Scenario,
    *,
    max_config_calls: int = 2000,
    max_incumbents: int = 10,
) -> Intensifier:
    """Returns ``Intensifier`` as intensifier. Supports budgets.

    Parameters
    ----------
    max_config_calls : int, defaults to 3
        Maximum number of configuration evaluations. Basically, how many instance-seed keys should be evaluated at
        maximum for a configuration.
    max_incumbents : int, defaults to 10
        How many incumbents to keep track of in the case of multi-objective.
    """
    return Intensifier(
        scenario=scenario,
        max_config_calls=max_config_calls,
        max_incumbents=max_incumbents,
    )

get_model `staticmethod` #

get_model(
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 5.0 / 6.0,
    min_samples_split: int = 3,
    min_samples_leaf: int = 3,
    max_depth: int = 20,
    bootstrapping: bool = True,
    pca_components: int = 4
) -> RandomForest

Returns a random forest as surrogate model.

Parameters#

n_trees : int, defaults to 10 The number of trees in the random forest. ratio_features : float, defaults to 5.0 / 6.0 The ratio of features that are considered for splitting. min_samples_split : int, defaults to 3 The minimum number of data points to perform a split. min_samples_leaf : int, defaults to 3 The minimum number of data points in a leaf. max_depth : int, defaults to 20 The maximum depth of a single tree. bootstrapping : bool, defaults to True Enables bootstrapping. pca_components : float, defaults to 4 Number of components to keep when using PCA to reduce dimensionality of instance features.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_model(  # type: ignore
    scenario: Scenario,
    *,
    n_trees: int = 10,
    ratio_features: float = 5.0 / 6.0,
    min_samples_split: int = 3,
    min_samples_leaf: int = 3,
    max_depth: int = 20,
    bootstrapping: bool = True,
    pca_components: int = 4,
) -> RandomForest:
    """Returns a random forest as surrogate model.

    Parameters
    ----------
    n_trees : int, defaults to 10
        The number of trees in the random forest.
    ratio_features : float, defaults to 5.0 / 6.0
        The ratio of features that are considered for splitting.
    min_samples_split : int, defaults to 3
        The minimum number of data points to perform a split.
    min_samples_leaf : int, defaults to 3
        The minimum number of data points in a leaf.
    max_depth : int, defaults to 20
        The maximum depth of a single tree.
    bootstrapping : bool, defaults to True
        Enables bootstrapping.
    pca_components : float, defaults to 4
        Number of components to keep when using PCA to reduce dimensionality of instance features.
    """
    return RandomForest(
        configspace=scenario.configspace,
        n_trees=n_trees,
        ratio_features=ratio_features,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_depth=max_depth,
        bootstrapping=bootstrapping,
        log_y=False,
        instance_features=scenario.instance_features,
        pca_components=pca_components,
        seed=scenario.seed,
    )

get_multi_objective_algorithm `staticmethod` #

get_multi_objective_algorithm(
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None
) -> MeanAggregationStrategy

Returns the mean aggregation strategy for the multi objective algorithm.

Parameters#

scenario : Scenario objective_weights : list[float] | None, defaults to None Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of objectives.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_multi_objective_algorithm(  # type: ignore
    scenario: Scenario,
    *,
    objective_weights: list[float] | None = None,
) -> MeanAggregationStrategy:
    """Returns the mean aggregation strategy for the multi objective algorithm.

    Parameters
    ----------
    scenario : Scenario
    objective_weights : list[float] | None, defaults to None
        Weights for averaging the objectives in a weighted manner. Must be of the same length as the number of
        objectives.
    """
    return MeanAggregationStrategy(
        scenario=scenario,
        objective_weights=objective_weights,
    )

get_random_design `staticmethod` #

get_random_design(
    scenario: Scenario, *, probability: float = 0.5
) -> ProbabilityRandomDesign

Returns ProbabilityRandomDesign for interleaving configurations.

Parameters#

probability : float, defaults to 0.5 Probability that a configuration will be drawn at random.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_random_design(  # type: ignore
    scenario: Scenario,
    *,
    probability: float = 0.5,
) -> ProbabilityRandomDesign:
    """Returns ``ProbabilityRandomDesign`` for interleaving configurations.

    Parameters
    ----------
    probability : float, defaults to 0.5
        Probability that a configuration will be drawn at random.
    """
    return ProbabilityRandomDesign(probability=probability, seed=scenario.seed)

get_runhistory_encoder `staticmethod` #

get_runhistory_encoder(
    scenario: Scenario,
) -> RunHistoryEncoder

Returns the default runhistory encoder.

Source code in smac/facade/algorithm_configuration_facade.py

@staticmethod
def get_runhistory_encoder(scenario: Scenario) -> RunHistoryEncoder:
    """Returns the default runhistory encoder."""
    return RunHistoryEncoder(scenario)

optimize #

optimize(
    *, data_to_scatter: dict[str, Any] | None = None
) -> Configuration | list[Configuration]

Optimizes the configuration of the algorithm.

Parameters#

data_to_scatter: dict[str, Any] | None We first note that this argument is valid only dask_runner! When a user scatters data from their local process to the distributed network, this data is distributed in a round-robin fashion grouping by number of cores. Roughly speaking, we can keep this data in memory and then we do not have to (de-)serialize the data every time we would like to execute a target function with a big dataset. For example, when your target function has a big dataset shared across all the target function, this argument is very useful.

Returns#

incumbent : Configuration Best found configuration.

Source code in smac/facade/abstract_facade.py

def optimize(self, *, data_to_scatter: dict[str, Any] | None = None) -> Configuration | list[Configuration]:
    """
    Optimizes the configuration of the algorithm.

    Parameters
    ----------
    data_to_scatter: dict[str, Any] | None
        We first note that this argument is valid only dask_runner!
        When a user scatters data from their local process to the distributed network,
        this data is distributed in a round-robin fashion grouping by number of cores.
        Roughly speaking, we can keep this data in memory and then we do not have to (de-)serialize the data
        every time we would like to execute a target function with a big dataset.
        For example, when your target function has a big dataset shared across all the target function,
        this argument is very useful.

    Returns
    -------
    incumbent : Configuration
        Best found configuration.
    """
    incumbents = None
    if isinstance(data_to_scatter, dict) and len(data_to_scatter) == 0:
        raise ValueError("data_to_scatter must be None or dict with some elements, but got an empty dict.")

    try:
        incumbents = self._optimizer.optimize(data_to_scatter=data_to_scatter)
    finally:
        self._optimizer.save()

    return incumbents

tell #

tell(
    info: TrialInfo, value: TrialValue, save: bool = True
) -> None

Adds the result of a trial to the runhistory and updates the intensifier.

Parameters#

info: TrialInfo Describes the trial from which to process the results. value: TrialValue Contains relevant information regarding the execution of a trial. save : bool, optional to True Whether the runhistory should be saved.

Source code in smac/facade/abstract_facade.py

def tell(self, info: TrialInfo, value: TrialValue, save: bool = True) -> None:
    """Adds the result of a trial to the runhistory and updates the intensifier.

    Parameters
    ----------
    info: TrialInfo
        Describes the trial from which to process the results.
    value: TrialValue
        Contains relevant information regarding the execution of a trial.
    save : bool, optional to True
        Whether the runhistory should be saved.
    """
    return self._optimizer.tell(info, value, save=save)

validate #

validate(
    config: Configuration, *, seed: int | None = None
) -> float | list[float]

Validates a configuration on seeds different from the ones used in the optimization process and on the highest budget (if budget type is real-valued).

Parameters#

config : Configuration Configuration to validate instances : list[str] | None, defaults to None Which instances to validate. If None, all instances specified in the scenario are used. In case that the budget type is real-valued, this argument is ignored. seed : int | None, defaults to None If None, the seed from the scenario is used.

Returns#

cost : float | list[float] The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is averaged.

Source code in smac/facade/abstract_facade.py

def validate(
    self,
    config: Configuration,
    *,
    seed: int | None = None,
) -> float | list[float]:
    """Validates a configuration on seeds different from the ones used in the optimization process and on the
    highest budget (if budget type is real-valued).

    Parameters
    ----------
    config : Configuration
        Configuration to validate
    instances : list[str] | None, defaults to None
        Which instances to validate. If None, all instances specified in the scenario are used.
        In case that the budget type is real-valued, this argument is ignored.
    seed : int | None, defaults to None
        If None, the seed from the scenario is used.

    Returns
    -------
    cost : float | list[float]
        The averaged cost of the configuration. In case of multi-fidelity, the cost of each objective is
        averaged.
    """
    return self._optimizer.validate(config, seed=seed)

Algorithm configuration facade

smac.facade.algorithm_configuration_facade #