Skip to content

AutoML-Toolkit

Smac

Smac

The SMACOptimizer, is a wrapper around the smac optimizer.

Requirements

This requires smac which can be installed with:

pip install amltk[smac]

# Or directly
pip install smac

This uses ConfigSpace as its search_space() to optimize.

Users should report results using trial.success().

Visit their documentation for what you can pass to SMACOptimizer.create().

The below example shows how you can use SMAC to optimize an sklearn pipeline.

```python exec="True" source="material-block" result="python" from future import annotations

import logging

from sklearn.datasets import load_iris from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split

from amltk.optimization.optimizers.smac import SMACOptimizer from amltk.scheduling import Scheduler from amltk.optimization import History, Trial from amltk.pipeline import Component, Node

logging.basicConfig(level=logging.INFO)

def target_function(trial: Trial, pipeline: Node) -> Trial.Report: X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y) clf = pipeline.configure(trial.config).build("sklearn")

with trial.begin():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    loss = 1 - accuracy
    return trial.success(loss=loss, accuracy=accuracy)

return trial.fail()

from amltk._doc import make_picklable; make_picklable(target_function) # markdown-exec: hide

pipeline = Component(RandomForestClassifier, space={"n_estimators": (10, 100)}) space = pipeline.search_space(parser=SMACOptimizer.preferred_parser()) optimizer = SMACOptimizer.create(space=space)

N_WORKERS = 2 scheduler = Scheduler.with_processes(N_WORKERS) task = scheduler.task(target_function)

history = History()

@scheduler.on_start(repeat=N_WORKERS) def on_start(): trial = optimizer.ask() task.submit(trial, pipeline)

@task.on_result def tell_and_launch_trial(_, report: Trial.Report): if scheduler.running(): optimizer.tell(report) trial = optimizer.ask() task.submit(trial, pipeline)

@task.on_result def add_to_history(_, report: Trial.Report): history.add(report)

scheduler.run(timeout=3, wait=False)

print(history.df())

`class SMACOptimizer(*, facade, bucket=None, metrics, fidelities=None)` #

Bases: Optimizer[TrialInfo]

An optimizer that uses SMAC to optimize a config space.

PARAMETER	DESCRIPTION
`facade`	The SMAC facade to use. TYPE: `AbstractFacade`
`bucket`	The bucket given to trials generated by this optimizer. TYPE: `PathBucket \| None` DEFAULT: `None`
`metrics`	The metrics to optimize. TYPE: `Metric \| Sequence[Metric]`
`fidelities`	The fidelities to use, if any. TYPE: `Mapping[str, FidT] \| None` DEFAULT: `None`

Source code in src/amltk/optimization/optimizers/smac.py

def __init__(
    self,
    *,
    facade: AbstractFacade,
    bucket: PathBucket | None = None,
    metrics: Metric | Sequence[Metric],
    fidelities: Mapping[str, FidT] | None = None,
) -> None:
    """Initialize the optimizer.

    Args:
        facade: The SMAC facade to use.
        bucket: The bucket given to trials generated by this optimizer.
        metrics: The metrics to optimize.
        fidelities: The fidelities to use, if any.
    """
    # We need to very that the scenario is correct incase user pass in
    # their own facade construction
    assert self.crash_cost(metrics) == facade.scenario.crash_cost

    metrics = metrics if isinstance(metrics, Sequence) else [metrics]
    super().__init__(metrics=metrics, bucket=bucket)
    self.facade = facade
    self.metrics = metrics
    self.fidelities = fidelities

`def create(*, space, metrics, bucket=None, deterministic=True, seed=None, fidelities=None, continue_from_last_run=False, logging_level=False)`
`classmethod` #

Create a new SMAC optimizer using either the HPO facade or a mutli-fidelity facade.

PARAMETER	DESCRIPTION
`space`	The config space to optimize. TYPE: `ConfigurationSpace \| Node`
`metrics`	The metrics to optimize. TYPE: `Metric \| Sequence[Metric]`
`bucket`	The bucket given to trials generated by this optimizer. TYPE: `PathBucket \| str \| Path \| None` DEFAULT: `None`
`deterministic`	Whether the function your optimizing is deterministic, given a seed and config. TYPE: `bool` DEFAULT: `True`
`seed`	The seed to use for the optimizer. TYPE: `Seed \| None` DEFAULT: `None`
`fidelities`	The fidelities to use, if any. TYPE: `Mapping[str, FidT] \| None` DEFAULT: `None`
`continue_from_last_run`	Whether to continue from a previous run. TYPE: `bool` DEFAULT: `False`
`logging_level`	The logging level to use. This argument is passed forward to SMAC, use False to disable SMAC's handling of logging. TYPE: `int \| Path \| Literal[False] \| None` DEFAULT: `False`

Source code in src/amltk/optimization/optimizers/smac.py

@classmethod
def create(
    cls,
    *,
    space: ConfigurationSpace | Node,
    metrics: Metric | Sequence[Metric],
    bucket: PathBucket | str | Path | None = None,
    deterministic: bool = True,
    seed: Seed | None = None,
    fidelities: Mapping[str, FidT] | None = None,
    continue_from_last_run: bool = False,
    logging_level: int | Path | Literal[False] | None = False,
) -> Self:
    """Create a new SMAC optimizer using either the HPO facade or
    a mutli-fidelity facade.

    Args:
        space: The config space to optimize.
        metrics: The metrics to optimize.
        bucket: The bucket given to trials generated by this optimizer.
        deterministic: Whether the function your optimizing is deterministic, given
            a seed and config.
        seed: The seed to use for the optimizer.
        fidelities: The fidelities to use, if any.
        continue_from_last_run: Whether to continue from a previous run.
        logging_level: The logging level to use.
            This argument is passed forward to SMAC, use False to disable
            SMAC's handling of logging.
    """
    seed = as_int(seed)
    match bucket:
        case None:
            bucket = PathBucket(
                f"{cls.__class__.__name__}-{datetime.now().isoformat()}",
            )
        case str() | Path():
            bucket = PathBucket(bucket)
        case bucket:
            bucket = bucket  # noqa: PLW0127

    # NOTE SMAC always minimizes! Hence we make it a minimization problem
    metric_names: str | list[str]
    if isinstance(metrics, Sequence):
        metric_names = [metric.name for metric in metrics]
    else:
        metric_names = metrics.name

    if isinstance(space, Node):
        space = space.search_space(parser=cls.preferred_parser())

    facade_cls: type[AbstractFacade]
    if fidelities:
        if len(fidelities) == 1:
            v = next(iter(fidelities.values()))
            min_budget, max_budget = v
        else:
            min_budget, max_budget = 1.0, 100.0

        scenario = Scenario(
            objectives=metric_names,
            configspace=space,
            output_directory=bucket.path / "smac3_output",
            seed=seed,
            min_budget=min_budget,
            max_budget=max_budget,
            crash_cost=cls.crash_cost(metrics),
        )
        facade_cls = MultiFidelityFacade
    else:
        scenario = Scenario(
            configspace=space,
            seed=seed,
            output_directory=bucket.path / "smac3_output",
            deterministic=deterministic,
            objectives=metric_names,
            crash_cost=cls.crash_cost(metrics),
        )
        facade_cls = HyperparameterOptimizationFacade

    facade = facade_cls(
        scenario=scenario,
        target_function="dummy",  # NOTE: https://github.com/automl/SMAC3/issues/946
        overwrite=not continue_from_last_run,
        logging_level=logging_level,
        multi_objective_algorithm=facade_cls.get_multi_objective_algorithm(
            scenario=scenario,
        ),
    )
    return cls(facade=facade, fidelities=fidelities, bucket=bucket, metrics=metrics)

`def ask()` #

Ask the optimizer for a new config.

RETURNS	DESCRIPTION
`Trial[TrialInfo]`	The trial info for the new config.

Source code in src/amltk/optimization/optimizers/smac.py

@override
def ask(self) -> Trial[SMACTrialInfo]:
    """Ask the optimizer for a new config.

    Returns:
        The trial info for the new config.
    """
    smac_trial_info = self.facade.ask()
    config = smac_trial_info.config
    budget = smac_trial_info.budget
    instance = smac_trial_info.instance
    seed = smac_trial_info.seed

    if self.fidelities and budget:
        if len(self.fidelities) == 1:
            k, _ = next(iter(self.fidelities.items()))
            trial_fids = {k: budget}
        else:
            trial_fids = {"budget": budget}
    else:
        trial_fids = None

    config_id = self.facade.runhistory.config_ids[config]
    unique_name = f"{config_id=}_{seed=}_{budget=}_{instance=}"
    trial: Trial[SMACTrialInfo] = Trial(
        name=unique_name,
        config=dict(config),
        info=smac_trial_info,
        seed=seed,
        fidelities=trial_fids,
        bucket=self.bucket,
        metrics=self.metrics,
    )
    logger.debug(f"Asked for trial {trial.name}")
    return trial

`def tell(report)` #

Tell the optimizer the result of the sampled config.

PARAMETER	DESCRIPTION
`report`	The report of the trial. TYPE: `Report[TrialInfo]`

Source code in src/amltk/optimization/optimizers/smac.py

@override
def tell(self, report: Trial.Report[SMACTrialInfo]) -> None:
    """Tell the optimizer the result of the sampled config.

    Args:
        report: The report of the trial.
    """
    assert report.trial.info is not None

    cost: float | list[float]
    match self.metrics:
        case [metric]:  # Single obj
            val: Metric.Value = first_true(
                report.metric_values,
                pred=lambda m: m.metric == metric,
                default=metric.worst,
            )
            cost = self.cost(val)
        case metrics:
            # NOTE: We need to make sure that there sorted in the order
            # that SMAC expects, with any missing metrics filled in
            _lookup = {v.metric.name: v for v in report.metric_values}
            cost = [
                self.cost(_lookup.get(metric.name, metric.worst))
                for metric in metrics
            ]

    logger.debug(f"Telling report for trial {report.trial.name}")

    # If we're successful, get the cost and times and report them
    params: dict[str, Any]
    match report.status:
        case Trial.Status.SUCCESS:
            params = {
                "time": report.time.duration,
                "starttime": report.time.start,
                "endtime": report.time.end,
                "cost": cost,
                "status": StatusType.SUCCESS,
            }
        case Trial.Status.FAIL:
            params = {
                "time": report.time.duration,
                "starttime": report.time.start,
                "endtime": report.time.end,
                "cost": cost,
                "status": StatusType.CRASHED,
            }
        case Trial.Status.CRASHED | Trial.Status.UNKNOWN:
            params = {
                "cost": cost,
                "status": StatusType.CRASHED,
            }

    match report.exception:
        case None:
            pass
        case MemoryLimitException():
            params["status"] = StatusType.MEMORYOUT
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }
        case TimeoutException():
            params["status"] = StatusType.TIMEOUT
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }
        case _:
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }

    self.facade.tell(report.trial.info, value=SMACTrialValue(**params), save=True)

`def preferred_parser()`
`classmethod` #

The preferred parser for this optimizer.

Source code in src/amltk/optimization/optimizers/smac.py

@override
@classmethod
def preferred_parser(cls) -> Literal["configspace"]:
    """The preferred parser for this optimizer."""
    return "configspace"

`def crash_cost(metric)`
`classmethod` #

Get the crash cost for a metric for SMAC.

Source code in src/amltk/optimization/optimizers/smac.py

@classmethod
def crash_cost(cls, metric: Metric | Sequence[Metric]) -> float | list[float]:
    """Get the crash cost for a metric for SMAC."""
    match metric:
        case Metric(bounds=(lower, upper)):  # Bounded metrics
            return abs(upper - lower)
        case Metric():  # Unbounded metric
            return np.inf
        case metrics:
            return [cls.crash_cost(m) for m in metrics]

`def cost(value)`
`classmethod` #

Get the cost for a metric value for SMAC.

Source code in src/amltk/optimization/optimizers/smac.py

@classmethod
def cost(cls, value: Metric.Value) -> float:
    """Get the cost for a metric value for SMAC."""
    match value.distance_to_optimal:
        case None:  # If we can't compute the distance, use the loss
            return value.loss
        case distance:  # If we can compute the distance, use that
            return distance