Skip to content

Smac

The SMACOptimizer, is a wrapper around the smac optimizer.

Requirements

This requires smac which can be installed with:

pip install amltk[smac]

# Or directly
pip install smac

This uses ConfigSpace as its search_space() to optimize.

Users should report results using trial.success().

Visit their documentation for what you can pass to SMACOptimizer.create().

The below example shows how you can use SMAC to optimize an sklearn pipeline.

from __future__ import annotations

import logging

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from amltk.optimization.optimizers.smac import SMACOptimizer
from amltk.scheduling import Scheduler
from amltk.optimization import History, Trial, Metric
from amltk.pipeline import Component, Node

logging.basicConfig(level=logging.INFO)


def target_function(trial: Trial, pipeline: Node) -> Trial.Report:
    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    clf = pipeline.configure(trial.config).build("sklearn")

    with trial.begin():
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        return trial.success(accuracy=accuracy)

    return trial.fail()

pipeline = Component(RandomForestClassifier, space={"n_estimators": (10, 100), "max_samples": (0.1, 0.9)})

metric = Metric("accuracy", minimize=False, bounds=(0, 1))
optimizer = SMACOptimizer.create(space=pipeline, metrics=metric, bucket="smac-doc-example")

N_WORKERS = 2
scheduler = Scheduler.with_processes(N_WORKERS)
task = scheduler.task(target_function)

history = History()

@scheduler.on_start(repeat=N_WORKERS)
def on_start():
    trial = optimizer.ask()
    task.submit(trial, pipeline)

@task.on_result
def tell_and_launch_trial(_, report: Trial.Report):
    if scheduler.running():
        optimizer.tell(report)
        trial = optimizer.ask()
        task.submit(trial, pipeline)

@task.on_result
def add_to_history(_, report: Trial.Report):
    history.add(report)

scheduler.run(timeout=3, wait=False)

print(history.df())
                                                     status  ...  time:unit
name                                                         ...           
config_id=2_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=1_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=3_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=4_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=6_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=5_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=7_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=8_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=10_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=9_seed=907842988_budget=None_instance...  success  ...    seconds
config_id=11_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=12_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=14_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=13_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=15_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=16_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=18_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=17_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=20_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=19_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=21_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=22_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=23_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=24_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=25_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=26_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=27_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=28_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=30_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=29_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=31_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=32_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=33_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=34_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=36_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=35_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=37_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=39_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=38_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=40_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=41_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=42_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=43_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=44_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=45_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=46_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=47_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=48_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=49_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=50_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=51_seed=907842988_budget=None_instanc...  success  ...    seconds
config_id=52_seed=907842988_budget=None_instanc...  success  ...    seconds

[52 rows x 20 columns]

class SMACOptimizer(*, facade, bucket=None, metrics, fidelities=None) #

Bases: Optimizer[TrialInfo]

An optimizer that uses SMAC to optimize a config space.

PARAMETER DESCRIPTION
facade

The SMAC facade to use.

TYPE: AbstractFacade

bucket

The bucket given to trials generated by this optimizer.

TYPE: PathBucket | None DEFAULT: None

metrics

The metrics to optimize.

TYPE: Metric | Sequence[Metric]

fidelities

The fidelities to use, if any.

TYPE: Mapping[str, FidT] | None DEFAULT: None

Source code in src/amltk/optimization/optimizers/smac.py
def __init__(
    self,
    *,
    facade: AbstractFacade,
    bucket: PathBucket | None = None,
    metrics: Metric | Sequence[Metric],
    fidelities: Mapping[str, FidT] | None = None,
) -> None:
    """Initialize the optimizer.

    Args:
        facade: The SMAC facade to use.
        bucket: The bucket given to trials generated by this optimizer.
        metrics: The metrics to optimize.
        fidelities: The fidelities to use, if any.
    """
    # We need to very that the scenario is correct incase user pass in
    # their own facade construction
    assert self.crash_cost(metrics) == facade.scenario.crash_cost

    metrics = metrics if isinstance(metrics, Sequence) else [metrics]
    super().__init__(metrics=metrics, bucket=bucket)
    self.facade = facade
    self.metrics = metrics
    self.fidelities = fidelities

def create(*, space, metrics, bucket=None, deterministic=True, seed=None, fidelities=None, continue_from_last_run=False, logging_level=False)
classmethod
#

Create a new SMAC optimizer using either the HPO facade or a mutli-fidelity facade.

PARAMETER DESCRIPTION
space

The config space to optimize.

TYPE: ConfigurationSpace | Node

metrics

The metrics to optimize.

TYPE: Metric | Sequence[Metric]

bucket

The bucket given to trials generated by this optimizer.

TYPE: PathBucket | str | Path | None DEFAULT: None

deterministic

Whether the function your optimizing is deterministic, given a seed and config.

TYPE: bool DEFAULT: True

seed

The seed to use for the optimizer.

TYPE: Seed | None DEFAULT: None

fidelities

The fidelities to use, if any.

TYPE: Mapping[str, FidT] | None DEFAULT: None

continue_from_last_run

Whether to continue from a previous run.

TYPE: bool DEFAULT: False

logging_level

The logging level to use. This argument is passed forward to SMAC, use False to disable SMAC's handling of logging.

TYPE: int | Path | Literal[False] | None DEFAULT: False

Source code in src/amltk/optimization/optimizers/smac.py
@classmethod
def create(
    cls,
    *,
    space: ConfigurationSpace | Node,
    metrics: Metric | Sequence[Metric],
    bucket: PathBucket | str | Path | None = None,
    deterministic: bool = True,
    seed: Seed | None = None,
    fidelities: Mapping[str, FidT] | None = None,
    continue_from_last_run: bool = False,
    logging_level: int | Path | Literal[False] | None = False,
) -> Self:
    """Create a new SMAC optimizer using either the HPO facade or
    a mutli-fidelity facade.

    Args:
        space: The config space to optimize.
        metrics: The metrics to optimize.
        bucket: The bucket given to trials generated by this optimizer.
        deterministic: Whether the function your optimizing is deterministic, given
            a seed and config.
        seed: The seed to use for the optimizer.
        fidelities: The fidelities to use, if any.
        continue_from_last_run: Whether to continue from a previous run.
        logging_level: The logging level to use.
            This argument is passed forward to SMAC, use False to disable
            SMAC's handling of logging.
    """
    seed = as_int(seed)
    match bucket:
        case None:
            bucket = PathBucket(
                f"{cls.__name__}-{datetime.now().isoformat()}",
            )
        case str() | Path():
            bucket = PathBucket(bucket)
        case bucket:
            bucket = bucket  # noqa: PLW0127

    # NOTE SMAC always minimizes! Hence we make it a minimization problem
    metric_names: str | list[str]
    if isinstance(metrics, Sequence):
        metric_names = [metric.name for metric in metrics]
    else:
        metric_names = metrics.name

    if isinstance(space, Node):
        space = space.search_space(parser=cls.preferred_parser())

    facade_cls: type[AbstractFacade]
    if fidelities:
        if len(fidelities) == 1:
            v = next(iter(fidelities.values()))
            min_budget, max_budget = v
        else:
            min_budget, max_budget = 1.0, 100.0

        scenario = Scenario(
            objectives=metric_names,
            configspace=space,
            output_directory=bucket.path / "smac3_output",
            seed=seed,
            min_budget=min_budget,
            max_budget=max_budget,
            crash_cost=cls.crash_cost(metrics),
        )
        facade_cls = MultiFidelityFacade
    else:
        scenario = Scenario(
            configspace=space,
            seed=seed,
            output_directory=bucket.path / "smac3_output",
            deterministic=deterministic,
            objectives=metric_names,
            crash_cost=cls.crash_cost(metrics),
        )
        facade_cls = HyperparameterOptimizationFacade

    facade = facade_cls(
        scenario=scenario,
        target_function="dummy",  # NOTE: https://github.com/automl/SMAC3/issues/946
        overwrite=not continue_from_last_run,
        logging_level=logging_level,
        multi_objective_algorithm=facade_cls.get_multi_objective_algorithm(
            scenario=scenario,
        ),
    )
    return cls(facade=facade, fidelities=fidelities, bucket=bucket, metrics=metrics)

def ask() #

Ask the optimizer for a new config.

RETURNS DESCRIPTION
Trial[TrialInfo]

The trial info for the new config.

Source code in src/amltk/optimization/optimizers/smac.py
@override
def ask(self) -> Trial[SMACTrialInfo]:
    """Ask the optimizer for a new config.

    Returns:
        The trial info for the new config.
    """
    smac_trial_info = self.facade.ask()
    config = smac_trial_info.config
    budget = smac_trial_info.budget
    instance = smac_trial_info.instance
    seed = smac_trial_info.seed

    if self.fidelities and budget:
        if len(self.fidelities) == 1:
            k, _ = next(iter(self.fidelities.items()))
            trial_fids = {k: budget}
        else:
            trial_fids = {"budget": budget}
    else:
        trial_fids = None

    config_id = self.facade.runhistory.config_ids[config]
    unique_name = f"{config_id=}_{seed=}_{budget=}_{instance=}"
    trial: Trial[SMACTrialInfo] = Trial(
        name=unique_name,
        config=dict(config),
        info=smac_trial_info,
        seed=seed,
        fidelities=trial_fids,
        bucket=self.bucket,
        metrics=self.metrics,
    )
    logger.debug(f"Asked for trial {trial.name}")
    return trial

def tell(report) #

Tell the optimizer the result of the sampled config.

PARAMETER DESCRIPTION
report

The report of the trial.

TYPE: Report[TrialInfo]

Source code in src/amltk/optimization/optimizers/smac.py
@override
def tell(self, report: Trial.Report[SMACTrialInfo]) -> None:
    """Tell the optimizer the result of the sampled config.

    Args:
        report: The report of the trial.
    """
    assert report.trial.info is not None

    cost: float | list[float]
    match self.metrics:
        case [metric]:  # Single obj
            val: Metric.Value = first_true(
                report.metric_values,
                pred=lambda m: m.metric == metric,
                default=metric.worst,
            )
            cost = self.cost(val)
        case metrics:
            # NOTE: We need to make sure that there sorted in the order
            # that SMAC expects, with any missing metrics filled in
            _lookup = {v.metric.name: v for v in report.metric_values}
            cost = [
                self.cost(_lookup.get(metric.name, metric.worst))
                for metric in metrics
            ]

    logger.debug(f"Telling report for trial {report.trial.name}")

    # If we're successful, get the cost and times and report them
    params: dict[str, Any]
    match report.status:
        case Trial.Status.SUCCESS:
            params = {
                "time": report.time.duration,
                "starttime": report.time.start,
                "endtime": report.time.end,
                "cost": cost,
                "status": StatusType.SUCCESS,
            }
        case Trial.Status.FAIL:
            params = {
                "time": report.time.duration,
                "starttime": report.time.start,
                "endtime": report.time.end,
                "cost": cost,
                "status": StatusType.CRASHED,
            }
        case Trial.Status.CRASHED | Trial.Status.UNKNOWN:
            params = {
                "cost": cost,
                "status": StatusType.CRASHED,
            }

    match report.exception:
        case None:
            pass
        case MemoryLimitException():
            params["status"] = StatusType.MEMORYOUT
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }
        case TimeoutException():
            params["status"] = StatusType.TIMEOUT
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }
        case _:
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }

    self.facade.tell(report.trial.info, value=SMACTrialValue(**params), save=True)

def preferred_parser()
classmethod
#

The preferred parser for this optimizer.

Source code in src/amltk/optimization/optimizers/smac.py
@override
@classmethod
def preferred_parser(cls) -> Literal["configspace"]:
    """The preferred parser for this optimizer."""
    return "configspace"

def crash_cost(metric)
classmethod
#

Get the crash cost for a metric for SMAC.

Source code in src/amltk/optimization/optimizers/smac.py
@classmethod
def crash_cost(cls, metric: Metric | Sequence[Metric]) -> float | list[float]:
    """Get the crash cost for a metric for SMAC."""
    match metric:
        case Metric(bounds=(lower, upper)):  # Bounded metrics
            return abs(upper - lower)
        case Metric():  # Unbounded metric
            return np.inf
        case metrics:
            return [cls.crash_cost(m) for m in metrics]

def cost(value)
classmethod
#

Get the cost for a metric value for SMAC.

Source code in src/amltk/optimization/optimizers/smac.py
@classmethod
def cost(cls, value: Metric.Value) -> float:
    """Get the cost for a metric value for SMAC."""
    match value.distance_to_optimal:
        case None:  # If we can't compute the distance, use the loss
            return value.loss
        case distance:  # If we can compute the distance, use that
            return distance