Smac

amltk.optimization.optimizers.smac #

The SMACOptimizer, is a wrapper around the smac optimizer.

Requirements

This requires smac which can be installed with:

pip install amltk[smac]

# Or directly
pip install smac

This uses ConfigSpace as its search_space() to optimize.

Users should report results using trial.success().

Visit their documentation for what you can pass to SMACOptimizer.create().

The below example shows how you can use SMAC to optimize an sklearn pipeline.

from __future__ import annotations

import logging

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from amltk.optimization.optimizers.smac import SMACOptimizer
from amltk.scheduling import Scheduler
from amltk.optimization import History, Trial, Metric
from amltk.pipeline import Component, Node

logging.basicConfig(level=logging.INFO)


def target_function(trial: Trial, pipeline: Node) -> Trial.Report:
    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    clf = pipeline.configure(trial.config).build("sklearn")

    with trial.profile("trial"):
        try:
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            return trial.success(accuracy=accuracy)
        except Exception as e:
            return trial.fail(e)

    return trial.fail()

pipeline = Component(RandomForestClassifier, space={"n_estimators": (10, 100), "max_samples": (0.1, 0.9)})

metric = Metric("accuracy", minimize=False, bounds=(0, 1))
optimizer = SMACOptimizer.create(space=pipeline, metrics=metric, bucket="smac-doc-example")

N_WORKERS = 2
scheduler = Scheduler.with_processes(N_WORKERS)
task = scheduler.task(target_function)

history = History()

@scheduler.on_start(repeat=N_WORKERS)
def on_start():
    trial = optimizer.ask()
    task.submit(trial, pipeline)

@task.on_result
def tell_and_launch_trial(_, report: Trial.Report):
    if scheduler.running():
        optimizer.tell(report)
        trial = optimizer.ask()
        task.submit(trial, pipeline)

@task.on_result
def add_to_history(_, report: Trial.Report):
    history.add(report)

scheduler.run(timeout=3, wait=False)

print(history.df())

                                                     status  ...  profile:trial:time:unit
name                                                         ...                         
config_id=1_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=3_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=2_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=4_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=5_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=7_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=6_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=9_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=8_seed=1947329455_budget=None_instanc...  success  ...                  seconds
config_id=11_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=10_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=13_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=12_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=15_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=14_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=17_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=16_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=19_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=18_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=20_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=21_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=22_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=23_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=24_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=26_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=25_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=27_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=28_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=29_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=30_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=31_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=32_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=33_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=34_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=35_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=36_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=37_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=38_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=40_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=39_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=41_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=42_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=43_seed=1947329455_budget=None_instan...  success  ...                  seconds
config_id=44_seed=1947329455_budget=None_instan...  success  ...                  seconds

[44 rows x 22 columns]

SMACOptimizer #

SMACOptimizer(
    *,
    facade: AbstractFacade,
    bucket: PathBucket | None = None,
    metrics: Metric | Sequence[Metric],
    fidelities: Mapping[str, FidT] | None = None,
    time_profile: str | None = None
)

Bases: Optimizer[TrialInfo]

An optimizer that uses SMAC to optimize a config space.

PARAMETER	DESCRIPTION
`facade`	The SMAC facade to use. TYPE: `AbstractFacade`
`bucket`	The bucket given to trials generated by this optimizer. TYPE: `PathBucket \| None` DEFAULT: `None`
`metrics`	The metrics to optimize. TYPE: `Metric \| Sequence[Metric]`
`fidelities`	The fidelities to use, if any. TYPE: `Mapping[str, FidT] \| None` DEFAULT: `None`
`time_profile`	The profile to use to get time information to the optimizer. Must use `trial.profile(time_profile)` in your target function then. TYPE: `str \| None` DEFAULT: `None`

Source code in src/amltk/optimization/optimizers/smac.py

def __init__(
    self,
    *,
    facade: AbstractFacade,
    bucket: PathBucket | None = None,
    metrics: Metric | Sequence[Metric],
    fidelities: Mapping[str, FidT] | None = None,
    time_profile: str | None = None,
) -> None:
    """Initialize the optimizer.

    Args:
        facade: The SMAC facade to use.
        bucket: The bucket given to trials generated by this optimizer.
        metrics: The metrics to optimize.
        fidelities: The fidelities to use, if any.
        time_profile: The profile to use to get time information to the
            optimizer. Must use `trial.profile(time_profile)` in your
            target function then.
    """
    # We need to very that the scenario is correct incase user pass in
    # their own facade construction
    assert list(self.crash_costs(metrics).values()) == facade.scenario.crash_cost

    metrics = metrics if isinstance(metrics, Sequence) else [metrics]
    super().__init__(metrics=metrics, bucket=bucket)
    self.facade = facade
    self.fidelities = fidelities
    self.time_profile = time_profile

bucket `instance-attribute` #

bucket: PathBucket = (
    bucket
    if bucket is not None
    else PathBucket(f"{__name__}-{isoformat()}")
)

The bucket to give to trials generated by this optimizer.

metrics `instance-attribute` #

metrics: MetricCollection = from_collection(metrics)

The metrics to optimize.

CreateSignature #

Bases: Protocol

A Protocol which defines the keywords required to create an optimizer with deterministic behavior at a desired location.

This protocol matches the Optimizer.create classmethod, however we also allow any function which accepts the keyword arguments to create an Optimizer.

call #

__call__(
    *,
    space: Node,
    metrics: Metric | Sequence[Metric],
    bucket: PathBucket | None = None,
    seed: Seed | None = None
) -> Optimizer

A function which creates an optimizer for node.optimize should accept the following keyword arguments.

PARAMETER	DESCRIPTION
`space`	The node to optimize TYPE: `Node`
`metrics`	The metrics to optimize TYPE: `Metric \| Sequence[Metric]`
`bucket`	The bucket to store the results in TYPE: `PathBucket \| None` DEFAULT: `None`
`seed`	The seed to use for the optimization TYPE: `Seed \| None` DEFAULT: `None`

Source code in src/amltk/optimization/optimizer.py

def __call__(
    self,
    *,
    space: Node,
    metrics: Metric | Sequence[Metric],
    bucket: PathBucket | None = None,
    seed: Seed | None = None,
) -> Optimizer:
    """A function which creates an optimizer for node.optimize should
    accept the following keyword arguments.

    Args:
        space: The node to optimize
        metrics: The metrics to optimize
        bucket: The bucket to store the results in
        seed: The seed to use for the optimization
    """
    ...

ask #

ask(
    n: int | None = None,
) -> Trial[TrialInfo] | Iterable[Trial[TrialInfo]]

Ask the optimizer for a new config.

PARAMETER	DESCRIPTION
`n`	The number of configs to ask for. If `None`, ask for a single config. TYPE: `int \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`Trial[TrialInfo] \| Iterable[Trial[TrialInfo]]`	The trial info for the new config.

Source code in src/amltk/optimization/optimizers/smac.py

@override
def ask(
    self,
    n: int | None = None,
) -> Trial[SMACTrialInfo] | Iterable[Trial[SMACTrialInfo]]:
    """Ask the optimizer for a new config.

    Args:
        n: The number of configs to ask for. If `None`, ask for a single config.


    Returns:
        The trial info for the new config.
    """
    if n is not None:
        return (self.ask(n=None) for _ in range(n))

    smac_trial_info = self.facade.ask()
    config = smac_trial_info.config
    budget = smac_trial_info.budget
    instance = smac_trial_info.instance
    seed = smac_trial_info.seed

    if self.fidelities and budget:
        if len(self.fidelities) == 1:
            k, _ = next(iter(self.fidelities.items()))
            trial_fids = {k: budget}
        else:
            trial_fids = {"budget": budget}
    else:
        trial_fids = None

    config_id = self.facade.runhistory.config_ids[config]
    unique_name = f"{config_id=}_{seed=}_{budget=}_{instance=}"
    trial: Trial[SMACTrialInfo] = Trial.create(
        name=unique_name,
        config=dict(config),
        info=smac_trial_info,
        seed=seed,
        fidelities=trial_fids,
        bucket=self.bucket / unique_name,
        metrics=self.metrics,
    )
    logger.debug(f"Asked for trial {trial.name}")
    return trial

crash_costs `classmethod` #

crash_costs(
    metric: Metric | Iterable[Metric],
) -> dict[str, float]

Get the crash cost for a metric for SMAC.

Source code in src/amltk/optimization/optimizers/smac.py

@classmethod
def crash_costs(cls, metric: Metric | Iterable[Metric]) -> dict[str, float]:
    """Get the crash cost for a metric for SMAC."""
    match metric:
        case Metric():
            return {metric.name: metric.normalized_loss(metric.worst)}
        case Iterable():
            return {
                metric.name: metric.normalized_loss(metric.worst)
                for metric in metric
            }
        case _:
            raise TypeError(
                f"Expected a Metric, Mapping, or Iterable of Metrics. Got {metric}",
            )

create `classmethod` #

create(
    *,
    space: ConfigurationSpace | Node,
    metrics: Metric | Sequence[Metric],
    bucket: PathBucket | str | Path | None = None,
    time_profile: str | None = None,
    deterministic: bool = True,
    seed: Seed | None = None,
    fidelities: Mapping[str, FidT] | None = None,
    continue_from_last_run: bool = False,
    logging_level: (
        int | Path | Literal[False] | None
    ) = False
) -> Self

Create a new SMAC optimizer using either the HPO facade or a mutli-fidelity facade.

PARAMETER	DESCRIPTION
`space`	The config space to optimize. TYPE: `ConfigurationSpace \| Node`
`metrics`	The metrics to optimize. TYPE: `Metric \| Sequence[Metric]`
`bucket`	The bucket given to trials generated by this optimizer. TYPE: `PathBucket \| str \| Path \| None` DEFAULT: `None`
`time_profile`	The profile to use to get time information to the optimizer. Must use `trial.profile(time_profile)` in your target function then. TYPE: `str \| None` DEFAULT: `None`
`deterministic`	Whether the function your optimizing is deterministic, given a seed and config. TYPE: `bool` DEFAULT: `True`
`seed`	The seed to use for the optimizer. TYPE: `Seed \| None` DEFAULT: `None`
`fidelities`	The fidelities to use, if any. TYPE: `Mapping[str, FidT] \| None` DEFAULT: `None`
`continue_from_last_run`	Whether to continue from a previous run. TYPE: `bool` DEFAULT: `False`
`logging_level`	The logging level to use. This argument is passed forward to SMAC, use False to disable SMAC's handling of logging. TYPE: `int \| Path \| Literal[False] \| None` DEFAULT: `False`

Source code in src/amltk/optimization/optimizers/smac.py

@override
@classmethod
def create(
    cls,
    *,
    space: ConfigurationSpace | Node,
    metrics: Metric | Sequence[Metric],
    bucket: PathBucket | str | Path | None = None,
    time_profile: str | None = None,
    deterministic: bool = True,
    seed: Seed | None = None,
    fidelities: Mapping[str, FidT] | None = None,
    continue_from_last_run: bool = False,
    logging_level: int | Path | Literal[False] | None = False,
) -> Self:
    """Create a new SMAC optimizer using either the HPO facade or
    a mutli-fidelity facade.

    Args:
        space: The config space to optimize.
        metrics: The metrics to optimize.
        bucket: The bucket given to trials generated by this optimizer.
        time_profile: The profile to use to get time information to the
            optimizer. Must use `trial.profile(time_profile)` in your
            target function then.
        deterministic: Whether the function your optimizing is deterministic, given
            a seed and config.
        seed: The seed to use for the optimizer.
        fidelities: The fidelities to use, if any.
        continue_from_last_run: Whether to continue from a previous run.
        logging_level: The logging level to use.
            This argument is passed forward to SMAC, use False to disable
            SMAC's handling of logging.
    """
    seed = as_int(seed)
    match bucket:
        case None:
            bucket = PathBucket(
                f"{cls.__name__}-{datetime.now().isoformat()}",
            )
        case str() | Path():
            bucket = PathBucket(bucket)
        case bucket:
            bucket = bucket  # noqa: PLW0127

    # NOTE SMAC always minimizes! Hence we make it a minimization problem
    metric_names: str | list[str]
    if isinstance(metrics, Sequence):
        metric_names = [metric.name for metric in metrics]
    else:
        metric_names = metrics.name

    if isinstance(space, Node):
        space = space.search_space(parser=cls.preferred_parser())

    facade_cls: type[AbstractFacade]
    if fidelities:
        if len(fidelities) == 1:
            v = next(iter(fidelities.values()))
            min_budget, max_budget = v
        else:
            min_budget, max_budget = 1.0, 100.0

        scenario = Scenario(
            objectives=metric_names,
            configspace=space,
            output_directory=bucket.path / "smac3_output",
            seed=seed,
            min_budget=min_budget,
            max_budget=max_budget,
            crash_cost=list(cls.crash_costs(metrics).values()),
        )
        facade_cls = MultiFidelityFacade
    else:
        scenario = Scenario(
            configspace=space,
            seed=seed,
            output_directory=bucket.path / "smac3_output",
            deterministic=deterministic,
            objectives=metric_names,
            crash_cost=list(cls.crash_costs(metrics).values()),
        )
        facade_cls = HyperparameterOptimizationFacade

    facade = facade_cls(
        scenario=scenario,
        target_function="dummy",  # NOTE: https://github.com/automl/SMAC3/issues/946
        overwrite=not continue_from_last_run,
        logging_level=logging_level,
        multi_objective_algorithm=facade_cls.get_multi_objective_algorithm(
            scenario=scenario,
        ),
    )
    return cls(
        facade=facade,
        fidelities=fidelities,
        bucket=bucket,
        metrics=metrics,
        time_profile=time_profile,
    )

preferred_parser `classmethod` #

preferred_parser() -> Literal['configspace']

The preferred parser for this optimizer.

Source code in src/amltk/optimization/optimizers/smac.py

@override
@classmethod
def preferred_parser(cls) -> Literal["configspace"]:
    """The preferred parser for this optimizer."""
    return "configspace"

tell #

tell(report: Report[TrialInfo]) -> None

Tell the optimizer the result of the sampled config.

PARAMETER	DESCRIPTION
`report`	The report of the trial. TYPE: `Report[TrialInfo]`

Source code in src/amltk/optimization/optimizers/smac.py

@override
def tell(self, report: Trial.Report[SMACTrialInfo]) -> None:
    """Tell the optimizer the result of the sampled config.

    Args:
        report: The report of the trial.
    """
    assert report.trial.info is not None

    costs: dict[str, float] = {}
    for name, metric in self.metrics.items():
        value = report.values.get(metric.name)
        if value is None:
            if report.status == Trial.Status.SUCCESS:
                raise ValueError(
                    f"Could not find metric '{metric.name}' in report values."
                    " Make sure you use `trial.success()` in your target function."
                    " So that we can report the metric value to SMAC.",
                )
            value = metric.worst

        costs[name] = metric.normalized_loss(value)

    logger.debug(f"Reporting for trial {report.trial.name} with costs: {costs}")

    cost = next(iter(costs.values())) if len(costs) == 1 else list(costs.values())

    # If we're successful, get the cost and times and report them
    params: dict[str, Any]
    match report.status:
        case Trial.Status.SUCCESS | Trial.Status.FAIL:
            smac_status = (
                StatusType.SUCCESS
                if report.status == Trial.Status.SUCCESS
                else StatusType.CRASHED
            )
            params = {"cost": cost, "status": smac_status}
        case Trial.Status.CRASHED | Trial.Status.UNKNOWN:
            params = {"cost": cost, "status": StatusType.CRASHED}

    if self.time_profile:
        profile = report.trial.profiles.get(self.time_profile)
        match profile:
            # If it was a success, we kind of expect there to have been this
            # timing. Otherwise, for failure we don't necessarily expect it.
            case None if report.status in Trial.Status.SUCCESS:
                raise ValueError(
                    f"Could not find profile '{self.time_profile}' in trial"
                    " as specified by `time_profile` during construction."
                    " Make sure you use `with trial.profile(time_profile):`"
                    " in your target function. So that we can report the"
                    " timing information to SMAC.",
                )
            case Profile.Interval(time=timer):
                params.update(
                    {
                        "time": timer.duration,
                        "starttime": timer.start,
                        "endtime": timer.end,
                    },
                )
            case None:
                pass

    match report.exception:
        case None:
            pass
        case MemoryLimitException():
            params["status"] = StatusType.MEMORYOUT
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }
        case TimeoutException():
            params["status"] = StatusType.TIMEOUT
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }
        case _:
            params["additional_info"] = {
                "exception": str(report.exception),
                "traceback": report.traceback,
            }

    self.facade.tell(report.trial.info, value=SMACTrialValue(**params), save=True)

Smac

amltk.optimization.optimizers.smac #

SMACOptimizer #

bucket instance-attribute #

metrics instance-attribute #

CreateSignature #

__call__ #

ask #

crash_costs classmethod #

create classmethod #

preferred_parser classmethod #

tell #

bucket `instance-attribute` #

metrics `instance-attribute` #

call #

crash_costs `classmethod` #

create `classmethod` #

preferred_parser `classmethod` #