Smac
amltk.optimization.optimizers.smac
#
The SMACOptimizer,
is a wrapper around the smac optimizer.
Requirements
This requires smac which can be installed with:
This uses ConfigSpace as its search_space() to
optimize.
Users should report results using
trial.success().
Visit their documentation for what you can pass to
SMACOptimizer.create().
The below example shows how you can use SMAC to optimize an sklearn pipeline.
from __future__ import annotations
import logging
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from amltk.optimization.optimizers.smac import SMACOptimizer
from amltk.scheduling import Scheduler
from amltk.optimization import History, Trial, Metric
from amltk.pipeline import Component, Node
logging.basicConfig(level=logging.INFO)
def target_function(trial: Trial, pipeline: Node) -> Trial.Report:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = pipeline.configure(trial.config).build("sklearn")
with trial.profile("trial"):
try:
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
return trial.success(accuracy=accuracy)
except Exception as e:
return trial.fail(e)
return trial.fail()
pipeline = Component(RandomForestClassifier, space={"n_estimators": (10, 100), "max_samples": (0.1, 0.9)})
metric = Metric("accuracy", minimize=False, bounds=(0, 1))
optimizer = SMACOptimizer.create(space=pipeline, metrics=metric, bucket="smac-doc-example")
N_WORKERS = 2
scheduler = Scheduler.with_processes(N_WORKERS)
task = scheduler.task(target_function)
history = History()
@scheduler.on_start(repeat=N_WORKERS)
def on_start():
trial = optimizer.ask()
task.submit(trial, pipeline)
@task.on_result
def tell_and_launch_trial(_, report: Trial.Report):
if scheduler.running():
optimizer.tell(report)
trial = optimizer.ask()
task.submit(trial, pipeline)
@task.on_result
def add_to_history(_, report: Trial.Report):
history.add(report)
scheduler.run(timeout=3, wait=False)
print(history.df())
status ... profile:trial:time:unit
name ...
config_id=1_seed=1947329455_budget=None_instanc... success ... seconds
config_id=3_seed=1947329455_budget=None_instanc... success ... seconds
config_id=2_seed=1947329455_budget=None_instanc... success ... seconds
config_id=4_seed=1947329455_budget=None_instanc... success ... seconds
config_id=5_seed=1947329455_budget=None_instanc... success ... seconds
config_id=7_seed=1947329455_budget=None_instanc... success ... seconds
config_id=6_seed=1947329455_budget=None_instanc... success ... seconds
config_id=9_seed=1947329455_budget=None_instanc... success ... seconds
config_id=8_seed=1947329455_budget=None_instanc... success ... seconds
config_id=11_seed=1947329455_budget=None_instan... success ... seconds
config_id=10_seed=1947329455_budget=None_instan... success ... seconds
config_id=13_seed=1947329455_budget=None_instan... success ... seconds
config_id=12_seed=1947329455_budget=None_instan... success ... seconds
config_id=15_seed=1947329455_budget=None_instan... success ... seconds
config_id=14_seed=1947329455_budget=None_instan... success ... seconds
config_id=17_seed=1947329455_budget=None_instan... success ... seconds
config_id=16_seed=1947329455_budget=None_instan... success ... seconds
config_id=19_seed=1947329455_budget=None_instan... success ... seconds
config_id=18_seed=1947329455_budget=None_instan... success ... seconds
config_id=20_seed=1947329455_budget=None_instan... success ... seconds
config_id=21_seed=1947329455_budget=None_instan... success ... seconds
config_id=22_seed=1947329455_budget=None_instan... success ... seconds
config_id=23_seed=1947329455_budget=None_instan... success ... seconds
config_id=24_seed=1947329455_budget=None_instan... success ... seconds
config_id=26_seed=1947329455_budget=None_instan... success ... seconds
config_id=25_seed=1947329455_budget=None_instan... success ... seconds
config_id=27_seed=1947329455_budget=None_instan... success ... seconds
config_id=28_seed=1947329455_budget=None_instan... success ... seconds
config_id=29_seed=1947329455_budget=None_instan... success ... seconds
config_id=30_seed=1947329455_budget=None_instan... success ... seconds
config_id=31_seed=1947329455_budget=None_instan... success ... seconds
config_id=32_seed=1947329455_budget=None_instan... success ... seconds
config_id=33_seed=1947329455_budget=None_instan... success ... seconds
config_id=34_seed=1947329455_budget=None_instan... success ... seconds
config_id=35_seed=1947329455_budget=None_instan... success ... seconds
config_id=36_seed=1947329455_budget=None_instan... success ... seconds
config_id=37_seed=1947329455_budget=None_instan... success ... seconds
config_id=38_seed=1947329455_budget=None_instan... success ... seconds
config_id=40_seed=1947329455_budget=None_instan... success ... seconds
config_id=39_seed=1947329455_budget=None_instan... success ... seconds
config_id=41_seed=1947329455_budget=None_instan... success ... seconds
config_id=42_seed=1947329455_budget=None_instan... success ... seconds
config_id=43_seed=1947329455_budget=None_instan... success ... seconds
config_id=44_seed=1947329455_budget=None_instan... success ... seconds
[44 rows x 22 columns]
SMACOptimizer
#
SMACOptimizer(
*,
facade: AbstractFacade,
bucket: PathBucket | None = None,
metrics: Metric | Sequence[Metric],
fidelities: Mapping[str, FidT] | None = None,
time_profile: str | None = None
)
Bases: Optimizer[TrialInfo]
An optimizer that uses SMAC to optimize a config space.
| PARAMETER | DESCRIPTION |
|---|---|
facade |
The SMAC facade to use.
TYPE:
|
bucket |
The bucket given to trials generated by this optimizer.
TYPE:
|
metrics |
The metrics to optimize. |
fidelities |
The fidelities to use, if any. |
time_profile |
The profile to use to get time information to the
optimizer. Must use
TYPE:
|
Source code in src/amltk/optimization/optimizers/smac.py
bucket
instance-attribute
#
bucket: PathBucket = (
bucket
if bucket is not None
else PathBucket(f"{__name__}-{isoformat()}")
)
The bucket to give to trials generated by this optimizer.
metrics
instance-attribute
#
metrics: MetricCollection = from_collection(metrics)
The metrics to optimize.
CreateSignature
#
Bases: Protocol
A Protocol which defines the keywords required to create an optimizer with deterministic behavior at a desired location.
This protocol matches the Optimizer.create classmethod, however we also
allow any function which accepts the keyword arguments to create an
Optimizer.
__call__
#
__call__(
*,
space: Node,
metrics: Metric | Sequence[Metric],
bucket: PathBucket | None = None,
seed: Seed | None = None
) -> Optimizer
A function which creates an optimizer for node.optimize should accept the following keyword arguments.
| PARAMETER | DESCRIPTION |
|---|---|
space |
The node to optimize
TYPE:
|
metrics |
The metrics to optimize |
bucket |
The bucket to store the results in
TYPE:
|
seed |
The seed to use for the optimization
TYPE:
|
Source code in src/amltk/optimization/optimizer.py
ask
#
Ask the optimizer for a new config.
| PARAMETER | DESCRIPTION |
|---|---|
n |
The number of configs to ask for. If
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
Trial[TrialInfo] | Iterable[Trial[TrialInfo]]
|
The trial info for the new config. |
Source code in src/amltk/optimization/optimizers/smac.py
crash_costs
classmethod
#
Get the crash cost for a metric for SMAC.
Source code in src/amltk/optimization/optimizers/smac.py
create
classmethod
#
create(
*,
space: ConfigurationSpace | Node,
metrics: Metric | Sequence[Metric],
bucket: PathBucket | str | Path | None = None,
time_profile: str | None = None,
deterministic: bool = True,
seed: Seed | None = None,
fidelities: Mapping[str, FidT] | None = None,
continue_from_last_run: bool = False,
logging_level: (
int | Path | Literal[False] | None
) = False
) -> Self
Create a new SMAC optimizer using either the HPO facade or a mutli-fidelity facade.
| PARAMETER | DESCRIPTION |
|---|---|
space |
The config space to optimize.
TYPE:
|
metrics |
The metrics to optimize. |
bucket |
The bucket given to trials generated by this optimizer.
TYPE:
|
time_profile |
The profile to use to get time information to the
optimizer. Must use
TYPE:
|
deterministic |
Whether the function your optimizing is deterministic, given a seed and config.
TYPE:
|
seed |
The seed to use for the optimizer.
TYPE:
|
fidelities |
The fidelities to use, if any. |
continue_from_last_run |
Whether to continue from a previous run.
TYPE:
|
logging_level |
The logging level to use. This argument is passed forward to SMAC, use False to disable SMAC's handling of logging. |
Source code in src/amltk/optimization/optimizers/smac.py
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | |
tell
#
tell(report: Report[TrialInfo]) -> None
Tell the optimizer the result of the sampled config.
| PARAMETER | DESCRIPTION |
|---|---|
report |
The report of the trial.
TYPE:
|
Source code in src/amltk/optimization/optimizers/smac.py
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 | |