Source code for smac.scenario

from __future__ import annotations

from typing import Any

import copy
import hashlib
import json
import random
from dataclasses import dataclass
from pathlib import Path

import numpy as np
from ConfigSpace import ConfigurationSpace
from ConfigSpace.read_and_write import json as cs_json

from smac.utils.logging import get_logger

logger = get_logger(__name__)


[docs] @dataclass(frozen=True) class Scenario: """ The scenario manages environment variables and therefore gives context in which frame the optimization is performed. Parameters ---------- configspace : ConfigurationSpace The configuration space from which to sample the configurations. name : str | None, defaults to None The name of the run. If no name is passed, SMAC generates a hash from the meta data. Specify this argument to identify your run easily. output_directory : Path, defaults to Path("smac3_output") The directory in which to save the output. The files are saved in `./output_directory/name/seed`. deterministic : bool, defaults to False If deterministic is set to true, only one seed is passed to the target function. Otherwise, multiple seeds (if n_seeds of the intensifier is greater than 1) are passed to the target function to ensure generalization. objectives : str | list[str] | None, defaults to "cost" The objective(s) to optimize. This argument is required for multi-objective optimization. crash_cost : float | list[float], defaults to np.inf Defines the cost for a failed trial. In case of multi-objective, each objective can be associated with a different cost. termination_cost_threshold : float | list[float], defaults to np.inf Defines a cost threshold when the optimization should stop. In case of multi-objective, each objective *must* be associated with a cost. The optimization stops when all objectives crossed the threshold. walltime_limit : float, defaults to np.inf The maximum time in seconds that SMAC is allowed to run. cputime_limit : float, defaults to np.inf The maximum CPU time in seconds that SMAC is allowed to run. trial_walltime_limit : float | None, defaults to None The maximum time in seconds that a trial is allowed to run. If not specified, no constraints are enforced. Otherwise, the process will be spawned by pynisher. trial_memory_limit : int | None, defaults to None The maximum memory in MB that a trial is allowed to use. If not specified, no constraints are enforced. Otherwise, the process will be spawned by pynisher. n_trials : int, defaults to 100 The maximum number of trials (combination of configuration, seed, budget, and instance, depending on the task) to run. use_default_config: bool, defaults to False. If True, the configspace's default configuration is evaluated in the initial design. For historic benchmark reasons, this is False by default. Notice, that this will result in n_configs + 1 for the initial design. Respecting n_trials, this will result in one fewer evaluated configuration in the optimization. instances : list[str] | None, defaults to None Names of the instances to use. If None, no instances are used. Instances could be dataset names, seeds, subsets, etc. instance_features : dict[str, list[float]] | None, defaults to None Instances can be associated with features. For example, meta data of the dataset (mean, var, ...) can be incorporated which are then further used to expand the training data of the surrogate model. min_budget : float | int | None, defaults to None The minimum budget (epochs, subset size, number of instances, ...) that is used for the optimization. Use this argument if you use multi-fidelity or instance optimization. max_budget : float | int | None, defaults to None The maximum budget (epochs, subset size, number of instances, ...) that is used for the optimization. Use this argument if you use multi-fidelity or instance optimization. seed : int, defaults to 0 The seed is used to make results reproducible. If seed is -1, SMAC will generate a random seed. n_workers : int, defaults to 1 The number of workers to use for parallelization. If `n_workers` is greather than 1, SMAC will use Dask to parallelize the optimization. """ # General configspace: ConfigurationSpace name: str | None = None output_directory: Path = Path("smac3_output") deterministic: bool = False # Objectives objectives: str | list[str] = "cost" crash_cost: float | list[float] = np.inf termination_cost_threshold: float | list[float] = np.inf # Limitations walltime_limit: float = np.inf cputime_limit: float = np.inf trial_walltime_limit: float | None = None trial_memory_limit: int | None = None n_trials: int = 100 use_default_config: bool = False # Algorithm Configuration instances: list[str] | None = None instance_features: dict[str, list[float]] | None = None # Budgets min_budget: float | int | None = None max_budget: float | int | None = None # Others seed: int = 0 n_workers: int = 1
[docs] def __post_init__(self) -> None: """Checks whether the config is valid.""" # Use random seed if seed is -1 if self.seed == -1: seed = random.randint(0, 999999) object.__setattr__(self, "seed", seed) # Transform instances to string if they are not if self.instances is not None: instances = [str(instance) for instance in self.instances] object.__setattr__(self, "instances", instances) # Transform instance features to string if they are not if self.instance_features is not None: instance_features = {str(instance): features for instance, features in self.instance_features.items()} object.__setattr__(self, "instance_features", instance_features) # Change directory wrt name and seed self._change_output_directory() # Set empty meta object.__setattr__(self, "_meta", {})
def __eq__(self, other: object) -> bool: if isinstance(other, Scenario): # When using __dict__, we make sure to include the meta data. # However, tuples are saved as lists in json. Therefore, we compare the json string # to make sure we have the same conversion. return Scenario.make_serializable(self) == Scenario.make_serializable(other) raise RuntimeError("Can only compare scenario objects.") @property def meta(self) -> dict[str, Any]: """Returns the meta data of the SMAC run. Note ---- Meta data are set when the facade is initialized. """ return self._meta # type: ignore
[docs] def count_objectives(self) -> int: """Counts the number of objectives.""" if isinstance(self.objectives, list): return len(self.objectives) return 1
[docs] def count_instance_features(self) -> int: """Counts the number of instance features.""" # Check whether key of instance features exist n_features = 0 if self.instance_features is not None: for k, v in self.instance_features.items(): if self.instances is None or k not in self.instances: raise RuntimeError(f"Instance {k} is not specified in instances.") if n_features == 0: n_features = len(v) else: if len(v) != n_features: raise RuntimeError("Instances must have the same number of features.") return n_features
[docs] def save(self) -> None: """Saves internal variables and the configuration space to a file.""" if self.meta == {}: logger.warning("Scenario will saved without meta data. Please call the facade first to set meta data.") if self.name is None: raise RuntimeError( "Please specify meta data for generating a name. Alternatively, you can specify a name manually." ) self.output_directory.mkdir(parents=True, exist_ok=True) data = {} for k, v in self.__dict__.items(): if k in ["configspace", "output_directory"]: continue data[k] = v # Convert `output_directory` data["output_directory"] = str(self.output_directory) # Save everything filename = self.output_directory / "scenario.json" with open(filename, "w") as fh: json.dump(data, fh, indent=4) # Save configspace on its own configspace_filename = self.output_directory / "configspace.json" with open(configspace_filename, "w") as f: f.write(cs_json.write(self.configspace))
[docs] @staticmethod def load(path: Path) -> Scenario: """Loads a scenario and the configuration space from a file.""" filename = path / "scenario.json" with open(filename, "r") as fh: data = json.load(fh) # Convert `output_directory` to path object again data["output_directory"] = Path(data["output_directory"]) meta = data["_meta"] del data["_meta"] # Read configspace configspace_filename = path / "configspace.json" with open(configspace_filename, "r") as f: configspace = cs_json.read(f.read()) data["configspace"] = configspace scenario = Scenario(**data) scenario._set_meta(meta) return scenario
[docs] @staticmethod def make_serializable(scenario: Scenario) -> dict[str, Any]: """Makes the scenario serializable.""" s = copy.deepcopy(scenario.__dict__) del s["configspace"] s["output_directory"] = str(s["output_directory"]) return json.loads(json.dumps(s))
def _change_output_directory(self) -> None: # Create output directory if self.name is not None: new = Path(self.name) / str(self.seed) if not str(self.output_directory).endswith(str(new)): object.__setattr__(self, "output_directory", self.output_directory / new) def _set_meta(self, meta: dict[str, Any]) -> None: """Sets the meta data of the SMAC run.""" object.__setattr__(self, "_meta", meta) # We overwrite name with the hash of the meta (if no name is passed) if self.name is None: hash = hashlib.md5(str(self.__dict__).encode("utf-8")).hexdigest() object.__setattr__(self, "name", hash) self._change_output_directory()