Skip to content

benchmark

class YAHPOBenchmark(task_id, *, datadir=None, seed=None, prior=None, perturb_prior=None, session=None) #

Bases: Benchmark[C, R, F]

PARAMETER DESCRIPTION
task_id

The task id to choose.

TYPE: str

seed

The seed to use

TYPE: int | None DEFAULT: None

datadir

The path to where mfpbench stores it data. If left to None, will use the _default_download_dir = ./data/yahpo-gym-data.

TYPE: str | Path | None DEFAULT: None

seed

The seed for the benchmark instance

TYPE: int | None DEFAULT: None

prior

The prior to use for the benchmark. If None, no prior is used. If a str, will check the local location first for a prior specific for this benchmark, otherwise assumes it to be a Path. If a Path, will load the prior from the path. If a Mapping, will be used directly.

TYPE: str | Path | C | Mapping[str, Any] | None DEFAULT: None

perturb_prior

If given, will perturb the prior by this amount. Only used if prior= is given as a config.

TYPE: float | None DEFAULT: None

session

The onnxruntime session to use. If None, will create a new one.

Not for faint hearted

This is only a backdoor for onnx compatibility issues with YahpoGym. You are advised not to use this unless you know what you are doing.

TYPE: InferenceSession | None DEFAULT: None

Source code in src/mfpbench/yahpo/benchmark.py
def __init__(  # noqa: C901, PLR0912
    self,
    task_id: str,
    *,
    datadir: str | Path | None = None,
    seed: int | None = None,
    prior: str | Path | C | Mapping[str, Any] | None = None,
    perturb_prior: float | None = None,
    session: onnxruntime.InferenceSession | None = None,
):
    """Initialize a Yahpo Benchmark.

    Args:
        task_id: The task id to choose.
        seed: The seed to use
        datadir: The path to where mfpbench stores it data. If left to `None`,
            will use the `_default_download_dir = ./data/yahpo-gym-data`.
        seed: The seed for the benchmark instance
        prior: The prior to use for the benchmark. If None, no prior is used.
            If a str, will check the local location first for a prior
            specific for this benchmark, otherwise assumes it to be a Path.
            If a Path, will load the prior from the path.
            If a Mapping, will be used directly.
        perturb_prior: If given, will perturb the prior by this amount. Only used if
            `prior=` is given as a config.
        session: The onnxruntime session to use. If None, will create a new one.

            !!! warning "Not for faint hearted"

                This is only a backdoor for onnx compatibility issues with YahpoGym.
                You are advised not to use this unless you know what you are doing.
    """
    # Validation
    cls = self.__class__

    # These errors are maintainers errors, not user errors
    if cls.yahpo_forced_remove_hps is not None and cls.has_conditionals:
        raise NotImplementedError(
            "Error setting up a YAHPO Benchmark with conditionals",
            " and forced hps",
        )

    if cls.yahpo_task_id_name is not None and cls.has_conditionals:
        raise NotImplementedError(
            f"{self.name} has conditionals, can't remove task_id from space",
        )

    instances = cls.yahpo_instances
    if task_id is None and instances is not None:
        raise ValueError(f"{cls} requires a task in {instances}")
    if task_id is not None and instances is None:
        raise ValueError(f"{cls} has no instances, you passed {task_id}")
    if task_id is not None and instances and task_id not in instances:
        raise ValueError(f"{cls} requires a task in {instances}")

    if datadir is None:
        datadir = YAHPOSource.default_location()
    elif isinstance(datadir, str):
        datadir = Path(datadir)

    datadir = Path(datadir) if isinstance(datadir, str) else datadir
    if not datadir.exists():
        raise FileNotFoundError(
            f"Can't find folder at {datadir}, have you run\n"
            f"`python -m mfpbench download --status --data-dir {datadir.parent}`",
        )
    _ensure_yahpo_config_set(datadir)

    import yahpo_gym

    if session is None:
        dummy_bench = yahpo_gym.BenchmarkSet(
            cls.yahpo_base_benchmark_name,
            instance=task_id,
            multithread=False,
            # HACK: Used to fix onnxruntime session issue with 1.16.0 where
            # `providers` is required. By setting these options, we prevent
            # the benchmark from automatically creating a session.
            # We will manually do so and set it later.
            active_session=False,
            session=None,
        )
        session = _yahpo_create_session(benchmark=dummy_bench)

    bench = yahpo_gym.BenchmarkSet(
        cls.yahpo_base_benchmark_name,
        instance=task_id,
        multithread=False,
        session=session,
    )

    name = f"{cls.yahpo_base_benchmark_name}-{task_id}"

    # These can have one or two fidelities
    # NOTE: seed is allowed to be int | None
    space = bench.get_opt_space(
        drop_fidelity_params=True,
        seed=seed,  # type: ignore
    )

    if cls.yahpo_task_id_name is not None:
        space = remove_hyperparameter(cls.yahpo_task_id_name, space)

    if cls.yahpo_forced_remove_hps is not None:
        names = space.get_hyperparameter_names()
        for key in cls.yahpo_forced_remove_hps:
            if key in names:
                space = remove_hyperparameter(key, space)

    self._bench = bench
    self.datadir = datadir
    self.task_id = task_id
    super().__init__(
        name=name,
        seed=seed,
        space=space,
        prior=prior,
        perturb_prior=perturb_prior,
    )

yahpo_base_benchmark_name: str
classvar
#

Base name of the yahpo benchmark.

yahpo_instances: tuple[str, ...] | None
attr
#

The instances available for this benchmark, if Any.

yahpo_task_id_name: str | None
classvar
#

Name of hp used to indicate task.

yahpo_forced_remove_hps: Mapping[str, int | float | str] | None
attr
#

Any hyperparameters that should be forcefully deleted from the space but have default values filled in

yahpo_replacements_hps: Sequence[tuple[str, str]] | None
attr
#

Any replacements that need to be done in hyperparameters [(dataclass_version, dict_version)]

datadir: Path
attr
#

The path to where the data is stored.

task_id: str
attr
#

The task id for this benchmark.

bench: yahpo_gym.BenchmarkSet
prop
#

The underlying yahpo gym benchmark.

def load() #

Load the benchmark into memory.

Source code in src/mfpbench/yahpo/benchmark.py
def load(self) -> None:
    """Load the benchmark into memory."""
    _ = self.bench