Skip to content


class TabularBenchmark(name, table, *, config_name, fidelity_name, result_keys, config_keys, remove_constants=False, space=None, seed=None, prior=None, perturb_prior=None) #

Bases: Benchmark[CTabular, R, F]


The name of this benchmark.

TYPE: str


The table to use for the benchmark.

TYPE: DataFrame


The column in the table that contains the config id

TYPE: str


The column in the table that contains the fidelity

TYPE: str


The columns in the table that contain the results

TYPE: Sequence[str]


The columns in the table that contain the config values

TYPE: Sequence[str]


Remove constant config columns from the data or not.

TYPE: bool DEFAULT: False


The configuration space to use for the benchmark. If None, will just be an empty space.

TYPE: ConfigurationSpace | None DEFAULT: None


The prior to use for the benchmark. If None, no prior is used. If a string, will be treated as a prior specific for this benchmark if it can be found, otherwise assumes it to be a Path. If a Path, will load the prior from the path. If a dict or Configuration, will be used directly.

TYPE: str | Path | CTabular | Mapping[str, Any] | None DEFAULT: None


If not None, will perturb the prior by this amount. For numericals, while for categoricals, this is interpreted as the probability of swapping the value for a random one.

TYPE: float | None DEFAULT: None


The seed to use for the benchmark.

TYPE: int | None DEFAULT: None

Source code in src/mfpbench/
def __init__(  # noqa: PLR0913, C901
    name: str,
    table: pd.DataFrame,
    config_name: str,
    fidelity_name: str,
    result_keys: Sequence[str],
    config_keys: Sequence[str],
    remove_constants: bool = False,
    space: ConfigurationSpace | None = None,
    seed: int | None = None,
    prior: str | Path | CTabular | Mapping[str, Any] | None = None,
    perturb_prior: float | None = None,
    """Initialize the benchmark.

        name: The name of this benchmark.
        table: The table to use for the benchmark.
        config_name: The column in the table that contains the config id
        fidelity_name: The column in the table that contains the fidelity
        result_keys: The columns in the table that contain the results
        config_keys: The columns in the table that contain the config values
        remove_constants: Remove constant config columns from the data or not.
        space: The configuration space to use for the benchmark. If None, will
            just be an empty space.
        prior: The prior to use for the benchmark. If None, no prior is used.
            If a string, will be treated as a prior specific for this benchmark
            if it can be found, otherwise assumes it to be a Path.
            If a Path, will load the prior from the path.
            If a dict or Configuration, will be used directly.
        perturb_prior: If not None, will perturb the prior by this amount.
            For numericals, while for categoricals, this is interpreted as the
            probability of swapping the value for a random one.
        seed: The seed to use for the benchmark.
    cls = self.__class__
    if remove_constants:

        def is_constant(_s: pd.Series) -> bool:
            _arr = _s.to_numpy()
            return bool((_arr == _arr[0]).all())

        constant_cols = [
            col for col in table.columns if is_constant(table[col])  # type: ignore
        table = table.drop(columns=constant_cols)  # type: ignore
        config_keys = [k for k in config_keys if k not in constant_cols]

    # If the table isn't indexed, index it
    index_cols = [config_name, fidelity_name]
    if table.index.names != index_cols:
        # Only drop the index if it's not relevant.
        relevant_cols: list[str] = [  # type: ignore
            *list(index_cols),  # type: ignore
        relevant = any(name in relevant_cols for name in table.index.names)
        table = table.reset_index(drop=not relevant)

        if config_name not in table.columns:
            raise ValueError(f"{config_name=} not in columns {table.columns}")
        if fidelity_name not in table.columns:
            raise ValueError(f"{fidelity_name=} not in columns {table.columns}")

        table = table.set_index(index_cols)
        table = table.sort_index()

    # Make sure all keys are in the table
    for key in chain(result_keys, config_keys):
        if key not in table.columns:
            raise ValueError(f"{key=} not in columns {table.columns}")

    # Make sure the keyword "id" is not in the columns as we use it to
    # identify configs
    if "id" in table.columns:
        raise ValueError(f"{table.columns=} contains 'id'. Please rename it")

    # Make sure we have equidistance fidelities for all configs
    fidelity_values = table.index.get_level_values(fidelity_name)
    fidelity_counts = fidelity_values.value_counts()
    if not (fidelity_counts == fidelity_counts.iloc[0]).all():
        raise ValueError(f"{fidelity_name=} not  uniform. \n{fidelity_counts}")

    # We now have the following table
    # config_id fidelity | **metric, **config_values
    #     0         0    |
    #               1    |
    #               2    |
    #     1         0    |
    #               1    |
    #               2    |
    #   ...

    # Here we get all the unique configs
    # config_id fidelity | **metric, **config_values
    #     0         0    |
    #     1         0    |
    #   ...
    config_id_table = table.groupby(level=config_name).agg("first")
    configs = {
        str(config_id): cls.Config.from_dict(
                **row[config_keys].to_dict(),  # type: ignore
                "id": str(config_id),
        for config_id, row in config_id_table.iterrows()

    fidelity_values = table.index.get_level_values(fidelity_name).unique()

    # We just assume equidistant fidelities
    sorted_fids = sorted(fidelity_values)
    start = sorted_fids[0]
    end = sorted_fids[-1]
    step = sorted_fids[1] - sorted_fids[0]

    # Create the configuration space
    if space is None:
        space = ConfigurationSpace(name, seed=seed)

    self.table = table
    self.configs = configs
    self.fidelity_name = fidelity_name
    self.config_name = config_name
    self.config_keys = sorted(config_keys)
    self.result_keys = sorted(result_keys)
    self.fidelity_range = (start, end, step)  # type: ignore


table: pd.DataFrame

The table of results used for this benchmark

fidelity_name: str

The name of the fidelity used in this benchmark

config_name: str

The column in the table that contains the config id. Will be set to the index

config_keys: Sequence[str]

The keys in the table that contain the config

result_keys: Sequence[str]

The keys in the table that contain the results

def query(config, at=None, *, argmax=None, argmin=None) #

Submit a query and get a result.

Passing a raw config

If a mapping is passed (and not a Config object), we will attempt to look for id in the mapping, to know which config to lookup.

If this fails, we will try match the config to one of the configs in the benchmark.

Prefer to pass the Config object directly if possible.


This function overrides the default query() to allow for this config matching


The query to use

TYPE: CTabular | Mapping[str, Any] | str


The fidelity at which to query, defaults to None which means maximum

TYPE: F | None DEFAULT: None


Whether to return the argmax up to the point at. Will be slower as it has to get the entire trajectory. Uses the key from the Results.

TYPE: str | None DEFAULT: None


Whether to return the argmin up to the point at. Will be slower as it has to get the entire trajectory. Uses the key from the Results.

TYPE: str | None DEFAULT: None


The result of the query

Source code in src/mfpbench/
def query(
    config: CTabular | Mapping[str, Any] | str,
    at: F | None = None,
    argmax: str | None = None,
    argmin: str | None = None,
) -> R:
    """Submit a query and get a result.

    !!! warning "Passing a raw config"

        If a mapping is passed (and **not** a [`Config`][mfpbench.Config] object),
        we will attempt to look for `id` in the mapping, to know which config to

        If this fails, we will try match the config to one of the configs in
        the benchmark.

        Prefer to pass the [`Config`][mfpbench.Config] object directly if possible.

    ??? note "Override"

        This function overrides the default
        [`query()`][mfpbench.Benchmark.query] to allow for this
        config matching

        config: The query to use
        at: The fidelity at which to query, defaults to None which means *maximum*
        argmax: Whether to return the argmax up to the point `at`. Will be slower as
            it has to get the entire trajectory. Uses the key from the Results.
        argmin: Whether to return the argmin up to the point `at`. Will be slower as
            it has to get the entire trajectory. Uses the key from the Results.

        The result of the query
    _config = self._find_config(config)
    return super().query(
        at=at,  # type: ignore

def trajectory(config, *, frm=None, to=None, step=None) #

Submit a query and get a result.

Passing a raw config

If a mapping is passed (and not a Config object), we will attempt to look for id in the mapping, to know which config to lookup.

If this fails, we will try match the config to one of the configs in the benchmark.

Prefer to pass the Config object directly if possible.


This function overrides the default trajectory() to allow for this config matching


The query to use

TYPE: CTabular | Mapping[str, Any] | str


Start of the curve, should default to the start

TYPE: F | None DEFAULT: None


End of the curve, should default to the total

TYPE: F | None DEFAULT: None


Step size, defaults to cls.default_step

TYPE: F | None DEFAULT: None


The result of the query

Source code in src/mfpbench/
def trajectory(
    config: CTabular | Mapping[str, Any] | str,
    frm: F | None = None,
    to: F | None = None,
    step: F | None = None,
) -> list[R]:
    """Submit a query and get a result.

    !!! warning "Passing a raw config"

        If a mapping is passed (and **not** a [`Config`][mfpbench.Config] object),
        we will attempt to look for `id` in the mapping, to know which config to

        If this fails, we will try match the config to one of the configs in
        the benchmark.

        Prefer to pass the [`Config`][mfpbench.Config] object directly if possible.

    ??? note "Override"

        This function overrides the default
        [`trajectory()`][mfpbench.Benchmark.trajectory] to allow for this
        config matching

        config: The query to use
        frm: Start of the curve, should default to the start
        to: End of the curve, should default to the total
        step: Step size, defaults to ``cls.default_step``

        The result of the query
    _config = self._find_config(config)
    return super().trajectory(_config, frm=frm, to=to, step=step)  # type: ignore

def sample(n=None, *, seed=None) #

Sample a random possible config.


How many samples to take, None means jsut a single one, not in a list

TYPE: int | None DEFAULT: None


The seed to use for the sampling.


This is different than any seed passed to the construction of the benchmark.

TYPE: int | RandomState | None DEFAULT: None

CTabular | list[CTabular]

Get back a possible Config to use

Source code in src/mfpbench/
def sample(
    n: int | None = None,
    seed: int | np.random.RandomState | None = None,
) -> CTabular | list[CTabular]:
    """Sample a random possible config.

        n: How many samples to take, None means jsut a single one, not in a list
        seed: The seed to use for the sampling.

            !!! note "Seeding"

                This is different than any seed passed to the construction
                of the benchmark.

        Get back a possible Config to use
    _seed: int | None
    if isinstance(seed, np.random.RandomState):
        _seed = seed.random_integers(0, 2**32 - 1)
        _seed = seed

    rng = np.random.default_rng(seed=_seed)

    config_items: list[CTabular] = list(self.configs.values())
    n_configs = len(config_items)
    sample_amount = n if n is not None else 1

    if sample_amount > n_configs:
        raise ValueError(
            f"Can't sample {sample_amount} configs from {n_configs} configs",

    indices = rng.choice(n_configs, size=sample_amount, replace=False)
    if n is None:
        first_index: int = indices[0]
        return config_items[first_index]

    return [config_items[i] for i in indices]

class GenericTabularBenchmark(table, *, name=None, fidelity_name, config_name, result_keys, config_keys, result_mapping=None, remove_constants=False, space=None, seed=None, prior=None, perturb_prior=None) #

Bases: TabularBenchmark[GenericTabularConfig, GenericTabularResult[GenericTabularConfig, F], F]


The table to use for the benchmark

TYPE: DataFrame


The name of the benchmark. If None, will be set to unknown-{}

TYPE: str | None DEFAULT: None


The column in the table that contains the fidelity

TYPE: str


The column in the table that contains the config id

TYPE: str


The columns in the table that contain the results

TYPE: Sequence[str]


The columns in the table that contain the config values

TYPE: Sequence[str]


A mapping from the result keys to the table keys. If a string, will be used as the key in the table. If a callable, will be called with the table and the result will be used as the value.

TYPE: dict[str, str | Callable[[DataFrame], Any]] | None DEFAULT: None


Remove constant config columns from the data or not.

TYPE: bool DEFAULT: False


The configuration space to use for the benchmark. If None, will just be an empty space.

TYPE: ConfigurationSpace | None DEFAULT: None


The seed to use.

TYPE: int | None DEFAULT: None


The prior to use for the benchmark. If None, no prior is used. If a str, will check the local location first for a prior specific for this benchmark, otherwise assumes it to be a Path. If a Path, will load the prior from the path. If a Mapping, will be used directly.

TYPE: str | Path | GenericTabularConfig | Mapping[str, Any] | None DEFAULT: None


If not None, will perturb the prior by this amount. For numericals, this is interpreted as the standard deviation of a normal distribution while for categoricals, this is interpreted as the probability of swapping the value for a random one.

TYPE: float | None DEFAULT: None

Source code in src/mfpbench/
def __init__(  # noqa: PLR0913
    table: pd.DataFrame,
    name: str | None = None,
    fidelity_name: str,
    config_name: str,
    result_keys: Sequence[str],
    config_keys: Sequence[str],
    result_mapping: (dict[str, str | Callable[[pd.DataFrame], Any]] | None) = None,
    remove_constants: bool = False,
    space: ConfigurationSpace | None = None,
    seed: int | None = None,
    prior: str | Path | GenericTabularConfig | Mapping[str, Any] | None = None,
    perturb_prior: float | None = None,
    """Initialize the benchmark.

        table: The table to use for the benchmark
        name: The name of the benchmark. If None, will be set to

        fidelity_name: The column in the table that contains the fidelity
        config_name: The column in the table that contains the config id
        result_keys: The columns in the table that contain the results
        config_keys: The columns in the table that contain the config values
        result_mapping: A mapping from the result keys to the table keys.
            If a string, will be used as the key in the table. If a callable,
            will be called with the table and the result will be used as the value.
        remove_constants: Remove constant config columns from the data or not.
        space: The configuration space to use for the benchmark. If None, will
            just be an empty space.
        seed: The seed to use.
        prior: The prior to use for the benchmark. If None, no prior is used.
            If a str, will check the local location first for a prior
            specific for this benchmark, otherwise assumes it to be a Path.
            If a Path, will load the prior from the path.
            If a Mapping, will be used directly.
        perturb_prior: If not None, will perturb the prior by this amount.
            For numericals, this is interpreted as the standard deviation of a
            normal distribution while for categoricals, this is interpreted
            as the probability of swapping the value for a random one.
    if name is None:
        name = f"unknown-{}"

    _result_mapping: dict = result_mapping if result_mapping is not None else {}

    # Remap the result keys so it works with the generic result types
    if _result_mapping is not None:
        for k, v in _result_mapping.items():
            if isinstance(v, str):
                if v not in table.columns:
                    raise ValueError(f"{v} not in columns\n{table.columns}")

                table[k] = table[v]
            elif callable(v):
                table[k] = v(table)
                raise ValueError(f"Unknown result mapping {v} for {k}")

        result_keys=[*result_keys, *_result_mapping.keys()],