benchmark

`class LCBenchTabularResult`
`dataclass` #

Bases: Result[LCBenchTabularConfig, int]

`score: float`
`prop` #

The score of interest.

`error: float`
`prop` #

The error of interest.

`val_score: float`
`prop` #

The score on the validation set.

`val_error: float`
`prop` #

The error on the validation set.

`test_score: float`
`prop` #

The score on the test set.

`test_error: float`
`prop` #

The error on the test set.

`cost: float`
`prop` #

The time to train the configuration (assumed to be seconds).

`class LCBenchTabularBenchmark(task_id, datadir=None, *, remove_constants=False, seed=None, prior=None, perturb_prior=None)` #

Bases: TabularBenchmark

PARAMETER	DESCRIPTION
`task_id`	The task to benchmark on. TYPE: `str`
`datadir`	The directory to look for the data in. If `None`, uses the default download directory. TYPE: `str \| Path \| None` DEFAULT: `None`
`remove_constants`	Whether to remove constant config columns from the data or not. TYPE: `bool` DEFAULT: `False`
`seed`	The seed to use. TYPE: `int \| None` DEFAULT: `None`
`prior`	The prior to use for the benchmark. If None, no prior is used. If a str, will check the local location first for a prior specific for this benchmark, otherwise assumes it to be a Path. If a Path, will load the prior from the path. If a Mapping, will be used directly. TYPE: `str \| Path \| LCBenchTabularConfig \| Mapping[str, Any] \| None` DEFAULT: `None`
`perturb_prior`	If not None, will perturb the prior by this amount. For numericals, this is interpreted as the standard deviation of a normal distribution while for categoricals, this is interpreted as the probability of swapping the value for a random one. TYPE: `float \| None` DEFAULT: `None`

Source code in src/mfpbench/lcbench_tabular/benchmark.py

def __init__(
    self,
    task_id: str,
    datadir: str | Path | None = None,
    *,
    remove_constants: bool = False,
    seed: int | None = None,
    prior: str | Path | LCBenchTabularConfig | Mapping[str, Any] | None = None,
    perturb_prior: float | None = None,
) -> None:
    """Initialize the benchmark.

    Args:
        task_id: The task to benchmark on.
        datadir: The directory to look for the data in. If `None`, uses the default
            download directory.
        remove_constants: Whether to remove constant config columns from the data or
            not.
        seed: The seed to use.
        prior: The prior to use for the benchmark. If None, no prior is used.
            If a str, will check the local location first for a prior
            specific for this benchmark, otherwise assumes it to be a Path.
            If a Path, will load the prior from the path.
            If a Mapping, will be used directly.
        perturb_prior: If not None, will perturb the prior by this amount.
            For numericals, this is interpreted as the standard deviation of a
            normal distribution while for categoricals, this is interpreted
            as the probability of swapping the value for a random one.
    """
    cls = self.__class__
    if task_id not in cls.task_ids:
        raise ValueError(f"Unknown task {task_id}, must be one of {cls.task_ids}")

    if datadir is None:
        datadir = LCBenchTabularSource.default_location()

    table_path = Path(datadir) / f"{task_id}.parquet"
    if not table_path.exists():
        raise FileNotFoundError(
            f"Could not find table {table_path}."
            f"`python -m mfpbench download --status --data-dir {datadir}",
        )

    self.task_id = task_id
    self.datadir = Path(datadir) if isinstance(datadir, str) else datadir

    table = pd.read_parquet(table_path)

    # NOTE: Dropping of 0'th epoch
    # As the 0'th epoch is a completely untrained model, this is different
    # from 1st epoch where it is trained and it's score is somewhat representitive.
    # This is a benchmarking library for HPO and we do not want to include untrained
    # models nor have it be part of the fidelity range. For that reason, we drop
    # the 0'th epoch.
    drop_epoch = 0
    table = table.drop(index=drop_epoch, level="epoch")

    benchmark_task_name = f"lcbench_tabular-{task_id}"
    space = _get_raw_lcbench_space(name=f"lcbench_tabular-{task_id}", seed=seed)

    super().__init__(
        table=table,  # type: ignore
        name=benchmark_task_name,
        config_name="id",
        fidelity_name=cls.fidelity_name,
        result_keys=LCBenchTabularResult.names(),
        config_keys=LCBenchTabularConfig.names(),
        remove_constants=remove_constants,
        space=space,
        seed=seed,
        prior=prior,
        perturb_prior=perturb_prior,
    )

`task_ids: tuple[str, ...]`
`classvar` #

('adult', 'airlines', 'albert', 'Amazon_employee_access', 'APSFailure', 'Australian', 'bank-marketing', 'blood-transfusion-service-center', 'car', 'christine', 'cnae-9', 'connect-4', 'covertype', 'credit-g', 'dionis', 'fabert', 'Fashion-MNIST', 'helena', 'higgs', 'jannis', 'jasmine', 'jungle_chess_2pcs_raw_endgame_complete', 'kc1', 'KDDCup09_appetency', 'kr-vs-kp', 'mfeat-factors', 'MiniBooNE', 'nomao', 'numerai28.6', 'phoneme', 'segment', 'shuttle', 'sylvine', 'vehicle', 'volkert')

benchmark

class LCBenchTabularResult dataclass #

score: float prop #

error: float prop #

val_score: float prop #

val_error: float prop #

test_score: float prop #

test_error: float prop #

cost: float prop #

class LCBenchTabularBenchmark(task_id, datadir=None, *, remove_constants=False, seed=None, prior=None, perturb_prior=None) #

task_ids: tuple[str, ...] classvar #