Dyhpo

neps.optimizers.multi_fidelity.dyhpo #

MFEIBO #

MFEIBO(
    pipeline_space: SearchSpace,
    budget: int = None,
    step_size: int | float = 1,
    optimal_assignment: bool = False,
    use_priors: bool = False,
    sample_default_first: bool = False,
    sample_default_at_target: bool = False,
    loss_value_on_error: None | float = None,
    cost_value_on_error: None | float = None,
    patience: int = 100,
    ignore_errors: bool = False,
    logger=None,
    surrogate_model: str | Any = "deep_gp",
    surrogate_model_args: dict = None,
    domain_se_kernel: str = None,
    graph_kernels: list = None,
    hp_kernels: list = None,
    acquisition: str | BaseAcquisition = acquisition,
    acquisition_args: dict = None,
    acquisition_sampler: (
        str | AcquisitionSampler
    ) = "freeze-thaw",
    acquisition_sampler_args: dict = None,
    model_policy: Any = FreezeThawModel,
    initial_design_fraction: float = 0.75,
    initial_design_size: int = 10,
    initial_design_budget: int = None,
)

Bases: BaseOptimizer

Base class for MF-BO algorithms that use DyHPO-like acquisition and budgeting.

PARAMETER	DESCRIPTION
`pipeline_space`	Space in which to search TYPE: `SearchSpace`
`budget`	Maximum budget TYPE: `int` DEFAULT: `None`
`use_priors`	Allows random samples to be generated from a default Samples generated from a Gaussian centered around the default value TYPE: `bool` DEFAULT: `False`
`sampling_policy`	The type of sampling procedure to use
`promotion_policy`	The type of promotion procedure to use
`loss_value_on_error`	Setting this and cost_value_on_error to any float will supress any error during bayesian optimization and will use given loss value instead. default: None TYPE: `None \| float` DEFAULT: `None`
`cost_value_on_error`	Setting this and loss_value_on_error to any float will supress any error during bayesian optimization and will use given cost value instead. default: None TYPE: `None \| float` DEFAULT: `None`
`logger`	logger object, or None to use the neps logger DEFAULT: `None`
`sample_default_first`	Whether to sample the default configuration first TYPE: `bool` DEFAULT: `False`

Source code in neps/optimizers/multi_fidelity/dyhpo.py

def __init__(
    self,
    pipeline_space: SearchSpace,
    budget: int = None,
    step_size: int | float = 1,
    optimal_assignment: bool = False,
    use_priors: bool = False,
    sample_default_first: bool = False,
    sample_default_at_target: bool = False,
    loss_value_on_error: None | float = None,
    cost_value_on_error: None | float = None,
    patience: int = 100,
    ignore_errors: bool = False,
    logger=None,
    # arguments for model
    surrogate_model: str | Any = "deep_gp",
    surrogate_model_args: dict = None,
    domain_se_kernel: str = None,
    graph_kernels: list = None,
    hp_kernels: list = None,
    acquisition: str | BaseAcquisition = acquisition,
    acquisition_args: dict = None,
    acquisition_sampler: str | AcquisitionSampler = "freeze-thaw",
    acquisition_sampler_args: dict = None,
    model_policy: Any = FreezeThawModel,
    initial_design_fraction: float = 0.75,
    initial_design_size: int = 10,
    initial_design_budget: int = None,
):
    """Initialise

    Args:
        pipeline_space: Space in which to search
        budget: Maximum budget
        use_priors: Allows random samples to be generated from a default
            Samples generated from a Gaussian centered around the default value
        sampling_policy: The type of sampling procedure to use
        promotion_policy: The type of promotion procedure to use
        loss_value_on_error: Setting this and cost_value_on_error to any float will
            supress any error during bayesian optimization and will use given loss
            value instead. default: None
        cost_value_on_error: Setting this and loss_value_on_error to any float will
            supress any error during bayesian optimization and will use given cost
            value instead. default: None
        logger: logger object, or None to use the neps logger
        sample_default_first: Whether to sample the default configuration first
    """
    super().__init__(
        pipeline_space=pipeline_space,
        budget=budget,
        patience=patience,
        loss_value_on_error=loss_value_on_error,
        cost_value_on_error=cost_value_on_error,
        ignore_errors=ignore_errors,
        logger=logger,
    )
    self.raw_tabular_space = (
        None  # placeholder, can be populated using pre_load_hook
    )
    self._budget_list: list[int | float] = []
    self.step_size: int | float = step_size
    self.min_budget = self.pipeline_space.fidelity.lower
    # TODO: generalize this to work with real data (not benchmarks)
    self.max_budget = self.pipeline_space.fidelity.upper

    self._initial_design_fraction = initial_design_fraction
    (
        self._initial_design_size,
        self._initial_design_budget,
    ) = self._set_initial_design(
        initial_design_size, initial_design_budget, self._initial_design_fraction
    )
    # TODO: Write use cases for these parameters
    self._model_update_failed = False
    self.sample_default_first = sample_default_first
    self.sample_default_at_target = sample_default_at_target

    self.surrogate_model_name = surrogate_model

    self.use_priors = use_priors
    self.total_fevals: int = 0

    self.observed_configs = MFObservedData(
        columns=["config", "perf", "learning_curves"],
        index_names=["config_id", "budget_id"],
    )

    # Preparing model
    self.graph_kernels, self.hp_kernels = get_kernels(
        pipeline_space=pipeline_space,
        domain_se_kernel=domain_se_kernel,
        graph_kernels=graph_kernels,
        hp_kernels=hp_kernels,
        optimal_assignment=optimal_assignment,
    )
    self.surrogate_model_args = (
        {} if surrogate_model_args is None else surrogate_model_args
    )
    self._prep_model_args(self.hp_kernels, self.graph_kernels, pipeline_space)

    # TODO: Better solution than branching based on the surrogate name is needed
    if surrogate_model in ["deep_gp", "gp"]:
        model_policy = FreezeThawModel
    elif surrogate_model == "pfn":
        model_policy = PFNSurrogate
    else:
        raise ValueError("Invalid model option selected!")

    # The surrogate model is initalized here
    self.model_policy = model_policy(
        pipeline_space=pipeline_space,
        surrogate_model=surrogate_model,
        surrogate_model_args=self.surrogate_model_args,
    )
    self.acquisition_args = {} if acquisition_args is None else acquisition_args
    self.acquisition_args.update(
        {
            "pipeline_space": self.pipeline_space,
            "surrogate_model_name": self.surrogate_model_name,
        }
    )
    self.acquisition = instance_from_map(
        AcquisitionMapping,
        acquisition,
        name="acquisition function",
        kwargs=self.acquisition_args,
    )
    self.acquisition_sampler_args = (
        {} if acquisition_sampler_args is None else acquisition_sampler_args
    )
    self.acquisition_sampler_args.update(
        {"patience": self.patience, "pipeline_space": self.pipeline_space}
    )
    self.acquisition_sampler = instance_from_map(
        AcquisitionSamplerMapping,
        acquisition_sampler,
        name="acquisition sampler function",
        kwargs=self.acquisition_sampler_args,
    )
    self.count = 0

get_config_and_ids #

get_config_and_ids() -> tuple[SearchSpace, str, str | None]

...and this is the method that decides which point to query.

RETURNS	DESCRIPTION
`tuple[SearchSpace, str, str \| None]`

Source code in neps/optimizers/multi_fidelity/dyhpo.py

def get_config_and_ids(
    self,
) -> tuple[SearchSpace, str, str | None]:
    """...and this is the method that decides which point to query.

    Returns:
        [type]: [description]
    """
    config_id = None
    previous_config_id = None
    if self.is_init_phase(budget_based=False):
        # sample a new config till initial design size is satisfied
        self.logger.info("sampling...")
        config = self.pipeline_space.sample(
            patience=self.patience, user_priors=True, ignore_fidelity=False
        )
        config.fidelity.value = self.min_budget
        _config_id = self.observed_configs.next_config_id()
    elif self.is_init_phase(budget_based=True) or self._model_update_failed:
        # promote a config randomly if initial design size is satisfied but the
        # initial design budget has not been exhausted
        self.logger.info("promoting...")
        config, _config_id = self._randomly_promote()
    else:
        if self.count == 0:
            self.logger.info("\nPartial learning curves as initial design:\n")
            self.logger.info(f"{self.observed_configs.get_learning_curves()}\n")
        self.count += 1
        # main acquisition call here after initial design is turned off
        self.logger.info("acquiring...")
        # generates candidate samples for acquisition calculation
        samples = self.acquisition_sampler.sample(
            set_new_sample_fidelity=self.pipeline_space.fidelity.lower
        )  # fidelity values here should be the observations or min. fidelity
        # calculating acquisition function values for the candidate samples
        acq, _samples = self.acquisition.eval(  # type: ignore[attr-defined]
            x=samples, asscalar=True
        )
        # maximizing acquisition function
        _idx = np.argsort(acq)[-1]
        # extracting the config ID for the selected maximizer
        _config_id = samples.index[_samples.index.values[_idx]]
        # `_samples` should have new configs with fidelities set to as required
        # NOTE: len(samples) need not be equal to len(_samples) as `samples` contain
        # all (partials + new) configurations obtained from the sampler, but
        # in `_samples`, configs are removed that have reached maximum epochs allowed
        # NOTE: `samples` and `_samples` should share the same index values, hence,
        # avoid using `.iloc` and work with `.loc` on pandas DataFrame/Series

        # Is this "config = _samples.loc[_config_id]"?
        config = samples.loc[_config_id]
        config.fidelity.value = _samples.loc[_config_id].fidelity.value
    # generating correct IDs
    if _config_id in self.observed_configs.seen_config_ids:
        config_id = f"{_config_id}_{self.get_budget_level(config)}"
        previous_config_id = f"{_config_id}_{self.get_budget_level(config) - 1}"
    else:
        config_id = f"{self.observed_configs.next_config_id()}_{self.get_budget_level(config)}"

    return config.hp_values(), config_id, previous_config_id

get_cost #

get_cost(result: str | dict | float) -> float | Any

Calls result.utils.get_cost() and passes the error handling through. Please use self.get_cost() instead of get_cost() in all optimizer classes.

Source code in neps/optimizers/base_optimizer.py

def get_cost(self, result: str | dict | float) -> float | Any:
    """Calls result.utils.get_cost() and passes the error handling through.
    Please use self.get_cost() instead of get_cost() in all optimizer classes."""
    return _get_cost(
        result,
        cost_value_on_error=self.cost_value_on_error,
        ignore_errors=self.ignore_errors,
    )

get_learning_curve #

get_learning_curve(
    result: str | dict | float,
) -> float | Any

Calls result.utils.get_loss() and passes the error handling through. Please use self.get_loss() instead of get_loss() in all optimizer classes.

Source code in neps/optimizers/base_optimizer.py

def get_learning_curve(self, result: str | dict | float) -> float | Any:
    """Calls result.utils.get_loss() and passes the error handling through.
    Please use self.get_loss() instead of get_loss() in all optimizer classes."""
    return _get_learning_curve(
        result,
        learning_curve_on_error=self.learning_curve_on_error,
        ignore_errors=self.ignore_errors,
    )

get_loss #

get_loss(result: str | dict | float) -> float | Any

Calls result.utils.get_loss() and passes the error handling through. Please use self.get_loss() instead of get_loss() in all optimizer classes.

Source code in neps/optimizers/base_optimizer.py

def get_loss(self, result: str | dict | float) -> float | Any:
    """Calls result.utils.get_loss() and passes the error handling through.
    Please use self.get_loss() instead of get_loss() in all optimizer classes."""
    return _get_loss(
        result,
        loss_value_on_error=self.loss_value_on_error,
        ignore_errors=self.ignore_errors,
    )

load_results #

load_results(
    previous_results: dict[str, ConfigResult],
    pending_evaluations: dict[str, SearchSpace],
) -> None

This is basically the fit method.

PARAMETER	DESCRIPTION
`previous_results`	[description] TYPE: `dict[str, ConfigResult]`
`pending_evaluations`	[description] TYPE: `dict[str, ConfigResult]`

Source code in neps/optimizers/multi_fidelity/dyhpo.py

def load_results(
    self,
    previous_results: dict[str, ConfigResult],
    pending_evaluations: dict[str, SearchSpace],
) -> None:
    """This is basically the fit method.

    Args:
        previous_results (dict[str, ConfigResult]): [description]
        pending_evaluations (dict[str, ConfigResult]): [description]
    """
    self.observed_configs = MFObservedData(
        columns=["config", "perf", "learning_curves"],
        index_names=["config_id", "budget_id"],
    )

    # previous optimization run exists and needs to be loaded
    self._load_previous_observations(previous_results)
    self.total_fevals = len(previous_results) + len(pending_evaluations)

    # account for pending evaluations
    self._handle_pending_evaluations(pending_evaluations)

    # an aesthetic choice more than a functional choice
    self.observed_configs.df.sort_index(
        level=self.observed_configs.df.index.names, inplace=True
    )

    # TODO: can we do better than keeping a copy of the observed configs?
    # TODO: can we not hide this in load_results and have something that pops out
    #   more, like a set_state or policy_args
    self.model_policy.observed_configs = self.observed_configs
    # fit any model/surrogates
    init_phase = self.is_init_phase()
    if not init_phase:
        self._fit_models()

total_budget_spent #

total_budget_spent() -> int | float

Calculates the toal budget spent so far.

This is calculated as a function of the fidelity range provided, that takes into account the minimum budget and the step size.

Source code in neps/optimizers/multi_fidelity/dyhpo.py

def total_budget_spent(self) -> int | float:
    """Calculates the toal budget spent so far.

    This is calculated as a function of the fidelity range provided, that takes into
    account the minimum budget and the step size.
    """
    if len(self.observed_configs.df) == 0:
        return 0

    n_configs = len(self.observed_configs.seen_config_ids)
    total_budget_level = sum(self.observed_configs.seen_budget_levels)
    total_initial_budget_spent = n_configs * self.pipeline_space.fidelity.lower
    total_budget_spent = (
        total_initial_budget_spent + total_budget_level * self.step_size
    )

    return total_budget_spent