Samples a new set and returns the total set of observed + new configs.
Source code in neps/optimizers/bayesian_optimization/acquisition_samplers/freeze_thaw_sampler.py
| def sample(
self,
acquisition_function=None,
n: int = None,
set_new_sample_fidelity: int | float = None,
) -> list():
"""Samples a new set and returns the total set of observed + new configs."""
partial_configs = self.observations.get_partial_configs_at_max_seen()
new_configs = self._sample_new(
index_from=self.observations.next_config_id(), n=n, ignore_fidelity=False
)
def __sample_single_new_tabular(index: int):
"""
A function to use in a list comprehension to slightly speed up
the sampling process when self.SAMPLE_TO_DRAW is large
"""
config = self.pipeline_space.sample(
patience=self.patience, user_priors=False, ignore_fidelity=False
)
config["id"].value = _new_configs[index]
config.fidelity.value = set_new_sample_fidelity
return config
if self.is_tabular:
_n = n if n is not None else self.SAMPLES_TO_DRAW
_partial_ids = {conf["id"].value for conf in partial_configs}
_all_ids = set(self.pipeline_space.custom_grid_table.index.values)
# accounting for unseen configs only, samples remaining table if flag is set
max_n = len(_all_ids) + 1 if self.sample_full_table else _n
_n = min(max_n, len(_all_ids - _partial_ids))
_new_configs = np.random.choice(
list(_all_ids - _partial_ids), size=_n, replace=False
)
new_configs = [__sample_single_new_tabular(i) for i in range(_n)]
new_configs = pd.Series(
new_configs,
index=np.arange(
len(partial_configs), len(partial_configs) + len(new_configs)
),
)
elif set_new_sample_fidelity is not None:
for config in new_configs:
config.fidelity.value = set_new_sample_fidelity
# Deep copy configs for fidelity updates
partial_configs_list = []
index_list = []
for idx, config in partial_configs.items():
_config = deepcopy(config)
partial_configs_list.append(_config)
index_list.append(idx)
# We build a new series of partial configs to avoid
# incrementing fidelities multiple times due to pass-by-reference
partial_configs = pd.Series(partial_configs_list, index=index_list)
configs = pd.concat([partial_configs, new_configs])
return configs
|