Neps random search

This module implements a simple random search optimizer for a NePS pipeline. It samples configurations randomly from the pipeline's domain and environment values.

NePSComplexRandomSearch `dataclass` #

NePSComplexRandomSearch(
    pipeline: PipelineSpace,
    ignore_fidelity: (
        bool | Literal["highest_fidelity"]
    ) = False,
)

A complex random search optimizer for a NePS pipeline. It samples configurations randomly from the pipeline's domain and environment values, and also performs mutations and crossovers based on previous successful trials.

PARAMETER	DESCRIPTION
`pipeline`	The pipeline to optimize, which should be a Pipeline object. TYPE: `PipelineSpace`

RAISES	DESCRIPTION
`ValueError`	If the pipeline is not a Pipeline object.

PARAMETER	DESCRIPTION
`pipeline`	The pipeline to optimize, which should be a Pipeline object. TYPE: `PipelineSpace`

RAISES	DESCRIPTION
`ValueError`	If the pipeline is not a Pipeline object.

Source code in neps/optimizers/neps_random_search.py

def __init__(
    self,
    pipeline: PipelineSpace,
    ignore_fidelity: bool | Literal["highest_fidelity"] = False,  # noqa: FBT002
):
    """Initialize the ComplexRandomSearch optimizer with a pipeline.

    Args:
        pipeline: The pipeline to optimize, which should be a Pipeline object.

    Raises:
        ValueError: If the pipeline is not a Pipeline object.
    """
    self._pipeline = pipeline

    self.ignore_fidelity = ignore_fidelity

    self._random_sampler = RandomSampler(
        predefined_samplings={},
    )
    self._try_always_priors_sampler = PriorOrFallbackSampler(
        fallback_sampler=self._random_sampler,
        always_use_prior=True,
    )
    self._sometimes_priors_sampler = PriorOrFallbackSampler(
        fallback_sampler=self._random_sampler
    )
    self._n_top_trials = 5

call #

__call__(
    trials: Mapping[str, Trial],
    budget_info: BudgetInfo | None,
    n: int | None = None,
) -> SampledConfig | list[SampledConfig]

Sample configurations randomly from the pipeline's domain and environment values, and also perform mutations and crossovers based on previous successful trials.

PARAMETER	DESCRIPTION
`trials`	A mapping of trial IDs to Trial objects, representing previous trials. TYPE: `Mapping[str, Trial]`
`budget_info`	The budget information for the optimization process. TYPE: `BudgetInfo \| None`
`n`	The number of configurations to sample. If None, a single configuration will be sampled. TYPE: `int \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`SampledConfig \| list[SampledConfig]`	A SampledConfig object or a list of SampledConfig objects, depending on the value of n.

RAISES	DESCRIPTION
`ValueError`	If the pipeline is not a Pipeline object or if the trials are not a valid mapping of trial IDs to Trial objects.

Source code in neps/optimizers/neps_random_search.py

def __call__(
    self,
    trials: Mapping[str, trial_state.Trial],
    budget_info: optimizer_state.BudgetInfo | None,
    n: int | None = None,
) -> optimizer.SampledConfig | list[optimizer.SampledConfig]:
    """Sample configurations randomly from the pipeline's domain and environment
    values, and also perform mutations and crossovers based on previous successful
    trials.

    Args:
        trials: A mapping of trial IDs to Trial objects, representing previous
            trials.
        budget_info: The budget information for the optimization process.
        n: The number of configurations to sample. If None, a single configuration
            will be sampled.

    Returns:
        A SampledConfig object or a list of SampledConfig objects, depending
            on the value of n.

    Raises:
        ValueError: If the pipeline is not a Pipeline object or if the trials are
            not a valid mapping of trial IDs to Trial objects.
    """
    n_prev_trials = len(trials)
    n_requested = 1 if n is None else n
    return_single = n is None

    random_pipelines = [
        resolve(
            pipeline=self._pipeline,
            domain_sampler=self._random_sampler,
            environment_values=self.sampled_fidelity_values(),
        )
        for _ in range(n_requested * 5)
    ]
    sometimes_priors_pipelines = [
        resolve(
            pipeline=self._pipeline,
            domain_sampler=self._sometimes_priors_sampler,
            environment_values=self.sampled_fidelity_values(),
        )
        for _ in range(n_requested * 5)
    ]

    mutated_incumbents = []
    crossed_over_incumbents = []

    successful_trials: list[Trial] = list(
        filter(
            lambda trial: (
                trial.report.reported_as == trial.State.SUCCESS
                if trial.report is not None
                else False
            ),
            trials.values(),
        )
    )
    if len(successful_trials) > 0:
        self._n_top_trials = 5
        top_trials = heapq.nsmallest(
            self._n_top_trials,
            successful_trials,
            key=lambda trial: (
                float(trial.report.objective_to_minimize)
                if trial.report
                and isinstance(trial.report.objective_to_minimize, float)
                else float("inf")
            ),
        )  # Will have up to `self._n_top_trials` items.

        # Do some mutations.
        for top_trial in top_trials:
            top_trial_config = top_trial.config

            # Mutate by resampling around some values of the original config.
            mutated_incumbents += [
                resolve(
                    pipeline=self._pipeline,
                    domain_sampler=MutatateUsingCentersSampler(
                        predefined_samplings=top_trial_config,
                        n_mutations=1,
                    ),
                    environment_values=self.sampled_fidelity_values(),
                )
                for _ in range(n_requested * 5)
            ]
            mutated_incumbents += [
                resolve(
                    pipeline=self._pipeline,
                    domain_sampler=MutatateUsingCentersSampler(
                        predefined_samplings=top_trial_config,
                        n_mutations=max(
                            1, random.randint(1, int(len(top_trial_config) / 2))
                        ),
                    ),
                    environment_values=self.sampled_fidelity_values(),
                )
                for _ in range(n_requested * 5)
            ]

            # Mutate by completely forgetting some values of the original config.
            mutated_incumbents += [
                resolve(
                    pipeline=self._pipeline,
                    domain_sampler=MutateByForgettingSampler(
                        predefined_samplings=top_trial_config,
                        n_forgets=1,
                    ),
                    environment_values=self.sampled_fidelity_values(),
                )
                for _ in range(n_requested * 5)
            ]
            mutated_incumbents += [
                resolve(
                    pipeline=self._pipeline,
                    domain_sampler=MutateByForgettingSampler(
                        predefined_samplings=top_trial_config,
                        n_forgets=max(
                            1, random.randint(1, int(len(top_trial_config) / 2))
                        ),
                    ),
                    environment_values=self.sampled_fidelity_values(),
                )
                for _ in range(n_requested * 5)
            ]

        # Do some crossovers.
        if len(top_trials) > 1:
            for _ in range(n_requested * 3):
                trial_1, trial_2 = random.sample(top_trials, k=2)

                try:
                    crossover_sampler = CrossoverByMixingSampler(
                        predefined_samplings_1=trial_1.config,
                        predefined_samplings_2=trial_2.config,
                        prefer_first_probability=0.5,
                    )
                except CrossoverNotPossibleError:
                    # A crossover was not possible for them. Do nothing.
                    pass
                else:
                    crossed_over_incumbents.append(
                        resolve(
                            pipeline=self._pipeline,
                            domain_sampler=crossover_sampler,
                            environment_values=self.sampled_fidelity_values(),
                        ),
                    )

                try:
                    crossover_sampler = CrossoverByMixingSampler(
                        predefined_samplings_1=trial_2.config,
                        predefined_samplings_2=trial_1.config,
                        prefer_first_probability=0.5,
                    )
                except CrossoverNotPossibleError:
                    # A crossover was not possible for them. Do nothing.
                    pass
                else:
                    crossed_over_incumbents.append(
                        resolve(
                            pipeline=self._pipeline,
                            domain_sampler=crossover_sampler,
                            environment_values=self.sampled_fidelity_values(),
                        ),
                    )

    all_sampled_pipelines = [
        *random_pipelines,
        *sometimes_priors_pipelines,
        *mutated_incumbents,
        *crossed_over_incumbents,
    ]

    # Here we can have a model which picks from all the sampled pipelines.
    # Currently, we just pick randomly from them.
    chosen_pipelines = random.sample(all_sampled_pipelines, k=n_requested)

    if n_prev_trials == 0:
        # In this case, always include the prior pipeline.
        prior_pipeline = resolve(
            pipeline=self._pipeline,
            domain_sampler=self._try_always_priors_sampler,
            environment_values=self.sampled_fidelity_values(),
        )
        chosen_pipelines[0] = prior_pipeline

    return _prepare_sampled_configs(chosen_pipelines, n_prev_trials, return_single)

import_trials #

import_trials(
    external_evaluations: Sequence[
        tuple[Mapping[str, Any], UserResultDict]
    ],
    trials: Mapping[str, Trial],
) -> list[ImportedConfig]

Import external evaluations as trials.

PARAMETER	DESCRIPTION
`external_evaluations`	A sequence of tuples containing configuration dictionaries and their corresponding results. TYPE: `Sequence[tuple[Mapping[str, Any], UserResultDict]]`
`trials`	A mapping of trial IDs to Trial objects, representing previous trials. TYPE: `Mapping[str, Trial]`

RETURNS	DESCRIPTION
`list[ImportedConfig]`	A list of ImportedConfig objects representing the imported trials.

Source code in neps/optimizers/neps_random_search.py

def import_trials(
    self,
    external_evaluations: Sequence[tuple[Mapping[str, Any], UserResultDict]],
    trials: Mapping[str, Trial],
) -> list[optimizer.ImportedConfig]:
    """Import external evaluations as trials.

    Args:
        external_evaluations: A sequence of tuples containing configuration
            dictionaries and their corresponding results.
        trials: A mapping of trial IDs to Trial objects, representing previous
            trials.

    Returns:
        A list of ImportedConfig objects representing the imported trials.
    """
    n_trials = len(trials)
    imported_configs = []
    for i, (config, result) in enumerate(external_evaluations):
        config_id = str(n_trials + i + 1)
        imported_configs.append(
            optimizer.ImportedConfig(
                config=config,
                id=config_id,
                result=result,
            )
        )
    return imported_configs

sampled_fidelity_values #

sampled_fidelity_values() -> dict[str, float | int]

Sample fidelity values based on the pipeline's fidelity attributes.

RETURNS	DESCRIPTION
`dict[str, float \| int]`	A dictionary mapping fidelity names to their sampled values.

Source code in neps/optimizers/neps_random_search.py

def sampled_fidelity_values(self) -> dict[str, float | int]:
    """Sample fidelity values based on the pipeline's fidelity attributes.

    Returns:
        A dictionary mapping fidelity names to their sampled values.
    """
    environment_values = {}
    fidelity_attrs = self._pipeline.fidelity_attrs
    for fidelity_name, fidelity_obj in fidelity_attrs.items():
        if self.ignore_fidelity == "highest_fidelity":
            environment_values[fidelity_name] = fidelity_obj.upper
        elif not self.ignore_fidelity:
            raise ValueError(
                "ComplexRandomSearch does not support fidelities by default."
                "Consider using a different optimizer or setting"
                " `ignore_fidelity=True` or `highest_fidelity`."
            )
        # Sample randomly from the fidelity bounds.
        elif isinstance(fidelity_obj.domain, Integer):
            assert isinstance(fidelity_obj.lower, int)
            assert isinstance(fidelity_obj.upper, int)
            environment_values[fidelity_name] = random.randint(
                fidelity_obj.lower, fidelity_obj.upper
            )
        elif isinstance(fidelity_obj.domain, Float):
            environment_values[fidelity_name] = random.uniform(
                fidelity_obj.lower, fidelity_obj.upper
            )
    return environment_values