Source code for smac.utils.pareto_front

from __future__ import annotations

import numpy as np
from ConfigSpace import Configuration

from smac.runhistory import RunHistory
from smac.runhistory.dataclasses import InstanceSeedBudgetKey


def _get_costs(
    runhistory: RunHistory,
    configs: list[Configuration],
    config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]],
) -> np.ndarray:
    """Returns the costs of the passed configurations.

    Parameters
    ----------
    runhistory : RunHistory
        The runhistory containing the passed configs.
    configs : list[Configuration]
        The configs for which the costs should be returned.
    config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]]
        The instance-seed budget keys for the configs for which the costs should be returned.

    Returns
    -------
    costs : np.ndarray[n_points, n_objectives]
        Costs of the given configs.
    """
    assert len(configs) == len(config_instance_seed_budget_keys)

    # Now we get the costs for the trials of the config
    average_costs = []

    for config, isb_keys in zip(configs, config_instance_seed_budget_keys):
        # Since we use multiple seeds, we have to average them to get only one cost value pair for each
        # configuration
        # However, we only want to consider the config trials
        # Average cost is a list of floats (one for each objective)
        average_cost = runhistory.average_cost(config, isb_keys, normalize=False)
        average_costs += [average_cost]

    # Let's work with a numpy array for efficiency
    return np.vstack(average_costs)



[docs]
def calculate_pareto_front(
    runhistory: RunHistory,
    configs: list[Configuration],
    config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]],
) -> list[Configuration]:
    """Compares the passed configurations and returns only the ones on the pareto front.

    Parameters
    ----------
    runhistory : RunHistory
        The runhistory containing the given configurations.
    configs : list[Configuration]
        The configurations from which the Pareto front should be computed.
    config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]]
        The instance-seed budget keys for the configurations on the basis of which the Pareto front should be computed.

    Returns
    -------
    pareto_front : list[Configuration]
        The pareto front computed from the given configurations.
    """
    costs = _get_costs(runhistory, configs, config_instance_seed_budget_keys)

    # The following code is an efficient pareto front implementation
    is_efficient = np.arange(costs.shape[0])
    next_point_index = 0  # Next index in the is_efficient array to search for
    while next_point_index < len(costs):
        nondominated_point_mask = np.any(costs < costs[next_point_index], axis=1)
        nondominated_point_mask[next_point_index] = True
        is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
        costs = costs[nondominated_point_mask]
        next_point_index = np.sum(nondominated_point_mask[:next_point_index]) + 1

    new_incumbents = [configs[i] for i in is_efficient]
    return new_incumbents




[docs]
def sort_by_crowding_distance(
    runhistory: RunHistory,
    configs: list[Configuration],
    config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]],
) -> list[Configuration]:
    """Sorts the passed configurations by their crowding distance. Taken from
    https://github.com/anyoptimization/pymoo/blob/20abef1ade71915352217400c11ece4c2f35163e/pymoo/algorithms/nsga2.py


    Parameters
    ----------
    runhistory : RunHistory
        The runhistory containing the given configurations.
    configs : list[Configuration]
        The configurations which should be sorted.
    config_instance_seed_budget_keys: list[list[InstanceSeedBudgetKey]]
        The instance-seed budget keys for the configurations which should be sorted.

    Returns
    -------
    sorted_list : list[Configuration]
        Configurations sorted by crowding distance.
    """
    F = _get_costs(runhistory, configs, config_instance_seed_budget_keys)
    infinity = 1e14

    n_points = F.shape[0]
    n_obj = F.shape[1]

    if n_points <= 2:
        # distances = np.full(n_points, infinity)
        return configs
    else:
        # Sort each column and get index
        I = np.argsort(F, axis=0, kind="mergesort")  # noqa

        # Now really sort the whole array
        F = F[I, np.arange(n_obj)]

        # get the distance to the last element in sorted list and replace zeros with actual values
        dist = np.concatenate([F, np.full((1, n_obj), np.inf)]) - np.concatenate([np.full((1, n_obj), -np.inf), F])

        index_dist_is_zero = np.where(dist == 0)

        dist_to_last = np.copy(dist)
        for i, j in zip(*index_dist_is_zero):
            dist_to_last[i, j] = dist_to_last[i - 1, j]

        dist_to_next = np.copy(dist)
        for i, j in reversed(list(zip(*index_dist_is_zero))):
            dist_to_next[i, j] = dist_to_next[i + 1, j]

        # Normalize all the distances
        norm = np.max(F, axis=0) - np.min(F, axis=0)
        norm[norm == 0] = np.nan
        dist_to_last, dist_to_next = dist_to_last[:-1] / norm, dist_to_next[1:] / norm

        # If we divided by zero because all values in one columns are equal replace by none
        dist_to_last[np.isnan(dist_to_last)] = 0.0
        dist_to_next[np.isnan(dist_to_next)] = 0.0

        # Sum up the distance to next and last and norm by objectives - also reorder from sorted list
        J = np.argsort(I, axis=0)
        crowding = np.sum(dist_to_last[J, np.arange(n_obj)] + dist_to_next[J, np.arange(n_obj)], axis=1) / n_obj

    # Replace infinity with a large number
    crowding[np.isinf(crowding)] = infinity
    config_with_crowding = [(config, v) for config, v in zip(configs, crowding)]
    config_with_crowding = sorted(config_with_crowding, key=lambda x: x[1], reverse=True)

    return [c for c, _ in config_with_crowding]
SMAC3 Documentation

Source code for smac.utils.pareto_front