Source code for dacbench.envs.cma_es

"""CMA ES Environment."""

from __future__ import annotations

import re
from dataclasses import dataclass
from typing import TYPE_CHECKING

import numpy as np
from modcma import ModularCMAES, Parameters

from dacbench import AbstractMADACEnv

if TYPE_CHECKING:
    from env_utils.toy_functions import AbstractFunction

BINARIES = {True: 1, False: 0}
STEP_SIZE_ADAPTATION = {
    "csa": 0,
    "tpa": 1,
    "msr": 2,
    "xnes": 3,
    "m-xnes": 4,
    "lp-xnes": 5,
    "psr": 6,
}
MIRRORED = {"None": 0, "mirrored": 1, "mirrored pairwise": 2}
BASE_SAMPLER = {"gaussian": 0, "sobol": 1, "halton": 2}
WEIGHTS_OPTION = {"default": 0, "equal": 1, "1/2^lambda": 2}
LOCAL_RESTART = {"None": 0, "IPOP": 1, "BIPOP": 2}
BOUND_CORRECTION = {
    "None": 0,
    "saturate": 1,
    "unif_resample": 2,
    "COTN": 3,
    "toroidal": 4,
    "mirror": 5,
}


[docs] @dataclass class CMAESInstance: """CMA-ES Instance.""" target_function: AbstractFunction dim: int fid: int iid: int active: bool elitist: bool orthogonal: bool sequential: bool threshold_convergence: bool step_size_adaptation: str mirrored: str base_sampler: str weights_option: str local_restart: str bound_correction: str
[docs] class CMAESEnv(AbstractMADACEnv): """The CMA ES environment controlles the step size on BBOB functions.""" def __init__(self, config): """Initialize the environment.""" super().__init__(config) self.es = None self.budget = config.budget self.total_budget = self.budget if not config.get("normalize_reward", False): self.get_reward = config.get("reward_function", self.get_default_reward) else: self.get_reward = config.get("reward_function", self.get_normalized_reward) self.get_state = config.get("state_method", self.get_default_state) def _uniform_name(self, name): # Convert name of parameters uniformly to lowercase, # separated with _ and no numbers pattern = r"^\d+_" # Use re.sub to remove the leading number and underscore result = re.sub(pattern, "", name) return result.lower()
[docs] def reset(self, seed=None, options=None): """Reset the environment.""" if options is None: options = {} super().reset_(seed) self.representation_dict = { "active": BINARIES[self.instance.active], "elitist": BINARIES[self.instance.elitist], "orthogonal": BINARIES[self.instance.orthogonal], "sequential": BINARIES[self.instance.sequential], "threshold_convergence": BINARIES[self.instance.threshold_convergence], "step_size_adaptation": STEP_SIZE_ADAPTATION[ self.instance.step_size_adaptation ], "mirrored": MIRRORED[self.instance.mirrored], "base_sampler": BASE_SAMPLER[self.instance.base_sampler], "weights_option": WEIGHTS_OPTION[self.instance.weights_option], "local_restart": LOCAL_RESTART[self.instance.local_restart], "bound_correction": BOUND_CORRECTION[self.instance.bound_correction], } self.objective = self.instance.target_function self.es = ModularCMAES( self.objective, parameters=Parameters.from_config_array( self.instance.dim, np.array(list(self.representation_dict.values())).astype(int), ), ) return self.get_state(self), {}
[docs] def step(self, action): """Make one step of the environment.""" truncated = super().step_() # Get all action values and uniform names complete_action = {} if isinstance(action, dict): for hp in action: n_name = self._uniform_name(hp) if n_name == "step_size": # Step size is set separately self.es.parameters.sigma = action[hp][0] else: # Save parameter values from actions complete_action[n_name] = action[hp] # Complete the given action with defaults for default in self.representation_dict: if default == "step_size": continue if default not in complete_action: complete_action[default] = self.representation_dict[default] complete_action = complete_action.values() else: raise ValueError("Action must be a Dict") new_parameters = Parameters.from_config_array( self.instance.dim, complete_action ) self.es.parameters.update( {m: getattr(new_parameters, m) for m in Parameters.__modules__} ) terminated = not self.es.step() return self.get_state(self), self.get_reward(self), terminated, truncated, {}
[docs] def close(self): """Closes the environment.""" return True
[docs] def get_default_reward(self, *_): """The default reward function. Args: _ (_type_): Empty parameter, which can be used when overriding Returns: float: The calculated reward """ return max( self.reward_range[0], min(self.reward_range[1], -self.es.parameters.fopt) )
[docs] def get_normalized_reward(self, *_): """Normalize each reward within domain bounds. Args: _ (_type_): Empty parameter, which can be used when overriding Returns: float: The calculated reward """ obj_min, obj_max = self.objective.fmin, 0 current_reward = -self.es.parameters.fopt norm_reward = (current_reward - obj_min) / (obj_max - obj_min) return max(self.reward_range[0], min(self.reward_range[1], norm_reward))
[docs] def get_default_state(self, *_): """Default state function. Args: _ (_type_): Empty parameter, which can be used when overriding Returns: dict: The current state """ return np.array( [ self.es.parameters.lambda_, self.es.parameters.sigma, self.budget - self.es.parameters.used_budget, self.instance.fid, self.instance.iid, ] )
[docs] def render(self, mode="human"): """Render progress.""" raise NotImplementedError("CMA-ES does not support rendering at this point")