Source code for dacbench.envs.function_approximation

"""Function Approximation Environment."""

from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING

import numpy as np

from dacbench import AbstractMADACEnv

if TYPE_CHECKING:
    from dacbench.envs.env_utils.toy_functions import AbstractFunction


[docs] @dataclass class FunctionApproximationInstance: """Function Approximation Instance.""" functions: list[AbstractFunction] dimension_importances: list[float] discrete: list[bool] omit_instance_type: bool = False
[docs] class FunctionApproximationEnv(AbstractMADACEnv): """Function Approximation Environment.""" def __init__(self, config): """Initialize Function Approximation Environment.""" super().__init__(config) self.done = False self.get_reward = config.get("reward_function", self.get_default_reward) self.get_state = config.get("state_method", self.get_default_state)
[docs] def reset(self, seed=None, options=None): """Reset environment.""" if options is None: options = {} super().reset_(seed) self.functions = self.instance.functions self.discrete = self.instance.discrete self.omit_instance_type = self.instance.omit_instance_type self.last_action = None return self.get_state(self), {}
[docs] def step(self, action): """Step function: check how close the action is to the target.""" self.done = super().step_() # apply action per dimension self.distances = [] action_items = action.values() if isinstance(action, dict) else action for i, a in enumerate(action_items): target = self.functions[i](self.n_steps) value = a if self.discrete[i]: value = np.linspace(0, 1, self.discrete[i])[int(a)] dim_distance = np.abs(target - value) if isinstance(dim_distance, np.ndarray): dim_distance = dim_distance.item() self.distances.append(dim_distance) self.last_action = list(action_items) for i in range(len(self.last_action)): if isinstance(self.last_action[i], list | np.ndarray): self.last_action[i] = self.last_action[i][0] self.weighted_distances = np.array(self.distances) * np.array( self.instance.dimension_importances ) return ( self.get_state(self), self.get_reward(self), False, self.done, { "raw_distances": self.distances, "weighted_distances": self.weighted_distances, }, )
[docs] def get_default_reward(self, _): """Get default reward: muliply dimensions.""" r = np.prod(self.weighted_distances) return max(self.reward_range[0], min(self.reward_range[1], r))
[docs] def get_sum_reward(self, _): """Get sum reward.""" r = -np.sum(self.weighted_distances) return max(self.reward_range[0], min(self.reward_range[1], r))
[docs] def get_default_state(self, _): """Get default state representation.""" remaining_budget = self.n_steps - self.c_step next_state = [int(remaining_budget)] for f in self.functions: instance_description = f.instance_description if self.omit_instance_type: instance_description = instance_description[1:] next_state += instance_description if self.c_step == 0: next_state += [-1 for _ in range(len(self.functions))] else: next_state = np.array(list(next_state) + list(self.last_action)) return np.array(next_state).astype(float)
[docs] def close(self): """Close environment.""" del self.instance_set return True