"""Abstract Environment."""
from __future__ import annotations
import random
from abc import ABC, abstractmethod
import gymnasium as gym
import numpy as np
from gymnasium.utils import seeding
[docs]
class AbstractEnv(ABC, gym.Env):
"""Abstract template for environments."""
def __init__(self, config):
"""Initialize environment.
Parameters
----------
config : dict
Environment configuration
If to seed the action space as well
"""
super().__init__()
self.config = config
self.instance_updates = self.config.get("instance_update_func", "round_robin")
self.instance_set = config["instance_set"]
self.instance_id_list = sorted(self.instance_set.keys())
self.instance_index = 0
self.inst_id = self.instance_id_list[self.instance_index]
self.instance = self.instance_set[self.inst_id]
self.test = False
if "test_set" in self.config:
self.test_set = config["test_set"]
self.test_instance_id_list = sorted(self.test_set.keys())
self.test_instance_index = 0
self.test_inst_id = self.test_instance_id_list[self.test_instance_index]
self.test_instance = self.test_set[self.test_inst_id]
self.training_set = self.instance_set
self.training_id_list = self.instance_id_list
self.training_inst_id = self.inst_id
self.training_instance = self.instance
else:
self.test_set = None
self.benchmark_info = config["benchmark_info"]
self.initial_seed = None
self.np_random = None
self.n_steps = config["cutoff"]
self.c_step = 0
self.reward_range = config["reward_range"]
if "observation_space" in config:
self.observation_space = config["observation_space"]
elif config["observation_space_class"] != "Dict":
try:
self.observation_space = getattr(
gym.spaces, config["observation_space_class"]
)(
*config["observation_space_args"],
dtype=config["observation_space_type"],
)
except KeyError as err:
print(
"Either submit a predefined gym.space 'observation_space' or an "
"'observation_space_class' as well as a list of "
"'observation_space_args' and the 'observation_space_type' "
"in the configuration."
)
print("Tuple observation_spaces are currently not supported.")
raise KeyError from err
else:
try:
self.observation_space = getattr(
gym.spaces, config["observation_space_class"]
)(*config["observation_space_args"])
except AssertionError as err:
print(
"To use a Dict observation space, the 'observation_space_args' in "
"the configuration should be a list containing a Dict of gym.Spaces"
)
raise TypeError from err
# TODO: use dicts by default for actions and observations
# The config could change this for RL purposes
if "config_space" in config:
actions = list(config["config_space"].values())
action_types = [type(a).__name__ for a in actions]
# Uniform action space
if all(t == action_types[0] for t in action_types):
if "Float" in action_types[0]:
low = np.array([a.lower for a in actions])
high = np.array([a.upper for a in actions])
self.action_space = gym.spaces.Box(low=low, high=high)
elif "Integer" in action_types[0] or "Categorical" in action_types[0]:
if len(action_types) == 1:
try:
n = actions[0].upper - actions[0].lower
except: # noqa: E722
n = len(actions[0].choices)
self.action_space = gym.spaces.Discrete(n)
else:
ns = []
for a in actions:
try:
ns.append(a.upper - a.lower)
except: # noqa: E722
ns.append(len(a.choices))
self.action_space = gym.spaces.MultiDiscrete(np.array(ns))
else:
raise ValueError(
"Only float, integer and categorical hyperparameters "
"are supported as of now"
)
# Mixed action space
else:
subspaces = {}
for t, a in zip(action_types, actions, strict=False):
if "Float" in t:
subspaces[a.name] = gym.spaces.Box(
low=a.lower, high=a.upper, dtype=np.float32
)
elif "Integer" in t or "Categorical" in t:
try:
n = a.upper - a.lower
except: # noqa: E722
n = len(a.choices)
subspaces[a.name] = gym.spaces.Discrete(n)
else:
raise ValueError(
"Only float, integer and categorical hyperparameters "
"are supported as of now"
)
self.action_space = gym.spaces.Dict(subspaces)
elif "action_space" in config:
self.action_space = config["action_space"]
else:
try:
self.action_space = getattr(gym.spaces, config["action_space_class"])(
*config["action_space_args"]
)
except KeyError as err:
print(
"Either submit a predefined gym.space 'action_space' or an "
"'action_space_class' as well as a list of 'action_space_args'"
" in the configuration"
)
raise KeyError from err
except TypeError as err:
print("Tuple and Dict action spaces are currently not supported")
raise TypeError from err
# seeding the environment after initialising action space
self.seed(config.get("seed", None), config.get("seed_action_space", False))
[docs]
def step_(self):
"""Pre-step function for step count and cutoff.
Returns:
--------
bool: End of episode
"""
truncated = False
self.c_step += 1
if self.c_step >= self.n_steps:
truncated = True
return truncated
[docs]
def reset_(
self, seed=0, options=None, instance=None, instance_id=None, scheme=None
):
"""Pre-reset function for progressing through the instance set.
Will either use round robin, random or no progression scheme.
"""
if options is None:
options = {}
if seed is not None:
self.seed(seed, self.config.get("seed_action_space", False))
self.c_step = 0
if scheme is None:
scheme = options.get("scheme", self.instance_updates)
if instance is None:
instance = options.get("instance", None)
if instance_id is None:
instance_id = options.get("instance_id", None)
self.use_next_instance(instance, instance_id, scheme=scheme)
[docs]
def use_next_instance(self, instance=None, instance_id=None, scheme=None):
"""Changes instance according to chosen instance progession.
Parameters
----------
instance
Instance specification for potentional new instances
instance_id
ID of the instance to switch to
scheme
Update scheme for this progression step
(either round robin, random or no progression)
"""
if instance is not None:
self.instance = instance
elif instance_id is not None:
self.inst_id = instance_id
self.instance = self.instance_set[self.inst_id]
elif scheme == "round_robin":
self.instance_index = (self.instance_index + 1) % len(self.instance_id_list)
self.inst_id = self.instance_id_list[self.instance_index]
self.instance = self.instance_set[self.inst_id]
elif scheme == "random":
rng = np.random.default_rng()
self.inst_id = rng.choice(self.instance_id_list)
self.instance = self.instance_set[self.inst_id]
[docs]
@abstractmethod
def step(self, action):
"""Execute environment step.
Parameters
----------
action
Action to take
Returns:
--------
state
Environment state
reward
Environment reward
terminated: bool
Run finished flag
truncated: bool
Run timed out flag
info : dict
Additional metainfo
"""
raise NotImplementedError
[docs]
@abstractmethod
def reset(self, seed: int | None = None):
"""Reset environment.
Parameters
----------
seed
Seed for the environment
Returns:
--------
state
Environment state
info: dict
Additional metainfo
"""
raise NotImplementedError
[docs]
def get_inst_id(self):
"""Return instance ID.
Returns:
--------
int: ID of current instance
"""
return self.inst_id
[docs]
def get_instance_set(self):
"""Return instance set.
Returns:
--------
list: List of instances
"""
return self.instance_set
[docs]
def get_instance(self):
"""Return current instance.
Returns:
--------
type flexible: Currently used instance
"""
return self.instance
[docs]
def set_inst_id(self, inst_id):
"""Change current instance ID.
Parameters
----------
inst_id : int
New instance index
"""
self.inst_id = inst_id
self.instance_index = self.instance_id_list.index(self.inst_id)
[docs]
def set_instance_set(self, inst_set):
"""Change instance set.
Parameters
----------
inst_set: list
New instance set
"""
self.instance_set = inst_set
self.instance_id_list = sorted(self.instance_set.keys())
[docs]
def set_instance(self, instance):
"""Change currently used instance.
Parameters
----------
instance:
New instance
"""
self.instance = instance
[docs]
def seed(self, seed=None, seed_action_space=False):
"""Set rng seed.
Parameters
----------
seed:
seed for rng
seed_action_space: bool, default False
if to seed the action space as well
"""
if seed is None:
rng = np.random.default_rng()
seed = rng.integers(0, 2**32 - 1)
self.initial_seed = seed
# maybe one should use the seed generated by seeding.np_random(seed)
# but it can be to large see issue https://github.com/openai/gym/issues/2210
rng = np.random.default_rng(seed)
random.seed(int(seed))
self.np_random, seed = seeding.np_random(int(seed))
# uses the uncorrelated seed from seeding but makes sure that no
# randomness is introduced.
if seed_action_space:
self.action_space.seed(seed)
return [seed]
[docs]
def use_test_set(self):
"""Change to test instance set."""
if self.test_set is None:
raise ValueError(
"No test set was provided, please check your benchmark config."
)
self.test = True
self.training_set = self.instance_set
self.training_id_list = self.instance_id_list
self.training_inst_id = self.inst_id
self.training_instance = self.instance
self.instance_set = self.test_set
self.instance_id_list = self.test_instance_id_list
self.inst_id = self.test_inst_id
self.instance = self.test_instance
[docs]
def use_training_set(self):
"""Change to training instance set."""
self.test = False
self.test_set = self.instance_set
self.test_instance_id_list = self.instance_id_list
self.test_inst_id = self.inst_id
self.test_instance = self.instance
self.instance_set = self.training_set
self.instance_id_list = self.training_id_list
self.inst_id = self.training_inst_id
self.instance = self.training_instance
[docs]
class AbstractMADACEnv(AbstractEnv):
"""Multi-Agent version of DAC environment."""
def __init__(self, config):
"""Initialize environment.
Parameters
----------
config : dict
Environment configuration
If to seed the action space as well
"""
super().__init__(config)
self.multi_agent = False
if "multi_agent" in config:
self.multi_agent = config.multi_agent
if self.multi_agent:
space_class = type(self.action_space)
if space_class == gym.spaces.Box:
num_hps = len(self.action_space.low)
elif space_class == gym.spaces.MultiDiscrete:
num_hps = len(self.action_space.nvec)
elif space_class == gym.spaces.Dict:
num_hps = len(self.action_space.spaces)
else:
print(
"The MultiAgent environment currently only supports "
"action spaces of types Box, Dict and MultiDiscrete"
)
raise TypeError
self.possible_agents = np.arange(num_hps)
self.hp_names = []
if "config_space" in self.config:
self.hp_names = self.config["config_space"].get_hyperparameter_names()
self.max_num_agent = len(self.possible_agents)
self.env_step = self.step
self.env_reset = self.reset
self.step = self.multi_agent_step
self.reset = self.multi_agent_reset
self.agents = []
self.current_agent = None
self.observation = None
self.reward = None
self.termination = False
self.truncation = False
self.info = None
# TODO: this should be set to a reasonable default, ideally
# Else playing with less than the full number of agents will be really hard
if "default_action" in self.config:
self.action = self.config.default_action
else:
self.action = self.action_space.sample()
self.observation_spaces = {}
for a in self.possible_agents:
self.observation_spaces[a] = self.observation_space
space_class = type(self.action_space)
if space_class == gym.spaces.Box:
lowers = self.action_space.low
uppers = self.action_space.high
elif space_class == gym.spaces.MultiDiscrete:
num_options = list(self.action_space.nvec)
self.action_spaces = {}
for a in self.possible_agents:
if space_class == gym.spaces.Box:
subspace = gym.spaces.Box(
low=np.array([lowers[a]]), high=np.array([uppers[a]])
)
elif space_class == gym.spaces.Dict:
subspace = self.action_space.spaces[self.hp_names[a]]
else:
subspace = gym.spaces.Discrete(num_options[a])
self.action_spaces[a] = subspace
[docs]
def multi_agent_reset(self, seed: int | None = None):
"""Reset env, but don't return observations.
Parameters
----------
seed : int
seed to use
"""
self.observation, self.info = self.env_reset(seed)
[docs]
def last(self):
"""Get current step data.
Returns:
--------
np.array, float, bool, bool, dict
"""
return (
self.observation,
self.reward,
self.termination,
self.truncation,
self.info,
)
[docs]
def multi_agent_step(self, action):
"""Step for a single hyperparameter.
Parameters
----------
action
the action in the current agent's dimension
"""
if isinstance(action, dict):
self.action[next(iter(action.keys()))] = next(iter(action.values()))
elif isinstance(self.action, dict):
self.action[list(self.action.keys())[self.current_agent]] = action
else:
self.action[self.current_agent] = action
self.current_agent = self.agents.index(self.current_agent) + 1
if self.current_agent >= len(self.agents):
(
self.observation,
self.reward,
self.termination,
self.truncation,
self.info,
) = self.env_step(self.action)
self.current_agent = self.agents[0]
[docs]
def register_agent(self, agent_id):
"""Add agent.
Parameters
----------
agent_id : int
id of the agent to add
"""
if isinstance(agent_id, str):
if len(agent_id) > 1:
if agent_id in self.hp_names:
agent_id = self.hp_names.index(agent_id)
else:
raise ValueError(
"""Unknown agent name - please register an ID
or a name within the config space."""
)
else:
agent_id = int(agent_id)
assert agent_id not in self.agents
assert agent_id in self.possible_agents
self.agents.append(agent_id)
if self.current_agent is None:
self.current_agent = agent_id
[docs]
def remove_agent(self, agent_id):
"""Remove agent.
Parameters
----------
agent_id : int
id of the agent to remove
"""
if agent_id in self.agents:
self.agents.remove(agent_id)
@property
def num_agents(self):
"""Current number of agents."""
return len(self.agents)
@property
def agent_selection(self):
"""Current agent."""
return self.current_agent
@property
def infos(self):
"""Current infos per agent."""
infos = {}
for a in self.agents:
infos[a] = self.info
return infos
@property
def rewards(self):
"""Current rewards values per agent."""
rewards = {}
for a in self.agents:
rewards[a] = self.rewards
return rewards
@property
def terminations(self):
"""Current termination values per agent."""
terminations = {}
for a in self.agents:
terminations[a] = self.termination
return terminations
@property
def truncations(self):
"""Current truncation values per agent."""
truncations = {}
for a in self.agents:
truncations[a] = self.truncation
return truncations