Source code for carl.envs.gymnasium.classic_control.carl_acrobot

from __future__ import annotations

from typing import Optional

import numpy as np

from carl.context.context_space import ContextFeature, UniformFloatContextFeature
from carl.envs.gymnasium.carl_gymnasium_env import CARLGymnasiumEnv


[docs] class CARLAcrobot(CARLGymnasiumEnv): env_name: str = "Acrobot-v1" metadata = {"render.modes": ["human", "rgb_array"]}
[docs] @staticmethod def get_context_features() -> dict[str, ContextFeature]: return { "LINK_LENGTH_1": UniformFloatContextFeature( "LINK_LENGTH_1", lower=0.1, upper=10, default_value=1 ), "LINK_LENGTH_2": UniformFloatContextFeature( "LINK_LENGTH_2", lower=0.1, upper=10, default_value=1 ), "LINK_MASS_1": UniformFloatContextFeature( "LINK_MASS_1", lower=0.1, upper=10, default_value=1 ), "LINK_MASS_2": UniformFloatContextFeature( "LINK_MASS_2", lower=0.1, upper=10, default_value=1 ), "LINK_COM_POS_1": UniformFloatContextFeature( "LINK_COM_POS_1", lower=0, upper=1, default_value=0.5 ), "LINK_COM_POS_2": UniformFloatContextFeature( "LINK_COM_POS_2", lower=0, upper=1, default_value=0.5 ), "LINK_MOI": UniformFloatContextFeature( "LINK_MOI", lower=0.1, upper=10, default_value=1 ), "MAX_VEL_1": UniformFloatContextFeature( "MAX_VEL_1", lower=0.4 * np.pi, upper=40 * np.pi, default_value=4 * np.pi, ), "MAX_VEL_2": UniformFloatContextFeature( "MAX_VEL_2", lower=0.9 * np.pi, upper=90 * np.pi, default_value=9 * np.pi, ), "torque_noise_max": UniformFloatContextFeature( "torque_noise_max", lower=-1, upper=1, default_value=0 ), "INITIAL_ANGLE_LOWER": UniformFloatContextFeature( "INITIAL_ANGLE_LOWER", lower=-np.inf, upper=np.inf, default_value=-0.1 ), "INITIAL_ANGLE_UPPER": UniformFloatContextFeature( "INITIAL_ANGLE_UPPER", lower=-np.inf, upper=np.inf, default_value=0.1 ), "INITIAL_VELOCITY_LOWER": UniformFloatContextFeature( "INITIAL_VELOCITY_LOWER", lower=-np.inf, upper=np.inf, default_value=-0.1, ), "INITIAL_VELOCITY_UPPER": UniformFloatContextFeature( "INITIAL_VELOCITY_UPPER", lower=-np.inf, upper=np.inf, default_value=0.1 ), }
[docs] def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed, options=options) angles = self.env.np_random.uniform( low=self.context.get( "INITIAL_ANGLE_LOWER", self.get_context_features()["INITIAL_ANGLE_LOWER"].default_value, ), high=self.context.get( "INITIAL_ANGLE_UPPER", self.get_context_features()["INITIAL_ANGLE_UPPER"].default_value, ), size=(2,), ) velocities = self.env.np_random.uniform( low=self.context.get( "INITIAL_VELOCITY_LOWER", self.get_context_features()["INITIAL_VELOCITY_LOWER"].default_value, ), high=self.context.get( "INITIAL_VELOCITY_UPPER", self.get_context_features()["INITIAL_VELOCITY_UPPER"].default_value, ), size=(2,), ) self.env.unwrapped.state = np.concatenate([angles, velocities]) state = np.array( [ np.cos(self.env.unwrapped.state[0]), np.sin(self.env.unwrapped.state[0]), np.cos(self.env.unwrapped.state[1]), np.sin(self.env.unwrapped.state[1]), self.env.unwrapped.state[2], self.env.unwrapped.state[3], ], dtype=np.float32, ) info = {} state = self._add_context_to_state(state) info["context_id"] = self.context_id return state, info