Source code for carl.envs.classic_control.carl_acrobot

from typing import Dict, List, Optional, Union

import numpy as np
from gymnasium.envs.classic_control import AcrobotEnv

from carl.context.selection import AbstractSelector
from carl.envs.carl_env import CARLEnv
from carl.utils.trial_logger import TrialLogger
from carl.utils.types import Context, Contexts

DEFAULT_CONTEXT = {
    "link_length_1": 1,  # should be seen as 100% default and scaled
    "link_length_2": 1,  # should be seen as 100% default and scaled
    "link_mass_1": 1,  # should be seen as 100% default and scaled
    "link_mass_2": 1,  # should be seen as 100% default and scaled
    "link_com_1": 0.5,  # Percentage of the length of link one
    "link_com_2": 0.5,  # Percentage of the length of link one
    "link_moi": 1,  # should be seen as 100% default and scaled
    "max_velocity_1": 4 * np.pi,
    "max_velocity_2": 9 * np.pi,
    "torque_noise_max": 0.0,  # optional noise on torque, sampled uniformly from [-torque_noise_max, torque_noise_max]
    "initial_angle_lower": -0.1,  # lower bound of initial angle distribution (uniform)
    "initial_angle_upper": 0.1,  # upper bound of initial angle distribution (uniform)
    "initial_velocity_lower": -0.1,  # lower bound of initial velocity distribution (uniform)
    "initial_velocity_upper": 0.1,  # upper bound of initial velocity distribution (uniform)
}

CONTEXT_BOUNDS = {
    "link_length_1": (
        0.1,
        10,
        float,
    ),  # Links can be shrunken and grown by a factor of 10
    "link_length_2": (0.1, 10, float),
    "link_mass_1": (
        0.1,
        10,
        float,
    ),  # Link mass can be shrunken and grown by a factor of 10
    "link_mass_2": (0.1, 10, float),
    "link_com_1": (0, 1, float),  # Center of mass can move from one end to the other
    "link_com_2": (0, 1, float),
    "link_moi": (
        0.1,
        10,
        float,
    ),  # Moments on inertia can be shrunken and grown by a factor of 10
    "max_velocity_1": (
        0.4 * np.pi,
        40 * np.pi,
        float,
    ),  # Velocity can vary by a factor of 10 in either direction
    "max_velocity_2": (0.9 * np.pi, 90 * np.pi, float),
    "torque_noise_max": (
        -1.0,
        1.0,
        float,
    ),  # torque is either {-1., 0., 1}. Applying noise of 1. would be quite extreme
    "initial_angle_lower": (-np.inf, np.inf, float),
    "initial_angle_upper": (-np.inf, np.inf, float),
    "initial_velocity_lower": (-np.inf, np.inf, float),
    "initial_velocity_upper": (-np.inf, np.inf, float),
}


[docs]class CustomAcrobotEnv(AcrobotEnv):
    INITIAL_ANGLE_LOWER: float = -0.1
    INITIAL_ANGLE_UPPER: float = 0.1
    INITIAL_VELOCITY_LOWER: float = -0.1
    INITIAL_VELOCITY_UPPER: float = 0.1

[docs]    def reset(
        self,
        *,
        seed: Optional[int] = None,
        options: Optional[dict] = None,
    ) -> Union[np.ndarray, tuple[np.ndarray, dict]]:
        super().reset(seed=seed)
        low = (
            self.INITIAL_ANGLE_LOWER,
            self.INITIAL_ANGLE_LOWER,
            self.INITIAL_VELOCITY_LOWER,
            self.INITIAL_VELOCITY_LOWER,
        )
        high = (
            self.INITIAL_ANGLE_UPPER,
            self.INITIAL_ANGLE_UPPER,
            self.INITIAL_VELOCITY_UPPER,
            self.INITIAL_VELOCITY_UPPER,
        )
        self.state = self.np_random.uniform(low=low, high=high).astype(np.float32)
        return self._get_ob(), {}


[docs]class CARLAcrobotEnv(CARLEnv):
    def __init__(
        self,
        env: CustomAcrobotEnv = CustomAcrobotEnv(),
        contexts: Contexts = {},
        hide_context: bool = True,
        add_gaussian_noise_to_context: bool = False,
        gaussian_noise_std_percentage: float = 0.01,
        logger: Optional[TrialLogger] = None,
        scale_context_features: str = "no",
        default_context: Optional[Context] = DEFAULT_CONTEXT,
        max_episode_length: int = 500,  # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py
        state_context_features: Optional[List[str]] = None,
        context_mask: Optional[List[str]] = None,
        dict_observation_space: bool = False,
        context_selector: Optional[
            Union[AbstractSelector, type[AbstractSelector]]
        ] = None,
        context_selector_kwargs: Optional[Dict] = None,
    ):
        if not contexts:
            contexts = {0: DEFAULT_CONTEXT}
        super().__init__(
            env=env,
            contexts=contexts,
            hide_context=hide_context,
            add_gaussian_noise_to_context=add_gaussian_noise_to_context,
            gaussian_noise_std_percentage=gaussian_noise_std_percentage,
            logger=logger,
            scale_context_features=scale_context_features,
            default_context=default_context,
            max_episode_length=max_episode_length,
            state_context_features=state_context_features,
            dict_observation_space=dict_observation_space,
            context_selector=context_selector,
            context_selector_kwargs=context_selector_kwargs,
            context_mask=context_mask,
        )
        self.whitelist_gaussian_noise = list(
            DEFAULT_CONTEXT.keys()
        )  # allow to augment all values

    def _update_context(self) -> None:
        self.env: CustomAcrobotEnv
        self.env.LINK_LENGTH_1 = self.context["link_length_1"]
        self.env.LINK_LENGTH_2 = self.context["link_length_2"]
        self.env.LINK_MASS_1 = self.context["link_mass_1"]
        self.env.LINK_MASS_2 = self.context["link_mass_2"]
        self.env.LINK_COM_POS_1 = self.context["link_com_1"]
        self.env.LINK_COM_POS_2 = self.context["link_com_2"]
        self.env.LINK_MOI = self.context["link_moi"]
        self.env.MAX_VEL_1 = self.context["max_velocity_1"]
        self.env.MAX_VEL_2 = self.context["max_velocity_2"]
        self.env.torque_noise_max = self.context["torque_noise_max"]
        self.env.INITIAL_ANGLE_LOWER = self.context["initial_angle_lower"]
        self.env.INITIAL_ANGLE_UPPER = self.context["initial_angle_upper"]
        self.env.INITIAL_VELOCITY_LOWER = self.context["initial_velocity_lower"]
        self.env.INITIAL_VELOCITY_UPPER = self.context["initial_velocity_upper"]

        high = np.array(
            [1.0, 1.0, 1.0, 1.0, self.env.MAX_VEL_1, self.env.MAX_VEL_2],
            dtype=np.float32,
        )
        low = -high
        self.build_observation_space(low, high, CONTEXT_BOUNDS)
CARL Documentation

Source code for carl.envs.classic_control.carl_acrobot