Source code for carl.envs.classic_control.carl_mountaincar

from typing import Dict, List, Optional, Tuple, Union

import gymnasium.envs.classic_control as gccenvs
import numpy as np

from carl.context.selection import AbstractSelector
from carl.envs.carl_env import CARLEnv
from carl.utils.trial_logger import TrialLogger
from carl.utils.types import Context, Contexts

DEFAULT_CONTEXT = {
    "min_position": -1.2,  # unit?
    "max_position": 0.6,  # unit?
    "max_speed": 0.07,  # unit?
    "goal_position": 0.5,  # unit?
    "goal_velocity": 0,  # unit?
    "force": 0.001,  # unit?
    "gravity": 0.0025,  # unit?
    "min_position_start": -0.6,
    "max_position_start": -0.4,
    "min_velocity_start": 0.0,
    "max_velocity_start": 0.0,
}

CONTEXT_BOUNDS = {
    "min_position": (-np.inf, np.inf, float),
    "max_position": (-np.inf, np.inf, float),
    "max_speed": (0, np.inf, float),
    "goal_position": (-np.inf, np.inf, float),
    "goal_velocity": (-np.inf, np.inf, float),
    "force": (-np.inf, np.inf, float),
    "gravity": (0, np.inf, float),
    "min_position_start": (-np.inf, np.inf, float),
    "max_position_start": (-np.inf, np.inf, float),
    "min_velocity_start": (-np.inf, np.inf, float),
    "max_velocity_start": (-np.inf, np.inf, float),
}


[docs]class CustomMountainCarEnv(gccenvs.mountain_car.MountainCarEnv): def __init__(self, goal_velocity: float = 0.0): super(CustomMountainCarEnv, self).__init__(goal_velocity=goal_velocity) self.min_position_start = -0.6 self.max_position_start = -0.4 self.min_velocity_start = 0.0 self.max_velocity_start = 0.0 self.state: np.ndarray # type: ignore [assignment] def sample_initial_state(self) -> np.ndarray: return np.array( [ self.np_random.uniform( low=self.min_position_start, high=self.max_position_start ), self.np_random.uniform( low=self.min_velocity_start, high=self.max_velocity_start ), ] )
[docs] def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ) -> Union[np.ndarray, tuple[np.ndarray, dict]]: super().reset(seed=seed) self.state = self.sample_initial_state() return np.array(self.state, dtype=np.float32), {}
[docs] def step(self, action: int) -> Tuple[np.ndarray, float, bool, Dict]: state, reward, done, info = super().step(action) return ( state.squeeze(), reward, done, info, ) # TODO something weird is happening such that the state gets shape (2,1) instead of (2,)
[docs]class CARLMountainCarEnv(CARLEnv): def __init__( self, env: CustomMountainCarEnv = CustomMountainCarEnv(), contexts: Contexts = {}, hide_context: bool = True, add_gaussian_noise_to_context: bool = False, gaussian_noise_std_percentage: float = 0.01, logger: Optional[TrialLogger] = None, scale_context_features: str = "no", default_context: Optional[Context] = DEFAULT_CONTEXT, max_episode_length: int = 200, # from https://github.com/openai/gym/blob/master/gym/envs/__init__.py state_context_features: Optional[List[str]] = None, context_mask: Optional[List[str]] = None, dict_observation_space: bool = False, context_selector: Optional[ Union[AbstractSelector, type[AbstractSelector]] ] = None, context_selector_kwargs: Optional[Dict] = None, ): """ Parameters ---------- env: gym.Env, optional Defaults to classic control environment mountain car from gym (MountainCarEnv). contexts: List[Dict], optional Different contexts / different environment parameter settings. instance_mode: str, optional """ if not contexts: contexts = {0: DEFAULT_CONTEXT} super().__init__( env=env, contexts=contexts, hide_context=hide_context, add_gaussian_noise_to_context=add_gaussian_noise_to_context, gaussian_noise_std_percentage=gaussian_noise_std_percentage, logger=logger, scale_context_features=scale_context_features, default_context=default_context, max_episode_length=max_episode_length, state_context_features=state_context_features, dict_observation_space=dict_observation_space, context_selector=context_selector, context_selector_kwargs=context_selector_kwargs, context_mask=context_mask, ) self.whitelist_gaussian_noise = list( DEFAULT_CONTEXT.keys() ) # allow to augment all values def _update_context(self) -> None: self.env: CustomMountainCarEnv self.env.min_position = self.context["min_position"] self.env.max_position = self.context["max_position"] self.env.max_speed = self.context["max_speed"] self.env.goal_position = self.context["goal_position"] self.env.goal_velocity = self.context["goal_velocity"] self.env.min_position_start = self.context["min_position_start"] self.env.max_position_start = self.context["max_position_start"] self.env.min_velocity_start = self.context["min_velocity_start"] self.env.max_velocity_start = self.context["max_velocity_start"] self.env.force = self.context["force"] self.env.gravity = self.context["gravity"] self.low = np.array( [self.env.min_position, -self.env.max_speed], dtype=np.float32 ).squeeze() self.high = np.array( [self.env.max_position, self.env.max_speed], dtype=np.float32 ).squeeze() self.build_observation_space(self.low, self.high, CONTEXT_BOUNDS)