Rnd

mighty.mighty_meta.rnd #

Internal reward via RND.

NovelD #

NovelD(
    rnd_output_dim: int = 512,
    rnd_network_config: dict | None = None,
    internal_reward_weight: float = 0.1,
    rnd_lr: float = 0.001,
    rnd_eps: float = 1e-05,
    rnd_weight_decay: float = 0.01,
    update_proportion: float = 0.5,
)

Bases: RND

:param initial_lr: Initial maximal LR :param num_decay_steps: Length of schedule in steps :param min_lr: Minimal LR :param restart_every: Restart frequency :param restart multiplier: Multiplies current learning rate on restart. :return:

Source code in mighty/mighty_meta/rnd.py

def __init__(
    self,
    rnd_output_dim: int = 512,
    rnd_network_config: dict | None = None,
    internal_reward_weight: float = 0.1,
    rnd_lr: float = 0.001,
    rnd_eps: float = 1e-5,
    rnd_weight_decay: float = 0.01,
    update_proportion: float = 0.5,
) -> None:
    """Cosine schedule initialization.

    :param initial_lr: Initial maximal LR
    :param num_decay_steps: Length of schedule in steps
    :param min_lr: Minimal LR
    :param restart_every: Restart frequency
    :param restart multiplier: Multiplies current learning rate on restart.
    :return:
    """
    super().__init__(
        rnd_output_dim=rnd_output_dim,
        internal_reward_weight=internal_reward_weight,
        rnd_network_config=rnd_network_config,
        rnd_lr=rnd_lr,
        rnd_eps=rnd_eps,
        rnd_weight_decay=rnd_weight_decay,
        update_proportion=update_proportion,
    )
    self.last_error = 0

post_episode #

post_episode(metrics)

Execute methods at the end of an episode.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def post_episode(self, metrics):
    """Execute methods at the end of an episode.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.post_episode_methods:
        m(metrics)

post_step #

post_step(metrics)

Execute methods after a step.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def post_step(self, metrics):
    """Execute methods after a step.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.post_step_methods:
        m(metrics)

post_update #

post_update(metrics)

Execute methods after the update.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def post_update(self, metrics):
    """Execute methods after the update.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.post_update_methods:
        m(metrics)

pre_episode #

pre_episode(metrics)

Execute methods before an episode.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def pre_episode(self, metrics):
    """Execute methods before an episode.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.pre_episode_methods:
        m(metrics)

pre_step #

pre_step(metrics)

Execute methods before a step.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def pre_step(self, metrics):
    """Execute methods before a step.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.pre_step_methods:
        m(metrics)

pre_update #

pre_update(metrics)

Execute methods before the update.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def pre_update(self, metrics):
    """Execute methods before the update.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.pre_update_methods:
        m(metrics)

RND #

RND(
    rnd_output_dim: int = 512,
    rnd_network_config: dict | None = None,
    internal_reward_weight: float = 0.1,
    rnd_lr: float = 0.001,
    rnd_eps: float = 1e-05,
    rnd_weight_decay: float = 0.01,
    update_proportion: float = 0.5,
)

Bases: MightyMetaComponent

Cosine LR Schedule with optional warm restarts.

:param initial_lr: Initial maximal LR :param num_decay_steps: Length of schedule in steps :param min_lr: Minimal LR :param restart_every: Restart frequency :param restart multiplier: Multiplies current learning rate on restart. :return:

Source code in mighty/mighty_meta/rnd.py

def __init__(
    self,
    rnd_output_dim: int = 512,
    rnd_network_config: dict | None = None,
    internal_reward_weight: float = 0.1,
    rnd_lr: float = 0.001,
    rnd_eps: float = 1e-5,
    rnd_weight_decay: float = 0.01,
    update_proportion: float = 0.5,
) -> None:
    """Cosine schedule initialization.

    :param initial_lr: Initial maximal LR
    :param num_decay_steps: Length of schedule in steps
    :param min_lr: Minimal LR
    :param restart_every: Restart frequency
    :param restart multiplier: Multiplies current learning rate on restart.
    :return:
    """
    if rnd_network_config is None:
        rnd_network_config = {}
    super().__init__()
    self.update_proportion = update_proportion
    self.internal_reward_weight = internal_reward_weight
    self.post_step_methods = [self.get_reward]
    self.post_update_methods = [self.update_predictor]
    self.rnd_network_config = rnd_network_config
    self.rnd_output_dim = rnd_output_dim
    self.rnd_lr = rnd_lr
    self.rnd_eps = rnd_eps
    self.rnd_weight_decay = rnd_weight_decay
    self.rnd_net = None
    self.post_update_methods = [self.update_predictor]
    self.post_step_methods = [self.get_reward]

get_reward #

get_reward(metrics)

Adapt LR on step.

:param metrics: Dict of current metrics :return:

Source code in mighty/mighty_meta/rnd.py

def get_reward(self, metrics):
    """Adapt LR on step.

    :param metrics: Dict of current metrics
    :return:
    """
    if self.rnd_net is None:
        self.initialize_networks(metrics["transition"]["next_state"].shape[1:])

    rnd_error = self.rnd_net.get_error(metrics["transition"]["next_state"])
    metrics["transition"]["intrinsic_reward"] = (
        self.internal_reward_weight * rnd_error
    )
    metrics["transition"]["reward"] = (
        metrics["transition"]["reward"] + self.internal_reward_weight * rnd_error
    )
    return metrics

post_episode #

post_episode(metrics)

Execute methods at the end of an episode.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def post_episode(self, metrics):
    """Execute methods at the end of an episode.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.post_episode_methods:
        m(metrics)

post_step #

post_step(metrics)

Execute methods after a step.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def post_step(self, metrics):
    """Execute methods after a step.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.post_step_methods:
        m(metrics)

post_update #

post_update(metrics)

Execute methods after the update.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def post_update(self, metrics):
    """Execute methods after the update.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.post_update_methods:
        m(metrics)

pre_episode #

pre_episode(metrics)

Execute methods before an episode.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def pre_episode(self, metrics):
    """Execute methods before an episode.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.pre_episode_methods:
        m(metrics)

pre_step #

pre_step(metrics)

Execute methods before a step.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def pre_step(self, metrics):
    """Execute methods before a step.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.pre_step_methods:
        m(metrics)

pre_update #

pre_update(metrics)

Execute methods before the update.

:param metrics: Current metrics dict :return:

Source code in mighty/mighty_meta/mighty_component.py

def pre_update(self, metrics):
    """Execute methods before the update.

    :param metrics: Current metrics dict
    :return:
    """
    for m in self.pre_update_methods:
        m(metrics)