Ez greedy

mighty.mighty_exploration.ez_greedy #

Epsilon Greedy Exploration.

EZGreedy #

EZGreedy(
    algo: str,
    model: Module,
    epsilon: float = 0.1,
    zipf_param: int = 2,
)

Bases: EpsilonGreedy

Epsilon Greedy Exploration.

:param algo: algorithm name :param model: model :param epsilon: exploration epsilon :param zipf_param: parametrizes the Zipf distribution for skipping :return:

Source code in mighty/mighty_exploration/ez_greedy.py

def __init__(
    self,
    algo: str,
    model: torch.nn.Module,
    epsilon: float = 0.1,
    zipf_param: int = 2,
):
    """Initialize EZ Greedy.

    :param algo: algorithm name
    :param model: model
    :param epsilon: exploration epsilon
    :param zipf_param: parametrizes the Zipf distribution for skipping
    :return:
    """
    super().__init__(algo, model)
    self.epsilon = epsilon
    self.zipf_param = zipf_param
    self.skip = max(1, np.random.default_rng().zipf(self.zipf_param))
    self.skipped = None
    self.frozen_actions = None

call #

__call__(
    s, return_logp=False, metrics=None, evaluate=False
)

Get action.

:param s: state :param return_logp: return logprobs :param metrics: current metric dict :param eval: eval mode :return: action or (action, logprobs)

Source code in mighty/mighty_exploration/mighty_exploration_policy.py

def __call__(self, s, return_logp=False, metrics=None, evaluate=False):
    """Get action.

    :param s: state
    :param return_logp: return logprobs
    :param metrics: current metric dict
    :param eval: eval mode
    :return: action or (action, logprobs)
    """
    if metrics is None:
        metrics = {}
    if evaluate:
        action, logprobs = self.sample_action(s)
        action = action.detach().numpy()
        output = (action, logprobs) if return_logp else action
    else:
        output = self.explore(s, return_logp, metrics)

    return output

explore #

explore(s, return_logp, metrics=None)

Explore.

:param s: state :param return_logp: return logprobs :param _: not used :return: action or (action, logprobs)