Skip to content

Epsilon greedy

mighty.mighty_exploration.epsilon_greedy #

Epsilon Greedy Exploration.

EpsilonGreedy #

EpsilonGreedy(algo, model, epsilon=0.1)

Bases: MightyExplorationPolicy

Epsilon Greedy Exploration.

:param algo: algorithm name :param func: policy function :param epsilon: exploration epsilon :param env: environment :return:

Source code in mighty/mighty_exploration/epsilon_greedy.py
def __init__(
    self,
    algo,
    model,
    epsilon=0.1,
):
    """Initialize Epsilon Greedy.

    :param algo: algorithm name
    :param func: policy function
    :param epsilon: exploration epsilon
    :param env: environment
    :return:
    """
    super().__init__(algo, model)
    self.epsilon = epsilon

__call__ #

__call__(
    s, return_logp=False, metrics=None, evaluate=False
)

Get action.

:param s: state :param return_logp: return logprobs :param metrics: current metric dict :param eval: eval mode :return: action or (action, logprobs)

Source code in mighty/mighty_exploration/mighty_exploration_policy.py
def __call__(self, s, return_logp=False, metrics=None, evaluate=False):
    """Get action.

    :param s: state
    :param return_logp: return logprobs
    :param metrics: current metric dict
    :param eval: eval mode
    :return: action or (action, logprobs)
    """
    if metrics is None:
        metrics = {}
    if evaluate:
        action, logprobs = self.sample_action(s)
        action = action.detach().numpy()
        output = (action, logprobs) if return_logp else action
    else:
        output = self.explore(s, return_logp, metrics)

    return output

explore #

explore(s, return_logp, _)

Explore.

:param s: state :param return_logp: return logprobs :param _: not used :return: action or (action, logprobs)

Source code in mighty/mighty_exploration/mighty_exploration_policy.py
def explore(self, s, return_logp, _):
    """Explore.

    :param s: state
    :param return_logp: return logprobs
    :param _: not used
    :return: action or (action, logprobs)
    """
    action, logprobs = self.explore_func(s)
    return (action, logprobs) if return_logp else action