Source code for catalyst.rl.exploration.greedy

import numpy as np

from catalyst.rl.core import ExplorationStrategy


[docs]class Greedy(ExplorationStrategy): """ For discrete environments only. Selects greedy action (argmax_a Q(s,a)). """
[docs] def get_action(self, q_values): action = np.argmax(q_values) return action
[docs]class EpsilonGreedy(ExplorationStrategy): """ For discrete environments only. Selects random action with probability eps and greedy action (argmax_a Q(s,a)) with probability 1-eps. Random action selection probability eps usually decreases from 1 to 0.01-0.05 during the course of training. """ def __init__(self, eps_init, eps_final, annealing_steps, eps_min=0.01): super().__init__() self.eps_init = max(eps_init, eps_min) self.eps_final = max(eps_final, eps_min) self.num_steps = annealing_steps self.delta_eps = (self.eps_init - self.eps_final) / self.num_steps self.eps = eps_init self.eps_min = eps_min
[docs] def set_power(self, value): super().set_power(value) self.eps_init *= self._power self.eps_final *= self._power self.eps_final = max(self.eps_final, self.eps_min) self.delta_eps = (self.eps_init - self.eps_final) / self.num_steps self.eps = self.eps_init
[docs] def get_action(self, q_values): if np.random.random() < self.eps: action = np.random.randint(len(q_values)) else: action = np.argmax(q_values) self.eps = max(self.eps_final, self.eps - self.delta_eps) return action
__all__ = ["Greedy", "EpsilonGreedy"]