Source code for catalyst.rl.exploration.gauss

import numpy as np

from catalyst.rl.core import ExplorationStrategy


[docs]class NoExploration(ExplorationStrategy):
    """
    For continuous environments only.
    Returns action produced by the actor network without changes.
    """
[docs]    def get_action(self, action):
        return action


[docs]class GaussNoise(ExplorationStrategy):
    """
    For continuous environments only.
    Adds spherical Gaussian noise to the action produced by actor.
    """
    def __init__(self, sigma):
        super().__init__()

        self.sigma = sigma

[docs]    def set_power(self, value):
        super().set_power(value)
        self.sigma *= self._power

[docs]    def get_action(self, action):
        noisy_action = np.random.normal(action, self.sigma)
        return noisy_action


[docs]class OrnsteinUhlenbeckProcess(ExplorationStrategy):
    """
    For continuous environments only.
    Adds temporally correlated Gaussian noise generated with
    Ornstein-Uhlenbeck process.
    Paper: https://arxiv.org/abs/1509.02971
    """
    def __init__(self, sigma, theta, dt=1e-2):
        super().__init__()

        self.sigma = sigma
        self.theta = theta
        self.dt = dt

[docs]    def set_power(self, value):
        super().set_power(value)
        self.sigma *= self._power

[docs]    def reset_state(self, action_size):
        self.x_prev = np.zeros(action_size)

[docs]    def get_action(self, action):
        mu = self.x_prev * (1 - self.theta * self.dt)
        sigma = self.sigma * np.sqrt(self.dt)
        x = np.random.normal(mu, sigma)
        noisy_action = action + x
        self.x_prev = x
        return noisy_action


__all__ = ["NoExploration", "GaussNoise", "OrnsteinUhlenbeckProcess"]