Source code for catalyst.rl.exploration.boltzman

import numpy as np

from catalyst import utils
from catalyst.rl.core import ExplorationStrategy

EPS = 1e-6


[docs]class Boltzmann(ExplorationStrategy): """ For discrete environments only. Selects soft maximum action (softmax_a [Q(s,a)/t]). Temperature parameter t usually decreases during the course of training. Importantly, the effective range of t depends on the magnitutdes of environment rewards. """ def __init__(self, temp_init, temp_final, annealing_steps, temp_min=0.01): super().__init__() self.temp_init = max(temp_init, temp_min) self.temp_final = max(temp_final, temp_min) self.num_steps = annealing_steps self.delta_temp = (self.temp_init - self.temp_final) / self.num_steps self.temperature = temp_init self.temp_min = temp_min
[docs] def set_power(self, value): super().set_power(value) self.temp_init *= self._power self.temp_init = max(self.temp_init, self.temp_min) self.temp_final *= self._power self.temp_final = max(self.temp_final, self.temp_min) self.delta_temp = (self.temp_init - self.temp_final) / self.num_steps self.temperature = self.temp_init
[docs] def get_action(self, q_values): probs = utils.np_softmax(q_values + EPS / self.temperature) action = np.random.choice(np.arange(len(probs)), p=probs) self.temperature = max( self.temp_final, self.temperature - self.delta_temp ) return action
__all__ = ["Boltzmann"]