Source code for catalyst.contrib.dl.runner.wandb

from typing import Dict, List  # isort:skip
import os
from pathlib import Path
import shutil

import wandb

from catalyst.dl import utils
from catalyst.dl.core import Experiment, Runner
from catalyst.dl.experiment import ConfigExperiment
from catalyst.dl.runner import SupervisedRunner


[docs]class WandbRunner(Runner): """ Runner wrapper with wandb integration hooks. """ @staticmethod def _log_metrics(metrics: Dict, mode: str, suffix: str = ""): def key_locate(key: str): """ Wandb uses first symbol _ for it service purposes because of that fact, we can not send original metric names Args: key: metric name Returns: formatted metric name """ if key.startswith("_"): return key[1:] return key metrics = { f"{key_locate(key)}/{mode}{suffix}": value for key, value in metrics.items() } wandb.log(metrics) def _init( self, log_on_batch_end: bool = False, log_on_epoch_end: bool = True, checkpoints_glob: List = None, ): self.log_on_batch_end = log_on_batch_end self.log_on_epoch_end = log_on_epoch_end self.checkpoints_glob = checkpoints_glob if (self.log_on_batch_end and not self.log_on_epoch_end) \ or (not self.log_on_batch_end and self.log_on_epoch_end): self.batch_log_suffix = "" self.epoch_log_suffix = "" else: self.batch_log_suffix = "_batch" self.epoch_log_suffix = "_epoch" def _pre_experiment_hook(self, experiment: Experiment): monitoring_params = experiment.monitoring_params monitoring_params["dir"] = str(Path(experiment.logdir).absolute()) log_on_batch_end: bool = \ monitoring_params.pop("log_on_batch_end", False) log_on_epoch_end: bool = \ monitoring_params.pop("log_on_epoch_end", True) checkpoints_glob: List[str] = \ monitoring_params.pop("checkpoints_glob", []) self._init( log_on_batch_end=log_on_batch_end, log_on_epoch_end=log_on_epoch_end, checkpoints_glob=checkpoints_glob, ) if isinstance(experiment, ConfigExperiment): exp_config = utils.flatten_dict(experiment.stages_config) wandb.init(**monitoring_params, config=exp_config) else: wandb.init(**monitoring_params) def _post_experiment_hook(self, experiment: Experiment): logdir_src = Path(experiment.logdir) logdir_dst = wandb.run.dir exclude = ["wandb", "checkpoints"] logdir_files = list(logdir_src.glob("*")) logdir_files = list(filter( lambda x: all(z not in str(x) for z in exclude), logdir_files)) for subdir in logdir_files: if subdir.is_dir(): os.makedirs(f"{logdir_dst}/{subdir.name}", exist_ok=True) shutil.rmtree(f"{logdir_dst}/{subdir.name}") shutil.copytree( f"{str(subdir.absolute())}", f"{logdir_dst}/{subdir.name}") else: shutil.copy2( f"{str(subdir.absolute())}", f"{logdir_dst}/{subdir.name}") checkpoints_src = logdir_src.joinpath("checkpoints") checkpoints_dst = Path(wandb.run.dir).joinpath("checkpoints") os.makedirs(checkpoints_dst, exist_ok=True) checkpoint_paths = [] for glob in self.checkpoints_glob: checkpoint_paths.extend(list(checkpoints_src.glob(glob))) checkpoint_paths = list(set(checkpoint_paths)) for checkpoint_path in checkpoint_paths: shutil.copy2( f"{str(checkpoint_path.absolute())}", f"{checkpoints_dst}/{checkpoint_path.name}") def _run_batch(self, batch): super()._run_batch(batch=batch) if self.log_on_batch_end: mode = self.state.loader_name metrics = self.state.metric_manager.batch_values self._log_metrics( metrics=metrics, mode=mode, suffix=self.batch_log_suffix ) def _run_epoch(self, stage: str, epoch: int): super()._run_epoch(stage=stage, epoch=epoch) if self.log_on_epoch_end: for mode, metrics in \ self.state.metric_manager.epoch_values.items(): self._log_metrics( metrics=metrics, mode=mode, suffix=self.epoch_log_suffix )
[docs] def run_experiment( self, experiment: Experiment, check: bool = False ): self._pre_experiment_hook(experiment=experiment) super().run_experiment(experiment=experiment, check=check) self._post_experiment_hook(experiment=experiment)
[docs]class SupervisedWandbRunner(WandbRunner, SupervisedRunner): pass
__all__ = ["WandbRunner", "SupervisedWandbRunner"]