Source code for catalyst.contrib.dl.runner.wandb

from typing import Dict, List  # isort:skip
import os
from pathlib import Path
import shutil

import wandb

from catalyst.dl import utils
from catalyst.dl.core import Experiment, Runner
from catalyst.dl.experiment import ConfigExperiment
from catalyst.dl.runner import SupervisedRunner


[docs]class WandbRunner(Runner):
    """
    Runner wrapper with wandb integration hooks.
    """
    @staticmethod
    def _log_metrics(metrics: Dict, mode: str, suffix: str = ""):
        def key_locate(key: str):
            """
            Wandb uses first symbol _ for it service purposes
            because of that fact, we can not send original metric names

            Args:
                key: metric name
            Returns:
                formatted metric name
            """
            if key.startswith("_"):
                return key[1:]
            return key

        metrics = {
            f"{key_locate(key)}/{mode}{suffix}": value
            for key, value in metrics.items()
        }
        wandb.log(metrics)

    def _init(
        self,
        log_on_batch_end: bool = False,
        log_on_epoch_end: bool = True,
        checkpoints_glob: List = None,
    ):
        self.log_on_batch_end = log_on_batch_end
        self.log_on_epoch_end = log_on_epoch_end
        self.checkpoints_glob = checkpoints_glob

        if (self.log_on_batch_end and not self.log_on_epoch_end) \
                or (not self.log_on_batch_end and self.log_on_epoch_end):
            self.batch_log_suffix = ""
            self.epoch_log_suffix = ""
        else:
            self.batch_log_suffix = "_batch"
            self.epoch_log_suffix = "_epoch"

    def _pre_experiment_hook(self, experiment: Experiment):
        monitoring_params = experiment.monitoring_params
        monitoring_params["dir"] = str(Path(experiment.logdir).absolute())

        log_on_batch_end: bool = \
            monitoring_params.pop("log_on_batch_end", False)
        log_on_epoch_end: bool = \
            monitoring_params.pop("log_on_epoch_end", True)
        checkpoints_glob: List[str] = \
            monitoring_params.pop("checkpoints_glob", [])
        self._init(
            log_on_batch_end=log_on_batch_end,
            log_on_epoch_end=log_on_epoch_end,
            checkpoints_glob=checkpoints_glob,
        )
        if isinstance(experiment, ConfigExperiment):
            exp_config = utils.flatten_dict(experiment.stages_config)
            wandb.init(**monitoring_params, config=exp_config)
        else:
            wandb.init(**monitoring_params)

    def _post_experiment_hook(self, experiment: Experiment):
        logdir_src = Path(experiment.logdir)
        logdir_dst = wandb.run.dir

        exclude = ["wandb", "checkpoints"]
        logdir_files = list(logdir_src.glob("*"))
        logdir_files = list(filter(
            lambda x: all(z not in str(x) for z in exclude),
            logdir_files))

        for subdir in logdir_files:
            if subdir.is_dir():
                os.makedirs(f"{logdir_dst}/{subdir.name}", exist_ok=True)
                shutil.rmtree(f"{logdir_dst}/{subdir.name}")
                shutil.copytree(
                    f"{str(subdir.absolute())}",
                    f"{logdir_dst}/{subdir.name}")
            else:
                shutil.copy2(
                    f"{str(subdir.absolute())}",
                    f"{logdir_dst}/{subdir.name}")

        checkpoints_src = logdir_src.joinpath("checkpoints")
        checkpoints_dst = Path(wandb.run.dir).joinpath("checkpoints")
        os.makedirs(checkpoints_dst, exist_ok=True)

        checkpoint_paths = []
        for glob in self.checkpoints_glob:
            checkpoint_paths.extend(list(checkpoints_src.glob(glob)))
        checkpoint_paths = list(set(checkpoint_paths))
        for checkpoint_path in checkpoint_paths:
            shutil.copy2(
                f"{str(checkpoint_path.absolute())}",
                f"{checkpoints_dst}/{checkpoint_path.name}")

    def _run_batch(self, batch):
        super()._run_batch(batch=batch)
        if self.log_on_batch_end:
            mode = self.state.loader_name
            metrics = self.state.metric_manager.batch_values
            self._log_metrics(
                metrics=metrics,
                mode=mode,
                suffix=self.batch_log_suffix
            )

    def _run_epoch(self, stage: str, epoch: int):
        super()._run_epoch(stage=stage, epoch=epoch)
        if self.log_on_epoch_end:
            for mode, metrics in \
                    self.state.metric_manager.epoch_values.items():
                self._log_metrics(
                    metrics=metrics,
                    mode=mode,
                    suffix=self.epoch_log_suffix
                )

[docs]    def run_experiment(
        self,
        experiment: Experiment,
        check: bool = False
    ):
        self._pre_experiment_hook(experiment=experiment)
        super().run_experiment(experiment=experiment, check=check)
        self._post_experiment_hook(experiment=experiment)


[docs]class SupervisedWandbRunner(WandbRunner, SupervisedRunner):
    pass


__all__ = ["WandbRunner", "SupervisedWandbRunner"]
Source code for catalyst.contrib.dl.runner.wandb

Catalyst

Navigation

Related Topics