Shortcuts

Engines

AMP

AMPEngine

class catalyst.engines.amp.AMPEngine(device: str = 'cuda', scaler_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Pytorch.AMP single training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.AMPEngine("cuda:1"),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.AMPEngine("cuda:1")
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: AMPEngine
    device: cuda:1

stages:
    ...

DataParallelAMPEngine

class catalyst.engines.amp.DataParallelAMPEngine(scaler_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.amp.AMPEngine

AMP multi-gpu training device engine.

Parameters

scaler_kwargs – parameters for torch.cuda.amp.GradScaler. Possible parameters: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DataParallelAMPEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DataParallelAMPEngine()
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DataParallelAMPEngine

stages:
    ...

DistributedDataParallelAMPEngine

class catalyst.engines.amp.DistributedDataParallelAMPEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None, scaler_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DistributedDataParallelEngine

Distributed AMP multi-gpu training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DistributedDataParallelAMPEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DistributedDataParallelAMPEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"find_unused_parameters": False},
            process_group_kwargs={"port": 12345},
            scaler_kwargs={"growth_factor": 1.5}
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DistributedDataParallelAMPEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        find_unused_parameters: false
    process_group_kwargs:
        port: 12345
    scaler_kwargs:
        growth_factor: 1.5

stages:
    ...

Apex

APEXEngine

class catalyst.engines.apex.APEXEngine(device: str = 'cuda', apex_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Apex single training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.APEXEngine(apex_kwargs=dict(opt_level="O1", keep_batchnorm_fp32=False)),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.APEXEngine(apex_kwargs=dict(opt_level="O1", keep_batchnorm_fp32=False))
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: APEXEngine
    apex_kwargs:
        opt_level: O1
        keep_batchnorm_fp32: false

stages:
    ...

DataParallelApexEngine

class catalyst.engines.apex.DataParallelApexEngine(apex_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.apex.APEXEngine

Apex multi-gpu training device engine.

Parameters

apex_kwargs

parameters for apex.amp.initialize except models and optimizers (they will be forwared automatically).

Docs for apex.amp.initialize: https://nvidia.github.io/apex/amp.html#apex.amp.initialize

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DataParallelApexEngine(apex_kwargs=dict(opt_level="O1")),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DataParallelApexEngine(apex_kwargs=dict(opt_level="O1"))
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DataParallelApexEngine
    apex_kwargs:
        opt_level: O1

stages:
    ...

DistributedDataParallelApexEngine

class catalyst.engines.apex.DistributedDataParallelApexEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None, apex_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DistributedDataParallelEngine

Distributed Apex MultiGPU training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DistributedDataParallelApexEngine(
        ddp_kwargs={"allreduce_always_fp32": True},
        process_group_kwargs={"backend": "nccl"},
        apex_kwargs={"opt_level": "O1"},
    ),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DistributedDataParallelApexEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"allreduce_always_fp32": True},
            process_group_kwargs={"backend": "nccl"},
            apex_kwargs={"opt_level": "O1"},
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DistributedDataParallelApexEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        allreduce_always_fp32: true
    process_group_kwargs:
        backend: nccl
    apex_kwargs:
        opt_level: O1

stages:
    ...

Torch

DeviceEngine

class catalyst.engines.torch.DeviceEngine(device: Optional[str] = None)[source]

Bases: catalyst.core.engine.IEngine

Single training device engine.

Parameters

device – use device, default is “cpu”.

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DeviceEngine("cuda:1"),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DeviceEngine("cuda:1")
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DeviceEngine
    device: cuda:1

stages:
    ...

DataParallelEngine

class catalyst.engines.torch.DataParallelEngine[source]

Bases: catalyst.engines.torch.DeviceEngine

MultiGPU training device engine.

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DataParallelEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DataParallelEngine()
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DataParallelEngine

stages:
    ...

DistributedDataParallelEngine

class catalyst.engines.torch.DistributedDataParallelEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Distributed MultiGPU training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DistributedDataParallelEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DistributedDataParallelEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"find_unused_parameters": False},
            process_group_kwargs={"backend": "nccl"},
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DistributedDataParallelEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        find_unused_parameters: false
    process_group_kwargs:
        backend: nccl

stages:
    ...