Shortcuts

Engines

You could check engines overview under examples/engines section.

AMP

AMPEngine

class catalyst.engines.amp.AMPEngine(device: str = 'cuda', scaler_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Pytorch.AMP single training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.AMPEngine("cuda:1"),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.AMPEngine("cuda:1")
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: AMPEngine
    device: cuda:1

stages:
    ...

DataParallelAMPEngine

class catalyst.engines.amp.DataParallelAMPEngine(scaler_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.amp.AMPEngine

AMP multi-gpu training device engine.

Parameters

scaler_kwargs – parameters for torch.cuda.amp.GradScaler. Possible parameters: https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DataParallelAMPEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DataParallelAMPEngine()
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DataParallelAMPEngine

stages:
    ...

DistributedDataParallelAMPEngine

class catalyst.engines.amp.DistributedDataParallelAMPEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None, scaler_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DistributedDataParallelEngine

Distributed AMP multi-gpu training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DistributedDataParallelAMPEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DistributedDataParallelAMPEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"find_unused_parameters": False},
            process_group_kwargs={"port": 12345},
            scaler_kwargs={"growth_factor": 1.5}
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DistributedDataParallelAMPEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        find_unused_parameters: false
    process_group_kwargs:
        port: 12345
    scaler_kwargs:
        growth_factor: 1.5

stages:
    ...

Apex

APEXEngine

class catalyst.engines.apex.APEXEngine(device: str = 'cuda', apex_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Apex single training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.APEXEngine(apex_kwargs=dict(opt_level="O1", keep_batchnorm_fp32=False)),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.APEXEngine(apex_kwargs=dict(opt_level="O1", keep_batchnorm_fp32=False))
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: APEXEngine
    apex_kwargs:
        opt_level: O1
        keep_batchnorm_fp32: false

stages:
    ...

DataParallelApexEngine

catalyst.engines.apex.DataParallelApexEngine

alias of catalyst.engines.apex.DataParallelAPEXEngine

DistributedDataParallelApexEngine

catalyst.engines.apex.DistributedDataParallelApexEngine

alias of catalyst.engines.apex.DistributedDataParallelAPEXEngine

DeepSpeed

DistributedDataParallelDeepSpeedEngine

class catalyst.engines.deepspeed.DistributedDataParallelDeepSpeedEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None, deepspeed_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Distributed DeepSpeed MultiGPU training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DistributedDataParallelDeepSpeedEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DistributedDataParallelDeepSpeedEngine(
            address="0.0.0.0",
            port=23234,
            process_group_kwargs={"port": 12345},
            deepspeed_kwargs={"config": 64}
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DistributedDataParallelDeepSpeedEngine
    address: 0.0.0.0
    port: 23234
    process_group_kwargs:
        port: 12345
    deepspeed_kwargs:
        config:
            train_batch_size: 64

stages:
    ...

FairScale

PipelineParallelFairScaleEngine

class catalyst.engines.fairscale.PipelineParallelFairScaleEngine(pipe_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

FairScale multi-gpu training device engine.

Parameters

pipe_kwargs – parameters for fairscale.nn.Pipe. Docs for fairscale.nn.Pipe: https://fairscale.readthedocs.io/en/latest/api/nn/pipe.html

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.PipelineParallelFairScaleEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.PipelineParallelFairScaleEngine(
            pipe_kwargs={"balance": [3, 1]},
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: PipelineParallelFairScaleEngine
    pipe_kwargs:
        balance: [3, 1]

stages:
    ...

SharedDataParallelFairScaleEngine

class catalyst.engines.fairscale.SharedDataParallelFairScaleEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Distributed FairScale MultiGPU training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.SharedDataParallelFairScaleEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.SharedDataParallelFairScaleEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"find_unused_parameters": False},
            process_group_kwargs={"port": 12345},
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: SharedDataParallelFairScaleEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        find_unused_parameters: false
    process_group_kwargs:
        port: 12345

stages:
    ...

SharedDataParallelFairScaleAMPEngine

class catalyst.engines.fairscale.SharedDataParallelFairScaleAMPEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None, scaler_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.fairscale.SharedDataParallelFairScaleEngine

Distributed FairScale MultiGPU training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.SharedDataParallelFairScaleAMPEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.SharedDataParallelFairScaleAMPEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"find_unused_parameters": False},
            process_group_kwargs={"port": 12345},
            scaler_kwargs={"growth_factor": 1.5}
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: SharedDataParallelFairScaleAMPEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        find_unused_parameters: false
    process_group_kwargs:
        port: 12345
    scaler_kwargs:
        growth_factor: 1.5

stages:
    ...

FullySharedDataParallelFairScaleEngine

class catalyst.engines.fairscale.FullySharedDataParallelFairScaleEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.fairscale.SharedDataParallelFairScaleEngine

Distributed FairScale MultiGPU training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.FullySharedDataParallelFairScaleEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.FullySharedDataParallelFairScaleEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"find_unused_parameters": False},
            process_group_kwargs={"port": 12345},
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: FullySharedDataParallelFairScaleEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        find_unused_parameters: false
    process_group_kwargs:
        port: 12345

stages:
    ...

Torch

DeviceEngine

class catalyst.engines.torch.DeviceEngine(device: Optional[str] = None)[source]

Bases: catalyst.core.engine.IEngine

Single training device engine.

Parameters

device – use device, default is “cpu”.

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DeviceEngine("cuda:1"),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DeviceEngine("cuda:1")
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DeviceEngine
    device: cuda:1

stages:
    ...

DataParallelEngine

class catalyst.engines.torch.DataParallelEngine[source]

Bases: catalyst.engines.torch.DeviceEngine

MultiGPU training device engine.

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DataParallelEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DataParallelEngine()
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DataParallelEngine

stages:
    ...

DistributedDataParallelEngine

class catalyst.engines.torch.DistributedDataParallelEngine(address: Optional[str] = None, port: Optional[Union[str, int]] = None, ddp_kwargs: Optional[Dict[str, Any]] = None, process_group_kwargs: Optional[Dict[str, Any]] = None)[source]

Bases: catalyst.engines.torch.DeviceEngine

Distributed MultiGPU training device engine.

Parameters

Examples:

from catalyst import dl

runner = dl.SupervisedRunner()
runner.train(
    engine=dl.DistributedDataParallelEngine(),
    ...
)
from catalyst import dl

class MyRunner(dl.IRunner):
    # ...
    def get_engine(self):
        return dl.DistributedDataParallelEngine(
            address="0.0.0.0",
            port=23234,
            ddp_kwargs={"find_unused_parameters": False},
            process_group_kwargs={"backend": "nccl"},
        )
    # ...
args:
    logs: ...

model:
    _target_: ...
    ...

engine:
    _target_: DistributedDataParallelEngine
    address: 0.0.0.0
    port: 23234
    ddp_kwargs:
        find_unused_parameters: false
    process_group_kwargs:
        backend: nccl

stages:
    ...