Multiple input and output keys¶
Catalyst supports models with multiple input arguments and multiple outputs.
Suppose that we need to train a siamese network. Firstly, need to create a dataset that will yield pairs of images and the same class indicator which later can be used in a contrastive loss.
import cv2
import numpy as np
from torch.utils.data import Dataset
class SiameseDataset(Dataset):
def __init__(self, images, labels):
self.images = images
self.labels = labels
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
original_image = ... # load image using `idx`
is_same = np.random.uniform() >= 0.5 # use same or opposite class
if is_same:
pair_image = ... # load image from the same class and with index != `idx`
else:
pair_image = ... # load image from another class
label = torch.FloatTensor([is_same])
return {"first": original_image, "second": pair_image, "labels": label}
Do not forget about contrastive loss:
import torch.nn as nn
class ContrastiveLoss(nn.Module):
def __init__(self, margin=1.0):
super().__init__()
self.margin = margin
def forward(self, l2_distance, labels, **kwargs):
# ...
return loss
Suppose you have a model which accepts two tensors - first and second and returns embeddings for input batches and distance between them:
import torch.nn as nn
class SiameseNet(nn.Module):
def __init__(self, in_features, out_features):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(in_features, in_features * 2),
nn.ReLU(),
nn.Linear(in_features * 2, out_features),
)
def get_embeddings(self, batch):
"""Generate embeddings for a given batch of images.
Args:
batch (torch.Tensor): batch with images,
expected shapes - [B, C, H, W].
Returns:
embeddings (torch.Tensor) for a given batch of images,
output shapes - [B, out_features].
"""
return self.layers(batch)
def forward(self, first, second):
"""Forward pass.
Args:
first (torch.Tensor): batch with images,
expected shapes - [B, C, H, W].
second (torch.Tensor): batch with images,
expected shapes - [B, C, H, W].
Returns:
embeddings (torch.Tensor) for a first batch of images,
output shapes - [B, out_features]
embeddings (torch.Tensor) for a second batch of images,
output shapes - [B, out_features]
l2 distance (torch.Tensor) between first and second image embeddings,
output shapes - [B,]
"""
first = self.get_embeddings(first)
second = self.get_embeddings(second)
difference = torch.sqrt(torch.sum(torch.pow(first - second, 2), 1))
return first, second, distance
And then for python API:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from catalyst import dl
dataset = SiameseDataset(...)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}
model = SiameseNet(...)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = ContrastiveLoss(margin=1.1)
runner = dl.SupervisedRunner(
input_key=["first", "second"], # model inputs, should be the same as in forward method
output_key=["first_emb", "second_emb", "l2_distance"], # model outputs, part of them will be passed to loss
target_key=["labels"], # key from dataset
loss_key="loss", # key to use for loss values
)
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
loaders=loaders,
num_epochs=3,
callbacks=[
dl.CriterionCallback(
input_key="l2_distance", target_key="labels", metric_key="loss"
),
],
logdir="./siamese_logs",
valid_loader="valid",
valid_metric="loss",
minimize_valid_metric=True,
verbose=True,
load_best_on_end=True,
)