Shortcuts

Source code for catalyst.contrib.nn.criterion.triplet

import torch
from torch import nn

from .functional import triplet_loss


[docs]class TripletLoss(nn.Module): """Triplet loss with hard positive/negative mining. Reference: Code imported from https://github.com/NegatioN/OnlineMiningTripletLoss """
[docs] def __init__(self, margin: float = 0.3): """ Args: margin (float): margin for triplet """ super().__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin)
def _pairwise_distances(self, embeddings, squared=False): """Compute the 2D matrix of distances between all the embeddings. Args: embeddings: tensor of shape (batch_size, embed_dim) squared (bool): if true, output is the pairwise squared euclidean distance matrix. If false, output is the pairwise euclidean distance matrix Returns: torch.Tensor: pairwise matrix of size (batch_size, batch_size) """ # Get squared L2 norm for each embedding. # We can just take the diagonal of `dot_product`. # This also provides more numerical stability # (the diagonal of the result will be exactly 0). # shape (batch_size,) square = torch.mm(embeddings, embeddings.t()) diag = torch.diag(square) # Compute the pairwise distance matrix as we have: # ||a - b||^2 = ||a||^2 - 2 <a, b> + ||b||^2 # shape (batch_size, batch_size) distances = diag.view(-1, 1) - 2.0 * square + diag.view(1, -1) # Because of computation errors, some distances # might be negative so we put everything >= 0.0 distances[distances < 0] = 0 if not squared: # Because the gradient of sqrt is infinite # when distances == 0.0 (ex: on the diagonal) # we need to add a small epsilon where distances == 0.0 mask = distances.eq(0).float() distances = distances + mask * 1e-16 distances = (1.0 - mask) * torch.sqrt(distances) return distances def _get_anchor_positive_triplet_mask(self, labels): """ Return a 2D mask where mask[a, p] is True if a and p are distinct and have same label. Args: labels: tf.int32 `Tensor` with shape [batch_size] Returns: mask: tf.bool `Tensor` with shape [batch_size, batch_size] """ indices_equal = torch.eye(labels.size(0)).bool() # labels and indices should be on # the same device, otherwise - exception indices_equal = indices_equal.to("cuda" if labels.is_cuda else "cpu") # Check that i and j are distinct indices_not_equal = ~indices_equal # Check if labels[i] == labels[j] # Uses broadcasting where the 1st argument # has shape (1, batch_size) and the 2nd (batch_size, 1) labels_equal = labels.unsqueeze(0) == labels.unsqueeze(1) return labels_equal & indices_not_equal def _get_anchor_negative_triplet_mask(self, labels): """Return 2D mask where mask[a, n] is True if a and n have same label. Args: labels: tf.int32 `Tensor` with shape [batch_size] Returns: mask: tf.bool `Tensor` with shape [batch_size, batch_size] """ # Check if labels[i] != labels[k] # Uses broadcasting where the 1st argument # has shape (1, batch_size) and the 2nd (batch_size, 1) return ~(labels.unsqueeze(0) == labels.unsqueeze(1)) def _batch_hard_triplet_loss( self, embeddings, labels, margin, squared=True, ): """ Build the triplet loss over a batch of embeddings. For each anchor, we get the hardest positive and hardest negative to form a triplet. Args: labels: labels of the batch, of size (batch_size,) embeddings: tensor of shape (batch_size, embed_dim) margin: margin for triplet loss squared: Boolean. If true, output is the pairwise squared euclidean distance matrix. If false, output is the pairwise euclidean distance matrix. Returns: triplet_loss: scalar tensor containing the triplet loss """ # Get the pairwise distance matrix pairwise_dist = self._pairwise_distances(embeddings, squared=squared) # For each anchor, get the hardest positive # First, we need to get a mask for every valid # positive (they should have same label) mask_anchor_positive = self._get_anchor_positive_triplet_mask( labels ).float() # We put to 0 any element where (a, p) is not valid # (valid if a != p and label(a) == label(p)) anchor_positive_dist = mask_anchor_positive * pairwise_dist # shape (batch_size, 1) hardest_positive_dist, _ = anchor_positive_dist.max(1, keepdim=True) # For each anchor, get the hardest negative # First, we need to get a mask for every valid negative # (they should have different labels) mask_anchor_negative = self._get_anchor_negative_triplet_mask( labels ).float() # We add the maximum value in each row # to the invalid negatives (label(a) == label(n)) max_anchor_negative_dist, _ = pairwise_dist.max(1, keepdim=True) anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * ( 1.0 - mask_anchor_negative ) # shape (batch_size,) hardest_negative_dist, _ = anchor_negative_dist.min(1, keepdim=True) # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss tl = hardest_positive_dist - hardest_negative_dist + margin tl[tl < 0] = 0 triplet_loss = tl.mean() return triplet_loss
[docs] def forward(self, embeddings, targets): """Forward propagation method for the triplet loss. Args: embeddings: tensor of shape (batch_size, embed_dim) targets: labels of the batch, of size (batch_size,) Returns: triplet_loss: scalar tensor containing the triplet loss """ return self._batch_hard_triplet_loss(embeddings, targets, self.margin)
class TripletLossV2(nn.Module): """@TODO: Docs. Contribution is welcome.""" def __init__(self, margin=0.3): """ Args: margin (float): margin for triplet. """ super().__init__() self.margin = margin def forward(self, embeddings, targets): """@TODO: Docs. Contribution is welcome.""" return triplet_loss(embeddings, targets, margin=self.margin,)
[docs]class TripletPairwiseEmbeddingLoss(nn.Module): """TripletPairwiseEmbeddingLoss – proof of concept criterion. Still work in progress. @TODO: Docs. Contribution is welcome. """
[docs] def __init__(self, margin: float = 0.3, reduction: str = "mean"): """ Args: margin (float): margin parameter reduction (str): criterion reduction type """ super().__init__() self.margin = margin self.reduction = reduction or "none"
[docs] def forward(self, embeddings_pred, embeddings_true): """ Work in progress. Args: embeddings_pred: predicted embeddings with shape [batch_size, embedding_size] embeddings_true: true embeddings with shape [batch_size, embedding_size] Returns: torch.Tensor: loss """ device = embeddings_pred.device # s - state space # d - embeddings space # a - action space # [batch_size, embedding_size] x [batch_size, embedding_size] # -> [batch_size, batch_size] pairwise_similarity = torch.einsum( "se,ae->sa", embeddings_pred, embeddings_true ) bs = embeddings_pred.shape[0] batch_idx = torch.arange(bs, device=device) negative_similarity = pairwise_similarity + torch.diag( torch.full([bs], -(10 ** 9), device=device) ) # TODO argsort, take k worst hard_negative_ids = negative_similarity.argmax(dim=-1) negative_similarities = pairwise_similarity[ batch_idx, hard_negative_ids ] positive_similarities = pairwise_similarity[batch_idx, batch_idx] loss = torch.relu( self.margin - positive_similarities + negative_similarities ) if self.reduction == "mean": loss = torch.sum(loss) / bs elif self.reduction == "sum": loss = torch.sum(loss) return loss
__all__ = ["TripletLoss", "TripletPairwiseEmbeddingLoss"]