Source code for catalyst.metrics.functional._ndcg

from typing import List

import torch

from catalyst.metrics.functional._misc import process_recsys_components


[docs]def dcg(outputs: torch.Tensor, targets: torch.Tensor, gain_function="exp_rank") -> torch.Tensor:
    """
    Computes Discounted cumulative gain (DCG)
    DCG@topk for the specified values of `k`.
    Graded relevance as a measure of  usefulness,
    or gain, from examining a set of items.
    Gain may be reduced at lower ranks.
    Reference:
    https://en.wikipedia.org/wiki/Discounted_cumulative_gain

    Args:
        outputs: model outputs, logits
            with shape [batch_size; slate_length]
        targets: ground truth, labels
            with shape [batch_size; slate_length]
        gain_function:
            String indicates the gain function for the ground truth labels.
            Two options available:
            - `exp_rank`: torch.pow(2, x) - 1
            - `linear_rank`: x
            On the default, `exp_rank` is used
            to emphasize on retrieving the relevant documents.

    Returns:
        dcg_score (torch.Tensor):
            The discounted gains tensor

    Raises:
        ValueError: gain function can be either `pow_rank` or `rank`

    Examples:

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="linear_rank",
        )
        # tensor([[2.0000, 2.0000, 0.6309, 0.0000]])

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="linear_rank",
        ).sum()
        # tensor(4.6309)

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="exp_rank",
        )
        # tensor([[3.0000, 1.8928, 0.5000, 0.0000]])

    .. code-block:: python

        from catalyst import metrics
        metrics.dcg(
            outputs = torch.tensor([
                [3, 2, 1, 0],
            ]),
            targets = torch.Tensor([
                [2.0, 2.0, 1.0, 0.0],
            ]),
            gain_function="exp_rank",
        ).sum()
        # tensor(5.3928)
    """
    targets_sort_by_outputs = process_recsys_components(outputs, targets)
    target_device = targets_sort_by_outputs.device

    if gain_function == "exp_rank":
        gain_function = lambda x: torch.pow(2, x) - 1
        gains = gain_function(targets_sort_by_outputs)
        discounts = torch.tensor(1) / torch.log2(
            torch.arange(
                targets_sort_by_outputs.shape[1], dtype=torch.float, device=target_device,
            )
            + 2.0
        )
        discounted_gains = gains * discounts

    elif gain_function == "linear_rank":
        discounts = torch.tensor(1) / torch.log2(
            torch.arange(
                targets_sort_by_outputs.shape[1], dtype=torch.float, device=target_device,
            )
            + 1.0
        )
        discounts[0] = 1
        discounted_gains = targets_sort_by_outputs * discounts

    else:
        raise ValueError("gain function can be either exp_rank or linear_rank")

    dcg_score = discounted_gains
    return dcg_score


[docs]def ndcg(
    outputs: torch.Tensor, targets: torch.Tensor, topk: List[int], gain_function="exp_rank",
) -> List[torch.Tensor]:
    """
    Computes nDCG@topk for the specified values of `topk`.

    Args:
        outputs (torch.Tensor): model outputs, logits
            with shape [batch_size; slate_size]
        targets (torch.Tensor): ground truth, labels
            with shape [batch_size; slate_size]
        gain_function:
            callable, gain function for the ground truth labels.
            Two options available:
            - `exp_rank`: torch.pow(2, x) - 1
            - `linear_rank`: x
            On the default, `exp_rank` is used
            to emphasize on retrieving the relevant documents.
        topk (List[int]):
            Parameter fro evaluation on top-k items

    Returns:
        results (Tuple[float]):
            tuple with computed ndcg@topk

    Examples:

    .. code-block:: python

        import torch
        from catalyst import metrics
        metrics.ndcg(
            outputs = torch.tensor([
                [0.5, 0.2, 0.1],
                [0.5, 0.2, 0.1],
            ]),
            targets = torch.Tensor([
                [1.0, 0.0, 1.0],
                [1.0, 0.0, 1.0],
            ]),
            topk=[2],
            gain_function="exp_rank",
        )
        # [tensor(0.6131)]

    .. code-block:: python

        import torch
        from catalyst import metrics
        metrics.ndcg(
            outputs = torch.tensor([
                [0.5, 0.2, 0.1],
                [0.5, 0.2, 0.1],
            ]),
            targets = torch.Tensor([
                [1.0, 0.0, 1.0],
                [1.0, 0.0, 1.0],
            ]),
            topk=[2],
            gain_function="exp_rank",
        )
        # [tensor(0.5000)]
    """
    results = []
    for k in topk:
        ideal_dcgs = dcg(targets, targets, gain_function)[:, :k]
        predicted_dcgs = dcg(outputs, targets, gain_function)[:, :k]
        ideal_dcgs_score = torch.sum(ideal_dcgs, dim=1)
        predicted_dcgs_score = torch.sum(predicted_dcgs, dim=1)
        ndcg_score = predicted_dcgs_score / ideal_dcgs_score
        idcg_mask = ideal_dcgs_score == 0
        ndcg_score[idcg_mask] = 0.0
        results.append(torch.mean(ndcg_score))
    return results


__all__ = ["dcg", "ndcg"]