Shortcuts

Source code for catalyst.metrics._ndcg

from typing import Any, Dict, List

import torch

from catalyst.metrics._additive import AdditiveValueMetric
from catalyst.metrics._metric import ICallbackBatchMetric
from catalyst.metrics.functional._ndcg import ndcg


[docs]class NDCGMetric(ICallbackBatchMetric): """ Calculates the Normalized discounted cumulative gain (NDCG) score given model outputs and targets The precision metric summarizes the fraction of relevant items Computes mean value of NDCG and it's approximate std value Args: topk_args: list of `topk` for ndcg@topk computing compute_on_call: if True, computes and returns metric value during metric call prefix: metric prefix suffix: metric suffix Examples: .. code-block:: python import torch from catalyst import metrics outputs = torch.Tensor([ [0.5, 0.2, 0.1], [0.5, 0.2, 0.1], ]) targets = torch.tensor([ [1.0, 0.0, 1.0], [1.0, 0.0, 1.0], ]) metric = metrics.NDCGMetric(topk_args=[1, 2]) metric.reset() metric.update(outputs, targets) metric.compute() # ( # (1.0, 0.6131471991539001), # mean for @01, @02 # (0.0, 0.0) # std for @01, @02 # ) metric.compute_key_value() # { # 'ndcg01': 1.0, # 'ndcg02': 0.6131471991539001, # 'ndcg': 1.0, # 'ndcg01/std': 0.0, # 'ndcg02/std': 0.0, # 'ndcg/std': 0.0 # } metric.reset() metric(outputs, targets) # ( # (1.0, 0.6131471991539001), # mean for @01, @02 # (0.0, 0.0) # std for @01, @02 # ) # ((0.5, 0.75), (0.0, 0.0)) # mean, std for @01, @03 .. code-block:: python import torch from torch.utils.data import DataLoader, TensorDataset from catalyst import dl # sample data num_users, num_features, num_items = int(1e4), int(1e1), 10 X = torch.rand(num_users, num_features) y = (torch.rand(num_users, num_items) > 0.5).to(torch.float32) # pytorch loaders dataset = TensorDataset(X, y) loader = DataLoader(dataset, batch_size=32, num_workers=1) loaders = {"train": loader, "valid": loader} # model, criterion, optimizer, scheduler model = torch.nn.Linear(num_features, num_items) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2]) # model training runner = dl.SupervisedRunner( input_key="features", output_key="logits", target_key="targets", loss_key="loss" ) runner.train( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, num_epochs=3, verbose=True, callbacks=[ dl.BatchTransformCallback( transform=torch.sigmoid, scope="on_batch_end", input_key="logits", output_key="scores" ), dl.CriterionCallback( input_key="logits", target_key="targets", metric_key="loss" ), dl.AUCCallback(input_key="scores", target_key="targets"), dl.HitrateCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.HitrateCallback( input_key="scores", target_key="targets", topk_args=(1, 3, 5) ), dl.MAPCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.NDCGCallback(input_key="scores", target_key="targets", topk_args=(1, 3, 5)), dl.OptimizerCallback(metric_key="loss"), dl.SchedulerCallback(), dl.CheckpointCallback( logdir="./logs", loader_key="valid", metric_key="loss", minimize=True ), ] ) .. note:: Please follow the `minimal examples`_ sections for more use cases. .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples """ def __init__( self, topk_args: List[int] = None, compute_on_call: bool = True, prefix: str = None, suffix: str = None, ): """Init NDCGMetric""" super().__init__(compute_on_call=compute_on_call, prefix=prefix, suffix=suffix) self.metric_name_mean = f"{self.prefix}ndcg{self.suffix}" self.metric_name_std = f"{self.prefix}ndcg{self.suffix}/std" self.topk_args: List[int] = topk_args or [1] self.additive_metrics: List[AdditiveValueMetric] = [ AdditiveValueMetric() for _ in range(len(self.topk_args)) ] def reset(self) -> None: """Reset all fields""" for metric in self.additive_metrics: metric.reset() def update(self, logits: torch.Tensor, targets: torch.Tensor) -> List[float]: """ Update metric value with ndcg for new data and return intermediate metrics values. Args: logits (torch.Tensor): tensor of logits targets (torch.Tensor): tensor of targets Returns: list of ndcg@k values """ values = ndcg(logits, targets, topk=self.topk_args) values = [v.item() for v in values] for value, metric in zip(values, self.additive_metrics): metric.update(value, len(targets)) return values def update_key_value(self, logits: torch.Tensor, targets: torch.Tensor) -> Dict[str, float]: """ Update metric value with ndcg for new data and return intermediate metrics values in key-value format. Args: logits (torch.Tensor): tensor of logits targets (torch.Tensor): tensor of targets Returns: dict of ndcg@k values """ values = self.update(logits=logits, targets=targets) output = { f"{self.prefix}ndcg{key:02d}{self.suffix}": value for key, value in zip(self.topk_args, values) } output[self.metric_name_mean] = output[f"{self.prefix}ndcg01{self.suffix}"] return output def compute(self) -> Any: """ Compute ndcg for all data Returns: list of mean values, list of std values """ means, stds = zip(*(metric.compute() for metric in self.additive_metrics)) return means, stds def compute_key_value(self) -> Dict[str, float]: """ Compute ndcg for all data and return results in key-value format Returns: dict of metrics """ means, stds = self.compute() output_mean = { f"{self.prefix}ndcg{key:02d}{self.suffix}": value for key, value in zip(self.topk_args, means) } output_std = { f"{self.prefix}ndcg{key:02d}{self.suffix}/std": value for key, value in zip(self.topk_args, stds) } output_mean[self.metric_name_mean] = output_mean[f"{self.prefix}ndcg01{self.suffix}"] output_std[self.metric_name_std] = output_std[f"{self.prefix}ndcg01{self.suffix}/std"] return {**output_mean, **output_std}
__all__ = ["NDCGMetric"]