Shortcuts

Source code for catalyst.contrib.data.cv.dataset

from typing import Callable, Dict, Mapping, Optional
import glob
from pathlib import Path

from catalyst.contrib.data.cv.reader import ImageReader
from catalyst.contrib.data.reader import ReaderCompose, ScalarReader
from catalyst.contrib.utils.cv.image import has_image_extension
from catalyst.data.dataset.torch import PathsDataset


# @TODO: change arch to primitives-based
# contrib/data/... datasets/transforms/mixins/... cv/nlp
# it's much easier to handle dependencies
[docs]class ImageFolderDataset(PathsDataset): """ Dataset class that derives targets from samples filesystem paths. Dataset structure should be the following: .. code-block:: bash rootpat/ |-- class1/ # folder of N images | |-- image11 | |-- image12 | ... | `-- image1N ... `-- classM/ # folder of K images |-- imageM1 |-- imageM2 ... `-- imageMK """
[docs] def __init__( self, rootpath: str, target_key: str = "targets", dir2class: Optional[Mapping[str, int]] = None, dict_transform: Optional[Callable[[Dict], Dict]] = None, ) -> None: """Constructor method for the :class:`ImageFolderDataset` class. Args: rootpath: root directory of dataset target_key: key to use to store target label dir2class (Mapping[str, int], optional): mapping from folder name to class index dict_transform (Callable[[Dict], Dict]], optional): transforms to use on dict """ files = glob.iglob(f"{rootpath}/**/*") images = sorted(filter(has_image_extension, files)) if dir2class is None: dirs = sorted({Path(f).parent.name for f in images}) dir2class = {dirname: index for index, dirname in enumerate(dirs)} super().__init__( filenames=images, open_fn=ReaderCompose( [ ImageReader(input_key="image", rootpath=rootpath), ScalarReader( input_key=target_key, output_key=target_key, dtype=int, default_value=-1, ), ] ), label_fn=lambda fn: dir2class[Path(fn).parent.name], features_key="image", target_key=target_key, dict_transform=dict_transform, )
__all__ = ["ImageFolderDataset"]