Shortcuts

Source code for catalyst.data.cv.dataset

from typing import Callable, Dict, Mapping, Optional
import glob
from pathlib import Path

from catalyst.contrib.utils.cv.image import has_image_extension
from catalyst.data.cv.reader import ImageReader
from catalyst.data.dataset.torch import PathsDataset
from catalyst.data.reader import ReaderCompose, ScalarReader


[docs]class ImageFolderDataset(PathsDataset): """ Dataset class that derives targets from samples filesystem paths. Dataset structure should be the following: .. code-block:: bash rootpat/ |-- class1/ # folder of N images | |-- image11 | |-- image12 | ... | `-- image1N ... `-- classM/ # folder of K images |-- imageM1 |-- imageM2 ... `-- imageMK """
[docs] def __init__( self, rootpath: str, target_key: str = "targets", dir2class: Optional[Mapping[str, int]] = None, dict_transform: Optional[Callable[[Dict], Dict]] = None, ) -> None: """Constructor method for the :class:`ImageFolderDataset` class. Args: rootpath: root directory of dataset target_key: key to use to store target label dir2class (Mapping[str, int], optional): mapping from folder name to class index dict_transform (Callable[[Dict], Dict]], optional): transforms to use on dict """ files = glob.iglob(f"{rootpath}/**/*") images = sorted(filter(has_image_extension, files)) if dir2class is None: dirs = sorted({Path(f).parent.name for f in images}) dir2class = {dirname: index for index, dirname in enumerate(dirs)} super().__init__( filenames=images, open_fn=ReaderCompose( [ ImageReader(input_key="image", rootpath=rootpath), ScalarReader( input_key=target_key, output_key=target_key, dtype=int, default_value=-1, ), ] ), label_fn=lambda fn: dir2class[Path(fn).parent.name], features_key="image", target_key=target_key, dict_transform=dict_transform, )
__all__ = ["ImageFolderDataset"]