Shortcuts

Source code for catalyst.contrib.datasets.cv.fastai

from typing import Iterable, Tuple
import os

from catalyst.contrib.datasets.functional import download_and_extract_archive
from catalyst.data.cv import ImageFolderDataset


[docs]class ImageClassificationDataset(ImageFolderDataset): """ Base class for datasets with the following structure: .. code-block:: bash path/to/dataset/ |-- train/ | |-- class1/ # folder of N images | | |-- train_image11 | | |-- train_image12 | | ... | | `-- train_image1N | ... | `-- classM/ # folder of K images | |-- train_imageM1 | |-- train_imageM2 | ... | `-- train_imageMK `-- val/ |-- class1/ # folder of P images | |-- val_image11 | |-- val_image12 | ... | `-- val_image1P ... `-- classM/ # folder of T images |-- val_imageT1 |-- val_imageT2 ... `-- val_imageMT """ # name of dataset folder name: str # list of (url, md5 hash) tuples representing files to download resources: Iterable[Tuple[str, str]] = None
[docs] def __init__( self, root: str, train: bool = True, download: bool = False, **kwargs ): """Constructor method for the ``ImageClassificationDataset`` class. Args: root: root directory of dataset train: if ``True``, creates dataset from ``train/`` subfolder, otherwise from ``val/`` download: if ``True``, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again **kwargs: """ # downlad dataset if needed if download and not os.path.exists(os.path.join(root, self.name)): os.makedirs(root, exist_ok=True) # download files for url, md5 in self.resources: filename = url.rpartition("/")[2] download_and_extract_archive( url, download_root=root, filename=filename, md5=md5 ) rootpath = os.path.join(root, self.name, "train" if train else "val") super().__init__(rootpath=rootpath, **kwargs)