Source code for ssds.dataset.dataset_factory

import torch
import torch.utils.data as data
import numpy as np
import os
from glob import glob

from ssds import dataset


[docs]def detection_collate(batch):
    """Custom collate fn for dealing with batches of images that have a different
    number of associated object annotations (bounding boxes).
    Arguments:
        batch: (tuple) A tuple of tensor images and lists of annotations
    Return:
        A tuple containing:
            1) (tensor) batch of images stacked on their 0 dim
            2) (tensors) annotations for a given image are stacked on 0 dim
    """
    targets = []
    imgs = []
    num_detections = []
    for img, target in batch:
        # for tup in sample:
        imgs.append(img)
        targets.append(target)
        num_detections.append(target.shape[0])

    torch_targets = -1 * torch.ones(
        [len(targets), max(max(num_detections), 1), 5], dtype=torch.float, device="cpu"
    )
    for i, target in enumerate(targets):
        num_dets = target.shape[0]
        torch_targets[i, :num_dets] = torch.from_numpy(target).float()
    return torch.stack(imgs, 0), torch_targets


[docs]def load_data(cfg, phase):
    r""" create the dataloader based on the config file.

    * If the phase == "train",
        it returns the dataloader in cfg.DATASET.TRAIN_SETS and fetch the randomly;
    * If the phase == "test",
        it returns the dataloader in cfg.DATASET.TEST_SETS and fetch the squentially;

    Args:
        cfg: the configs defined by cfg.DATASET
        phase (str): "train" or "test"

    Returns:
        dataloader 
    """
    training = phase == "train"
    image_sets = cfg.TRAIN_SETS if training else cfg.TEST_SETS
    batch_size = cfg.TRAIN_BATCH_SIZE if training else cfg.TEST_BATCH_SIZE

    if "Dali" in cfg.DATASET:
        data_loader = getattr(dataset, cfg.DATASET)(
            cfg=cfg,
            dataset_dir=cfg.DATASET_DIR,
            image_sets=image_sets,
            batch_size=batch_size,
            training=training,
        )
    else:
        _dataset = getattr(dataset, cfg.DATASET)(
            cfg=cfg,
            dataset_dir=cfg.DATASET_DIR,
            image_sets=image_sets,
            training=training,
        )
        data_loader = data.DataLoader(
            _dataset,
            batch_size,
            num_workers=cfg.NUM_WORKERS,
            shuffle=training,
            collate_fn=detection_collate,
            pin_memory=True,
        )
    return data_loader