Source code for ssds.dataset.dali_coco

import torch
import os
import math
import ctypes
from contextlib import redirect_stdout
from pycocotools.coco import COCO

import nvidia.dali.ops as ops
import nvidia.dali.types as types
from nvidia.dali.pipeline import Pipeline

from .dali_dataiterator import DaliDataset, DaliPipeline

[docs]class COCOPipeline(Pipeline, DaliPipeline): 'Dali pipeline for COCO' def __init__(self, image_dir, annotations_file, cache_path, batch_size, target_size, preproc_param, num_threads, num_shards, device_ids, training=False): Pipeline.__init__(self, batch_size=batch_size, num_threads=num_threads, device_id = device_ids, prefetch_queue_depth=num_threads, seed=42) DaliPipeline.__init__(self, target_size=target_size, preproc_param=preproc_param, training=training) self.reader = ops.COCOReader(annotations_file=annotations_file, file_root=image_dir, num_shards=num_shards, shard_id=0, ltrb=True, ratio=True, shuffle_after_epoch=training, save_img_ids=True, dump_meta_files=True, dump_meta_files_path=cache_path)
[docs] def define_graph(self): images, bboxes, labels, img_ids = self.reader() return self.predefined_graph(images, bboxes, labels)
[docs]class DaliCOCO(DaliDataset): 'Data loader for data parallel using Dali for TFRecord files' def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False): super(DaliCOCO, self).__init__(cfg, dataset_dir, image_sets, batch_size, training) if len(image_sets) != 1: raise ValueError("For DaliCOCO dataset, the number of image_set has to be 1, currently it is {}".format(image_sets)) self.image_dir = os.path.join(dataset_dir, "images", image_sets[0]) self.annotations_file = os.path.join(dataset_dir, "annotations", "instances_{}.json".format(image_sets[0])) self.cache_path = os.path.join(dataset_dir, "cache") if not os.path.exists(self.cache_path): os.makedirs(self.cache_path) self.pipe = COCOPipeline(image_dir=self.image_dir, annotations_file=self.annotations_file, cache_path=self.cache_path, **self.pipeline_args) self.pipe.build() with redirect_stdout(None): self.coco = COCO(self.annotations_file) self.ids = list(self.coco.imgs.keys()) def __len__(self): return math.ceil(len(self.ids) // self.num_shards / self.batch_size) def reset_size(self, batch_size, target_size): self.batch_size = batch_size self.target_size = target_size self.pipeline_args["batch_size"] = batch_size self.pipeline_args["target_size"] = target_size del self.pipe self.pipe = COCOPipeline(image_dir=self.image_dir, annotations_file=self.annotations_file, cache_path=self.cache_path, **self.pipeline_args) self.pipe.build()