import torch
import math
import ctypes
import nvidia.dali.ops as ops
import nvidia.dali.types as types
[docs]class DaliPipeline(object):
r""" The data pipeline for the Dali dataset
"""
def __init__(self, target_size, preproc_param, training=False):
self.training = training
mean = preproc_param.MEAN
std = preproc_param.STD
bri_delta = preproc_param.BRI_DELTA
hue_delta = preproc_param.HUE_DELTA
max_expand_ratio = preproc_param.MAX_EXPAND_RATIO
contrast_range = preproc_param.CONTRAST_RANGE
saturation_range = preproc_param.SATURATION_RANGE
crop_aspect_ratio = preproc_param.CROP_ASPECT_RATIO
crop_scale = preproc_param.CROP_SCALE
crop_attempts = preproc_param.CROP_ATTEMPTS
# decoder
self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB)
self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB)
# ssd crop
self.bbox_crop = ops.RandomBBoxCrop(
device="cpu",
bbox_layout="xyXY",
scaling=crop_scale,
aspect_ratio=crop_aspect_ratio,
allow_no_crop=True,
thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
num_attempts=crop_attempts,
)
# color twist
self.uniform_con = ops.Uniform(range=contrast_range)
self.uniform_bri = ops.Uniform(
range=(1.0 - bri_delta / 256.0, 1.0 + bri_delta / 256.0)
)
self.uniform_sat = ops.Uniform(range=saturation_range)
self.uniform_hue = ops.Uniform(range=(-hue_delta, hue_delta))
self.hsv = ops.Hsv(device="gpu")
self.contrast = ops.BrightnessContrast(device="gpu")
# hflip
self.bbox_flip = ops.BbFlip(device="cpu", ltrb=True)
self.img_flip = ops.Flip(device="gpu")
self.coin_flip = ops.CoinFlip(probability=0.5)
# past
self.paste_pos = ops.Uniform(range=(0, 1))
self.paste_ratio = ops.Uniform(range=(1, max_expand_ratio))
self.paste = ops.Paste(device="gpu", fill_value=mean)
self.bbox_paste = ops.BBoxPaste(device="cpu", ltrb=True)
# resize and normalize
self.resize = ops.Resize(
device="gpu",
interp_type=types.DALIInterpType.INTERP_CUBIC,
resize_x=target_size[0],
resize_y=target_size[1],
save_attrs=True,
)
self.normalize = ops.CropMirrorNormalize(device="gpu", mean=mean, std=std)
[docs] def predefined_graph(self, images, bboxes, labels):
if self.training:
# crop
crop_begin, crop_size, bboxes, labels = self.bbox_crop(bboxes, labels)
images = self.decode_train(images, crop_begin, crop_size)
# color twist
images = self.hsv(
images, hue=self.uniform_hue(), saturation=self.uniform_sat()
)
images = self.contrast(
images, brightness=self.uniform_bri(), contrast=self.uniform_con()
)
# hflip
flip = self.coin_flip()
bboxes = self.bbox_flip(bboxes, horizontal=flip)
images = self.img_flip(images, horizontal=flip)
# past
ratio = self.paste_ratio()
px = self.paste_pos()
py = self.paste_pos()
images = self.paste(images.gpu(), paste_x=px, paste_y=py, ratio=ratio)
bboxes = self.bbox_paste(bboxes, paste_x=px, paste_y=py, ratio=ratio)
else:
images = self.decode_infer(images)
images, attrs = self.resize(images)
images = self.normalize(images)
return images, bboxes, labels
[docs]class DaliDataset(object):
r""" Data loader for data parallel using Dali
"""
def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False):
self.training = training
self.batch_size = batch_size
self.target_size = cfg.IMAGE_SIZE
self.preproc_param = cfg.PREPROC
self.device_ids = (
torch.cuda.current_device() if len(cfg.DEVICE_ID) != 1 else cfg.DEVICE_ID[0]
) # ",".join([str(d) for d in device_ids])
self.num_shards = max(len(cfg.DEVICE_ID), 1)
self.num_threads = cfg.NUM_WORKERS
self.pipeline_args = {
"target_size": self.target_size,
"num_threads": self.num_threads,
"num_shards": self.num_shards,
"batch_size": self.batch_size,
"training": self.training,
"device_ids": self.device_ids,
"preproc_param": self.preproc_param,
}
def __repr__(self):
return "\n".join(
[
" loader: dali"
" length: {}"
" target_size: {}".format(self.__len__(), self.target_size),
]
)
def __len__(self):
return math.ceil(len(self.pipe) // self.num_shards / self.batch_size)
def __iter__(self):
for _ in range(self.__len__()):
data, num_detections = [], []
dali_data, dali_boxes, dali_labels = self.pipe.run()
for l in range(len(dali_boxes)):
num_detections.append(dali_boxes.at(l).shape[0])
torch_targets = -1 * torch.ones(
[len(dali_boxes), max(max(num_detections), 1), 5]
)
for batch in range(self.batch_size):
# Convert dali tensor to pytorch
dali_tensor = dali_data[batch]
tensor_shape = dali_tensor.shape()
datum = torch.zeros(
dali_tensor.shape(), dtype=torch.float, device=torch.device("cuda")
)
c_type_pointer = ctypes.c_void_p(datum.data_ptr())
dali_tensor.copy_to_external(c_type_pointer)
# Rescale boxes
b_arr = dali_boxes.at(batch)
num_dets = b_arr.shape[0]
if num_dets is not 0:
torch_bbox = torch.from_numpy(b_arr).float()
torch_bbox[:, ::2] *= self.target_size[0]
torch_bbox[:, 1::2] *= self.target_size[1]
# (l,t,r,b) -> (x,y,w,h) == (l,r, r-l, b-t)
torch_bbox[:, 2] -= torch_bbox[:, 0]
torch_bbox[:, 3] -= torch_bbox[:, 1]
torch_targets[batch, :num_dets, :4] = torch_bbox # * ratio
# Arrange labels in target tensor
l_arr = dali_labels.at(batch)
if num_dets is not 0:
torch_label = torch.from_numpy(l_arr).float()
torch_label -= 1 # Rescale labels to [0,n-1] instead of [1,n]
torch_targets[batch, :num_dets, 4] = torch_label.squeeze()
data.append(datum.unsqueeze(0))
data = torch.cat(data, dim=0)
torch_targets = torch_targets.cuda(non_blocking=True)
yield data, torch_targets
def reset_size(self, batch_size, target_size):
r"""
:meta private:
"""
raise NotImplementedError()