Source code for ssds.dataset.coco

import os
import sys
import numpy as np
import pickle
from pycocotools.coco import COCO

from .detection_dataset import DetectionDataset

[docs]class COCODataset(object): r"""COCO Dataset, used to extract the data from annotation file only. For the dataset defined in the cfg.DATASET.DATASET, please refer to :class:`.COCODetection`. Saved the image path and the relative annotation to the self.img_paths and self.anno Arguments: dataset_dir (str): the directory of coco dataset image_sets (list): list folders that """ def __init__(self, dataset_dir, image_sets): self.dataset_dir = dataset_dir self.cache_path = os.path.join(dataset_dir, 'cache') self.image_sets = image_sets self.img_paths = [] self.anno = [] self.classes_names = [] if not os.path.exists(self.cache_path): os.makedirs(self.cache_path) self._load_anno_files(dataset_dir, image_sets) def _load_anno_files(self, dataset_dir, image_sets): for coco_name in image_sets: annofile = os.path.join(dataset_dir, 'annotations', 'instances_' + coco_name + '.json') _COCO = COCO(annofile) cats = _COCO.loadCats(_COCO.getCatIds()) indexes = _COCO.getImgIds() self.classes_names = tuple(c['name'] for c in cats) self.num_classes = len(self.classes_names) self._class_to_ind = dict(zip(self.classes_names, range(self.num_classes))) self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats], _COCO.getCatIds())) self.img_paths.extend(self._load_coco_img_path(coco_name, indexes)) self.anno.extend(self._load_coco_annotations(coco_name, indexes, _COCO)) def _load_coco_img_path(self, coco_name, indexes): cache_file=os.path.join(self.cache_path, coco_name+'_img_path.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: img_path = pickle.load(fid) print('{} img path loaded from {}'.format(coco_name,cache_file)) return img_path print('parsing img path for {}'.format(coco_name)) img_path = [self.image_path_from_index(coco_name, index) for index in indexes] with open(cache_file, 'wb') as fid: pickle.dump(img_path,fid,pickle.HIGHEST_PROTOCOL) print('wrote img path to {}'.format(cache_file)) return img_path def _load_coco_annotations(self, coco_name, indexes, _COCO): cache_file=os.path.join(self.cache_path, coco_name+'_gt_db.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = pickle.load(fid) print('{} gt loaded from {}'.format(coco_name,cache_file)) return roidb print('parsing gt for {}'.format(coco_name)) gt_roidb = [self.annotation_from_index(index, _COCO) for index in indexes] with open(cache_file, 'wb') as fid: pickle.dump(gt_roidb,fid,pickle.HIGHEST_PROTOCOL) print('wrote gt to {}'.format(cache_file)) return gt_roidb
[docs] def image_path_from_index(self, name, index): """ Construct an image path from the image's "index" identifier. Example image path for index=119993: images/train2014/COCO_train2014_000000119993.jpg """ file_name = (str(index).zfill(12) + '.jpg') image_path = os.path.join(self.dataset_dir, 'images', name, file_name) assert os.path.exists(image_path), \ 'Path does not exist: {}'.format(image_path) return image_path
[docs] def annotation_from_index(self, index, _COCO, toPercent=True): """ Loads COCO bounding-box instance annotations. Crowd instances are handled by marking their overlaps (with all categories) to -1. This overlap value means that crowd "instances" are excluded from training. Return result with Percent Coords """ im_ann = _COCO.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] annIds = _COCO.getAnnIds(imgIds=index, iscrowd=None) objs = _COCO.loadAnns(annIds) # Sanitize bboxes -- some are invalid valid_objs = [] for obj in objs: x1 = np.max((0, obj['bbox'][0])) y1 = np.max((0, obj['bbox'][1])) x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) # Lookup table to map from COCO category ids to our internal class # indices coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[name], self._class_to_ind[name]) for name in self.classes_names]) res = np.zeros((len(valid_objs), 5), dtype=np.float32) for ix, obj in enumerate(valid_objs): clss = coco_cat_id_to_class_ind[obj['category_id']] res[ix, 0:4] = obj['clean_bbox'] res[ix, 4] = clss if toPercent == True: res[:,:4:2] /= width res[:,1:4:2] /= height return res
[docs]class COCODetection(COCODataset, DetectionDataset): r"""COCO Object Detection Dataset The derivative class for COCODataset and DetectionDataset. load the image path and the relative annotation from :class:`.COCODataset` and save them to the annotation database. Then fetch the data by the data pipeline in the :class:`ssds.dataset.detection_dataset.DetectionDataset`. Arguments: dataset_dir (str): the directory of coco dataset image_sets (list): list folders that """ def __init__(self, cfg, dataset_dir, image_sets, training=False, transform=None): DetectionDataset.__init__(self, cfg, training, transform) COCODataset.__init__(self, dataset_dir, image_sets) self.db = self._get_db() # self.db = self.reorder_data(self.db, self.cfg_joints_name, self.ds_joints_name) # loading img db to boost up the speed if self.using_pickle: pickle_path = os.path.join(dataset_dir, 'pickle', 'img_db_' + '_'.join(image_set) + '.pickle') if not os.path.exists(os.path.dirname(pickle_path)): os.makedirs(os.path.dirname(pickle_path)) if not os.path.exists(pickle_path): self.saving_pickle(pickle_path) self.img_db = self.loading_pickle(pickle_path) def _get_db(self): gt_db = [{ 'image': img_path, 'boxes': anno[:,:4], 'labels': anno[:,4] } for img_path, anno in zip(self.img_paths, self.anno)] return gt_db