123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- import cv2
- import numpy as np
- from pycocotools.coco import COCO
- import os
- from ..dataloading import get_yolox_datadir
- from .datasets_wrapper import Dataset
- class MOTDataset(Dataset):
- """
- COCO dataset class.
- """
- def __init__(
- self,
- data_dir=None,
- json_file="train_half.json",
- name="train",
- img_size=(608, 1088),
- preproc=None,
- ):
- """
- COCO dataset initialization. Annotation data are read into memory by COCO API.
- Args:
- data_dir (str): dataset root directory
- json_file (str): COCO json file name
- name (str): COCO data name (e.g. 'train2017' or 'val2017')
- img_size (int): target image size after pre-processing
- preproc: data augmentation strategy
- """
- super().__init__(img_size)
- if data_dir is None:
- data_dir = os.path.join(get_yolox_datadir(), "mot")
- self.data_dir = data_dir
- self.json_file = json_file
- self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
- self.ids = self.coco.getImgIds()
- self.class_ids = sorted(self.coco.getCatIds())
- cats = self.coco.loadCats(self.coco.getCatIds())
- self._classes = tuple([c["name"] for c in cats])
- self.annotations = self._load_coco_annotations()
- self.name = name
- self.img_size = img_size
- self.preproc = preproc
- def __len__(self):
- return len(self.ids)
- def _load_coco_annotations(self):
- return [self.load_anno_from_ids(_ids) for _ids in self.ids]
- def load_anno_from_ids(self, id_):
- im_ann = self.coco.loadImgs(id_)[0]
- width = im_ann["width"]
- height = im_ann["height"]
- frame_id = im_ann["frame_id"]
- video_id = im_ann["video_id"]
- anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
- annotations = self.coco.loadAnns(anno_ids)
- objs = []
- for obj in annotations:
- x1 = obj["bbox"][0]
- y1 = obj["bbox"][1]
- x2 = x1 + obj["bbox"][2]
- y2 = y1 + obj["bbox"][3]
- if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
- obj["clean_bbox"] = [x1, y1, x2, y2]
- objs.append(obj)
- num_objs = len(objs)
- res = np.zeros((num_objs, 6))
- for ix, obj in enumerate(objs):
- cls = self.class_ids.index(obj["category_id"])
- res[ix, 0:4] = obj["clean_bbox"]
- res[ix, 4] = cls
- res[ix, 5] = obj["track_id"]
- file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
- img_info = (height, width, frame_id, video_id, file_name)
- del im_ann, annotations
- return (res, img_info, file_name)
- def load_anno(self, index):
- return self.annotations[index][0]
- def pull_item(self, index):
- id_ = self.ids[index]
- res, img_info, file_name = self.annotations[index]
- # load image and preprocess
- img_file = os.path.join(
- self.data_dir, self.name, file_name
- )
- img = cv2.imread(img_file)
- assert img is not None
- return img, res.copy(), img_info, np.array([id_])
- @Dataset.resize_getitem
- def __getitem__(self, index):
- """
- One image / label pair for the given index is picked up and pre-processed.
- Args:
- index (int): data index
- Returns:
- img (numpy.ndarray): pre-processed image
- padded_labels (torch.Tensor): pre-processed label data.
- The shape is :math:`[max_labels, 5]`.
- each label consists of [class, xc, yc, w, h]:
- class (float): class index.
- xc, yc (float) : center of bbox whose values range from 0 to 1.
- w, h (float) : size of bbox whose values range from 0 to 1.
- info_img : tuple of h, w, nh, nw, dx, dy.
- h, w (int): original shape of the image
- nh, nw (int): shape of the resized image without padding
- dx, dy (int): pad size
- img_id (int): same as the input index. Used for evaluation.
- """
- img, target, img_info, img_id = self.pull_item(index)
- if self.preproc is not None:
- img, target = self.preproc(img, target, self.input_dim)
- return img, target, img_info, img_id
|