data_augment.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. #!/usr/bin/env python3
  2. # -*- coding:utf-8 -*-
  3. # Copyright (c) Megvii, Inc. and its affiliates.
  4. """
  5. Data augmentation functionality. Passed as callable transformations to
  6. Dataset classes.
  7. The data augmentation procedures were interpreted from @weiliu89's SSD paper
  8. http://arxiv.org/abs/1512.02325
  9. """
  10. import cv2
  11. import numpy as np
  12. import torch
  13. from torchvision import transforms
  14. from yolox.utils import xyxy2cxcywh
  15. import time
  16. import math
  17. import random
  18. def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4):
  19. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  20. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  21. dtype = img.dtype # uint8
  22. x = np.arange(0, 256, dtype=np.int16)
  23. lut_hue = ((x * r[0]) % 180).astype(dtype)
  24. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  25. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  26. img_hsv = cv2.merge(
  27. (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))
  28. ).astype(dtype)
  29. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  30. def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2):
  31. # box1(4,n), box2(4,n)
  32. # Compute candidate boxes which include follwing 5 things:
  33. # box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
  34. w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
  35. w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
  36. ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
  37. return (
  38. (w2 > wh_thr)
  39. & (h2 > wh_thr)
  40. & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr)
  41. & (ar < ar_thr)
  42. ) # candidates
  43. def random_perspective(
  44. img,
  45. targets=(),
  46. degrees=10,
  47. translate=0.1,
  48. scale=0.1,
  49. shear=10,
  50. perspective=0.0,
  51. border=(0, 0),
  52. ):
  53. # targets = [cls, xyxy]
  54. height = img.shape[0] + border[0] * 2 # shape(h,w,c)
  55. width = img.shape[1] + border[1] * 2
  56. # Center
  57. C = np.eye(3)
  58. C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
  59. C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
  60. # Rotation and Scale
  61. R = np.eye(3)
  62. a = random.uniform(-degrees, degrees)
  63. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  64. s = random.uniform(scale[0], scale[1])
  65. # s = 2 ** random.uniform(-scale, scale)
  66. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  67. # Shear
  68. S = np.eye(3)
  69. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  70. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  71. # Translation
  72. T = np.eye(3)
  73. T[0, 2] = (
  74. random.uniform(0.5 - translate, 0.5 + translate) * width
  75. ) # x translation (pixels)
  76. T[1, 2] = (
  77. random.uniform(0.5 - translate, 0.5 + translate) * height
  78. ) # y translation (pixels)
  79. # Combined rotation matrix
  80. M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT
  81. ###########################
  82. # For Aug out of Mosaic
  83. # s = 1.
  84. # M = np.eye(3)
  85. ###########################
  86. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  87. if perspective:
  88. img = cv2.warpPerspective(
  89. img, M, dsize=(width, height), borderValue=(114, 114, 114)
  90. )
  91. else: # affine
  92. img = cv2.warpAffine(
  93. img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)
  94. )
  95. # Transform label coordinates
  96. n = len(targets)
  97. if n:
  98. # warp points
  99. xy = np.ones((n * 4, 3))
  100. xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
  101. n * 4, 2
  102. ) # x1y1, x2y2, x1y2, x2y1
  103. xy = xy @ M.T # transform
  104. if perspective:
  105. xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
  106. else: # affine
  107. xy = xy[:, :2].reshape(n, 8)
  108. # create new boxes
  109. x = xy[:, [0, 2, 4, 6]]
  110. y = xy[:, [1, 3, 5, 7]]
  111. xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  112. # clip boxes
  113. #xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
  114. #xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
  115. # filter candidates
  116. i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T)
  117. targets = targets[i]
  118. targets[:, :4] = xy[i]
  119. targets = targets[targets[:, 0] < width]
  120. targets = targets[targets[:, 2] > 0]
  121. targets = targets[targets[:, 1] < height]
  122. targets = targets[targets[:, 3] > 0]
  123. return img, targets
  124. def _distort(image):
  125. def _convert(image, alpha=1, beta=0):
  126. tmp = image.astype(float) * alpha + beta
  127. tmp[tmp < 0] = 0
  128. tmp[tmp > 255] = 255
  129. image[:] = tmp
  130. image = image.copy()
  131. if random.randrange(2):
  132. _convert(image, beta=random.uniform(-32, 32))
  133. if random.randrange(2):
  134. _convert(image, alpha=random.uniform(0.5, 1.5))
  135. image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
  136. if random.randrange(2):
  137. tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
  138. tmp %= 180
  139. image[:, :, 0] = tmp
  140. if random.randrange(2):
  141. _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
  142. image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
  143. return image
  144. def _mirror(image, boxes):
  145. _, width, _ = image.shape
  146. if random.randrange(2):
  147. image = image[:, ::-1]
  148. boxes = boxes.copy()
  149. boxes[:, 0::2] = width - boxes[:, 2::-2]
  150. return image, boxes
  151. def preproc(img, input_size, mean, std, swap=(2, 0, 1)):
  152. if len(img.shape) == 3:
  153. padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
  154. else:
  155. padded_img = np.ones(input_size) * 114.0
  156. r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
  157. resized_img = cv2.resize(
  158. img,
  159. (int(img.shape[1] * r), int(img.shape[0] * r)),
  160. interpolation=cv2.INTER_LINEAR,
  161. ).astype(np.float32)
  162. padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
  163. padded_img = padded_img[:, :, ::-1]
  164. padded_img /= 255.0
  165. padded_img = (padded_img - mean) / std
  166. padded_img = padded_img.transpose(swap)
  167. padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
  168. return padded_img, r
  169. class TrainTransform:
  170. def __init__(self, p=0.5, rgb_means=None, std=None, max_labels=100):
  171. self.means = rgb_means
  172. self.std = std
  173. self.p = p
  174. self.max_labels = max_labels
  175. def __call__(self, image, targets, input_dim):
  176. boxes = targets[:, :4].copy()
  177. labels = targets[:, 4].copy()
  178. ids = targets[:, 5].copy()
  179. if len(boxes) == 0:
  180. targets = np.zeros((self.max_labels, 6), dtype=np.float32)
  181. image, r_o = preproc(image, input_dim, self.means, self.std)
  182. image = np.ascontiguousarray(image, dtype=np.float32)
  183. return image, targets
  184. image_o = image.copy()
  185. targets_o = targets.copy()
  186. height_o, width_o, _ = image_o.shape
  187. boxes_o = targets_o[:, :4]
  188. labels_o = targets_o[:, 4]
  189. ids_o = targets_o[:, 5]
  190. # bbox_o: [xyxy] to [c_x,c_y,w,h]
  191. boxes_o = xyxy2cxcywh(boxes_o)
  192. image_t = _distort(image)
  193. image_t, boxes = _mirror(image_t, boxes)
  194. height, width, _ = image_t.shape
  195. image_t, r_ = preproc(image_t, input_dim, self.means, self.std)
  196. # boxes [xyxy] 2 [cx,cy,w,h]
  197. boxes = xyxy2cxcywh(boxes)
  198. boxes *= r_
  199. mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1
  200. boxes_t = boxes[mask_b]
  201. labels_t = labels[mask_b]
  202. ids_t = ids[mask_b]
  203. if len(boxes_t) == 0:
  204. image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
  205. boxes_o *= r_o
  206. boxes_t = boxes_o
  207. labels_t = labels_o
  208. ids_t = ids_o
  209. labels_t = np.expand_dims(labels_t, 1)
  210. ids_t = np.expand_dims(ids_t, 1)
  211. targets_t = np.hstack((labels_t, boxes_t, ids_t))
  212. padded_labels = np.zeros((self.max_labels, 6))
  213. padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
  214. : self.max_labels
  215. ]
  216. padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
  217. image_t = np.ascontiguousarray(image_t, dtype=np.float32)
  218. return image_t, padded_labels
  219. class ValTransform:
  220. """
  221. Defines the transformations that should be applied to test PIL image
  222. for input into the network
  223. dimension -> tensorize -> color adj
  224. Arguments:
  225. resize (int): input dimension to SSD
  226. rgb_means ((int,int,int)): average RGB of the dataset
  227. (104,117,123)
  228. swap ((int,int,int)): final order of channels
  229. Returns:
  230. transform (transform) : callable transform to be applied to test/val
  231. data
  232. """
  233. def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1)):
  234. self.means = rgb_means
  235. self.swap = swap
  236. self.std = std
  237. # assume input is cv2 img for now
  238. def __call__(self, img, res, input_size):
  239. img, _ = preproc(img, input_size, self.means, self.std, self.swap)
  240. return img, np.zeros((1, 5))