test_yolov3_loss.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import division
  15. import unittest
  16. import paddle
  17. from paddle import fluid
  18. # add python path of PadleDetection to sys.path
  19. import os
  20. import sys
  21. parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 4)))
  22. if parent_path not in sys.path:
  23. sys.path.append(parent_path)
  24. from ppdet.modeling.losses import YOLOv3Loss
  25. from ppdet.data.transform.op_helper import jaccard_overlap
  26. import numpy as np
  27. def _split_ioup(output, an_num, num_classes):
  28. """
  29. Split output feature map to output, predicted iou
  30. along channel dimension
  31. """
  32. ioup = fluid.layers.slice(output, axes=[1], starts=[0], ends=[an_num])
  33. ioup = fluid.layers.sigmoid(ioup)
  34. oriout = fluid.layers.slice(
  35. output, axes=[1], starts=[an_num], ends=[an_num * (num_classes + 6)])
  36. return (ioup, oriout)
  37. def _split_output(output, an_num, num_classes):
  38. """
  39. Split output feature map to x, y, w, h, objectness, classification
  40. along channel dimension
  41. """
  42. x = fluid.layers.strided_slice(
  43. output,
  44. axes=[1],
  45. starts=[0],
  46. ends=[output.shape[1]],
  47. strides=[5 + num_classes])
  48. y = fluid.layers.strided_slice(
  49. output,
  50. axes=[1],
  51. starts=[1],
  52. ends=[output.shape[1]],
  53. strides=[5 + num_classes])
  54. w = fluid.layers.strided_slice(
  55. output,
  56. axes=[1],
  57. starts=[2],
  58. ends=[output.shape[1]],
  59. strides=[5 + num_classes])
  60. h = fluid.layers.strided_slice(
  61. output,
  62. axes=[1],
  63. starts=[3],
  64. ends=[output.shape[1]],
  65. strides=[5 + num_classes])
  66. obj = fluid.layers.strided_slice(
  67. output,
  68. axes=[1],
  69. starts=[4],
  70. ends=[output.shape[1]],
  71. strides=[5 + num_classes])
  72. clss = []
  73. stride = output.shape[1] // an_num
  74. for m in range(an_num):
  75. clss.append(
  76. fluid.layers.slice(
  77. output,
  78. axes=[1],
  79. starts=[stride * m + 5],
  80. ends=[stride * m + 5 + num_classes]))
  81. cls = fluid.layers.transpose(
  82. fluid.layers.stack(
  83. clss, axis=1), perm=[0, 1, 3, 4, 2])
  84. return (x, y, w, h, obj, cls)
  85. def _split_target(target):
  86. """
  87. split target to x, y, w, h, objectness, classification
  88. along dimension 2
  89. target is in shape [N, an_num, 6 + class_num, H, W]
  90. """
  91. tx = target[:, :, 0, :, :]
  92. ty = target[:, :, 1, :, :]
  93. tw = target[:, :, 2, :, :]
  94. th = target[:, :, 3, :, :]
  95. tscale = target[:, :, 4, :, :]
  96. tobj = target[:, :, 5, :, :]
  97. tcls = fluid.layers.transpose(target[:, :, 6:, :, :], perm=[0, 1, 3, 4, 2])
  98. tcls.stop_gradient = True
  99. return (tx, ty, tw, th, tscale, tobj, tcls)
  100. def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes,
  101. downsample, ignore_thresh, scale_x_y):
  102. # A prediction bbox overlap any gt_bbox over ignore_thresh,
  103. # objectness loss will be ignored, process as follows:
  104. # 1. get pred bbox, which is same with YOLOv3 infer mode, use yolo_box here
  105. # NOTE: img_size is set as 1.0 to get noramlized pred bbox
  106. bbox, prob = fluid.layers.yolo_box(
  107. x=output,
  108. img_size=fluid.layers.ones(
  109. shape=[batch_size, 2], dtype="int32"),
  110. anchors=anchors,
  111. class_num=num_classes,
  112. conf_thresh=0.,
  113. downsample_ratio=downsample,
  114. clip_bbox=False,
  115. scale_x_y=scale_x_y)
  116. # 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox
  117. # and gt bbox in each sample
  118. if batch_size > 1:
  119. preds = fluid.layers.split(bbox, batch_size, dim=0)
  120. gts = fluid.layers.split(gt_box, batch_size, dim=0)
  121. else:
  122. preds = [bbox]
  123. gts = [gt_box]
  124. probs = [prob]
  125. ious = []
  126. for pred, gt in zip(preds, gts):
  127. def box_xywh2xyxy(box):
  128. x = box[:, 0]
  129. y = box[:, 1]
  130. w = box[:, 2]
  131. h = box[:, 3]
  132. return fluid.layers.stack(
  133. [
  134. x - w / 2.,
  135. y - h / 2.,
  136. x + w / 2.,
  137. y + h / 2.,
  138. ], axis=1)
  139. pred = fluid.layers.squeeze(pred, axes=[0])
  140. gt = box_xywh2xyxy(fluid.layers.squeeze(gt, axes=[0]))
  141. ious.append(fluid.layers.iou_similarity(pred, gt))
  142. iou = fluid.layers.stack(ious, axis=0)
  143. # 3. Get iou_mask by IoU between gt bbox and prediction bbox,
  144. # Get obj_mask by tobj(holds gt_score), calculate objectness loss
  145. max_iou = fluid.layers.reduce_max(iou, dim=-1)
  146. iou_mask = fluid.layers.cast(max_iou <= ignore_thresh, dtype="float32")
  147. output_shape = fluid.layers.shape(output)
  148. an_num = len(anchors) // 2
  149. iou_mask = fluid.layers.reshape(iou_mask, (-1, an_num, output_shape[2],
  150. output_shape[3]))
  151. iou_mask.stop_gradient = True
  152. # NOTE: tobj holds gt_score, obj_mask holds object existence mask
  153. obj_mask = fluid.layers.cast(tobj > 0., dtype="float32")
  154. obj_mask.stop_gradient = True
  155. # For positive objectness grids, objectness loss should be calculated
  156. # For negative objectness grids, objectness loss is calculated only iou_mask == 1.0
  157. loss_obj = fluid.layers.sigmoid_cross_entropy_with_logits(obj, obj_mask)
  158. loss_obj_pos = fluid.layers.reduce_sum(loss_obj * tobj, dim=[1, 2, 3])
  159. loss_obj_neg = fluid.layers.reduce_sum(
  160. loss_obj * (1.0 - obj_mask) * iou_mask, dim=[1, 2, 3])
  161. return loss_obj_pos, loss_obj_neg
  162. def fine_grained_loss(output,
  163. target,
  164. gt_box,
  165. batch_size,
  166. num_classes,
  167. anchors,
  168. ignore_thresh,
  169. downsample,
  170. scale_x_y=1.,
  171. eps=1e-10):
  172. an_num = len(anchors) // 2
  173. x, y, w, h, obj, cls = _split_output(output, an_num, num_classes)
  174. tx, ty, tw, th, tscale, tobj, tcls = _split_target(target)
  175. tscale_tobj = tscale * tobj
  176. scale_x_y = scale_x_y
  177. if (abs(scale_x_y - 1.0) < eps):
  178. loss_x = fluid.layers.sigmoid_cross_entropy_with_logits(
  179. x, tx) * tscale_tobj
  180. loss_x = fluid.layers.reduce_sum(loss_x, dim=[1, 2, 3])
  181. loss_y = fluid.layers.sigmoid_cross_entropy_with_logits(
  182. y, ty) * tscale_tobj
  183. loss_y = fluid.layers.reduce_sum(loss_y, dim=[1, 2, 3])
  184. else:
  185. dx = scale_x_y * fluid.layers.sigmoid(x) - 0.5 * (scale_x_y - 1.0)
  186. dy = scale_x_y * fluid.layers.sigmoid(y) - 0.5 * (scale_x_y - 1.0)
  187. loss_x = fluid.layers.abs(dx - tx) * tscale_tobj
  188. loss_x = fluid.layers.reduce_sum(loss_x, dim=[1, 2, 3])
  189. loss_y = fluid.layers.abs(dy - ty) * tscale_tobj
  190. loss_y = fluid.layers.reduce_sum(loss_y, dim=[1, 2, 3])
  191. # NOTE: we refined loss function of (w, h) as L1Loss
  192. loss_w = fluid.layers.abs(w - tw) * tscale_tobj
  193. loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3])
  194. loss_h = fluid.layers.abs(h - th) * tscale_tobj
  195. loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3])
  196. loss_obj_pos, loss_obj_neg = _calc_obj_loss(
  197. output, obj, tobj, gt_box, batch_size, anchors, num_classes, downsample,
  198. ignore_thresh, scale_x_y)
  199. loss_cls = fluid.layers.sigmoid_cross_entropy_with_logits(cls, tcls)
  200. loss_cls = fluid.layers.elementwise_mul(loss_cls, tobj, axis=0)
  201. loss_cls = fluid.layers.reduce_sum(loss_cls, dim=[1, 2, 3, 4])
  202. loss_xys = fluid.layers.reduce_mean(loss_x + loss_y)
  203. loss_whs = fluid.layers.reduce_mean(loss_w + loss_h)
  204. loss_objs = fluid.layers.reduce_mean(loss_obj_pos + loss_obj_neg)
  205. loss_clss = fluid.layers.reduce_mean(loss_cls)
  206. losses_all = {
  207. "loss_xy": fluid.layers.sum(loss_xys),
  208. "loss_wh": fluid.layers.sum(loss_whs),
  209. "loss_loc": fluid.layers.sum(loss_xys) + fluid.layers.sum(loss_whs),
  210. "loss_obj": fluid.layers.sum(loss_objs),
  211. "loss_cls": fluid.layers.sum(loss_clss),
  212. }
  213. return losses_all, x, y, tx, ty
  214. def gt2yolotarget(gt_bbox, gt_class, gt_score, anchors, mask, num_classes, size,
  215. stride):
  216. grid_h, grid_w = size
  217. h, w = grid_h * stride, grid_w * stride
  218. an_hw = np.array(anchors) / np.array([[w, h]])
  219. target = np.zeros(
  220. (len(mask), 6 + num_classes, grid_h, grid_w), dtype=np.float32)
  221. for b in range(gt_bbox.shape[0]):
  222. gx, gy, gw, gh = gt_bbox[b, :]
  223. cls = gt_class[b]
  224. score = gt_score[b]
  225. if gw <= 0. or gh <= 0. or score <= 0.:
  226. continue
  227. # find best match anchor index
  228. best_iou = 0.
  229. best_idx = -1
  230. for an_idx in range(an_hw.shape[0]):
  231. iou = jaccard_overlap([0., 0., gw, gh],
  232. [0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
  233. if iou > best_iou:
  234. best_iou = iou
  235. best_idx = an_idx
  236. gi = int(gx * grid_w)
  237. gj = int(gy * grid_h)
  238. # gtbox should be regresed in this layes if best match
  239. # anchor index in anchor mask of this layer
  240. if best_idx in mask:
  241. best_n = mask.index(best_idx)
  242. # x, y, w, h, scale
  243. target[best_n, 0, gj, gi] = gx * grid_w - gi
  244. target[best_n, 1, gj, gi] = gy * grid_h - gj
  245. target[best_n, 2, gj, gi] = np.log(gw * w / anchors[best_idx][0])
  246. target[best_n, 3, gj, gi] = np.log(gh * h / anchors[best_idx][1])
  247. target[best_n, 4, gj, gi] = 2.0 - gw * gh
  248. # objectness record gt_score
  249. # if target[best_n, 5, gj, gi] > 0:
  250. # print('find 1 duplicate')
  251. target[best_n, 5, gj, gi] = score
  252. # classification
  253. target[best_n, 6 + cls, gj, gi] = 1.
  254. return target
  255. class TestYolov3LossOp(unittest.TestCase):
  256. def setUp(self):
  257. self.initTestCase()
  258. x = np.random.uniform(0, 1, self.x_shape).astype('float64')
  259. gtbox = np.random.random(size=self.gtbox_shape).astype('float64')
  260. gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2])
  261. gtmask = np.random.randint(0, 2, self.gtbox_shape[:2])
  262. gtbox = gtbox * gtmask[:, :, np.newaxis]
  263. gtlabel = gtlabel * gtmask
  264. gtscore = np.ones(self.gtbox_shape[:2]).astype('float64')
  265. if self.gtscore:
  266. gtscore = np.random.random(self.gtbox_shape[:2]).astype('float64')
  267. target = []
  268. for box, label, score in zip(gtbox, gtlabel, gtscore):
  269. target.append(
  270. gt2yolotarget(box, label, score, self.anchors, self.anchor_mask,
  271. self.class_num, (self.h, self.w
  272. ), self.downsample_ratio))
  273. self.target = np.array(target).astype('float64')
  274. self.mask_anchors = []
  275. for i in self.anchor_mask:
  276. self.mask_anchors.extend(self.anchors[i])
  277. self.x = x
  278. self.gtbox = gtbox
  279. self.gtlabel = gtlabel
  280. self.gtscore = gtscore
  281. def initTestCase(self):
  282. self.b = 8
  283. self.h = 19
  284. self.w = 19
  285. self.anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
  286. [59, 119], [116, 90], [156, 198], [373, 326]]
  287. self.anchor_mask = [6, 7, 8]
  288. self.na = len(self.anchor_mask)
  289. self.class_num = 80
  290. self.ignore_thresh = 0.7
  291. self.downsample_ratio = 32
  292. self.x_shape = (self.b, len(self.anchor_mask) * (5 + self.class_num),
  293. self.h, self.w)
  294. self.gtbox_shape = (self.b, 40, 4)
  295. self.gtscore = True
  296. self.use_label_smooth = False
  297. self.scale_x_y = 1.
  298. def test_loss(self):
  299. x, gtbox, gtlabel, gtscore, target = self.x, self.gtbox, self.gtlabel, self.gtscore, self.target
  300. yolo_loss = YOLOv3Loss(
  301. ignore_thresh=self.ignore_thresh,
  302. label_smooth=self.use_label_smooth,
  303. num_classes=self.class_num,
  304. downsample=self.downsample_ratio,
  305. scale_x_y=self.scale_x_y)
  306. x = paddle.to_tensor(x.astype(np.float32))
  307. gtbox = paddle.to_tensor(gtbox.astype(np.float32))
  308. gtlabel = paddle.to_tensor(gtlabel.astype(np.float32))
  309. gtscore = paddle.to_tensor(gtscore.astype(np.float32))
  310. t = paddle.to_tensor(target.astype(np.float32))
  311. anchor = [self.anchors[i] for i in self.anchor_mask]
  312. (yolo_loss1, px, py, tx, ty) = fine_grained_loss(
  313. output=x,
  314. target=t,
  315. gt_box=gtbox,
  316. batch_size=self.b,
  317. num_classes=self.class_num,
  318. anchors=self.mask_anchors,
  319. ignore_thresh=self.ignore_thresh,
  320. downsample=self.downsample_ratio,
  321. scale_x_y=self.scale_x_y)
  322. yolo_loss2 = yolo_loss.yolov3_loss(
  323. x, t, gtbox, anchor, self.downsample_ratio, self.scale_x_y)
  324. for k in yolo_loss2:
  325. self.assertAlmostEqual(
  326. yolo_loss1[k].numpy()[0],
  327. yolo_loss2[k].numpy()[0],
  328. delta=1e-2,
  329. msg=k)
  330. class TestYolov3LossNoGTScore(TestYolov3LossOp):
  331. def initTestCase(self):
  332. self.b = 1
  333. self.h = 76
  334. self.w = 76
  335. self.anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
  336. [59, 119], [116, 90], [156, 198], [373, 326]]
  337. self.anchor_mask = [0, 1, 2]
  338. self.na = len(self.anchor_mask)
  339. self.class_num = 80
  340. self.ignore_thresh = 0.7
  341. self.downsample_ratio = 8
  342. self.x_shape = (self.b, len(self.anchor_mask) * (5 + self.class_num),
  343. self.h, self.w)
  344. self.gtbox_shape = (self.b, 40, 4)
  345. self.gtscore = False
  346. self.use_label_smooth = False
  347. self.scale_x_y = 1.
  348. class TestYolov3LossWithScaleXY(TestYolov3LossOp):
  349. def initTestCase(self):
  350. self.b = 5
  351. self.h = 38
  352. self.w = 38
  353. self.anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],
  354. [59, 119], [116, 90], [156, 198], [373, 326]]
  355. self.anchor_mask = [3, 4, 5]
  356. self.na = len(self.anchor_mask)
  357. self.class_num = 80
  358. self.ignore_thresh = 0.7
  359. self.downsample_ratio = 16
  360. self.x_shape = (self.b, len(self.anchor_mask) * (5 + self.class_num),
  361. self.h, self.w)
  362. self.gtbox_shape = (self.b, 40, 4)
  363. self.gtscore = True
  364. self.use_label_smooth = False
  365. self.scale_x_y = 1.2
  366. if __name__ == "__main__":
  367. unittest.main()