yolo_loss.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from paddle import fluid
  18. from ppdet.core.workspace import register
  19. try:
  20. from collections.abc import Sequence
  21. except Exception:
  22. from collections import Sequence
  23. import logging
  24. logger = logging.getLogger(__name__)
  25. __all__ = ['YOLOv3Loss']
  26. @register
  27. class YOLOv3Loss(object):
  28. """
  29. Combined loss for YOLOv3 network
  30. Args:
  31. train_batch_size (int): training batch size
  32. ignore_thresh (float): threshold to ignore confidence loss
  33. label_smooth (bool): whether to use label smoothing
  34. use_fine_grained_loss (bool): whether use fine grained YOLOv3 loss
  35. instead of fluid.layers.yolov3_loss
  36. """
  37. __inject__ = ['iou_loss', 'iou_aware_loss']
  38. __shared__ = ['use_fine_grained_loss', 'train_batch_size']
  39. def __init__(
  40. self,
  41. train_batch_size=8,
  42. batch_size=-1, # stub for backward compatable
  43. ignore_thresh=0.7,
  44. label_smooth=True,
  45. use_fine_grained_loss=False,
  46. iou_loss=None,
  47. iou_aware_loss=None,
  48. downsample=[32, 16, 8],
  49. scale_x_y=1.,
  50. match_score=False):
  51. self._train_batch_size = train_batch_size
  52. self._ignore_thresh = ignore_thresh
  53. self._label_smooth = label_smooth
  54. self._use_fine_grained_loss = use_fine_grained_loss
  55. self._iou_loss = iou_loss
  56. self._iou_aware_loss = iou_aware_loss
  57. self.downsample = downsample
  58. self.scale_x_y = scale_x_y
  59. self.match_score = match_score
  60. if batch_size != -1:
  61. logger.warning(
  62. "config YOLOv3Loss.batch_size is deprecated, "
  63. "training batch size should be set by TrainReader.batch_size")
  64. def __call__(self, outputs, gt_box, gt_label, gt_score, targets, anchors,
  65. anchor_masks, mask_anchors, num_classes, prefix_name):
  66. if self._use_fine_grained_loss:
  67. return self._get_fine_grained_loss(
  68. outputs, targets, gt_box, self._train_batch_size, num_classes,
  69. mask_anchors, self._ignore_thresh)
  70. else:
  71. losses = []
  72. for i, output in enumerate(outputs):
  73. scale_x_y = self.scale_x_y if not isinstance(
  74. self.scale_x_y, Sequence) else self.scale_x_y[i]
  75. anchor_mask = anchor_masks[i]
  76. loss = fluid.layers.yolov3_loss(
  77. x=output,
  78. gt_box=gt_box,
  79. gt_label=gt_label,
  80. gt_score=gt_score,
  81. anchors=anchors,
  82. anchor_mask=anchor_mask,
  83. class_num=num_classes,
  84. ignore_thresh=self._ignore_thresh,
  85. downsample_ratio=self.downsample[i],
  86. use_label_smooth=self._label_smooth,
  87. scale_x_y=scale_x_y,
  88. name=prefix_name + "yolo_loss" + str(i))
  89. losses.append(fluid.layers.reduce_mean(loss))
  90. return {'loss': sum(losses)}
  91. def _get_fine_grained_loss(self,
  92. outputs,
  93. targets,
  94. gt_box,
  95. train_batch_size,
  96. num_classes,
  97. mask_anchors,
  98. ignore_thresh,
  99. eps=1.e-10):
  100. """
  101. Calculate fine grained YOLOv3 loss
  102. Args:
  103. outputs ([Variables]): List of Variables, output of backbone stages
  104. targets ([Variables]): List of Variables, The targets for yolo
  105. loss calculatation.
  106. gt_box (Variable): The ground-truth boudding boxes.
  107. train_batch_size (int): The training batch size
  108. num_classes (int): class num of dataset
  109. mask_anchors ([[float]]): list of anchors in each output layer
  110. ignore_thresh (float): prediction bbox overlap any gt_box greater
  111. than ignore_thresh, objectness loss will
  112. be ignored.
  113. Returns:
  114. Type: dict
  115. xy_loss (Variable): YOLOv3 (x, y) coordinates loss
  116. wh_loss (Variable): YOLOv3 (w, h) coordinates loss
  117. obj_loss (Variable): YOLOv3 objectness score loss
  118. cls_loss (Variable): YOLOv3 classification loss
  119. """
  120. assert len(outputs) == len(targets), \
  121. "YOLOv3 output layer number not equal target number"
  122. loss_xys, loss_whs, loss_objs, loss_clss = [], [], [], []
  123. if self._iou_loss is not None:
  124. loss_ious = []
  125. if self._iou_aware_loss is not None:
  126. loss_iou_awares = []
  127. for i, (output, target,
  128. anchors) in enumerate(zip(outputs, targets, mask_anchors)):
  129. downsample = self.downsample[i]
  130. an_num = len(anchors) // 2
  131. if self._iou_aware_loss is not None:
  132. ioup, output = self._split_ioup(output, an_num, num_classes)
  133. x, y, w, h, obj, cls = self._split_output(output, an_num,
  134. num_classes)
  135. tx, ty, tw, th, tscale, tobj, tcls = self._split_target(target)
  136. tscale_tobj = tscale * tobj
  137. scale_x_y = self.scale_x_y if not isinstance(
  138. self.scale_x_y, Sequence) else self.scale_x_y[i]
  139. if (abs(scale_x_y - 1.0) < eps):
  140. loss_x = fluid.layers.sigmoid_cross_entropy_with_logits(
  141. x, tx) * tscale_tobj
  142. loss_x = fluid.layers.reduce_sum(loss_x, dim=[1, 2, 3])
  143. loss_y = fluid.layers.sigmoid_cross_entropy_with_logits(
  144. y, ty) * tscale_tobj
  145. loss_y = fluid.layers.reduce_sum(loss_y, dim=[1, 2, 3])
  146. else:
  147. dx = scale_x_y * fluid.layers.sigmoid(x) - 0.5 * (scale_x_y -
  148. 1.0)
  149. dy = scale_x_y * fluid.layers.sigmoid(y) - 0.5 * (scale_x_y -
  150. 1.0)
  151. loss_x = fluid.layers.abs(dx - tx) * tscale_tobj
  152. loss_x = fluid.layers.reduce_sum(loss_x, dim=[1, 2, 3])
  153. loss_y = fluid.layers.abs(dy - ty) * tscale_tobj
  154. loss_y = fluid.layers.reduce_sum(loss_y, dim=[1, 2, 3])
  155. # NOTE: we refined loss function of (w, h) as L1Loss
  156. loss_w = fluid.layers.abs(w - tw) * tscale_tobj
  157. loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3])
  158. loss_h = fluid.layers.abs(h - th) * tscale_tobj
  159. loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3])
  160. if self._iou_loss is not None:
  161. loss_iou = self._iou_loss(
  162. x,
  163. y,
  164. w,
  165. h,
  166. tx,
  167. ty,
  168. tw,
  169. th,
  170. anchors,
  171. downsample,
  172. self._train_batch_size,
  173. scale_x_y=scale_x_y)
  174. loss_iou = loss_iou * tscale_tobj
  175. loss_iou = fluid.layers.reduce_sum(loss_iou, dim=[1, 2, 3])
  176. loss_ious.append(fluid.layers.reduce_mean(loss_iou))
  177. if self._iou_aware_loss is not None:
  178. loss_iou_aware = self._iou_aware_loss(
  179. ioup, x, y, w, h, tx, ty, tw, th, anchors, downsample,
  180. self._train_batch_size, scale_x_y)
  181. loss_iou_aware = loss_iou_aware * tobj
  182. loss_iou_aware = fluid.layers.reduce_sum(
  183. loss_iou_aware, dim=[1, 2, 3])
  184. loss_iou_awares.append(fluid.layers.reduce_mean(loss_iou_aware))
  185. loss_obj_pos, loss_obj_neg = self._calc_obj_loss(
  186. output, obj, tobj, gt_box, self._train_batch_size, anchors,
  187. num_classes, downsample, self._ignore_thresh, scale_x_y)
  188. loss_cls = fluid.layers.sigmoid_cross_entropy_with_logits(cls, tcls)
  189. loss_cls = fluid.layers.elementwise_mul(loss_cls, tobj, axis=0)
  190. loss_cls = fluid.layers.reduce_sum(loss_cls, dim=[1, 2, 3, 4])
  191. loss_xys.append(fluid.layers.reduce_mean(loss_x + loss_y))
  192. loss_whs.append(fluid.layers.reduce_mean(loss_w + loss_h))
  193. loss_objs.append(
  194. fluid.layers.reduce_mean(loss_obj_pos + loss_obj_neg))
  195. loss_clss.append(fluid.layers.reduce_mean(loss_cls))
  196. losses_all = {
  197. "loss_xy": fluid.layers.sum(loss_xys),
  198. "loss_wh": fluid.layers.sum(loss_whs),
  199. "loss_obj": fluid.layers.sum(loss_objs),
  200. "loss_cls": fluid.layers.sum(loss_clss),
  201. }
  202. if self._iou_loss is not None:
  203. losses_all["loss_iou"] = fluid.layers.sum(loss_ious)
  204. if self._iou_aware_loss is not None:
  205. losses_all["loss_iou_aware"] = fluid.layers.sum(loss_iou_awares)
  206. return losses_all
  207. def _split_ioup(self, output, an_num, num_classes):
  208. """
  209. Split output feature map to output, predicted iou
  210. along channel dimension
  211. """
  212. ioup = fluid.layers.slice(output, axes=[1], starts=[0], ends=[an_num])
  213. oriout = fluid.layers.slice(
  214. output,
  215. axes=[1],
  216. starts=[an_num],
  217. ends=[an_num * (num_classes + 6)])
  218. return (ioup, oriout)
  219. def _split_output(self, output, an_num, num_classes):
  220. """
  221. Split output feature map to x, y, w, h, objectness, classification
  222. along channel dimension
  223. """
  224. x = fluid.layers.strided_slice(
  225. output,
  226. axes=[1],
  227. starts=[0],
  228. ends=[output.shape[1]],
  229. strides=[5 + num_classes])
  230. y = fluid.layers.strided_slice(
  231. output,
  232. axes=[1],
  233. starts=[1],
  234. ends=[output.shape[1]],
  235. strides=[5 + num_classes])
  236. w = fluid.layers.strided_slice(
  237. output,
  238. axes=[1],
  239. starts=[2],
  240. ends=[output.shape[1]],
  241. strides=[5 + num_classes])
  242. h = fluid.layers.strided_slice(
  243. output,
  244. axes=[1],
  245. starts=[3],
  246. ends=[output.shape[1]],
  247. strides=[5 + num_classes])
  248. obj = fluid.layers.strided_slice(
  249. output,
  250. axes=[1],
  251. starts=[4],
  252. ends=[output.shape[1]],
  253. strides=[5 + num_classes])
  254. clss = []
  255. stride = output.shape[1] // an_num
  256. for m in range(an_num):
  257. clss.append(
  258. fluid.layers.slice(
  259. output,
  260. axes=[1],
  261. starts=[stride * m + 5],
  262. ends=[stride * m + 5 + num_classes]))
  263. cls = fluid.layers.transpose(
  264. fluid.layers.stack(
  265. clss, axis=1), perm=[0, 1, 3, 4, 2])
  266. return (x, y, w, h, obj, cls)
  267. def _split_target(self, target):
  268. """
  269. split target to x, y, w, h, objectness, classification
  270. along dimension 2
  271. target is in shape [N, an_num, 6 + class_num, H, W]
  272. """
  273. tx = target[:, :, 0, :, :]
  274. ty = target[:, :, 1, :, :]
  275. tw = target[:, :, 2, :, :]
  276. th = target[:, :, 3, :, :]
  277. tscale = target[:, :, 4, :, :]
  278. tobj = target[:, :, 5, :, :]
  279. tcls = fluid.layers.transpose(
  280. target[:, :, 6:, :, :], perm=[0, 1, 3, 4, 2])
  281. tcls.stop_gradient = True
  282. return (tx, ty, tw, th, tscale, tobj, tcls)
  283. def _calc_obj_loss(self, output, obj, tobj, gt_box, batch_size, anchors,
  284. num_classes, downsample, ignore_thresh, scale_x_y):
  285. # A prediction bbox overlap any gt_bbox over ignore_thresh,
  286. # objectness loss will be ignored, process as follows:
  287. # 1. get pred bbox, which is same with YOLOv3 infer mode, use yolo_box here
  288. # NOTE: img_size is set as 1.0 to get noramlized pred bbox
  289. bbox, prob = fluid.layers.yolo_box(
  290. x=output,
  291. img_size=fluid.layers.ones(
  292. shape=[batch_size, 2], dtype="int32"),
  293. anchors=anchors,
  294. class_num=num_classes,
  295. conf_thresh=0.,
  296. downsample_ratio=downsample,
  297. clip_bbox=False,
  298. scale_x_y=scale_x_y)
  299. # 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox
  300. # and gt bbox in each sample
  301. if batch_size > 1:
  302. preds = fluid.layers.split(bbox, batch_size, dim=0)
  303. gts = fluid.layers.split(gt_box, batch_size, dim=0)
  304. else:
  305. preds = [bbox]
  306. gts = [gt_box]
  307. probs = [prob]
  308. ious = []
  309. for pred, gt in zip(preds, gts):
  310. def box_xywh2xyxy(box):
  311. x = box[:, 0]
  312. y = box[:, 1]
  313. w = box[:, 2]
  314. h = box[:, 3]
  315. return fluid.layers.stack(
  316. [
  317. x - w / 2.,
  318. y - h / 2.,
  319. x + w / 2.,
  320. y + h / 2.,
  321. ], axis=1)
  322. pred = fluid.layers.squeeze(pred, axes=[0])
  323. gt = box_xywh2xyxy(fluid.layers.squeeze(gt, axes=[0]))
  324. ious.append(fluid.layers.iou_similarity(pred, gt))
  325. iou = fluid.layers.stack(ious, axis=0)
  326. # 3. Get iou_mask by IoU between gt bbox and prediction bbox,
  327. # Get obj_mask by tobj(holds gt_score), calculate objectness loss
  328. max_iou = fluid.layers.reduce_max(iou, dim=-1)
  329. iou_mask = fluid.layers.cast(max_iou <= ignore_thresh, dtype="float32")
  330. if self.match_score:
  331. max_prob = fluid.layers.reduce_max(prob, dim=-1)
  332. iou_mask = iou_mask * fluid.layers.cast(
  333. max_prob <= 0.25, dtype="float32")
  334. output_shape = fluid.layers.shape(output)
  335. an_num = len(anchors) // 2
  336. iou_mask = fluid.layers.reshape(iou_mask, (-1, an_num, output_shape[2],
  337. output_shape[3]))
  338. iou_mask.stop_gradient = True
  339. # NOTE: tobj holds gt_score, obj_mask holds object existence mask
  340. obj_mask = fluid.layers.cast(tobj > 0., dtype="float32")
  341. obj_mask.stop_gradient = True
  342. # For positive objectness grids, objectness loss should be calculated
  343. # For negative objectness grids, objectness loss is calculated only iou_mask == 1.0
  344. loss_obj = fluid.layers.sigmoid_cross_entropy_with_logits(obj, obj_mask)
  345. loss_obj_pos = fluid.layers.reduce_sum(loss_obj * tobj, dim=[1, 2, 3])
  346. loss_obj_neg = fluid.layers.reduce_sum(
  347. loss_obj * (1.0 - obj_mask) * iou_mask, dim=[1, 2, 3])
  348. return loss_obj_pos, loss_obj_neg