ttf_head.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. import paddle.nn.functional as F
  17. from paddle import ParamAttr
  18. from paddle.nn.initializer import Constant, Normal
  19. from paddle.regularizer import L2Decay
  20. from ppdet.core.workspace import register
  21. from ppdet.modeling.layers import DeformableConvV2, LiteConv
  22. import numpy as np
  23. @register
  24. class HMHead(nn.Layer):
  25. """
  26. Args:
  27. ch_in (int): The channel number of input Tensor.
  28. ch_out (int): The channel number of output Tensor.
  29. num_classes (int): Number of classes.
  30. conv_num (int): The convolution number of hm_feat.
  31. dcn_head(bool): whether use dcn in head. False by default.
  32. lite_head(bool): whether use lite version. False by default.
  33. norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
  34. bn by default
  35. Return:
  36. Heatmap head output
  37. """
  38. __shared__ = ['num_classes', 'norm_type']
  39. def __init__(
  40. self,
  41. ch_in,
  42. ch_out=128,
  43. num_classes=80,
  44. conv_num=2,
  45. dcn_head=False,
  46. lite_head=False,
  47. norm_type='bn', ):
  48. super(HMHead, self).__init__()
  49. head_conv = nn.Sequential()
  50. for i in range(conv_num):
  51. name = 'conv.{}'.format(i)
  52. if lite_head:
  53. lite_name = 'hm.' + name
  54. head_conv.add_sublayer(
  55. lite_name,
  56. LiteConv(
  57. in_channels=ch_in if i == 0 else ch_out,
  58. out_channels=ch_out,
  59. norm_type=norm_type))
  60. else:
  61. if dcn_head:
  62. head_conv.add_sublayer(
  63. name,
  64. DeformableConvV2(
  65. in_channels=ch_in if i == 0 else ch_out,
  66. out_channels=ch_out,
  67. kernel_size=3,
  68. weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
  69. else:
  70. head_conv.add_sublayer(
  71. name,
  72. nn.Conv2D(
  73. in_channels=ch_in if i == 0 else ch_out,
  74. out_channels=ch_out,
  75. kernel_size=3,
  76. padding=1,
  77. weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
  78. bias_attr=ParamAttr(
  79. learning_rate=2., regularizer=L2Decay(0.))))
  80. head_conv.add_sublayer(name + '.act', nn.ReLU())
  81. self.feat = head_conv
  82. bias_init = float(-np.log((1 - 0.01) / 0.01))
  83. weight_attr = None if lite_head else ParamAttr(initializer=Normal(0,
  84. 0.01))
  85. self.head = nn.Conv2D(
  86. in_channels=ch_out,
  87. out_channels=num_classes,
  88. kernel_size=1,
  89. weight_attr=weight_attr,
  90. bias_attr=ParamAttr(
  91. learning_rate=2.,
  92. regularizer=L2Decay(0.),
  93. initializer=Constant(bias_init)))
  94. def forward(self, feat):
  95. out = self.feat(feat)
  96. out = self.head(out)
  97. return out
  98. @register
  99. class WHHead(nn.Layer):
  100. """
  101. Args:
  102. ch_in (int): The channel number of input Tensor.
  103. ch_out (int): The channel number of output Tensor.
  104. conv_num (int): The convolution number of wh_feat.
  105. dcn_head(bool): whether use dcn in head. False by default.
  106. lite_head(bool): whether use lite version. False by default.
  107. norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
  108. bn by default
  109. Return:
  110. Width & Height head output
  111. """
  112. __shared__ = ['norm_type']
  113. def __init__(self,
  114. ch_in,
  115. ch_out=64,
  116. conv_num=2,
  117. dcn_head=False,
  118. lite_head=False,
  119. norm_type='bn'):
  120. super(WHHead, self).__init__()
  121. head_conv = nn.Sequential()
  122. for i in range(conv_num):
  123. name = 'conv.{}'.format(i)
  124. if lite_head:
  125. lite_name = 'wh.' + name
  126. head_conv.add_sublayer(
  127. lite_name,
  128. LiteConv(
  129. in_channels=ch_in if i == 0 else ch_out,
  130. out_channels=ch_out,
  131. norm_type=norm_type))
  132. else:
  133. if dcn_head:
  134. head_conv.add_sublayer(
  135. name,
  136. DeformableConvV2(
  137. in_channels=ch_in if i == 0 else ch_out,
  138. out_channels=ch_out,
  139. kernel_size=3,
  140. weight_attr=ParamAttr(initializer=Normal(0, 0.01))))
  141. else:
  142. head_conv.add_sublayer(
  143. name,
  144. nn.Conv2D(
  145. in_channels=ch_in if i == 0 else ch_out,
  146. out_channels=ch_out,
  147. kernel_size=3,
  148. padding=1,
  149. weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
  150. bias_attr=ParamAttr(
  151. learning_rate=2., regularizer=L2Decay(0.))))
  152. head_conv.add_sublayer(name + '.act', nn.ReLU())
  153. weight_attr = None if lite_head else ParamAttr(initializer=Normal(0,
  154. 0.01))
  155. self.feat = head_conv
  156. self.head = nn.Conv2D(
  157. in_channels=ch_out,
  158. out_channels=4,
  159. kernel_size=1,
  160. weight_attr=weight_attr,
  161. bias_attr=ParamAttr(
  162. learning_rate=2., regularizer=L2Decay(0.)))
  163. def forward(self, feat):
  164. out = self.feat(feat)
  165. out = self.head(out)
  166. out = F.relu(out)
  167. return out
  168. @register
  169. class TTFHead(nn.Layer):
  170. """
  171. TTFHead
  172. Args:
  173. in_channels (int): the channel number of input to TTFHead.
  174. num_classes (int): the number of classes, 80 by default.
  175. hm_head_planes (int): the channel number in heatmap head,
  176. 128 by default.
  177. wh_head_planes (int): the channel number in width & height head,
  178. 64 by default.
  179. hm_head_conv_num (int): the number of convolution in heatmap head,
  180. 2 by default.
  181. wh_head_conv_num (int): the number of convolution in width & height
  182. head, 2 by default.
  183. hm_loss (object): Instance of 'CTFocalLoss'.
  184. wh_loss (object): Instance of 'GIoULoss'.
  185. wh_offset_base (float): the base offset of width and height,
  186. 16.0 by default.
  187. down_ratio (int): the actual down_ratio is calculated by base_down_ratio
  188. (default 16) and the number of upsample layers.
  189. lite_head(bool): whether use lite version. False by default.
  190. norm_type (string): norm type, 'sync_bn', 'bn', 'gn' are optional.
  191. bn by default
  192. ags_module(bool): whether use AGS module to reweight location feature.
  193. false by default.
  194. """
  195. __shared__ = ['num_classes', 'down_ratio', 'norm_type']
  196. __inject__ = ['hm_loss', 'wh_loss']
  197. def __init__(self,
  198. in_channels,
  199. num_classes=80,
  200. hm_head_planes=128,
  201. wh_head_planes=64,
  202. hm_head_conv_num=2,
  203. wh_head_conv_num=2,
  204. hm_loss='CTFocalLoss',
  205. wh_loss='GIoULoss',
  206. wh_offset_base=16.,
  207. down_ratio=4,
  208. dcn_head=False,
  209. lite_head=False,
  210. norm_type='bn',
  211. ags_module=False):
  212. super(TTFHead, self).__init__()
  213. self.in_channels = in_channels
  214. self.hm_head = HMHead(in_channels, hm_head_planes, num_classes,
  215. hm_head_conv_num, dcn_head, lite_head, norm_type)
  216. self.wh_head = WHHead(in_channels, wh_head_planes, wh_head_conv_num,
  217. dcn_head, lite_head, norm_type)
  218. self.hm_loss = hm_loss
  219. self.wh_loss = wh_loss
  220. self.wh_offset_base = wh_offset_base
  221. self.down_ratio = down_ratio
  222. self.ags_module = ags_module
  223. @classmethod
  224. def from_config(cls, cfg, input_shape):
  225. if isinstance(input_shape, (list, tuple)):
  226. input_shape = input_shape[0]
  227. return {'in_channels': input_shape.channels, }
  228. def forward(self, feats):
  229. hm = self.hm_head(feats)
  230. wh = self.wh_head(feats) * self.wh_offset_base
  231. return hm, wh
  232. def filter_box_by_weight(self, pred, target, weight):
  233. """
  234. Filter out boxes where ttf_reg_weight is 0, only keep positive samples.
  235. """
  236. index = paddle.nonzero(weight > 0)
  237. index.stop_gradient = True
  238. weight = paddle.gather_nd(weight, index)
  239. pred = paddle.gather_nd(pred, index)
  240. target = paddle.gather_nd(target, index)
  241. return pred, target, weight
  242. def filter_loc_by_weight(self, score, weight):
  243. index = paddle.nonzero(weight > 0)
  244. index.stop_gradient = True
  245. score = paddle.gather_nd(score, index)
  246. return score
  247. def get_loss(self, pred_hm, pred_wh, target_hm, box_target, target_weight):
  248. pred_hm = paddle.clip(F.sigmoid(pred_hm), 1e-4, 1 - 1e-4)
  249. hm_loss = self.hm_loss(pred_hm, target_hm)
  250. H, W = target_hm.shape[2:]
  251. mask = paddle.reshape(target_weight, [-1, H, W])
  252. avg_factor = paddle.sum(mask) + 1e-4
  253. base_step = self.down_ratio
  254. shifts_x = paddle.arange(0, W * base_step, base_step, dtype='int32')
  255. shifts_y = paddle.arange(0, H * base_step, base_step, dtype='int32')
  256. shift_y, shift_x = paddle.tensor.meshgrid([shifts_y, shifts_x])
  257. base_loc = paddle.stack([shift_x, shift_y], axis=0)
  258. base_loc.stop_gradient = True
  259. pred_boxes = paddle.concat(
  260. [0 - pred_wh[:, 0:2, :, :] + base_loc, pred_wh[:, 2:4] + base_loc],
  261. axis=1)
  262. pred_boxes = paddle.transpose(pred_boxes, [0, 2, 3, 1])
  263. boxes = paddle.transpose(box_target, [0, 2, 3, 1])
  264. boxes.stop_gradient = True
  265. if self.ags_module:
  266. pred_hm_max = paddle.max(pred_hm, axis=1, keepdim=True)
  267. pred_hm_max_softmax = F.softmax(pred_hm_max, axis=1)
  268. pred_hm_max_softmax = paddle.transpose(pred_hm_max_softmax,
  269. [0, 2, 3, 1])
  270. pred_hm_max_softmax = self.filter_loc_by_weight(pred_hm_max_softmax,
  271. mask)
  272. else:
  273. pred_hm_max_softmax = None
  274. pred_boxes, boxes, mask = self.filter_box_by_weight(pred_boxes, boxes,
  275. mask)
  276. mask.stop_gradient = True
  277. wh_loss = self.wh_loss(
  278. pred_boxes,
  279. boxes,
  280. iou_weight=mask.unsqueeze(1),
  281. loc_reweight=pred_hm_max_softmax)
  282. wh_loss = wh_loss / avg_factor
  283. ttf_loss = {'hm_loss': hm_loss, 'wh_loss': wh_loss}
  284. return ttf_loss