acfpn.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from collections import OrderedDict
  18. import copy
  19. from paddle import fluid
  20. from paddle.fluid.param_attr import ParamAttr
  21. from paddle.fluid.initializer import Xavier
  22. from paddle.fluid.regularizer import L2Decay
  23. from ppdet.core.workspace import register
  24. from ppdet.modeling.ops import ConvNorm
  25. __all__ = ['ACFPN']
  26. @register
  27. class ACFPN(object):
  28. """
  29. Attention-guided Context Feature Pyramid Network for Object Detection,
  30. see https://arxiv.org/abs/2005.11475
  31. Args:
  32. num_chan (int): number of feature channels
  33. min_level (int): lowest level of the backbone feature map to use
  34. max_level (int): highest level of the backbone feature map to use
  35. spatial_scale (list): feature map scaling factor
  36. has_extra_convs (bool): whether has extral convolutions in higher levels
  37. norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel'
  38. use_c5 (bool): whether to use C5 as the feature map.
  39. norm_groups (int): group number of group norm.
  40. """
  41. __shared__ = ['norm_type', 'freeze_norm']
  42. def __init__(self,
  43. num_chan=256,
  44. min_level=2,
  45. max_level=6,
  46. spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.],
  47. has_extra_convs=False,
  48. norm_type=None,
  49. freeze_norm=False,
  50. use_c5=True,
  51. norm_groups=32):
  52. self.freeze_norm = freeze_norm
  53. self.num_chan = num_chan
  54. self.min_level = min_level
  55. self.max_level = max_level
  56. self.spatial_scale = spatial_scale
  57. self.has_extra_convs = has_extra_convs
  58. self.norm_type = norm_type
  59. self.use_c5 = use_c5
  60. self.norm_groups = norm_groups
  61. def _add_topdown_lateral(self, body_name, body_input, upper_output):
  62. lateral_name = 'fpn_inner_' + body_name + '_lateral'
  63. topdown_name = 'fpn_topdown_' + body_name
  64. fan = body_input.shape[1]
  65. if self.norm_type:
  66. initializer = Xavier(fan_out=fan)
  67. lateral = ConvNorm(
  68. body_input,
  69. self.num_chan,
  70. 1,
  71. initializer=initializer,
  72. norm_type=self.norm_type,
  73. freeze_norm=self.freeze_norm,
  74. name=lateral_name,
  75. norm_name=lateral_name)
  76. else:
  77. lateral = fluid.layers.conv2d(
  78. body_input,
  79. self.num_chan,
  80. 1,
  81. param_attr=ParamAttr(
  82. name=lateral_name + "_w", initializer=Xavier(fan_out=fan)),
  83. bias_attr=ParamAttr(
  84. name=lateral_name + "_b",
  85. learning_rate=2.,
  86. regularizer=L2Decay(0.)),
  87. name=lateral_name)
  88. topdown = fluid.layers.resize_nearest(
  89. upper_output, scale=2., name=topdown_name)
  90. return lateral + topdown
  91. def dense_aspp_block(self, input, num_filters1, num_filters2, dilation_rate,
  92. dropout_prob, name):
  93. conv = ConvNorm(
  94. input,
  95. num_filters=num_filters1,
  96. filter_size=1,
  97. stride=1,
  98. groups=1,
  99. norm_decay=0.,
  100. norm_type='gn',
  101. norm_groups=self.norm_groups,
  102. dilation=dilation_rate,
  103. lr_scale=1,
  104. freeze_norm=False,
  105. act="relu",
  106. norm_name=name + "_gn",
  107. initializer=None,
  108. bias_attr=False,
  109. name=name + "_gn")
  110. conv = fluid.layers.conv2d(
  111. conv,
  112. num_filters2,
  113. filter_size=3,
  114. padding=dilation_rate,
  115. dilation=dilation_rate,
  116. act="relu",
  117. param_attr=ParamAttr(name=name + "_conv_w"),
  118. bias_attr=ParamAttr(name=name + "_conv_b"), )
  119. if dropout_prob > 0:
  120. conv = fluid.layers.dropout(conv, dropout_prob=dropout_prob)
  121. return conv
  122. def dense_aspp(self, input, name=None):
  123. dropout0 = 0.1
  124. d_feature0 = 512
  125. d_feature1 = 256
  126. aspp3 = self.dense_aspp_block(
  127. input,
  128. num_filters1=d_feature0,
  129. num_filters2=d_feature1,
  130. dropout_prob=dropout0,
  131. name=name + '_aspp3',
  132. dilation_rate=3)
  133. conv = fluid.layers.concat([aspp3, input], axis=1)
  134. aspp6 = self.dense_aspp_block(
  135. conv,
  136. num_filters1=d_feature0,
  137. num_filters2=d_feature1,
  138. dropout_prob=dropout0,
  139. name=name + '_aspp6',
  140. dilation_rate=6)
  141. conv = fluid.layers.concat([aspp6, conv], axis=1)
  142. aspp12 = self.dense_aspp_block(
  143. conv,
  144. num_filters1=d_feature0,
  145. num_filters2=d_feature1,
  146. dropout_prob=dropout0,
  147. name=name + '_aspp12',
  148. dilation_rate=12)
  149. conv = fluid.layers.concat([aspp12, conv], axis=1)
  150. aspp18 = self.dense_aspp_block(
  151. conv,
  152. num_filters1=d_feature0,
  153. num_filters2=d_feature1,
  154. dropout_prob=dropout0,
  155. name=name + '_aspp18',
  156. dilation_rate=18)
  157. conv = fluid.layers.concat([aspp18, conv], axis=1)
  158. aspp24 = self.dense_aspp_block(
  159. conv,
  160. num_filters1=d_feature0,
  161. num_filters2=d_feature1,
  162. dropout_prob=dropout0,
  163. name=name + '_aspp24',
  164. dilation_rate=24)
  165. conv = fluid.layers.concat(
  166. [aspp3, aspp6, aspp12, aspp18, aspp24], axis=1)
  167. conv = ConvNorm(
  168. conv,
  169. num_filters=self.num_chan,
  170. filter_size=1,
  171. stride=1,
  172. groups=1,
  173. norm_decay=0.,
  174. norm_type='gn',
  175. norm_groups=self.norm_groups,
  176. dilation=1,
  177. lr_scale=1,
  178. freeze_norm=False,
  179. act="relu",
  180. norm_name=name + "_dense_aspp_reduce_gn",
  181. initializer=None,
  182. bias_attr=False,
  183. name=name + "_dense_aspp_reduce_gn")
  184. return conv
  185. def get_output(self, body_dict):
  186. """
  187. Add FPN onto backbone.
  188. Args:
  189. body_dict(OrderedDict): Dictionary of variables and each element is the
  190. output of backbone.
  191. Return:
  192. fpn_dict(OrderedDict): A dictionary represents the output of FPN with
  193. their name.
  194. spatial_scale(list): A list of multiplicative spatial scale factor.
  195. """
  196. spatial_scale = copy.deepcopy(self.spatial_scale)
  197. body_name_list = list(body_dict.keys())[::-1]
  198. num_backbone_stages = len(body_name_list)
  199. self.fpn_inner_output = [[] for _ in range(num_backbone_stages)]
  200. fpn_inner_name = 'fpn_inner_' + body_name_list[0]
  201. body_input = body_dict[body_name_list[0]]
  202. fan = body_input.shape[1]
  203. if self.norm_type:
  204. initializer = Xavier(fan_out=fan)
  205. self.fpn_inner_output[0] = ConvNorm(
  206. body_input,
  207. self.num_chan,
  208. 1,
  209. initializer=initializer,
  210. norm_type=self.norm_type,
  211. freeze_norm=self.freeze_norm,
  212. name=fpn_inner_name,
  213. norm_name=fpn_inner_name)
  214. else:
  215. self.fpn_inner_output[0] = fluid.layers.conv2d(
  216. body_input,
  217. self.num_chan,
  218. 1,
  219. param_attr=ParamAttr(
  220. name=fpn_inner_name + "_w",
  221. initializer=Xavier(fan_out=fan)),
  222. bias_attr=ParamAttr(
  223. name=fpn_inner_name + "_b",
  224. learning_rate=2.,
  225. regularizer=L2Decay(0.)),
  226. name=fpn_inner_name)
  227. self.fpn_inner_output[0] += self.dense_aspp(
  228. self.fpn_inner_output[0], name="acfpn")
  229. for i in range(1, num_backbone_stages):
  230. body_name = body_name_list[i]
  231. body_input = body_dict[body_name]
  232. top_output = self.fpn_inner_output[i - 1]
  233. fpn_inner_single = self._add_topdown_lateral(body_name, body_input,
  234. top_output)
  235. self.fpn_inner_output[i] = fpn_inner_single
  236. fpn_dict = {}
  237. fpn_name_list = []
  238. for i in range(num_backbone_stages):
  239. fpn_name = 'fpn_' + body_name_list[i]
  240. fan = self.fpn_inner_output[i].shape[1] * 3 * 3
  241. if self.norm_type:
  242. initializer = Xavier(fan_out=fan)
  243. fpn_output = ConvNorm(
  244. self.fpn_inner_output[i],
  245. self.num_chan,
  246. 3,
  247. initializer=initializer,
  248. norm_type=self.norm_type,
  249. freeze_norm=self.freeze_norm,
  250. name=fpn_name,
  251. norm_name=fpn_name)
  252. else:
  253. fpn_output = fluid.layers.conv2d(
  254. self.fpn_inner_output[i],
  255. self.num_chan,
  256. filter_size=3,
  257. padding=1,
  258. param_attr=ParamAttr(
  259. name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
  260. bias_attr=ParamAttr(
  261. name=fpn_name + "_b",
  262. learning_rate=2.,
  263. regularizer=L2Decay(0.)),
  264. name=fpn_name)
  265. fpn_dict[fpn_name] = fpn_output
  266. fpn_name_list.append(fpn_name)
  267. if not self.has_extra_convs and self.max_level - self.min_level == len(
  268. spatial_scale):
  269. body_top_name = fpn_name_list[0]
  270. body_top_extension = fluid.layers.pool2d(
  271. fpn_dict[body_top_name],
  272. 1,
  273. 'max',
  274. pool_stride=2,
  275. name=body_top_name + '_subsampled_2x')
  276. fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension
  277. fpn_name_list.insert(0, body_top_name + '_subsampled_2x')
  278. spatial_scale.insert(0, spatial_scale[0] * 0.5)
  279. # Coarser FPN levels introduced for RetinaNet
  280. highest_backbone_level = self.min_level + len(spatial_scale) - 1
  281. if self.has_extra_convs and self.max_level > highest_backbone_level:
  282. if self.use_c5:
  283. fpn_blob = body_dict[body_name_list[0]]
  284. else:
  285. fpn_blob = fpn_dict[fpn_name_list[0]]
  286. for i in range(highest_backbone_level + 1, self.max_level + 1):
  287. fpn_blob_in = fpn_blob
  288. fpn_name = 'fpn_' + str(i)
  289. if i > highest_backbone_level + 1:
  290. fpn_blob_in = fluid.layers.relu(fpn_blob)
  291. fan = fpn_blob_in.shape[1] * 3 * 3
  292. fpn_blob = fluid.layers.conv2d(
  293. input=fpn_blob_in,
  294. num_filters=self.num_chan,
  295. filter_size=3,
  296. stride=2,
  297. padding=1,
  298. param_attr=ParamAttr(
  299. name=fpn_name + "_w", initializer=Xavier(fan_out=fan)),
  300. bias_attr=ParamAttr(
  301. name=fpn_name + "_b",
  302. learning_rate=2.,
  303. regularizer=L2Decay(0.)),
  304. name=fpn_name)
  305. fpn_dict[fpn_name] = fpn_blob
  306. fpn_name_list.insert(0, fpn_name)
  307. spatial_scale.insert(0, spatial_scale[0] * 0.5)
  308. res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
  309. return res_dict, spatial_scale