bfp.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from collections import OrderedDict
  18. from paddle import fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from ppdet.core.workspace import register
  21. from .nonlocal_helper import add_space_nonlocal
  22. from .fpn import FPN
  23. __all__ = ['BFP']
  24. @register
  25. class BFP(object):
  26. """
  27. Libra R-CNN, see https://arxiv.org/abs/1904.02701
  28. Args:
  29. base_neck (dict): basic neck before balanced feature pyramid (bfp)
  30. refine_level (int): index of integration and refine level of bfp
  31. refine_type (str): refine type, None, conv or nonlocal
  32. nonlocal_reduction (float): channel reduction level if refine_type is nonlocal
  33. with_bias (bool): whether the nonlocal module contains bias
  34. with_scale (bool): whether to scale feature in nonlocal module or not
  35. """
  36. __inject__ = ['base_neck']
  37. def __init__(self,
  38. base_neck=FPN().__dict__,
  39. refine_level=2,
  40. refine_type="nonlocal",
  41. nonlocal_reduction=1,
  42. with_bias=True,
  43. with_scale=False):
  44. if isinstance(base_neck, dict):
  45. self.base_neck = FPN(**base_neck)
  46. self.refine_level = refine_level
  47. self.refine_type = refine_type
  48. self.nonlocal_reduction = nonlocal_reduction
  49. self.with_bias = with_bias
  50. self.with_scale = with_scale
  51. def get_output(self, body_dict):
  52. # top-down order
  53. res_dict, spatial_scale = self.base_neck.get_output(body_dict)
  54. res_dict = self.get_output_bfp(res_dict)
  55. return res_dict, spatial_scale
  56. def get_output_bfp(self, body_dict):
  57. body_name_list = list(body_dict.keys())
  58. num_backbone_stages = len(body_name_list)
  59. self.num_levels = len(body_dict)
  60. # step 1: gather multi-level features by resize and average
  61. feats = []
  62. refine_level_name = body_name_list[self.refine_level]
  63. for i in range(self.num_levels):
  64. curr_fpn_name = body_name_list[i]
  65. pool_stride = 2**(i - self.refine_level)
  66. pool_size = [
  67. body_dict[refine_level_name].shape[2],
  68. body_dict[refine_level_name].shape[3]
  69. ]
  70. if i > self.refine_level:
  71. gathered = fluid.layers.pool2d(
  72. input=body_dict[curr_fpn_name],
  73. pool_type='max',
  74. pool_size=pool_stride,
  75. pool_stride=pool_stride,
  76. ceil_mode=True, )
  77. else:
  78. gathered = self._resize_input_tensor(
  79. body_dict[curr_fpn_name], body_dict[refine_level_name],
  80. 1.0 / pool_stride)
  81. feats.append(gathered)
  82. bsf = sum(feats) / len(feats)
  83. # step 2: refine gathered features
  84. if self.refine_type == "conv":
  85. bsf = fluid.layers.conv2d(
  86. bsf,
  87. bsf.shape[1],
  88. filter_size=3,
  89. padding=1,
  90. param_attr=ParamAttr(name="bsf_w"),
  91. bias_attr=ParamAttr(name="bsf_b"),
  92. name="bsf")
  93. elif self.refine_type == "nonlocal":
  94. dim_in = bsf.shape[1]
  95. nonlocal_name = "nonlocal_bsf"
  96. bsf = add_space_nonlocal(
  97. bsf,
  98. bsf.shape[1],
  99. bsf.shape[1],
  100. nonlocal_name,
  101. int(bsf.shape[1] / self.nonlocal_reduction),
  102. with_bias=self.with_bias,
  103. with_scale=self.with_scale)
  104. # step 3: scatter refined features to multi-levels by a residual path
  105. fpn_dict = {}
  106. fpn_name_list = []
  107. for i in range(self.num_levels):
  108. curr_fpn_name = body_name_list[i]
  109. pool_stride = 2**(self.refine_level - i)
  110. if i >= self.refine_level:
  111. residual = self._resize_input_tensor(
  112. bsf, body_dict[curr_fpn_name], 1.0 / pool_stride)
  113. else:
  114. residual = fluid.layers.pool2d(
  115. input=bsf,
  116. pool_type='max',
  117. pool_size=pool_stride,
  118. pool_stride=pool_stride,
  119. ceil_mode=True, )
  120. fpn_dict[curr_fpn_name] = residual + body_dict[curr_fpn_name]
  121. fpn_name_list.append(curr_fpn_name)
  122. res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
  123. return res_dict
  124. def _resize_input_tensor(self, body_input, ref_output, scale):
  125. shape = fluid.layers.shape(ref_output)
  126. shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
  127. out_shape_ = shape_hw
  128. out_shape = fluid.layers.cast(out_shape_, dtype='int32')
  129. out_shape.stop_gradient = True
  130. body_output = fluid.layers.resize_nearest(
  131. body_input, scale=scale, out_shape=out_shape)
  132. return body_output