123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- from collections import OrderedDict
- from paddle import fluid
- from paddle.fluid.param_attr import ParamAttr
- from ppdet.core.workspace import register
- from .nonlocal_helper import add_space_nonlocal
- from .fpn import FPN
- __all__ = ['BFP']
- @register
- class BFP(object):
- """
- Libra R-CNN, see https://arxiv.org/abs/1904.02701
- Args:
- base_neck (dict): basic neck before balanced feature pyramid (bfp)
- refine_level (int): index of integration and refine level of bfp
- refine_type (str): refine type, None, conv or nonlocal
- nonlocal_reduction (float): channel reduction level if refine_type is nonlocal
- with_bias (bool): whether the nonlocal module contains bias
- with_scale (bool): whether to scale feature in nonlocal module or not
- """
- __inject__ = ['base_neck']
- def __init__(self,
- base_neck=FPN().__dict__,
- refine_level=2,
- refine_type="nonlocal",
- nonlocal_reduction=1,
- with_bias=True,
- with_scale=False):
- if isinstance(base_neck, dict):
- self.base_neck = FPN(**base_neck)
- self.refine_level = refine_level
- self.refine_type = refine_type
- self.nonlocal_reduction = nonlocal_reduction
- self.with_bias = with_bias
- self.with_scale = with_scale
- def get_output(self, body_dict):
- # top-down order
- res_dict, spatial_scale = self.base_neck.get_output(body_dict)
- res_dict = self.get_output_bfp(res_dict)
- return res_dict, spatial_scale
- def get_output_bfp(self, body_dict):
- body_name_list = list(body_dict.keys())
- num_backbone_stages = len(body_name_list)
- self.num_levels = len(body_dict)
- # step 1: gather multi-level features by resize and average
- feats = []
- refine_level_name = body_name_list[self.refine_level]
- for i in range(self.num_levels):
- curr_fpn_name = body_name_list[i]
- pool_stride = 2**(i - self.refine_level)
- pool_size = [
- body_dict[refine_level_name].shape[2],
- body_dict[refine_level_name].shape[3]
- ]
- if i > self.refine_level:
- gathered = fluid.layers.pool2d(
- input=body_dict[curr_fpn_name],
- pool_type='max',
- pool_size=pool_stride,
- pool_stride=pool_stride,
- ceil_mode=True, )
- else:
- gathered = self._resize_input_tensor(
- body_dict[curr_fpn_name], body_dict[refine_level_name],
- 1.0 / pool_stride)
- feats.append(gathered)
- bsf = sum(feats) / len(feats)
- # step 2: refine gathered features
- if self.refine_type == "conv":
- bsf = fluid.layers.conv2d(
- bsf,
- bsf.shape[1],
- filter_size=3,
- padding=1,
- param_attr=ParamAttr(name="bsf_w"),
- bias_attr=ParamAttr(name="bsf_b"),
- name="bsf")
- elif self.refine_type == "nonlocal":
- dim_in = bsf.shape[1]
- nonlocal_name = "nonlocal_bsf"
- bsf = add_space_nonlocal(
- bsf,
- bsf.shape[1],
- bsf.shape[1],
- nonlocal_name,
- int(bsf.shape[1] / self.nonlocal_reduction),
- with_bias=self.with_bias,
- with_scale=self.with_scale)
- # step 3: scatter refined features to multi-levels by a residual path
- fpn_dict = {}
- fpn_name_list = []
- for i in range(self.num_levels):
- curr_fpn_name = body_name_list[i]
- pool_stride = 2**(self.refine_level - i)
- if i >= self.refine_level:
- residual = self._resize_input_tensor(
- bsf, body_dict[curr_fpn_name], 1.0 / pool_stride)
- else:
- residual = fluid.layers.pool2d(
- input=bsf,
- pool_type='max',
- pool_size=pool_stride,
- pool_stride=pool_stride,
- ceil_mode=True, )
- fpn_dict[curr_fpn_name] = residual + body_dict[curr_fpn_name]
- fpn_name_list.append(curr_fpn_name)
- res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
- return res_dict
- def _resize_input_tensor(self, body_input, ref_output, scale):
- shape = fluid.layers.shape(ref_output)
- shape_hw = fluid.layers.slice(shape, axes=[0], starts=[2], ends=[4])
- out_shape_ = shape_hw
- out_shape = fluid.layers.cast(out_shape_, dtype='int32')
- out_shape.stop_gradient = True
- body_output = fluid.layers.resize_nearest(
- body_input, scale=scale, out_shape=out_shape)
- return body_output
|