es_pan.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. import paddle.nn.functional as F
  17. from paddle import ParamAttr
  18. from paddle.regularizer import L2Decay
  19. from ppdet.core.workspace import register, serializable
  20. from ..shape_spec import ShapeSpec
  21. from ..backbones.esnet import SEModule
  22. from .csp_pan import ConvBNLayer, Channel_T, DPModule
  23. __all__ = ['ESPAN']
  24. class ES_Block(nn.Layer):
  25. def __init__(self,
  26. in_channels,
  27. mid_channels,
  28. out_channels,
  29. kernel_size=5,
  30. stride=1,
  31. act='leaky_relu'):
  32. super(ES_Block, self).__init__()
  33. self._residual = ConvBNLayer(
  34. in_channel=in_channels,
  35. out_channel=out_channels,
  36. kernel_size=1,
  37. stride=1,
  38. groups=1,
  39. act=act)
  40. self._conv_pw = ConvBNLayer(
  41. in_channel=in_channels,
  42. out_channel=mid_channels // 2,
  43. kernel_size=1,
  44. stride=1,
  45. groups=1,
  46. act=act)
  47. self._conv_dw = ConvBNLayer(
  48. in_channel=mid_channels // 2,
  49. out_channel=mid_channels // 2,
  50. kernel_size=kernel_size,
  51. stride=stride,
  52. groups=mid_channels // 2,
  53. act=None)
  54. self._se = SEModule(mid_channels)
  55. self._conv_linear = ConvBNLayer(
  56. in_channel=mid_channels,
  57. out_channel=out_channels,
  58. kernel_size=1,
  59. stride=1,
  60. groups=1,
  61. act=act)
  62. self._out_conv = ConvBNLayer(
  63. in_channel=out_channels * 2,
  64. out_channel=out_channels,
  65. kernel_size=1,
  66. stride=1,
  67. groups=1,
  68. act=act)
  69. def forward(self, inputs):
  70. x1 = self._residual(inputs)
  71. x2 = self._conv_pw(inputs)
  72. x3 = self._conv_dw(x2)
  73. x3 = paddle.concat([x2, x3], axis=1)
  74. x3 = self._se(x3)
  75. x3 = self._conv_linear(x3)
  76. out = paddle.concat([x1, x3], axis=1)
  77. out = self._out_conv(out)
  78. return out
  79. @register
  80. @serializable
  81. class ESPAN(nn.Layer):
  82. """Path Aggregation Network with ES module.
  83. Args:
  84. in_channels (List[int]): Number of input channels per scale.
  85. out_channels (int): Number of output channels (used at each scale)
  86. kernel_size (int): The conv2d kernel size of this Module.
  87. num_features (int): Number of output features of CSPPAN module.
  88. num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
  89. use_depthwise (bool): Whether to depthwise separable convolution in
  90. blocks. Default: True
  91. """
  92. def __init__(self,
  93. in_channels,
  94. out_channels,
  95. kernel_size=5,
  96. num_features=3,
  97. use_depthwise=True,
  98. act='hard_swish',
  99. spatial_scales=[0.125, 0.0625, 0.03125]):
  100. super(ESPAN, self).__init__()
  101. self.conv_t = Channel_T(in_channels, out_channels, act=act)
  102. in_channels = [out_channels] * len(spatial_scales)
  103. self.in_channels = in_channels
  104. self.out_channels = out_channels
  105. self.spatial_scales = spatial_scales
  106. self.num_features = num_features
  107. conv_func = DPModule if use_depthwise else ConvBNLayer
  108. if self.num_features == 4:
  109. self.first_top_conv = conv_func(
  110. in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
  111. self.second_top_conv = conv_func(
  112. in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
  113. self.spatial_scales.append(self.spatial_scales[-1] / 2)
  114. # build top-down blocks
  115. self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
  116. self.top_down_blocks = nn.LayerList()
  117. for idx in range(len(in_channels) - 1, 0, -1):
  118. self.top_down_blocks.append(
  119. ES_Block(
  120. in_channels[idx - 1] * 2,
  121. in_channels[idx - 1],
  122. in_channels[idx - 1],
  123. kernel_size=kernel_size,
  124. stride=1,
  125. act=act))
  126. # build bottom-up blocks
  127. self.downsamples = nn.LayerList()
  128. self.bottom_up_blocks = nn.LayerList()
  129. for idx in range(len(in_channels) - 1):
  130. self.downsamples.append(
  131. conv_func(
  132. in_channels[idx],
  133. in_channels[idx],
  134. kernel_size=kernel_size,
  135. stride=2,
  136. act=act))
  137. self.bottom_up_blocks.append(
  138. ES_Block(
  139. in_channels[idx] * 2,
  140. in_channels[idx + 1],
  141. in_channels[idx + 1],
  142. kernel_size=kernel_size,
  143. stride=1,
  144. act=act))
  145. def forward(self, inputs):
  146. """
  147. Args:
  148. inputs (tuple[Tensor]): input features.
  149. Returns:
  150. tuple[Tensor]: CSPPAN features.
  151. """
  152. assert len(inputs) == len(self.in_channels)
  153. inputs = self.conv_t(inputs)
  154. # top-down path
  155. inner_outs = [inputs[-1]]
  156. for idx in range(len(self.in_channels) - 1, 0, -1):
  157. feat_heigh = inner_outs[0]
  158. feat_low = inputs[idx - 1]
  159. upsample_feat = self.upsample(feat_heigh)
  160. inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
  161. paddle.concat([upsample_feat, feat_low], 1))
  162. inner_outs.insert(0, inner_out)
  163. # bottom-up path
  164. outs = [inner_outs[0]]
  165. for idx in range(len(self.in_channels) - 1):
  166. feat_low = outs[-1]
  167. feat_height = inner_outs[idx + 1]
  168. downsample_feat = self.downsamples[idx](feat_low)
  169. out = self.bottom_up_blocks[idx](paddle.concat(
  170. [downsample_feat, feat_height], 1))
  171. outs.append(out)
  172. top_features = None
  173. if self.num_features == 4:
  174. top_features = self.first_top_conv(inputs[-1])
  175. top_features = top_features + self.second_top_conv(outs[-1])
  176. outs.append(top_features)
  177. return tuple(outs)
  178. @property
  179. def out_shape(self):
  180. return [
  181. ShapeSpec(
  182. channels=self.out_channels, stride=1. / s)
  183. for s in self.spatial_scales
  184. ]
  185. @classmethod
  186. def from_config(cls, cfg, input_shape):
  187. return {'in_channels': [i.channels for i in input_shape], }