csp_pan.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # The code is based on:
  15. # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/yolox_pafpn.py
  16. import paddle
  17. import paddle.nn as nn
  18. import paddle.nn.functional as F
  19. from paddle import ParamAttr
  20. from ppdet.core.workspace import register, serializable
  21. from ..shape_spec import ShapeSpec
  22. __all__ = ['CSPPAN']
  23. class ConvBNLayer(nn.Layer):
  24. def __init__(self,
  25. in_channel=96,
  26. out_channel=96,
  27. kernel_size=3,
  28. stride=1,
  29. groups=1,
  30. act='leaky_relu'):
  31. super(ConvBNLayer, self).__init__()
  32. initializer = nn.initializer.KaimingUniform()
  33. self.conv = nn.Conv2D(
  34. in_channels=in_channel,
  35. out_channels=out_channel,
  36. kernel_size=kernel_size,
  37. groups=groups,
  38. padding=(kernel_size - 1) // 2,
  39. stride=stride,
  40. weight_attr=ParamAttr(initializer=initializer),
  41. bias_attr=False)
  42. self.bn = nn.BatchNorm2D(out_channel)
  43. if act == "hard_swish":
  44. act = 'hardswish'
  45. self.act = act
  46. def forward(self, x):
  47. x = self.bn(self.conv(x))
  48. if self.act:
  49. x = getattr(F, self.act)(x)
  50. return x
  51. class DPModule(nn.Layer):
  52. """
  53. Depth-wise and point-wise module.
  54. Args:
  55. in_channel (int): The input channels of this Module.
  56. out_channel (int): The output channels of this Module.
  57. kernel_size (int): The conv2d kernel size of this Module.
  58. stride (int): The conv2d's stride of this Module.
  59. act (str): The activation function of this Module,
  60. Now support `leaky_relu` and `hard_swish`.
  61. """
  62. def __init__(self,
  63. in_channel=96,
  64. out_channel=96,
  65. kernel_size=3,
  66. stride=1,
  67. act='leaky_relu',
  68. use_act_in_out=True):
  69. super(DPModule, self).__init__()
  70. initializer = nn.initializer.KaimingUniform()
  71. self.use_act_in_out = use_act_in_out
  72. self.dwconv = nn.Conv2D(
  73. in_channels=in_channel,
  74. out_channels=out_channel,
  75. kernel_size=kernel_size,
  76. groups=out_channel,
  77. padding=(kernel_size - 1) // 2,
  78. stride=stride,
  79. weight_attr=ParamAttr(initializer=initializer),
  80. bias_attr=False)
  81. self.bn1 = nn.BatchNorm2D(out_channel)
  82. self.pwconv = nn.Conv2D(
  83. in_channels=out_channel,
  84. out_channels=out_channel,
  85. kernel_size=1,
  86. groups=1,
  87. padding=0,
  88. weight_attr=ParamAttr(initializer=initializer),
  89. bias_attr=False)
  90. self.bn2 = nn.BatchNorm2D(out_channel)
  91. if act == "hard_swish":
  92. act = 'hardswish'
  93. self.act = act
  94. def forward(self, x):
  95. x = self.bn1(self.dwconv(x))
  96. if self.act:
  97. x = getattr(F, self.act)(x)
  98. x = self.bn2(self.pwconv(x))
  99. if self.use_act_in_out and self.act:
  100. x = getattr(F, self.act)(x)
  101. return x
  102. class DarknetBottleneck(nn.Layer):
  103. """The basic bottleneck block used in Darknet.
  104. Each Block consists of two ConvModules and the input is added to the
  105. final output. Each ConvModule is composed of Conv, BN, and act.
  106. The first convLayer has filter size of 1x1 and the second one has the
  107. filter size of 3x3.
  108. Args:
  109. in_channels (int): The input channels of this Module.
  110. out_channels (int): The output channels of this Module.
  111. expansion (int): The kernel size of the convolution. Default: 0.5
  112. add_identity (bool): Whether to add identity to the out.
  113. Default: True
  114. use_depthwise (bool): Whether to use depthwise separable convolution.
  115. Default: False
  116. """
  117. def __init__(self,
  118. in_channels,
  119. out_channels,
  120. kernel_size=3,
  121. expansion=0.5,
  122. add_identity=True,
  123. use_depthwise=False,
  124. act="leaky_relu"):
  125. super(DarknetBottleneck, self).__init__()
  126. hidden_channels = int(out_channels * expansion)
  127. conv_func = DPModule if use_depthwise else ConvBNLayer
  128. self.conv1 = ConvBNLayer(
  129. in_channel=in_channels,
  130. out_channel=hidden_channels,
  131. kernel_size=1,
  132. act=act)
  133. self.conv2 = conv_func(
  134. in_channel=hidden_channels,
  135. out_channel=out_channels,
  136. kernel_size=kernel_size,
  137. stride=1,
  138. act=act)
  139. self.add_identity = \
  140. add_identity and in_channels == out_channels
  141. def forward(self, x):
  142. identity = x
  143. out = self.conv1(x)
  144. out = self.conv2(out)
  145. if self.add_identity:
  146. return out + identity
  147. else:
  148. return out
  149. class CSPLayer(nn.Layer):
  150. """Cross Stage Partial Layer.
  151. Args:
  152. in_channels (int): The input channels of the CSP layer.
  153. out_channels (int): The output channels of the CSP layer.
  154. expand_ratio (float): Ratio to adjust the number of channels of the
  155. hidden layer. Default: 0.5
  156. num_blocks (int): Number of blocks. Default: 1
  157. add_identity (bool): Whether to add identity in blocks.
  158. Default: True
  159. use_depthwise (bool): Whether to depthwise separable convolution in
  160. blocks. Default: False
  161. """
  162. def __init__(self,
  163. in_channels,
  164. out_channels,
  165. kernel_size=3,
  166. expand_ratio=0.5,
  167. num_blocks=1,
  168. add_identity=True,
  169. use_depthwise=False,
  170. act="leaky_relu"):
  171. super().__init__()
  172. mid_channels = int(out_channels * expand_ratio)
  173. self.main_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
  174. self.short_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act)
  175. self.final_conv = ConvBNLayer(
  176. 2 * mid_channels, out_channels, 1, act=act)
  177. self.blocks = nn.Sequential(* [
  178. DarknetBottleneck(
  179. mid_channels,
  180. mid_channels,
  181. kernel_size,
  182. 1.0,
  183. add_identity,
  184. use_depthwise,
  185. act=act) for _ in range(num_blocks)
  186. ])
  187. def forward(self, x):
  188. x_short = self.short_conv(x)
  189. x_main = self.main_conv(x)
  190. x_main = self.blocks(x_main)
  191. x_final = paddle.concat((x_main, x_short), axis=1)
  192. return self.final_conv(x_final)
  193. class Channel_T(nn.Layer):
  194. def __init__(self,
  195. in_channels=[116, 232, 464],
  196. out_channels=96,
  197. act="leaky_relu"):
  198. super(Channel_T, self).__init__()
  199. self.convs = nn.LayerList()
  200. for i in range(len(in_channels)):
  201. self.convs.append(
  202. ConvBNLayer(
  203. in_channels[i], out_channels, 1, act=act))
  204. def forward(self, x):
  205. outs = [self.convs[i](x[i]) for i in range(len(x))]
  206. return outs
  207. @register
  208. @serializable
  209. class CSPPAN(nn.Layer):
  210. """Path Aggregation Network with CSP module.
  211. Args:
  212. in_channels (List[int]): Number of input channels per scale.
  213. out_channels (int): Number of output channels (used at each scale)
  214. kernel_size (int): The conv2d kernel size of this Module.
  215. num_features (int): Number of output features of CSPPAN module.
  216. num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1
  217. use_depthwise (bool): Whether to depthwise separable convolution in
  218. blocks. Default: True
  219. """
  220. def __init__(self,
  221. in_channels,
  222. out_channels,
  223. kernel_size=5,
  224. num_features=3,
  225. num_csp_blocks=1,
  226. use_depthwise=True,
  227. act='hard_swish',
  228. spatial_scales=[0.125, 0.0625, 0.03125]):
  229. super(CSPPAN, self).__init__()
  230. self.conv_t = Channel_T(in_channels, out_channels, act=act)
  231. in_channels = [out_channels] * len(spatial_scales)
  232. self.in_channels = in_channels
  233. self.out_channels = out_channels
  234. self.spatial_scales = spatial_scales
  235. self.num_features = num_features
  236. conv_func = DPModule if use_depthwise else ConvBNLayer
  237. if self.num_features == 4:
  238. self.first_top_conv = conv_func(
  239. in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
  240. self.second_top_conv = conv_func(
  241. in_channels[0], in_channels[0], kernel_size, stride=2, act=act)
  242. self.spatial_scales.append(self.spatial_scales[-1] / 2)
  243. # build top-down blocks
  244. self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
  245. self.top_down_blocks = nn.LayerList()
  246. for idx in range(len(in_channels) - 1, 0, -1):
  247. self.top_down_blocks.append(
  248. CSPLayer(
  249. in_channels[idx - 1] * 2,
  250. in_channels[idx - 1],
  251. kernel_size=kernel_size,
  252. num_blocks=num_csp_blocks,
  253. add_identity=False,
  254. use_depthwise=use_depthwise,
  255. act=act))
  256. # build bottom-up blocks
  257. self.downsamples = nn.LayerList()
  258. self.bottom_up_blocks = nn.LayerList()
  259. for idx in range(len(in_channels) - 1):
  260. self.downsamples.append(
  261. conv_func(
  262. in_channels[idx],
  263. in_channels[idx],
  264. kernel_size=kernel_size,
  265. stride=2,
  266. act=act))
  267. self.bottom_up_blocks.append(
  268. CSPLayer(
  269. in_channels[idx] * 2,
  270. in_channels[idx + 1],
  271. kernel_size=kernel_size,
  272. num_blocks=num_csp_blocks,
  273. add_identity=False,
  274. use_depthwise=use_depthwise,
  275. act=act))
  276. def forward(self, inputs):
  277. """
  278. Args:
  279. inputs (tuple[Tensor]): input features.
  280. Returns:
  281. tuple[Tensor]: CSPPAN features.
  282. """
  283. assert len(inputs) == len(self.in_channels)
  284. inputs = self.conv_t(inputs)
  285. # top-down path
  286. inner_outs = [inputs[-1]]
  287. for idx in range(len(self.in_channels) - 1, 0, -1):
  288. feat_heigh = inner_outs[0]
  289. feat_low = inputs[idx - 1]
  290. upsample_feat = self.upsample(feat_heigh)
  291. inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](
  292. paddle.concat([upsample_feat, feat_low], 1))
  293. inner_outs.insert(0, inner_out)
  294. # bottom-up path
  295. outs = [inner_outs[0]]
  296. for idx in range(len(self.in_channels) - 1):
  297. feat_low = outs[-1]
  298. feat_height = inner_outs[idx + 1]
  299. downsample_feat = self.downsamples[idx](feat_low)
  300. out = self.bottom_up_blocks[idx](paddle.concat(
  301. [downsample_feat, feat_height], 1))
  302. outs.append(out)
  303. top_features = None
  304. if self.num_features == 4:
  305. top_features = self.first_top_conv(inputs[-1])
  306. top_features = top_features + self.second_top_conv(outs[-1])
  307. outs.append(top_features)
  308. return tuple(outs)
  309. @property
  310. def out_shape(self):
  311. return [
  312. ShapeSpec(
  313. channels=self.out_channels, stride=1. / s)
  314. for s in self.spatial_scales
  315. ]
  316. @classmethod
  317. def from_config(cls, cfg, input_shape):
  318. return {'in_channels': [i.channels for i in input_shape], }