centernet_fpn.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import math
  16. import paddle
  17. import paddle.nn as nn
  18. from paddle import ParamAttr
  19. from paddle.nn.initializer import Uniform
  20. import paddle.nn.functional as F
  21. from ppdet.core.workspace import register, serializable
  22. from ppdet.modeling.layers import ConvNormLayer
  23. from ppdet.modeling.backbones.hardnet import ConvLayer, HarDBlock
  24. from ..shape_spec import ShapeSpec
  25. __all__ = ['CenterNetDLAFPN', 'CenterNetHarDNetFPN']
  26. # SGE attention
  27. class BasicConv(nn.Layer):
  28. def __init__(self,
  29. in_planes,
  30. out_planes,
  31. kernel_size,
  32. stride=1,
  33. padding=0,
  34. dilation=1,
  35. groups=1,
  36. relu=True,
  37. bn=True,
  38. bias_attr=False):
  39. super(BasicConv, self).__init__()
  40. self.out_channels = out_planes
  41. self.conv = nn.Conv2D(
  42. in_planes,
  43. out_planes,
  44. kernel_size=kernel_size,
  45. stride=stride,
  46. padding=padding,
  47. dilation=dilation,
  48. groups=groups,
  49. bias_attr=bias_attr)
  50. self.bn = nn.BatchNorm2D(
  51. out_planes,
  52. epsilon=1e-5,
  53. momentum=0.01,
  54. weight_attr=False,
  55. bias_attr=False) if bn else None
  56. self.relu = nn.ReLU() if relu else None
  57. def forward(self, x):
  58. x = self.conv(x)
  59. if self.bn is not None:
  60. x = self.bn(x)
  61. if self.relu is not None:
  62. x = self.relu(x)
  63. return x
  64. class ChannelPool(nn.Layer):
  65. def forward(self, x):
  66. return paddle.concat(
  67. (paddle.max(x, 1).unsqueeze(1), paddle.mean(x, 1).unsqueeze(1)),
  68. axis=1)
  69. class SpatialGate(nn.Layer):
  70. def __init__(self):
  71. super(SpatialGate, self).__init__()
  72. kernel_size = 7
  73. self.compress = ChannelPool()
  74. self.spatial = BasicConv(
  75. 2,
  76. 1,
  77. kernel_size,
  78. stride=1,
  79. padding=(kernel_size - 1) // 2,
  80. relu=False)
  81. def forward(self, x):
  82. x_compress = self.compress(x)
  83. x_out = self.spatial(x_compress)
  84. scale = F.sigmoid(x_out) # broadcasting
  85. return x * scale
  86. def fill_up_weights(up):
  87. weight = up.weight.numpy()
  88. f = math.ceil(weight.shape[2] / 2)
  89. c = (2 * f - 1 - f % 2) / (2. * f)
  90. for i in range(weight.shape[2]):
  91. for j in range(weight.shape[3]):
  92. weight[0, 0, i, j] = \
  93. (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
  94. for c in range(1, weight.shape[0]):
  95. weight[c, 0, :, :] = weight[0, 0, :, :]
  96. up.weight.set_value(weight)
  97. class IDAUp(nn.Layer):
  98. def __init__(self, ch_ins, ch_out, up_strides, dcn_v2=True):
  99. super(IDAUp, self).__init__()
  100. for i in range(1, len(ch_ins)):
  101. ch_in = ch_ins[i]
  102. up_s = int(up_strides[i])
  103. fan_in = ch_in * 3 * 3
  104. stdv = 1. / math.sqrt(fan_in)
  105. proj = nn.Sequential(
  106. ConvNormLayer(
  107. ch_in,
  108. ch_out,
  109. filter_size=3,
  110. stride=1,
  111. use_dcn=dcn_v2,
  112. bias_on=dcn_v2,
  113. norm_decay=None,
  114. dcn_lr_scale=1.,
  115. dcn_regularizer=None,
  116. initializer=Uniform(-stdv, stdv)),
  117. nn.ReLU())
  118. node = nn.Sequential(
  119. ConvNormLayer(
  120. ch_out,
  121. ch_out,
  122. filter_size=3,
  123. stride=1,
  124. use_dcn=dcn_v2,
  125. bias_on=dcn_v2,
  126. norm_decay=None,
  127. dcn_lr_scale=1.,
  128. dcn_regularizer=None,
  129. initializer=Uniform(-stdv, stdv)),
  130. nn.ReLU())
  131. kernel_size = up_s * 2
  132. fan_in = ch_out * kernel_size * kernel_size
  133. stdv = 1. / math.sqrt(fan_in)
  134. up = nn.Conv2DTranspose(
  135. ch_out,
  136. ch_out,
  137. kernel_size=up_s * 2,
  138. stride=up_s,
  139. padding=up_s // 2,
  140. groups=ch_out,
  141. weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
  142. bias_attr=False)
  143. fill_up_weights(up)
  144. setattr(self, 'proj_' + str(i), proj)
  145. setattr(self, 'up_' + str(i), up)
  146. setattr(self, 'node_' + str(i), node)
  147. def forward(self, inputs, start_level, end_level):
  148. for i in range(start_level + 1, end_level):
  149. upsample = getattr(self, 'up_' + str(i - start_level))
  150. project = getattr(self, 'proj_' + str(i - start_level))
  151. inputs[i] = project(inputs[i])
  152. inputs[i] = upsample(inputs[i])
  153. node = getattr(self, 'node_' + str(i - start_level))
  154. inputs[i] = node(paddle.add(inputs[i], inputs[i - 1]))
  155. class DLAUp(nn.Layer):
  156. def __init__(self, start_level, channels, scales, ch_in=None, dcn_v2=True):
  157. super(DLAUp, self).__init__()
  158. self.start_level = start_level
  159. if ch_in is None:
  160. ch_in = channels
  161. self.channels = channels
  162. channels = list(channels)
  163. scales = np.array(scales, dtype=int)
  164. for i in range(len(channels) - 1):
  165. j = -i - 2
  166. setattr(
  167. self,
  168. 'ida_{}'.format(i),
  169. IDAUp(
  170. ch_in[j:],
  171. channels[j],
  172. scales[j:] // scales[j],
  173. dcn_v2=dcn_v2))
  174. scales[j + 1:] = scales[j]
  175. ch_in[j + 1:] = [channels[j] for _ in channels[j + 1:]]
  176. def forward(self, inputs):
  177. out = [inputs[-1]] # start with 32
  178. for i in range(len(inputs) - self.start_level - 1):
  179. ida = getattr(self, 'ida_{}'.format(i))
  180. ida(inputs, len(inputs) - i - 2, len(inputs))
  181. out.insert(0, inputs[-1])
  182. return out
  183. @register
  184. @serializable
  185. class CenterNetDLAFPN(nn.Layer):
  186. """
  187. Args:
  188. in_channels (list): number of input feature channels from backbone.
  189. [16, 32, 64, 128, 256, 512] by default, means the channels of DLA-34
  190. down_ratio (int): the down ratio from images to heatmap, 4 by default
  191. last_level (int): the last level of input feature fed into the upsamplng block
  192. out_channel (int): the channel of the output feature, 0 by default means
  193. the channel of the input feature whose down ratio is `down_ratio`
  194. first_level (None): the first level of input feature fed into the upsamplng block.
  195. if None, the first level stands for logs(down_ratio)
  196. dcn_v2 (bool): whether use the DCNv2, True by default
  197. with_sge (bool): whether use SGE attention, False by default
  198. """
  199. def __init__(self,
  200. in_channels,
  201. down_ratio=4,
  202. last_level=5,
  203. out_channel=0,
  204. first_level=None,
  205. dcn_v2=True,
  206. with_sge=False):
  207. super(CenterNetDLAFPN, self).__init__()
  208. self.first_level = int(np.log2(
  209. down_ratio)) if first_level is None else first_level
  210. assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
  211. self.first_level)
  212. self.down_ratio = down_ratio
  213. self.last_level = last_level
  214. scales = [2**i for i in range(len(in_channels[self.first_level:]))]
  215. self.dla_up = DLAUp(
  216. self.first_level,
  217. in_channels[self.first_level:],
  218. scales,
  219. dcn_v2=dcn_v2)
  220. self.out_channel = out_channel
  221. if out_channel == 0:
  222. self.out_channel = in_channels[self.first_level]
  223. self.ida_up = IDAUp(
  224. in_channels[self.first_level:self.last_level],
  225. self.out_channel,
  226. [2**i for i in range(self.last_level - self.first_level)],
  227. dcn_v2=dcn_v2)
  228. self.with_sge = with_sge
  229. if self.with_sge:
  230. self.sge_attention = SpatialGate()
  231. @classmethod
  232. def from_config(cls, cfg, input_shape):
  233. return {'in_channels': [i.channels for i in input_shape]}
  234. def forward(self, body_feats):
  235. dla_up_feats = self.dla_up(body_feats)
  236. ida_up_feats = []
  237. for i in range(self.last_level - self.first_level):
  238. ida_up_feats.append(dla_up_feats[i].clone())
  239. self.ida_up(ida_up_feats, 0, len(ida_up_feats))
  240. feat = ida_up_feats[-1]
  241. if self.with_sge:
  242. feat = self.sge_attention(feat)
  243. if self.down_ratio != 4:
  244. feat = F.interpolate(feat, scale_factor=self.down_ratio // 4, mode="bilinear", align_corners=True)
  245. return feat
  246. @property
  247. def out_shape(self):
  248. return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]
  249. class TransitionUp(nn.Layer):
  250. def __init__(self, in_channels, out_channels):
  251. super().__init__()
  252. def forward(self, x, skip):
  253. w, h = skip.shape[2], skip.shape[3]
  254. out = F.interpolate(x, size=(w, h), mode="bilinear", align_corners=True)
  255. out = paddle.concat([out, skip], 1)
  256. return out
  257. @register
  258. @serializable
  259. class CenterNetHarDNetFPN(nn.Layer):
  260. """
  261. Args:
  262. in_channels (list): number of input feature channels from backbone.
  263. [96, 214, 458, 784] by default, means the channels of HarDNet85
  264. num_layers (int): HarDNet laters, 85 by default
  265. down_ratio (int): the down ratio from images to heatmap, 4 by default
  266. first_level (int|None): the first level of input feature fed into the upsamplng block.
  267. if None, the first level stands for logs(down_ratio) - 1
  268. last_level (int): the last level of input feature fed into the upsamplng block
  269. out_channel (int): the channel of the output feature, 0 by default means
  270. the channel of the input feature whose down ratio is `down_ratio`
  271. """
  272. def __init__(self,
  273. in_channels,
  274. num_layers=85,
  275. down_ratio=4,
  276. first_level=None,
  277. last_level=4,
  278. out_channel=0):
  279. super(CenterNetHarDNetFPN, self).__init__()
  280. self.first_level = int(np.log2(
  281. down_ratio)) - 1 if first_level is None else first_level
  282. assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
  283. self.first_level)
  284. self.down_ratio = down_ratio
  285. self.last_level = last_level
  286. self.last_pool = nn.AvgPool2D(kernel_size=2, stride=2)
  287. assert num_layers in [68, 85], "HarDNet-{} not support.".format(
  288. num_layers)
  289. if num_layers == 85:
  290. self.last_proj = ConvLayer(784, 256, kernel_size=1)
  291. self.last_blk = HarDBlock(768, 80, 1.7, 8)
  292. self.skip_nodes = [1, 3, 8, 13]
  293. self.SC = [32, 32, 0]
  294. gr = [64, 48, 28]
  295. layers = [8, 8, 4]
  296. ch_list2 = [224 + self.SC[0], 160 + self.SC[1], 96 + self.SC[2]]
  297. channels = [96, 214, 458, 784]
  298. self.skip_lv = 3
  299. elif num_layers == 68:
  300. self.last_proj = ConvLayer(654, 192, kernel_size=1)
  301. self.last_blk = HarDBlock(576, 72, 1.7, 8)
  302. self.skip_nodes = [1, 3, 8, 11]
  303. self.SC = [32, 32, 0]
  304. gr = [48, 32, 20]
  305. layers = [8, 8, 4]
  306. ch_list2 = [224 + self.SC[0], 96 + self.SC[1], 64 + self.SC[2]]
  307. channels = [64, 124, 328, 654]
  308. self.skip_lv = 2
  309. self.transUpBlocks = nn.LayerList([])
  310. self.denseBlocksUp = nn.LayerList([])
  311. self.conv1x1_up = nn.LayerList([])
  312. self.avg9x9 = nn.AvgPool2D(kernel_size=(9, 9), stride=1, padding=(4, 4))
  313. prev_ch = self.last_blk.get_out_ch()
  314. for i in range(3):
  315. skip_ch = channels[3 - i]
  316. self.transUpBlocks.append(TransitionUp(prev_ch, prev_ch))
  317. if i < self.skip_lv:
  318. cur_ch = prev_ch + skip_ch
  319. else:
  320. cur_ch = prev_ch
  321. self.conv1x1_up.append(
  322. ConvLayer(
  323. cur_ch, ch_list2[i], kernel_size=1))
  324. cur_ch = ch_list2[i]
  325. cur_ch -= self.SC[i]
  326. cur_ch *= 3
  327. blk = HarDBlock(cur_ch, gr[i], 1.7, layers[i])
  328. self.denseBlocksUp.append(blk)
  329. prev_ch = blk.get_out_ch()
  330. prev_ch += self.SC[0] + self.SC[1] + self.SC[2]
  331. self.out_channel = prev_ch
  332. @classmethod
  333. def from_config(cls, cfg, input_shape):
  334. return {'in_channels': [i.channels for i in input_shape]}
  335. def forward(self, body_feats):
  336. x = body_feats[-1]
  337. x_sc = []
  338. x = self.last_proj(x)
  339. x = self.last_pool(x)
  340. x2 = self.avg9x9(x)
  341. x3 = x / (x.sum((2, 3), keepdim=True) + 0.1)
  342. x = paddle.concat([x, x2, x3], 1)
  343. x = self.last_blk(x)
  344. for i in range(3):
  345. skip_x = body_feats[3 - i]
  346. x_up = self.transUpBlocks[i](x, skip_x)
  347. x_ch = self.conv1x1_up[i](x_up)
  348. if self.SC[i] > 0:
  349. end = x_ch.shape[1]
  350. new_st = end - self.SC[i]
  351. x_sc.append(x_ch[:, new_st:, :, :])
  352. x_ch = x_ch[:, :new_st, :, :]
  353. x2 = self.avg9x9(x_ch)
  354. x3 = x_ch / (x_ch.sum((2, 3), keepdim=True) + 0.1)
  355. x_new = paddle.concat([x_ch, x2, x3], 1)
  356. x = self.denseBlocksUp[i](x_new)
  357. scs = [x]
  358. for i in range(3):
  359. if self.SC[i] > 0:
  360. scs.insert(
  361. 0,
  362. F.interpolate(
  363. x_sc[i],
  364. size=(x.shape[2], x.shape[3]),
  365. mode="bilinear",
  366. align_corners=True))
  367. neck_feat = paddle.concat(scs, 1)
  368. return neck_feat
  369. @property
  370. def out_shape(self):
  371. return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]