yolo_fpn.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. import paddle.nn.functional as F
  17. from ppdet.core.workspace import register, serializable
  18. from ppdet.modeling.layers import DropBlock
  19. from ppdet.modeling.ops import get_act_fn
  20. from ..backbones.darknet import ConvBNLayer
  21. from ..shape_spec import ShapeSpec
  22. from ..backbones.csp_darknet import BaseConv, DWConv, CSPLayer
  23. __all__ = ['YOLOv3FPN', 'PPYOLOFPN', 'PPYOLOTinyFPN', 'PPYOLOPAN', 'YOLOCSPPAN']
  24. def add_coord(x, data_format):
  25. b = paddle.shape(x)[0]
  26. if data_format == 'NCHW':
  27. h, w = x.shape[2], x.shape[3]
  28. else:
  29. h, w = x.shape[1], x.shape[2]
  30. gx = paddle.cast(paddle.arange(w) / ((w - 1.) * 2.0) - 1., x.dtype)
  31. gy = paddle.cast(paddle.arange(h) / ((h - 1.) * 2.0) - 1., x.dtype)
  32. if data_format == 'NCHW':
  33. gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w])
  34. gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w])
  35. else:
  36. gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1])
  37. gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1])
  38. gx.stop_gradient = True
  39. gy.stop_gradient = True
  40. return gx, gy
  41. class YoloDetBlock(nn.Layer):
  42. def __init__(self,
  43. ch_in,
  44. channel,
  45. norm_type,
  46. freeze_norm=False,
  47. name='',
  48. data_format='NCHW'):
  49. """
  50. YOLODetBlock layer for yolov3, see https://arxiv.org/abs/1804.02767
  51. Args:
  52. ch_in (int): input channel
  53. channel (int): base channel
  54. norm_type (str): batch norm type
  55. freeze_norm (bool): whether to freeze norm, default False
  56. name (str): layer name
  57. data_format (str): data format, NCHW or NHWC
  58. """
  59. super(YoloDetBlock, self).__init__()
  60. self.ch_in = ch_in
  61. self.channel = channel
  62. assert channel % 2 == 0, \
  63. "channel {} cannot be divided by 2".format(channel)
  64. conv_def = [
  65. ['conv0', ch_in, channel, 1, '.0.0'],
  66. ['conv1', channel, channel * 2, 3, '.0.1'],
  67. ['conv2', channel * 2, channel, 1, '.1.0'],
  68. ['conv3', channel, channel * 2, 3, '.1.1'],
  69. ['route', channel * 2, channel, 1, '.2'],
  70. ]
  71. self.conv_module = nn.Sequential()
  72. for idx, (conv_name, ch_in, ch_out, filter_size,
  73. post_name) in enumerate(conv_def):
  74. self.conv_module.add_sublayer(
  75. conv_name,
  76. ConvBNLayer(
  77. ch_in=ch_in,
  78. ch_out=ch_out,
  79. filter_size=filter_size,
  80. padding=(filter_size - 1) // 2,
  81. norm_type=norm_type,
  82. freeze_norm=freeze_norm,
  83. data_format=data_format,
  84. name=name + post_name))
  85. self.tip = ConvBNLayer(
  86. ch_in=channel,
  87. ch_out=channel * 2,
  88. filter_size=3,
  89. padding=1,
  90. norm_type=norm_type,
  91. freeze_norm=freeze_norm,
  92. data_format=data_format,
  93. name=name + '.tip')
  94. def forward(self, inputs):
  95. route = self.conv_module(inputs)
  96. tip = self.tip(route)
  97. return route, tip
  98. class SPP(nn.Layer):
  99. def __init__(self,
  100. ch_in,
  101. ch_out,
  102. k,
  103. pool_size,
  104. norm_type='bn',
  105. freeze_norm=False,
  106. name='',
  107. act='leaky',
  108. data_format='NCHW'):
  109. """
  110. SPP layer, which consist of four pooling layer follwed by conv layer
  111. Args:
  112. ch_in (int): input channel of conv layer
  113. ch_out (int): output channel of conv layer
  114. k (int): kernel size of conv layer
  115. norm_type (str): batch norm type
  116. freeze_norm (bool): whether to freeze norm, default False
  117. name (str): layer name
  118. act (str): activation function
  119. data_format (str): data format, NCHW or NHWC
  120. """
  121. super(SPP, self).__init__()
  122. self.pool = []
  123. self.data_format = data_format
  124. for size in pool_size:
  125. pool = self.add_sublayer(
  126. '{}.pool1'.format(name),
  127. nn.MaxPool2D(
  128. kernel_size=size,
  129. stride=1,
  130. padding=size // 2,
  131. data_format=data_format,
  132. ceil_mode=False))
  133. self.pool.append(pool)
  134. self.conv = ConvBNLayer(
  135. ch_in,
  136. ch_out,
  137. k,
  138. padding=k // 2,
  139. norm_type=norm_type,
  140. freeze_norm=freeze_norm,
  141. name=name,
  142. act=act,
  143. data_format=data_format)
  144. def forward(self, x):
  145. outs = [x]
  146. for pool in self.pool:
  147. outs.append(pool(x))
  148. if self.data_format == "NCHW":
  149. y = paddle.concat(outs, axis=1)
  150. else:
  151. y = paddle.concat(outs, axis=-1)
  152. y = self.conv(y)
  153. return y
  154. class CoordConv(nn.Layer):
  155. def __init__(self,
  156. ch_in,
  157. ch_out,
  158. filter_size,
  159. padding,
  160. norm_type,
  161. freeze_norm=False,
  162. name='',
  163. data_format='NCHW'):
  164. """
  165. CoordConv layer, see https://arxiv.org/abs/1807.03247
  166. Args:
  167. ch_in (int): input channel
  168. ch_out (int): output channel
  169. filter_size (int): filter size, default 3
  170. padding (int): padding size, default 0
  171. norm_type (str): batch norm type, default bn
  172. name (str): layer name
  173. data_format (str): data format, NCHW or NHWC
  174. """
  175. super(CoordConv, self).__init__()
  176. self.conv = ConvBNLayer(
  177. ch_in + 2,
  178. ch_out,
  179. filter_size=filter_size,
  180. padding=padding,
  181. norm_type=norm_type,
  182. freeze_norm=freeze_norm,
  183. data_format=data_format,
  184. name=name)
  185. self.data_format = data_format
  186. def forward(self, x):
  187. gx, gy = add_coord(x, self.data_format)
  188. if self.data_format == 'NCHW':
  189. y = paddle.concat([x, gx, gy], axis=1)
  190. else:
  191. y = paddle.concat([x, gx, gy], axis=-1)
  192. y = self.conv(y)
  193. return y
  194. class PPYOLODetBlock(nn.Layer):
  195. def __init__(self, cfg, name, data_format='NCHW'):
  196. """
  197. PPYOLODetBlock layer
  198. Args:
  199. cfg (list): layer configs for this block
  200. name (str): block name
  201. data_format (str): data format, NCHW or NHWC
  202. """
  203. super(PPYOLODetBlock, self).__init__()
  204. self.conv_module = nn.Sequential()
  205. for idx, (conv_name, layer, args, kwargs) in enumerate(cfg[:-1]):
  206. kwargs.update(
  207. name='{}.{}'.format(name, conv_name), data_format=data_format)
  208. self.conv_module.add_sublayer(conv_name, layer(*args, **kwargs))
  209. conv_name, layer, args, kwargs = cfg[-1]
  210. kwargs.update(
  211. name='{}.{}'.format(name, conv_name), data_format=data_format)
  212. self.tip = layer(*args, **kwargs)
  213. def forward(self, inputs):
  214. route = self.conv_module(inputs)
  215. tip = self.tip(route)
  216. return route, tip
  217. class PPYOLOTinyDetBlock(nn.Layer):
  218. def __init__(self,
  219. ch_in,
  220. ch_out,
  221. name,
  222. drop_block=False,
  223. block_size=3,
  224. keep_prob=0.9,
  225. data_format='NCHW'):
  226. """
  227. PPYOLO Tiny DetBlock layer
  228. Args:
  229. ch_in (list): input channel number
  230. ch_out (list): output channel number
  231. name (str): block name
  232. drop_block: whether user DropBlock
  233. block_size: drop block size
  234. keep_prob: probability to keep block in DropBlock
  235. data_format (str): data format, NCHW or NHWC
  236. """
  237. super(PPYOLOTinyDetBlock, self).__init__()
  238. self.drop_block_ = drop_block
  239. self.conv_module = nn.Sequential()
  240. cfgs = [
  241. # name, in channels, out channels, filter_size,
  242. # stride, padding, groups
  243. ['.0', ch_in, ch_out, 1, 1, 0, 1],
  244. ['.1', ch_out, ch_out, 5, 1, 2, ch_out],
  245. ['.2', ch_out, ch_out, 1, 1, 0, 1],
  246. ['.route', ch_out, ch_out, 5, 1, 2, ch_out],
  247. ]
  248. for cfg in cfgs:
  249. conv_name, conv_ch_in, conv_ch_out, filter_size, stride, padding, \
  250. groups = cfg
  251. self.conv_module.add_sublayer(
  252. name + conv_name,
  253. ConvBNLayer(
  254. ch_in=conv_ch_in,
  255. ch_out=conv_ch_out,
  256. filter_size=filter_size,
  257. stride=stride,
  258. padding=padding,
  259. groups=groups,
  260. name=name + conv_name))
  261. self.tip = ConvBNLayer(
  262. ch_in=ch_out,
  263. ch_out=ch_out,
  264. filter_size=1,
  265. stride=1,
  266. padding=0,
  267. groups=1,
  268. name=name + conv_name)
  269. if self.drop_block_:
  270. self.drop_block = DropBlock(
  271. block_size=block_size,
  272. keep_prob=keep_prob,
  273. data_format=data_format,
  274. name=name + '.dropblock')
  275. def forward(self, inputs):
  276. if self.drop_block_:
  277. inputs = self.drop_block(inputs)
  278. route = self.conv_module(inputs)
  279. tip = self.tip(route)
  280. return route, tip
  281. class PPYOLODetBlockCSP(nn.Layer):
  282. def __init__(self,
  283. cfg,
  284. ch_in,
  285. ch_out,
  286. act,
  287. norm_type,
  288. name,
  289. data_format='NCHW'):
  290. """
  291. PPYOLODetBlockCSP layer
  292. Args:
  293. cfg (list): layer configs for this block
  294. ch_in (int): input channel
  295. ch_out (int): output channel
  296. act (str): default mish
  297. name (str): block name
  298. data_format (str): data format, NCHW or NHWC
  299. """
  300. super(PPYOLODetBlockCSP, self).__init__()
  301. self.data_format = data_format
  302. self.conv1 = ConvBNLayer(
  303. ch_in,
  304. ch_out,
  305. 1,
  306. padding=0,
  307. act=act,
  308. norm_type=norm_type,
  309. name=name + '.left',
  310. data_format=data_format)
  311. self.conv2 = ConvBNLayer(
  312. ch_in,
  313. ch_out,
  314. 1,
  315. padding=0,
  316. act=act,
  317. norm_type=norm_type,
  318. name=name + '.right',
  319. data_format=data_format)
  320. self.conv3 = ConvBNLayer(
  321. ch_out * 2,
  322. ch_out * 2,
  323. 1,
  324. padding=0,
  325. act=act,
  326. norm_type=norm_type,
  327. name=name,
  328. data_format=data_format)
  329. self.conv_module = nn.Sequential()
  330. for idx, (layer_name, layer, args, kwargs) in enumerate(cfg):
  331. kwargs.update(name=name + layer_name, data_format=data_format)
  332. self.conv_module.add_sublayer(layer_name, layer(*args, **kwargs))
  333. def forward(self, inputs):
  334. conv_left = self.conv1(inputs)
  335. conv_right = self.conv2(inputs)
  336. conv_left = self.conv_module(conv_left)
  337. if self.data_format == 'NCHW':
  338. conv = paddle.concat([conv_left, conv_right], axis=1)
  339. else:
  340. conv = paddle.concat([conv_left, conv_right], axis=-1)
  341. conv = self.conv3(conv)
  342. return conv, conv
  343. @register
  344. @serializable
  345. class YOLOv3FPN(nn.Layer):
  346. __shared__ = ['norm_type', 'data_format']
  347. def __init__(self,
  348. in_channels=[256, 512, 1024],
  349. norm_type='bn',
  350. freeze_norm=False,
  351. data_format='NCHW'):
  352. """
  353. YOLOv3FPN layer
  354. Args:
  355. in_channels (list): input channels for fpn
  356. norm_type (str): batch norm type, default bn
  357. data_format (str): data format, NCHW or NHWC
  358. """
  359. super(YOLOv3FPN, self).__init__()
  360. assert len(in_channels) > 0, "in_channels length should > 0"
  361. self.in_channels = in_channels
  362. self.num_blocks = len(in_channels)
  363. self._out_channels = []
  364. self.yolo_blocks = []
  365. self.routes = []
  366. self.data_format = data_format
  367. for i in range(self.num_blocks):
  368. name = 'yolo_block.{}'.format(i)
  369. in_channel = in_channels[-i - 1]
  370. if i > 0:
  371. in_channel += 512 // (2**i)
  372. yolo_block = self.add_sublayer(
  373. name,
  374. YoloDetBlock(
  375. in_channel,
  376. channel=512 // (2**i),
  377. norm_type=norm_type,
  378. freeze_norm=freeze_norm,
  379. data_format=data_format,
  380. name=name))
  381. self.yolo_blocks.append(yolo_block)
  382. # tip layer output channel doubled
  383. self._out_channels.append(1024 // (2**i))
  384. if i < self.num_blocks - 1:
  385. name = 'yolo_transition.{}'.format(i)
  386. route = self.add_sublayer(
  387. name,
  388. ConvBNLayer(
  389. ch_in=512 // (2**i),
  390. ch_out=256 // (2**i),
  391. filter_size=1,
  392. stride=1,
  393. padding=0,
  394. norm_type=norm_type,
  395. freeze_norm=freeze_norm,
  396. data_format=data_format,
  397. name=name))
  398. self.routes.append(route)
  399. def forward(self, blocks, for_mot=False):
  400. assert len(blocks) == self.num_blocks
  401. blocks = blocks[::-1]
  402. yolo_feats = []
  403. # add embedding features output for multi-object tracking model
  404. if for_mot:
  405. emb_feats = []
  406. for i, block in enumerate(blocks):
  407. if i > 0:
  408. if self.data_format == 'NCHW':
  409. block = paddle.concat([route, block], axis=1)
  410. else:
  411. block = paddle.concat([route, block], axis=-1)
  412. route, tip = self.yolo_blocks[i](block)
  413. yolo_feats.append(tip)
  414. if for_mot:
  415. # add embedding features output
  416. emb_feats.append(route)
  417. if i < self.num_blocks - 1:
  418. route = self.routes[i](route)
  419. route = F.interpolate(
  420. route, scale_factor=2., data_format=self.data_format)
  421. if for_mot:
  422. return {'yolo_feats': yolo_feats, 'emb_feats': emb_feats}
  423. else:
  424. return yolo_feats
  425. @classmethod
  426. def from_config(cls, cfg, input_shape):
  427. return {'in_channels': [i.channels for i in input_shape], }
  428. @property
  429. def out_shape(self):
  430. return [ShapeSpec(channels=c) for c in self._out_channels]
  431. @register
  432. @serializable
  433. class PPYOLOFPN(nn.Layer):
  434. __shared__ = ['norm_type', 'data_format']
  435. def __init__(self,
  436. in_channels=[512, 1024, 2048],
  437. norm_type='bn',
  438. freeze_norm=False,
  439. data_format='NCHW',
  440. coord_conv=False,
  441. conv_block_num=2,
  442. drop_block=False,
  443. block_size=3,
  444. keep_prob=0.9,
  445. spp=False):
  446. """
  447. PPYOLOFPN layer
  448. Args:
  449. in_channels (list): input channels for fpn
  450. norm_type (str): batch norm type, default bn
  451. data_format (str): data format, NCHW or NHWC
  452. coord_conv (bool): whether use CoordConv or not
  453. conv_block_num (int): conv block num of each pan block
  454. drop_block (bool): whether use DropBlock or not
  455. block_size (int): block size of DropBlock
  456. keep_prob (float): keep probability of DropBlock
  457. spp (bool): whether use spp or not
  458. """
  459. super(PPYOLOFPN, self).__init__()
  460. assert len(in_channels) > 0, "in_channels length should > 0"
  461. self.in_channels = in_channels
  462. self.num_blocks = len(in_channels)
  463. # parse kwargs
  464. self.coord_conv = coord_conv
  465. self.drop_block = drop_block
  466. self.block_size = block_size
  467. self.keep_prob = keep_prob
  468. self.spp = spp
  469. self.conv_block_num = conv_block_num
  470. self.data_format = data_format
  471. if self.coord_conv:
  472. ConvLayer = CoordConv
  473. else:
  474. ConvLayer = ConvBNLayer
  475. if self.drop_block:
  476. dropblock_cfg = [[
  477. 'dropblock', DropBlock, [self.block_size, self.keep_prob],
  478. dict()
  479. ]]
  480. else:
  481. dropblock_cfg = []
  482. self._out_channels = []
  483. self.yolo_blocks = []
  484. self.routes = []
  485. for i, ch_in in enumerate(self.in_channels[::-1]):
  486. if i > 0:
  487. ch_in += 512 // (2**i)
  488. channel = 64 * (2**self.num_blocks) // (2**i)
  489. base_cfg = []
  490. c_in, c_out = ch_in, channel
  491. for j in range(self.conv_block_num):
  492. base_cfg += [
  493. [
  494. 'conv{}'.format(2 * j), ConvLayer, [c_in, c_out, 1],
  495. dict(
  496. padding=0,
  497. norm_type=norm_type,
  498. freeze_norm=freeze_norm)
  499. ],
  500. [
  501. 'conv{}'.format(2 * j + 1), ConvBNLayer,
  502. [c_out, c_out * 2, 3], dict(
  503. padding=1,
  504. norm_type=norm_type,
  505. freeze_norm=freeze_norm)
  506. ],
  507. ]
  508. c_in, c_out = c_out * 2, c_out
  509. base_cfg += [[
  510. 'route', ConvLayer, [c_in, c_out, 1], dict(
  511. padding=0, norm_type=norm_type, freeze_norm=freeze_norm)
  512. ], [
  513. 'tip', ConvLayer, [c_out, c_out * 2, 3], dict(
  514. padding=1, norm_type=norm_type, freeze_norm=freeze_norm)
  515. ]]
  516. if self.conv_block_num == 2:
  517. if i == 0:
  518. if self.spp:
  519. spp_cfg = [[
  520. 'spp', SPP, [channel * 4, channel, 1], dict(
  521. pool_size=[5, 9, 13],
  522. norm_type=norm_type,
  523. freeze_norm=freeze_norm)
  524. ]]
  525. else:
  526. spp_cfg = []
  527. cfg = base_cfg[0:3] + spp_cfg + base_cfg[
  528. 3:4] + dropblock_cfg + base_cfg[4:6]
  529. else:
  530. cfg = base_cfg[0:2] + dropblock_cfg + base_cfg[2:6]
  531. elif self.conv_block_num == 0:
  532. if self.spp and i == 0:
  533. spp_cfg = [[
  534. 'spp', SPP, [c_in * 4, c_in, 1], dict(
  535. pool_size=[5, 9, 13],
  536. norm_type=norm_type,
  537. freeze_norm=freeze_norm)
  538. ]]
  539. else:
  540. spp_cfg = []
  541. cfg = spp_cfg + dropblock_cfg + base_cfg
  542. name = 'yolo_block.{}'.format(i)
  543. yolo_block = self.add_sublayer(name, PPYOLODetBlock(cfg, name))
  544. self.yolo_blocks.append(yolo_block)
  545. self._out_channels.append(channel * 2)
  546. if i < self.num_blocks - 1:
  547. name = 'yolo_transition.{}'.format(i)
  548. route = self.add_sublayer(
  549. name,
  550. ConvBNLayer(
  551. ch_in=channel,
  552. ch_out=256 // (2**i),
  553. filter_size=1,
  554. stride=1,
  555. padding=0,
  556. norm_type=norm_type,
  557. freeze_norm=freeze_norm,
  558. data_format=data_format,
  559. name=name))
  560. self.routes.append(route)
  561. def forward(self, blocks, for_mot=False):
  562. assert len(blocks) == self.num_blocks
  563. blocks = blocks[::-1]
  564. yolo_feats = []
  565. # add embedding features output for multi-object tracking model
  566. if for_mot:
  567. emb_feats = []
  568. for i, block in enumerate(blocks):
  569. if i > 0:
  570. if self.data_format == 'NCHW':
  571. block = paddle.concat([route, block], axis=1)
  572. else:
  573. block = paddle.concat([route, block], axis=-1)
  574. route, tip = self.yolo_blocks[i](block)
  575. yolo_feats.append(tip)
  576. if for_mot:
  577. # add embedding features output
  578. emb_feats.append(route)
  579. if i < self.num_blocks - 1:
  580. route = self.routes[i](route)
  581. route = F.interpolate(
  582. route, scale_factor=2., data_format=self.data_format)
  583. if for_mot:
  584. return {'yolo_feats': yolo_feats, 'emb_feats': emb_feats}
  585. else:
  586. return yolo_feats
  587. @classmethod
  588. def from_config(cls, cfg, input_shape):
  589. return {'in_channels': [i.channels for i in input_shape], }
  590. @property
  591. def out_shape(self):
  592. return [ShapeSpec(channels=c) for c in self._out_channels]
  593. @register
  594. @serializable
  595. class PPYOLOTinyFPN(nn.Layer):
  596. __shared__ = ['norm_type', 'data_format']
  597. def __init__(self,
  598. in_channels=[80, 56, 34],
  599. detection_block_channels=[160, 128, 96],
  600. norm_type='bn',
  601. data_format='NCHW',
  602. **kwargs):
  603. """
  604. PPYOLO Tiny FPN layer
  605. Args:
  606. in_channels (list): input channels for fpn
  607. detection_block_channels (list): channels in fpn
  608. norm_type (str): batch norm type, default bn
  609. data_format (str): data format, NCHW or NHWC
  610. kwargs: extra key-value pairs, such as parameter of DropBlock and spp
  611. """
  612. super(PPYOLOTinyFPN, self).__init__()
  613. assert len(in_channels) > 0, "in_channels length should > 0"
  614. self.in_channels = in_channels[::-1]
  615. assert len(detection_block_channels
  616. ) > 0, "detection_block_channelslength should > 0"
  617. self.detection_block_channels = detection_block_channels
  618. self.data_format = data_format
  619. self.num_blocks = len(in_channels)
  620. # parse kwargs
  621. self.drop_block = kwargs.get('drop_block', False)
  622. self.block_size = kwargs.get('block_size', 3)
  623. self.keep_prob = kwargs.get('keep_prob', 0.9)
  624. self.spp_ = kwargs.get('spp', False)
  625. if self.spp_:
  626. self.spp = SPP(self.in_channels[0] * 4,
  627. self.in_channels[0],
  628. k=1,
  629. pool_size=[5, 9, 13],
  630. norm_type=norm_type,
  631. name='spp')
  632. self._out_channels = []
  633. self.yolo_blocks = []
  634. self.routes = []
  635. for i, (
  636. ch_in, ch_out
  637. ) in enumerate(zip(self.in_channels, self.detection_block_channels)):
  638. name = 'yolo_block.{}'.format(i)
  639. if i > 0:
  640. ch_in += self.detection_block_channels[i - 1]
  641. yolo_block = self.add_sublayer(
  642. name,
  643. PPYOLOTinyDetBlock(
  644. ch_in,
  645. ch_out,
  646. name,
  647. drop_block=self.drop_block,
  648. block_size=self.block_size,
  649. keep_prob=self.keep_prob))
  650. self.yolo_blocks.append(yolo_block)
  651. self._out_channels.append(ch_out)
  652. if i < self.num_blocks - 1:
  653. name = 'yolo_transition.{}'.format(i)
  654. route = self.add_sublayer(
  655. name,
  656. ConvBNLayer(
  657. ch_in=ch_out,
  658. ch_out=ch_out,
  659. filter_size=1,
  660. stride=1,
  661. padding=0,
  662. norm_type=norm_type,
  663. data_format=data_format,
  664. name=name))
  665. self.routes.append(route)
  666. def forward(self, blocks, for_mot=False):
  667. assert len(blocks) == self.num_blocks
  668. blocks = blocks[::-1]
  669. yolo_feats = []
  670. # add embedding features output for multi-object tracking model
  671. if for_mot:
  672. emb_feats = []
  673. for i, block in enumerate(blocks):
  674. if i == 0 and self.spp_:
  675. block = self.spp(block)
  676. if i > 0:
  677. if self.data_format == 'NCHW':
  678. block = paddle.concat([route, block], axis=1)
  679. else:
  680. block = paddle.concat([route, block], axis=-1)
  681. route, tip = self.yolo_blocks[i](block)
  682. yolo_feats.append(tip)
  683. if for_mot:
  684. # add embedding features output
  685. emb_feats.append(route)
  686. if i < self.num_blocks - 1:
  687. route = self.routes[i](route)
  688. route = F.interpolate(
  689. route, scale_factor=2., data_format=self.data_format)
  690. if for_mot:
  691. return {'yolo_feats': yolo_feats, 'emb_feats': emb_feats}
  692. else:
  693. return yolo_feats
  694. @classmethod
  695. def from_config(cls, cfg, input_shape):
  696. return {'in_channels': [i.channels for i in input_shape], }
  697. @property
  698. def out_shape(self):
  699. return [ShapeSpec(channels=c) for c in self._out_channels]
  700. @register
  701. @serializable
  702. class PPYOLOPAN(nn.Layer):
  703. __shared__ = ['norm_type', 'data_format']
  704. def __init__(self,
  705. in_channels=[512, 1024, 2048],
  706. norm_type='bn',
  707. data_format='NCHW',
  708. act='mish',
  709. conv_block_num=3,
  710. drop_block=False,
  711. block_size=3,
  712. keep_prob=0.9,
  713. spp=False):
  714. """
  715. PPYOLOPAN layer with SPP, DropBlock and CSP connection.
  716. Args:
  717. in_channels (list): input channels for fpn
  718. norm_type (str): batch norm type, default bn
  719. data_format (str): data format, NCHW or NHWC
  720. act (str): activation function, default mish
  721. conv_block_num (int): conv block num of each pan block
  722. drop_block (bool): whether use DropBlock or not
  723. block_size (int): block size of DropBlock
  724. keep_prob (float): keep probability of DropBlock
  725. spp (bool): whether use spp or not
  726. """
  727. super(PPYOLOPAN, self).__init__()
  728. assert len(in_channels) > 0, "in_channels length should > 0"
  729. self.in_channels = in_channels
  730. self.num_blocks = len(in_channels)
  731. # parse kwargs
  732. self.drop_block = drop_block
  733. self.block_size = block_size
  734. self.keep_prob = keep_prob
  735. self.spp = spp
  736. self.conv_block_num = conv_block_num
  737. self.data_format = data_format
  738. if self.drop_block:
  739. dropblock_cfg = [[
  740. 'dropblock', DropBlock, [self.block_size, self.keep_prob],
  741. dict()
  742. ]]
  743. else:
  744. dropblock_cfg = []
  745. # fpn
  746. self.fpn_blocks = []
  747. self.fpn_routes = []
  748. fpn_channels = []
  749. for i, ch_in in enumerate(self.in_channels[::-1]):
  750. if i > 0:
  751. ch_in += 512 // (2**(i - 1))
  752. channel = 512 // (2**i)
  753. base_cfg = []
  754. for j in range(self.conv_block_num):
  755. base_cfg += [
  756. # name, layer, args
  757. [
  758. '{}.0'.format(j), ConvBNLayer, [channel, channel, 1],
  759. dict(
  760. padding=0, act=act, norm_type=norm_type)
  761. ],
  762. [
  763. '{}.1'.format(j), ConvBNLayer, [channel, channel, 3],
  764. dict(
  765. padding=1, act=act, norm_type=norm_type)
  766. ]
  767. ]
  768. if i == 0 and self.spp:
  769. base_cfg[3] = [
  770. 'spp', SPP, [channel * 4, channel, 1], dict(
  771. pool_size=[5, 9, 13], act=act, norm_type=norm_type)
  772. ]
  773. cfg = base_cfg[:4] + dropblock_cfg + base_cfg[4:]
  774. name = 'fpn.{}'.format(i)
  775. fpn_block = self.add_sublayer(
  776. name,
  777. PPYOLODetBlockCSP(cfg, ch_in, channel, act, norm_type, name,
  778. data_format))
  779. self.fpn_blocks.append(fpn_block)
  780. fpn_channels.append(channel * 2)
  781. if i < self.num_blocks - 1:
  782. name = 'fpn_transition.{}'.format(i)
  783. route = self.add_sublayer(
  784. name,
  785. ConvBNLayer(
  786. ch_in=channel * 2,
  787. ch_out=channel,
  788. filter_size=1,
  789. stride=1,
  790. padding=0,
  791. act=act,
  792. norm_type=norm_type,
  793. data_format=data_format,
  794. name=name))
  795. self.fpn_routes.append(route)
  796. # pan
  797. self.pan_blocks = []
  798. self.pan_routes = []
  799. self._out_channels = [512 // (2**(self.num_blocks - 2)), ]
  800. for i in reversed(range(self.num_blocks - 1)):
  801. name = 'pan_transition.{}'.format(i)
  802. route = self.add_sublayer(
  803. name,
  804. ConvBNLayer(
  805. ch_in=fpn_channels[i + 1],
  806. ch_out=fpn_channels[i + 1],
  807. filter_size=3,
  808. stride=2,
  809. padding=1,
  810. act=act,
  811. norm_type=norm_type,
  812. data_format=data_format,
  813. name=name))
  814. self.pan_routes = [route, ] + self.pan_routes
  815. base_cfg = []
  816. ch_in = fpn_channels[i] + fpn_channels[i + 1]
  817. channel = 512 // (2**i)
  818. for j in range(self.conv_block_num):
  819. base_cfg += [
  820. # name, layer, args
  821. [
  822. '{}.0'.format(j), ConvBNLayer, [channel, channel, 1],
  823. dict(
  824. padding=0, act=act, norm_type=norm_type)
  825. ],
  826. [
  827. '{}.1'.format(j), ConvBNLayer, [channel, channel, 3],
  828. dict(
  829. padding=1, act=act, norm_type=norm_type)
  830. ]
  831. ]
  832. cfg = base_cfg[:4] + dropblock_cfg + base_cfg[4:]
  833. name = 'pan.{}'.format(i)
  834. pan_block = self.add_sublayer(
  835. name,
  836. PPYOLODetBlockCSP(cfg, ch_in, channel, act, norm_type, name,
  837. data_format))
  838. self.pan_blocks = [pan_block, ] + self.pan_blocks
  839. self._out_channels.append(channel * 2)
  840. self._out_channels = self._out_channels[::-1]
  841. def forward(self, blocks, for_mot=False):
  842. assert len(blocks) == self.num_blocks
  843. blocks = blocks[::-1]
  844. fpn_feats = []
  845. # add embedding features output for multi-object tracking model
  846. if for_mot:
  847. emb_feats = []
  848. for i, block in enumerate(blocks):
  849. if i > 0:
  850. if self.data_format == 'NCHW':
  851. block = paddle.concat([route, block], axis=1)
  852. else:
  853. block = paddle.concat([route, block], axis=-1)
  854. route, tip = self.fpn_blocks[i](block)
  855. fpn_feats.append(tip)
  856. if for_mot:
  857. # add embedding features output
  858. emb_feats.append(route)
  859. if i < self.num_blocks - 1:
  860. route = self.fpn_routes[i](route)
  861. route = F.interpolate(
  862. route, scale_factor=2., data_format=self.data_format)
  863. pan_feats = [fpn_feats[-1], ]
  864. route = fpn_feats[self.num_blocks - 1]
  865. for i in reversed(range(self.num_blocks - 1)):
  866. block = fpn_feats[i]
  867. route = self.pan_routes[i](route)
  868. if self.data_format == 'NCHW':
  869. block = paddle.concat([route, block], axis=1)
  870. else:
  871. block = paddle.concat([route, block], axis=-1)
  872. route, tip = self.pan_blocks[i](block)
  873. pan_feats.append(tip)
  874. if for_mot:
  875. return {'yolo_feats': pan_feats[::-1], 'emb_feats': emb_feats}
  876. else:
  877. return pan_feats[::-1]
  878. @classmethod
  879. def from_config(cls, cfg, input_shape):
  880. return {'in_channels': [i.channels for i in input_shape], }
  881. @property
  882. def out_shape(self):
  883. return [ShapeSpec(channels=c) for c in self._out_channels]
  884. @register
  885. @serializable
  886. class YOLOCSPPAN(nn.Layer):
  887. """
  888. YOLO CSP-PAN, used in YOLOv5 and YOLOX.
  889. """
  890. __shared__ = ['depth_mult', 'data_format', 'act', 'trt']
  891. def __init__(self,
  892. depth_mult=1.0,
  893. in_channels=[256, 512, 1024],
  894. depthwise=False,
  895. data_format='NCHW',
  896. act='silu',
  897. trt=False):
  898. super(YOLOCSPPAN, self).__init__()
  899. self.in_channels = in_channels
  900. self._out_channels = in_channels
  901. Conv = DWConv if depthwise else BaseConv
  902. self.data_format = data_format
  903. act = get_act_fn(
  904. act, trt=trt) if act is None or isinstance(act,
  905. (str, dict)) else act
  906. self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
  907. # top-down fpn
  908. self.lateral_convs = nn.LayerList()
  909. self.fpn_blocks = nn.LayerList()
  910. for idx in range(len(in_channels) - 1, 0, -1):
  911. self.lateral_convs.append(
  912. BaseConv(
  913. int(in_channels[idx]),
  914. int(in_channels[idx - 1]),
  915. 1,
  916. 1,
  917. act=act))
  918. self.fpn_blocks.append(
  919. CSPLayer(
  920. int(in_channels[idx - 1] * 2),
  921. int(in_channels[idx - 1]),
  922. round(3 * depth_mult),
  923. shortcut=False,
  924. depthwise=depthwise,
  925. act=act))
  926. # bottom-up pan
  927. self.downsample_convs = nn.LayerList()
  928. self.pan_blocks = nn.LayerList()
  929. for idx in range(len(in_channels) - 1):
  930. self.downsample_convs.append(
  931. Conv(
  932. int(in_channels[idx]),
  933. int(in_channels[idx]),
  934. 3,
  935. stride=2,
  936. act=act))
  937. self.pan_blocks.append(
  938. CSPLayer(
  939. int(in_channels[idx] * 2),
  940. int(in_channels[idx + 1]),
  941. round(3 * depth_mult),
  942. shortcut=False,
  943. depthwise=depthwise,
  944. act=act))
  945. def forward(self, feats, for_mot=False):
  946. assert len(feats) == len(self.in_channels)
  947. # top-down fpn
  948. inner_outs = [feats[-1]]
  949. for idx in range(len(self.in_channels) - 1, 0, -1):
  950. feat_heigh = inner_outs[0]
  951. feat_low = feats[idx - 1]
  952. feat_heigh = self.lateral_convs[len(self.in_channels) - 1 - idx](
  953. feat_heigh)
  954. inner_outs[0] = feat_heigh
  955. upsample_feat = F.interpolate(
  956. feat_heigh,
  957. scale_factor=2.,
  958. mode="nearest",
  959. data_format=self.data_format)
  960. inner_out = self.fpn_blocks[len(self.in_channels) - 1 - idx](
  961. paddle.concat(
  962. [upsample_feat, feat_low], axis=1))
  963. inner_outs.insert(0, inner_out)
  964. # bottom-up pan
  965. outs = [inner_outs[0]]
  966. for idx in range(len(self.in_channels) - 1):
  967. feat_low = outs[-1]
  968. feat_height = inner_outs[idx + 1]
  969. downsample_feat = self.downsample_convs[idx](feat_low)
  970. out = self.pan_blocks[idx](paddle.concat(
  971. [downsample_feat, feat_height], axis=1))
  972. outs.append(out)
  973. return outs
  974. @classmethod
  975. def from_config(cls, cfg, input_shape):
  976. return {'in_channels': [i.channels for i in input_shape], }
  977. @property
  978. def out_shape(self):
  979. return [ShapeSpec(channels=c) for c in self._out_channels]