ops.py 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. from numbers import Integral
  16. import math
  17. import six
  18. import paddle
  19. from paddle import fluid
  20. from paddle.fluid.layer_helper import LayerHelper
  21. from paddle.fluid.initializer import NumpyArrayInitializer
  22. from paddle.fluid.param_attr import ParamAttr
  23. from paddle.fluid.regularizer import L2Decay
  24. from ppdet.core.workspace import register, serializable
  25. from ppdet.utils.bbox_utils import bbox_overlaps, box_to_delta
  26. __all__ = [
  27. 'AnchorGenerator', 'AnchorGrid', 'DropBlock', 'RPNTargetAssign',
  28. 'GenerateProposals', 'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner',
  29. 'RoIAlign', 'RoIPool', 'MultiBoxHead', 'SSDLiteMultiBoxHead',
  30. 'SSDOutputDecoder', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm',
  31. 'DeformConvNorm', 'MultiClassSoftNMS', 'MatrixNMS', 'LibraBBoxAssigner',
  32. 'DeformConv'
  33. ]
  34. def _conv_offset(input, filter_size, stride, padding, act=None, name=None):
  35. out_channel = filter_size * filter_size * 3
  36. out = fluid.layers.conv2d(
  37. input,
  38. num_filters=out_channel,
  39. filter_size=filter_size,
  40. stride=stride,
  41. padding=padding,
  42. param_attr=ParamAttr(
  43. initializer=fluid.initializer.Constant(0), name=name + ".w_0"),
  44. bias_attr=ParamAttr(
  45. initializer=fluid.initializer.Constant(0),
  46. learning_rate=2.,
  47. regularizer=L2Decay(0.),
  48. name=name + ".b_0"),
  49. act=act,
  50. name=name)
  51. return out
  52. def DeformConv(input,
  53. num_filters,
  54. filter_size,
  55. stride=1,
  56. groups=1,
  57. dilation=1,
  58. lr_scale=1,
  59. initializer=None,
  60. bias_attr=False,
  61. name=None):
  62. if bias_attr:
  63. bias_para = ParamAttr(
  64. name=name + "_bias",
  65. initializer=fluid.initializer.Constant(0),
  66. regularizer=L2Decay(0.),
  67. learning_rate=lr_scale * 2)
  68. else:
  69. bias_para = False
  70. offset_mask = _conv_offset(
  71. input=input,
  72. filter_size=filter_size,
  73. stride=stride,
  74. padding=(filter_size - 1) // 2,
  75. act=None,
  76. name=name + "_conv_offset")
  77. offset_channel = filter_size**2 * 2
  78. mask_channel = filter_size**2
  79. offset, mask = fluid.layers.split(
  80. input=offset_mask,
  81. num_or_sections=[offset_channel, mask_channel],
  82. dim=1)
  83. mask = fluid.layers.sigmoid(mask)
  84. conv = fluid.layers.deformable_conv(
  85. input=input,
  86. offset=offset,
  87. mask=mask,
  88. num_filters=num_filters,
  89. filter_size=filter_size,
  90. stride=stride,
  91. padding=(filter_size - 1) // 2 * dilation,
  92. dilation=dilation,
  93. groups=groups,
  94. deformable_groups=1,
  95. im2col_step=1,
  96. param_attr=ParamAttr(
  97. name=name + "_weights",
  98. initializer=initializer,
  99. learning_rate=lr_scale),
  100. bias_attr=bias_para,
  101. name=name + ".conv2d.output.1")
  102. return conv
  103. def DeformConvNorm(input,
  104. num_filters,
  105. filter_size,
  106. stride=1,
  107. groups=1,
  108. norm_decay=0.,
  109. norm_type='affine_channel',
  110. norm_groups=32,
  111. dilation=1,
  112. lr_scale=1,
  113. freeze_norm=False,
  114. act=None,
  115. norm_name=None,
  116. initializer=None,
  117. bias_attr=False,
  118. name=None):
  119. assert norm_type in ['bn', 'sync_bn', 'affine_channel', 'gn']
  120. conv = DeformConv(input, num_filters, filter_size, stride, groups, dilation,
  121. lr_scale, initializer, bias_attr, name)
  122. norm_lr = 0. if freeze_norm else 1.
  123. pattr = ParamAttr(
  124. name=norm_name + '_scale',
  125. learning_rate=norm_lr * lr_scale,
  126. regularizer=L2Decay(norm_decay))
  127. battr = ParamAttr(
  128. name=norm_name + '_offset',
  129. learning_rate=norm_lr * lr_scale,
  130. regularizer=L2Decay(norm_decay))
  131. if norm_type in ['bn', 'sync_bn']:
  132. global_stats = True if freeze_norm else False
  133. out = fluid.layers.batch_norm(
  134. input=conv,
  135. act=act,
  136. name=norm_name + '.output.1',
  137. param_attr=pattr,
  138. bias_attr=battr,
  139. moving_mean_name=norm_name + '_mean',
  140. moving_variance_name=norm_name + '_variance',
  141. use_global_stats=global_stats)
  142. scale = fluid.framework._get_var(pattr.name)
  143. bias = fluid.framework._get_var(battr.name)
  144. elif norm_type == 'gn':
  145. out = fluid.layers.group_norm(
  146. input=conv,
  147. act=act,
  148. name=norm_name + '.output.1',
  149. groups=norm_groups,
  150. param_attr=pattr,
  151. bias_attr=battr)
  152. scale = fluid.framework._get_var(pattr.name)
  153. bias = fluid.framework._get_var(battr.name)
  154. elif norm_type == 'affine_channel':
  155. scale = fluid.layers.create_parameter(
  156. shape=[conv.shape[1]],
  157. dtype=conv.dtype,
  158. attr=pattr,
  159. default_initializer=fluid.initializer.Constant(1.))
  160. bias = fluid.layers.create_parameter(
  161. shape=[conv.shape[1]],
  162. dtype=conv.dtype,
  163. attr=battr,
  164. default_initializer=fluid.initializer.Constant(0.))
  165. out = fluid.layers.affine_channel(
  166. x=conv, scale=scale, bias=bias, act=act)
  167. if freeze_norm:
  168. scale.stop_gradient = True
  169. bias.stop_gradient = True
  170. return out
  171. def ConvNorm(input,
  172. num_filters,
  173. filter_size,
  174. stride=1,
  175. groups=1,
  176. norm_decay=0.,
  177. norm_type='affine_channel',
  178. norm_groups=32,
  179. dilation=1,
  180. lr_scale=1,
  181. freeze_norm=False,
  182. act=None,
  183. norm_name=None,
  184. initializer=None,
  185. bias_attr=False,
  186. name=None):
  187. fan = num_filters
  188. if bias_attr:
  189. bias_para = ParamAttr(
  190. name=name + "_bias",
  191. initializer=fluid.initializer.Constant(value=0),
  192. learning_rate=lr_scale * 2)
  193. else:
  194. bias_para = False
  195. conv = fluid.layers.conv2d(
  196. input=input,
  197. num_filters=num_filters,
  198. filter_size=filter_size,
  199. stride=stride,
  200. padding=((filter_size - 1) // 2) * dilation,
  201. dilation=dilation,
  202. groups=groups,
  203. act=None,
  204. param_attr=ParamAttr(
  205. name=name + "_weights",
  206. initializer=initializer,
  207. learning_rate=lr_scale),
  208. bias_attr=bias_para,
  209. name=name + '.conv2d.output.1')
  210. norm_lr = 0. if freeze_norm else 1.
  211. pattr = ParamAttr(
  212. name=norm_name + '_scale',
  213. learning_rate=norm_lr * lr_scale,
  214. regularizer=L2Decay(norm_decay))
  215. battr = ParamAttr(
  216. name=norm_name + '_offset',
  217. learning_rate=norm_lr * lr_scale,
  218. regularizer=L2Decay(norm_decay))
  219. if norm_type in ['bn', 'sync_bn']:
  220. global_stats = True if freeze_norm else False
  221. out = fluid.layers.batch_norm(
  222. input=conv,
  223. act=act,
  224. name=norm_name + '.output.1',
  225. param_attr=pattr,
  226. bias_attr=battr,
  227. moving_mean_name=norm_name + '_mean',
  228. moving_variance_name=norm_name + '_variance',
  229. use_global_stats=global_stats)
  230. scale = fluid.framework._get_var(pattr.name)
  231. bias = fluid.framework._get_var(battr.name)
  232. elif norm_type == 'gn':
  233. out = fluid.layers.group_norm(
  234. input=conv,
  235. act=act,
  236. name=norm_name + '.output.1',
  237. groups=norm_groups,
  238. param_attr=pattr,
  239. bias_attr=battr)
  240. scale = fluid.framework._get_var(pattr.name)
  241. bias = fluid.framework._get_var(battr.name)
  242. elif norm_type == 'affine_channel':
  243. scale = fluid.layers.create_parameter(
  244. shape=[conv.shape[1]],
  245. dtype=conv.dtype,
  246. attr=pattr,
  247. default_initializer=fluid.initializer.Constant(1.))
  248. bias = fluid.layers.create_parameter(
  249. shape=[conv.shape[1]],
  250. dtype=conv.dtype,
  251. attr=battr,
  252. default_initializer=fluid.initializer.Constant(0.))
  253. out = fluid.layers.affine_channel(
  254. x=conv, scale=scale, bias=bias, act=act)
  255. if freeze_norm:
  256. scale.stop_gradient = True
  257. bias.stop_gradient = True
  258. return out
  259. def DropBlock(input, block_size, keep_prob, is_test):
  260. if is_test:
  261. return input
  262. def CalculateGamma(input, block_size, keep_prob):
  263. input_shape = fluid.layers.shape(input)
  264. feat_shape_tmp = fluid.layers.slice(input_shape, [0], [3], [4])
  265. feat_shape_tmp = fluid.layers.cast(feat_shape_tmp, dtype="float32")
  266. feat_shape_t = fluid.layers.reshape(feat_shape_tmp, [1, 1, 1, 1])
  267. feat_area = fluid.layers.pow(feat_shape_t, factor=2)
  268. block_shape_t = fluid.layers.fill_constant(
  269. shape=[1, 1, 1, 1], value=block_size, dtype='float32')
  270. block_area = fluid.layers.pow(block_shape_t, factor=2)
  271. useful_shape_t = feat_shape_t - block_shape_t + 1
  272. useful_area = fluid.layers.pow(useful_shape_t, factor=2)
  273. upper_t = feat_area * (1 - keep_prob)
  274. bottom_t = block_area * useful_area
  275. output = upper_t / bottom_t
  276. return output
  277. gamma = CalculateGamma(input, block_size=block_size, keep_prob=keep_prob)
  278. input_shape = fluid.layers.shape(input)
  279. p = fluid.layers.expand_as(gamma, input)
  280. input_shape_tmp = fluid.layers.cast(input_shape, dtype="int64")
  281. random_matrix = fluid.layers.uniform_random(
  282. input_shape_tmp, dtype='float32', min=0.0, max=1.0)
  283. one_zero_m = fluid.layers.less_than(random_matrix, p)
  284. one_zero_m.stop_gradient = True
  285. one_zero_m = fluid.layers.cast(one_zero_m, dtype="float32")
  286. mask_flag = fluid.layers.pool2d(
  287. one_zero_m,
  288. pool_size=block_size,
  289. pool_type='max',
  290. pool_stride=1,
  291. pool_padding=block_size // 2)
  292. mask = 1.0 - mask_flag
  293. elem_numel = fluid.layers.reduce_prod(input_shape)
  294. elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
  295. elem_numel_m.stop_gradient = True
  296. elem_sum = fluid.layers.reduce_sum(mask)
  297. elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
  298. elem_sum_m.stop_gradient = True
  299. output = fluid.layers.elementwise_mul(input,
  300. mask) * elem_numel_m / elem_sum_m
  301. return output
  302. @register
  303. @serializable
  304. class AnchorGenerator(object):
  305. __op__ = fluid.layers.anchor_generator
  306. __append_doc__ = True
  307. def __init__(self,
  308. stride=[16.0, 16.0],
  309. anchor_sizes=[32, 64, 128, 256, 512],
  310. aspect_ratios=[0.5, 1., 2.],
  311. variance=[1., 1., 1., 1.]):
  312. super(AnchorGenerator, self).__init__()
  313. self.anchor_sizes = anchor_sizes
  314. self.aspect_ratios = aspect_ratios
  315. self.variance = variance
  316. self.stride = stride
  317. @register
  318. @serializable
  319. class AnchorGrid(object):
  320. """Generate anchor grid
  321. Args:
  322. image_size (int or list): input image size, may be a single integer or
  323. list of [h, w]. Default: 512
  324. min_level (int): min level of the feature pyramid. Default: 3
  325. max_level (int): max level of the feature pyramid. Default: 7
  326. anchor_base_scale: base anchor scale. Default: 4
  327. num_scales: number of anchor scales. Default: 3
  328. aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]]
  329. """
  330. def __init__(self,
  331. image_size=512,
  332. min_level=3,
  333. max_level=7,
  334. anchor_base_scale=4,
  335. num_scales=3,
  336. aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]):
  337. super(AnchorGrid, self).__init__()
  338. if isinstance(image_size, Integral):
  339. self.image_size = [image_size, image_size]
  340. else:
  341. self.image_size = image_size
  342. for dim in self.image_size:
  343. assert dim % 2 ** max_level == 0, \
  344. "image size should be multiple of the max level stride"
  345. self.min_level = min_level
  346. self.max_level = max_level
  347. self.anchor_base_scale = anchor_base_scale
  348. self.num_scales = num_scales
  349. self.aspect_ratios = aspect_ratios
  350. @property
  351. def base_cell(self):
  352. if not hasattr(self, '_base_cell'):
  353. self._base_cell = self.make_cell()
  354. return self._base_cell
  355. def make_cell(self):
  356. scales = [2**(i / self.num_scales) for i in range(self.num_scales)]
  357. scales = np.array(scales)
  358. ratios = np.array(self.aspect_ratios)
  359. ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1)
  360. hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1)
  361. anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs))
  362. return anchors
  363. def make_grid(self, stride):
  364. cell = self.base_cell * stride * self.anchor_base_scale
  365. x_steps = np.arange(stride // 2, self.image_size[1], stride)
  366. y_steps = np.arange(stride // 2, self.image_size[0], stride)
  367. offset_x, offset_y = np.meshgrid(x_steps, y_steps)
  368. offset_x = offset_x.flatten()
  369. offset_y = offset_y.flatten()
  370. offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1)
  371. offsets = offsets[:, np.newaxis, :]
  372. return (cell + offsets).reshape(-1, 4)
  373. def generate(self):
  374. return [
  375. self.make_grid(2**l)
  376. for l in range(self.min_level, self.max_level + 1)
  377. ]
  378. def __call__(self):
  379. if not hasattr(self, '_anchor_vars'):
  380. anchor_vars = []
  381. helper = LayerHelper('anchor_grid')
  382. for idx, l in enumerate(range(self.min_level, self.max_level + 1)):
  383. stride = 2**l
  384. anchors = self.make_grid(stride)
  385. var = helper.create_parameter(
  386. attr=ParamAttr(name='anchors_{}'.format(idx)),
  387. shape=anchors.shape,
  388. dtype='float32',
  389. stop_gradient=True,
  390. default_initializer=NumpyArrayInitializer(anchors))
  391. anchor_vars.append(var)
  392. var.persistable = True
  393. self._anchor_vars = anchor_vars
  394. return self._anchor_vars
  395. @register
  396. @serializable
  397. class RPNTargetAssign(object):
  398. __op__ = fluid.layers.rpn_target_assign
  399. __append_doc__ = True
  400. def __init__(self,
  401. rpn_batch_size_per_im=256,
  402. rpn_straddle_thresh=0.,
  403. rpn_fg_fraction=0.5,
  404. rpn_positive_overlap=0.7,
  405. rpn_negative_overlap=0.3,
  406. use_random=True):
  407. super(RPNTargetAssign, self).__init__()
  408. self.rpn_batch_size_per_im = rpn_batch_size_per_im
  409. self.rpn_straddle_thresh = rpn_straddle_thresh
  410. self.rpn_fg_fraction = rpn_fg_fraction
  411. self.rpn_positive_overlap = rpn_positive_overlap
  412. self.rpn_negative_overlap = rpn_negative_overlap
  413. self.use_random = use_random
  414. @register
  415. @serializable
  416. class GenerateProposals(object):
  417. __op__ = fluid.layers.generate_proposals
  418. __append_doc__ = True
  419. def __init__(self,
  420. pre_nms_top_n=6000,
  421. post_nms_top_n=1000,
  422. nms_thresh=.5,
  423. min_size=.1,
  424. eta=1.):
  425. super(GenerateProposals, self).__init__()
  426. self.pre_nms_top_n = pre_nms_top_n
  427. self.post_nms_top_n = post_nms_top_n
  428. self.nms_thresh = nms_thresh
  429. self.min_size = min_size
  430. self.eta = eta
  431. @register
  432. class MaskAssigner(object):
  433. __op__ = fluid.layers.generate_mask_labels
  434. __append_doc__ = True
  435. __shared__ = ['num_classes']
  436. def __init__(self, num_classes=81, resolution=14):
  437. super(MaskAssigner, self).__init__()
  438. self.num_classes = num_classes
  439. self.resolution = resolution
  440. @register
  441. @serializable
  442. class MultiClassNMS(object):
  443. __op__ = fluid.layers.multiclass_nms
  444. __append_doc__ = True
  445. def __init__(self,
  446. score_threshold=.05,
  447. nms_top_k=-1,
  448. keep_top_k=100,
  449. nms_threshold=.5,
  450. normalized=False,
  451. nms_eta=1.0,
  452. background_label=0):
  453. super(MultiClassNMS, self).__init__()
  454. self.score_threshold = score_threshold
  455. self.nms_top_k = nms_top_k
  456. self.keep_top_k = keep_top_k
  457. self.nms_threshold = nms_threshold
  458. self.normalized = normalized
  459. self.nms_eta = nms_eta
  460. self.background_label = background_label
  461. @register
  462. @serializable
  463. class MatrixNMS(object):
  464. __op__ = 'paddle.fluid.layers.matrix_nms'
  465. __append_doc__ = True
  466. def __init__(self,
  467. score_threshold=.05,
  468. post_threshold=.05,
  469. nms_top_k=-1,
  470. keep_top_k=100,
  471. use_gaussian=False,
  472. gaussian_sigma=2.,
  473. normalized=False,
  474. background_label=0):
  475. super(MatrixNMS, self).__init__()
  476. self.score_threshold = score_threshold
  477. self.post_threshold = post_threshold
  478. self.nms_top_k = nms_top_k
  479. self.keep_top_k = keep_top_k
  480. self.normalized = normalized
  481. self.use_gaussian = use_gaussian
  482. self.gaussian_sigma = gaussian_sigma
  483. self.background_label = background_label
  484. @register
  485. @serializable
  486. class MultiClassSoftNMS(object):
  487. def __init__(
  488. self,
  489. score_threshold=0.01,
  490. keep_top_k=300,
  491. softnms_sigma=0.5,
  492. normalized=False,
  493. background_label=0, ):
  494. super(MultiClassSoftNMS, self).__init__()
  495. self.score_threshold = score_threshold
  496. self.keep_top_k = keep_top_k
  497. self.softnms_sigma = softnms_sigma
  498. self.normalized = normalized
  499. self.background_label = background_label
  500. def __call__(self, bboxes, scores):
  501. def create_tmp_var(program, name, dtype, shape, lod_level):
  502. return program.current_block().create_var(
  503. name=name, dtype=dtype, shape=shape, lod_level=lod_level)
  504. def _soft_nms_for_cls(dets, sigma, thres):
  505. """soft_nms_for_cls"""
  506. dets_final = []
  507. while len(dets) > 0:
  508. maxpos = np.argmax(dets[:, 0])
  509. dets_final.append(dets[maxpos].copy())
  510. ts, tx1, ty1, tx2, ty2 = dets[maxpos]
  511. scores = dets[:, 0]
  512. # force remove bbox at maxpos
  513. scores[maxpos] = -1
  514. x1 = dets[:, 1]
  515. y1 = dets[:, 2]
  516. x2 = dets[:, 3]
  517. y2 = dets[:, 4]
  518. eta = 0 if self.normalized else 1
  519. areas = (x2 - x1 + eta) * (y2 - y1 + eta)
  520. xx1 = np.maximum(tx1, x1)
  521. yy1 = np.maximum(ty1, y1)
  522. xx2 = np.minimum(tx2, x2)
  523. yy2 = np.minimum(ty2, y2)
  524. w = np.maximum(0.0, xx2 - xx1 + eta)
  525. h = np.maximum(0.0, yy2 - yy1 + eta)
  526. inter = w * h
  527. ovr = inter / (areas + areas[maxpos] - inter)
  528. weight = np.exp(-(ovr * ovr) / sigma)
  529. scores = scores * weight
  530. idx_keep = np.where(scores >= thres)
  531. dets[:, 0] = scores
  532. dets = dets[idx_keep]
  533. dets_final = np.array(dets_final).reshape(-1, 5)
  534. return dets_final
  535. def _soft_nms(bboxes, scores):
  536. class_nums = scores.shape[-1]
  537. softnms_thres = self.score_threshold
  538. softnms_sigma = self.softnms_sigma
  539. keep_top_k = self.keep_top_k
  540. cls_boxes = [[] for _ in range(class_nums)]
  541. cls_ids = [[] for _ in range(class_nums)]
  542. start_idx = 1 if self.background_label == 0 else 0
  543. for j in range(start_idx, class_nums):
  544. inds = np.where(scores[:, j] >= softnms_thres)[0]
  545. scores_j = scores[inds, j]
  546. rois_j = bboxes[inds, j, :] if len(
  547. bboxes.shape) > 2 else bboxes[inds, :]
  548. dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
  549. np.float32, copy=False)
  550. cls_rank = np.argsort(-dets_j[:, 0])
  551. dets_j = dets_j[cls_rank]
  552. cls_boxes[j] = _soft_nms_for_cls(
  553. dets_j, sigma=softnms_sigma, thres=softnms_thres)
  554. cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
  555. 1)
  556. cls_boxes = np.vstack(cls_boxes[start_idx:])
  557. cls_ids = np.vstack(cls_ids[start_idx:])
  558. pred_result = np.hstack([cls_ids, cls_boxes])
  559. # Limit to max_per_image detections **over all classes**
  560. image_scores = cls_boxes[:, 0]
  561. if len(image_scores) > keep_top_k:
  562. image_thresh = np.sort(image_scores)[-keep_top_k]
  563. keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
  564. pred_result = pred_result[keep, :]
  565. return pred_result
  566. def _batch_softnms(bboxes, scores):
  567. batch_offsets = bboxes.lod()
  568. bboxes = np.array(bboxes)
  569. scores = np.array(scores)
  570. out_offsets = [0]
  571. pred_res = []
  572. if len(batch_offsets) > 0:
  573. batch_offset = batch_offsets[0]
  574. for i in range(len(batch_offset) - 1):
  575. s, e = batch_offset[i], batch_offset[i + 1]
  576. pred = _soft_nms(bboxes[s:e], scores[s:e])
  577. out_offsets.append(pred.shape[0] + out_offsets[-1])
  578. pred_res.append(pred)
  579. else:
  580. assert len(bboxes.shape) == 3
  581. assert len(scores.shape) == 3
  582. for i in range(bboxes.shape[0]):
  583. pred = _soft_nms(bboxes[i], scores[i])
  584. out_offsets.append(pred.shape[0] + out_offsets[-1])
  585. pred_res.append(pred)
  586. res = fluid.LoDTensor()
  587. res.set_lod([out_offsets])
  588. if len(pred_res) == 0:
  589. pred_res = np.array([[1]], dtype=np.float32)
  590. res.set(np.vstack(pred_res).astype(np.float32), fluid.CPUPlace())
  591. return res
  592. pred_result = create_tmp_var(
  593. fluid.default_main_program(),
  594. name='softnms_pred_result',
  595. dtype='float32',
  596. shape=[-1, 6],
  597. lod_level=1)
  598. fluid.layers.py_func(
  599. func=_batch_softnms, x=[bboxes, scores], out=pred_result)
  600. return pred_result
  601. @register
  602. @serializable
  603. class MultiClassDiouNMS(object):
  604. def __init__(
  605. self,
  606. score_threshold=0.05,
  607. keep_top_k=100,
  608. nms_threshold=0.5,
  609. normalized=False,
  610. background_label=0, ):
  611. super(MultiClassDiouNMS, self).__init__()
  612. self.score_threshold = score_threshold
  613. self.nms_threshold = nms_threshold
  614. self.keep_top_k = keep_top_k
  615. self.normalized = normalized
  616. self.background_label = background_label
  617. def __call__(self, bboxes, scores):
  618. def create_tmp_var(program, name, dtype, shape, lod_level):
  619. return program.current_block().create_var(
  620. name=name, dtype=dtype, shape=shape, lod_level=lod_level)
  621. def _calc_diou_term(dets1, dets2):
  622. eps = 1.e-10
  623. eta = 0 if self.normalized else 1
  624. x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
  625. x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]
  626. cx = (x1 + x2) / 2
  627. cy = (y1 + y2) / 2
  628. w = x2 - x1 + eta
  629. h = y2 - y1 + eta
  630. cxg = (x1g + x2g) / 2
  631. cyg = (y1g + y2g) / 2
  632. wg = x2g - x1g + eta
  633. hg = y2g - y1g + eta
  634. x2 = np.maximum(x1, x2)
  635. y2 = np.maximum(y1, y2)
  636. # A or B
  637. xc1 = np.minimum(x1, x1g)
  638. yc1 = np.minimum(y1, y1g)
  639. xc2 = np.maximum(x2, x2g)
  640. yc2 = np.maximum(y2, y2g)
  641. # DIOU term
  642. dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
  643. dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
  644. diou_term = (dist_intersection + eps) / (dist_union + eps)
  645. return diou_term
  646. def _diou_nms_for_cls(dets, thres):
  647. """_diou_nms_for_cls"""
  648. scores = dets[:, 0]
  649. x1 = dets[:, 1]
  650. y1 = dets[:, 2]
  651. x2 = dets[:, 3]
  652. y2 = dets[:, 4]
  653. eta = 0 if self.normalized else 1
  654. areas = (x2 - x1 + eta) * (y2 - y1 + eta)
  655. dt_num = dets.shape[0]
  656. order = np.array(range(dt_num))
  657. keep = []
  658. while order.size > 0:
  659. i = order[0]
  660. keep.append(i)
  661. xx1 = np.maximum(x1[i], x1[order[1:]])
  662. yy1 = np.maximum(y1[i], y1[order[1:]])
  663. xx2 = np.minimum(x2[i], x2[order[1:]])
  664. yy2 = np.minimum(y2[i], y2[order[1:]])
  665. w = np.maximum(0.0, xx2 - xx1 + eta)
  666. h = np.maximum(0.0, yy2 - yy1 + eta)
  667. inter = w * h
  668. ovr = inter / (areas[i] + areas[order[1:]] - inter)
  669. diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
  670. x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
  671. ])
  672. inds = np.where(ovr - diou_term <= thres)[0]
  673. order = order[inds + 1]
  674. dets_final = dets[keep]
  675. return dets_final
  676. def _diou_nms(bboxes, scores):
  677. bboxes = np.array(bboxes)
  678. scores = np.array(scores)
  679. class_nums = scores.shape[-1]
  680. score_threshold = self.score_threshold
  681. nms_threshold = self.nms_threshold
  682. keep_top_k = self.keep_top_k
  683. cls_boxes = [[] for _ in range(class_nums)]
  684. cls_ids = [[] for _ in range(class_nums)]
  685. start_idx = 1 if self.background_label == 0 else 0
  686. for j in range(start_idx, class_nums):
  687. inds = np.where(scores[:, j] >= score_threshold)[0]
  688. scores_j = scores[inds, j]
  689. rois_j = bboxes[inds, j, :]
  690. dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
  691. np.float32, copy=False)
  692. cls_rank = np.argsort(-dets_j[:, 0])
  693. dets_j = dets_j[cls_rank]
  694. cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
  695. cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
  696. 1)
  697. cls_boxes = np.vstack(cls_boxes[start_idx:])
  698. cls_ids = np.vstack(cls_ids[start_idx:])
  699. pred_result = np.hstack([cls_ids, cls_boxes]).astype(np.float32)
  700. # Limit to max_per_image detections **over all classes**
  701. image_scores = cls_boxes[:, 0]
  702. if len(image_scores) > keep_top_k:
  703. image_thresh = np.sort(image_scores)[-keep_top_k]
  704. keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
  705. pred_result = pred_result[keep, :]
  706. res = fluid.LoDTensor()
  707. res.set_lod([[0, pred_result.shape[0]]])
  708. if pred_result.shape[0] == 0:
  709. pred_result = np.array([[1]], dtype=np.float32)
  710. res.set(pred_result, fluid.CPUPlace())
  711. return res
  712. pred_result = create_tmp_var(
  713. fluid.default_main_program(),
  714. name='diou_nms_pred_result',
  715. dtype='float32',
  716. shape=[-1, 6],
  717. lod_level=0)
  718. fluid.layers.py_func(
  719. func=_diou_nms, x=[bboxes, scores], out=pred_result)
  720. return pred_result
  721. @register
  722. class BBoxAssigner(object):
  723. __op__ = fluid.layers.generate_proposal_labels
  724. __append_doc__ = True
  725. __shared__ = ['num_classes']
  726. def __init__(self,
  727. batch_size_per_im=512,
  728. fg_fraction=.25,
  729. fg_thresh=.5,
  730. bg_thresh_hi=.5,
  731. bg_thresh_lo=0.,
  732. bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
  733. num_classes=81,
  734. shuffle_before_sample=True):
  735. super(BBoxAssigner, self).__init__()
  736. self.batch_size_per_im = batch_size_per_im
  737. self.fg_fraction = fg_fraction
  738. self.fg_thresh = fg_thresh
  739. self.bg_thresh_hi = bg_thresh_hi
  740. self.bg_thresh_lo = bg_thresh_lo
  741. self.bbox_reg_weights = bbox_reg_weights
  742. self.class_nums = num_classes
  743. self.use_random = shuffle_before_sample
  744. @register
  745. class LibraBBoxAssigner(object):
  746. __shared__ = ['num_classes']
  747. def __init__(self,
  748. batch_size_per_im=512,
  749. fg_fraction=.25,
  750. fg_thresh=.5,
  751. bg_thresh_hi=.5,
  752. bg_thresh_lo=0.,
  753. bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
  754. num_classes=81,
  755. shuffle_before_sample=True,
  756. is_cls_agnostic=False,
  757. num_bins=3):
  758. super(LibraBBoxAssigner, self).__init__()
  759. self.batch_size_per_im = batch_size_per_im
  760. self.fg_fraction = fg_fraction
  761. self.fg_thresh = fg_thresh
  762. self.bg_thresh_hi = bg_thresh_hi
  763. self.bg_thresh_lo = bg_thresh_lo
  764. self.bbox_reg_weights = bbox_reg_weights
  765. self.class_nums = num_classes
  766. self.use_random = shuffle_before_sample
  767. self.is_cls_agnostic = is_cls_agnostic
  768. self.num_bins = num_bins
  769. def __call__(
  770. self,
  771. rpn_rois,
  772. gt_classes,
  773. is_crowd,
  774. gt_boxes,
  775. im_info, ):
  776. return self.generate_proposal_label_libra(
  777. rpn_rois=rpn_rois,
  778. gt_classes=gt_classes,
  779. is_crowd=is_crowd,
  780. gt_boxes=gt_boxes,
  781. im_info=im_info,
  782. batch_size_per_im=self.batch_size_per_im,
  783. fg_fraction=self.fg_fraction,
  784. fg_thresh=self.fg_thresh,
  785. bg_thresh_hi=self.bg_thresh_hi,
  786. bg_thresh_lo=self.bg_thresh_lo,
  787. bbox_reg_weights=self.bbox_reg_weights,
  788. class_nums=self.class_nums,
  789. use_random=self.use_random,
  790. is_cls_agnostic=self.is_cls_agnostic,
  791. is_cascade_rcnn=False)
  792. def generate_proposal_label_libra(
  793. self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
  794. batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
  795. bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
  796. is_cls_agnostic, is_cascade_rcnn):
  797. num_bins = self.num_bins
  798. def create_tmp_var(program, name, dtype, shape, lod_level=None):
  799. return program.current_block().create_var(
  800. name=name, dtype=dtype, shape=shape, lod_level=lod_level)
  801. def _sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
  802. if len(pos_inds) <= num_expected:
  803. return pos_inds
  804. else:
  805. unique_gt_inds = np.unique(max_classes[pos_inds])
  806. num_gts = len(unique_gt_inds)
  807. num_per_gt = int(round(num_expected / float(num_gts)) + 1)
  808. sampled_inds = []
  809. for i in unique_gt_inds:
  810. inds = np.nonzero(max_classes == i)[0]
  811. before_len = len(inds)
  812. inds = list(set(inds) & set(pos_inds))
  813. after_len = len(inds)
  814. if len(inds) > num_per_gt:
  815. inds = np.random.choice(
  816. inds, size=num_per_gt, replace=False)
  817. sampled_inds.extend(list(inds)) # combine as a new sampler
  818. if len(sampled_inds) < num_expected:
  819. num_extra = num_expected - len(sampled_inds)
  820. extra_inds = np.array(
  821. list(set(pos_inds) - set(sampled_inds)))
  822. assert len(sampled_inds)+len(extra_inds) == len(pos_inds), \
  823. "sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
  824. len(sampled_inds), len(extra_inds), len(pos_inds))
  825. if len(extra_inds) > num_extra:
  826. extra_inds = np.random.choice(
  827. extra_inds, size=num_extra, replace=False)
  828. sampled_inds.extend(extra_inds.tolist())
  829. elif len(sampled_inds) > num_expected:
  830. sampled_inds = np.random.choice(
  831. sampled_inds, size=num_expected, replace=False)
  832. return sampled_inds
  833. def sample_via_interval(max_overlaps, full_set, num_expected, floor_thr,
  834. num_bins, bg_thresh_hi):
  835. max_iou = max_overlaps.max()
  836. iou_interval = (max_iou - floor_thr) / num_bins
  837. per_num_expected = int(num_expected / num_bins)
  838. sampled_inds = []
  839. for i in range(num_bins):
  840. start_iou = floor_thr + i * iou_interval
  841. end_iou = floor_thr + (i + 1) * iou_interval
  842. tmp_set = set(
  843. np.where(
  844. np.logical_and(max_overlaps >= start_iou, max_overlaps <
  845. end_iou))[0])
  846. tmp_inds = list(tmp_set & full_set)
  847. if len(tmp_inds) > per_num_expected:
  848. tmp_sampled_set = np.random.choice(
  849. tmp_inds, size=per_num_expected, replace=False)
  850. else:
  851. tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
  852. sampled_inds.append(tmp_sampled_set)
  853. sampled_inds = np.concatenate(sampled_inds)
  854. if len(sampled_inds) < num_expected:
  855. num_extra = num_expected - len(sampled_inds)
  856. extra_inds = np.array(list(full_set - set(sampled_inds)))
  857. assert len(sampled_inds)+len(extra_inds) == len(full_set), \
  858. "sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
  859. len(sampled_inds), len(extra_inds), len(full_set))
  860. if len(extra_inds) > num_extra:
  861. extra_inds = np.random.choice(
  862. extra_inds, num_extra, replace=False)
  863. sampled_inds = np.concatenate([sampled_inds, extra_inds])
  864. return sampled_inds
  865. def _sample_neg(max_overlaps,
  866. max_classes,
  867. neg_inds,
  868. num_expected,
  869. floor_thr=-1,
  870. floor_fraction=0,
  871. num_bins=3,
  872. bg_thresh_hi=0.5):
  873. if len(neg_inds) <= num_expected:
  874. return neg_inds
  875. else:
  876. # balance sampling for negative samples
  877. neg_set = set(neg_inds)
  878. if floor_thr > 0:
  879. floor_set = set(
  880. np.where(
  881. np.logical_and(max_overlaps >= 0, max_overlaps <
  882. floor_thr))[0])
  883. iou_sampling_set = set(
  884. np.where(max_overlaps >= floor_thr)[0])
  885. elif floor_thr == 0:
  886. floor_set = set(np.where(max_overlaps == 0)[0])
  887. iou_sampling_set = set(
  888. np.where(max_overlaps > floor_thr)[0])
  889. else:
  890. floor_set = set()
  891. iou_sampling_set = set(
  892. np.where(max_overlaps > floor_thr)[0])
  893. floor_thr = 0
  894. floor_neg_inds = list(floor_set & neg_set)
  895. iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
  896. num_expected_iou_sampling = int(num_expected *
  897. (1 - floor_fraction))
  898. if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
  899. if num_bins >= 2:
  900. iou_sampled_inds = sample_via_interval(
  901. max_overlaps,
  902. set(iou_sampling_neg_inds),
  903. num_expected_iou_sampling, floor_thr, num_bins,
  904. bg_thresh_hi)
  905. else:
  906. iou_sampled_inds = np.random.choice(
  907. iou_sampling_neg_inds,
  908. size=num_expected_iou_sampling,
  909. replace=False)
  910. else:
  911. iou_sampled_inds = np.array(
  912. iou_sampling_neg_inds, dtype=np.int)
  913. num_expected_floor = num_expected - len(iou_sampled_inds)
  914. if len(floor_neg_inds) > num_expected_floor:
  915. sampled_floor_inds = np.random.choice(
  916. floor_neg_inds, size=num_expected_floor, replace=False)
  917. else:
  918. sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
  919. sampled_inds = np.concatenate(
  920. (sampled_floor_inds, iou_sampled_inds))
  921. if len(sampled_inds) < num_expected:
  922. num_extra = num_expected - len(sampled_inds)
  923. extra_inds = np.array(list(neg_set - set(sampled_inds)))
  924. if len(extra_inds) > num_extra:
  925. extra_inds = np.random.choice(
  926. extra_inds, size=num_extra, replace=False)
  927. sampled_inds = np.concatenate((sampled_inds, extra_inds))
  928. return sampled_inds
  929. def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
  930. batch_size_per_im, fg_fraction, fg_thresh,
  931. bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
  932. class_nums, use_random, is_cls_agnostic,
  933. is_cascade_rcnn):
  934. rois_per_image = int(batch_size_per_im)
  935. fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
  936. # Roidb
  937. im_scale = im_info[2]
  938. inv_im_scale = 1. / im_scale
  939. rpn_rois = rpn_rois * inv_im_scale
  940. if is_cascade_rcnn:
  941. rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
  942. boxes = np.vstack([gt_boxes, rpn_rois])
  943. gt_overlaps = np.zeros((boxes.shape[0], class_nums))
  944. box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
  945. if len(gt_boxes) > 0:
  946. proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
  947. overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
  948. overlaps_max = proposal_to_gt_overlaps.max(axis=1)
  949. # Boxes which with non-zero overlap with gt boxes
  950. overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
  951. overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
  952. overlapped_boxes_ind]]
  953. for idx in range(len(overlapped_boxes_ind)):
  954. gt_overlaps[overlapped_boxes_ind[
  955. idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[
  956. overlapped_boxes_ind[idx]]
  957. box_to_gt_ind_map[overlapped_boxes_ind[
  958. idx]] = overlaps_argmax[overlapped_boxes_ind[idx]]
  959. crowd_ind = np.where(is_crowd)[0]
  960. gt_overlaps[crowd_ind] = -1
  961. max_overlaps = gt_overlaps.max(axis=1)
  962. max_classes = gt_overlaps.argmax(axis=1)
  963. # Cascade RCNN Decode Filter
  964. if is_cascade_rcnn:
  965. ws = boxes[:, 2] - boxes[:, 0] + 1
  966. hs = boxes[:, 3] - boxes[:, 1] + 1
  967. keep = np.where((ws > 0) & (hs > 0))[0]
  968. boxes = boxes[keep]
  969. max_overlaps = max_overlaps[keep]
  970. fg_inds = np.where(max_overlaps >= fg_thresh)[0]
  971. bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
  972. max_overlaps >= bg_thresh_lo))[0]
  973. fg_rois_per_this_image = fg_inds.shape[0]
  974. bg_rois_per_this_image = bg_inds.shape[0]
  975. else:
  976. # Foreground
  977. fg_inds = np.where(max_overlaps >= fg_thresh)[0]
  978. fg_rois_per_this_image = np.minimum(fg_rois_per_im,
  979. fg_inds.shape[0])
  980. # Sample foreground if there are too many
  981. if fg_inds.shape[0] > fg_rois_per_this_image:
  982. if use_random:
  983. fg_inds = _sample_pos(max_overlaps, max_classes,
  984. fg_inds, fg_rois_per_this_image)
  985. fg_inds = fg_inds[:fg_rois_per_this_image]
  986. # Background
  987. bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
  988. max_overlaps >= bg_thresh_lo))[0]
  989. bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
  990. bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
  991. bg_inds.shape[0])
  992. assert bg_rois_per_this_image >= 0, "bg_rois_per_this_image must be >= 0 but got {}".format(
  993. bg_rois_per_this_image)
  994. # Sample background if there are too many
  995. if bg_inds.shape[0] > bg_rois_per_this_image:
  996. if use_random:
  997. # libra neg sample
  998. bg_inds = _sample_neg(
  999. max_overlaps,
  1000. max_classes,
  1001. bg_inds,
  1002. bg_rois_per_this_image,
  1003. num_bins=num_bins,
  1004. bg_thresh_hi=bg_thresh_hi)
  1005. bg_inds = bg_inds[:bg_rois_per_this_image]
  1006. keep_inds = np.append(fg_inds, bg_inds)
  1007. sampled_labels = max_classes[keep_inds] # N x 1
  1008. sampled_labels[fg_rois_per_this_image:] = 0
  1009. sampled_boxes = boxes[keep_inds] # N x 324
  1010. sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
  1011. sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
  1012. bbox_label_targets = _compute_targets(
  1013. sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights)
  1014. bbox_targets, bbox_inside_weights = _expand_bbox_targets(
  1015. bbox_label_targets, class_nums, is_cls_agnostic)
  1016. bbox_outside_weights = np.array(
  1017. bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
  1018. # Scale rois
  1019. sampled_rois = sampled_boxes * im_scale
  1020. # Faster RCNN blobs
  1021. frcn_blobs = dict(
  1022. rois=sampled_rois,
  1023. labels_int32=sampled_labels,
  1024. bbox_targets=bbox_targets,
  1025. bbox_inside_weights=bbox_inside_weights,
  1026. bbox_outside_weights=bbox_outside_weights)
  1027. return frcn_blobs
  1028. def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
  1029. assert roi_boxes.shape[0] == gt_boxes.shape[0]
  1030. assert roi_boxes.shape[1] == 4
  1031. assert gt_boxes.shape[1] == 4
  1032. targets = np.zeros(roi_boxes.shape)
  1033. bbox_reg_weights = np.asarray(bbox_reg_weights)
  1034. targets = box_to_delta(
  1035. ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights)
  1036. return np.hstack([labels[:, np.newaxis], targets]).astype(
  1037. np.float32, copy=False)
  1038. def _expand_bbox_targets(bbox_targets_input, class_nums,
  1039. is_cls_agnostic):
  1040. class_labels = bbox_targets_input[:, 0]
  1041. fg_inds = np.where(class_labels > 0)[0]
  1042. bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
  1043. if not is_cls_agnostic else 4 * 2))
  1044. bbox_inside_weights = np.zeros(bbox_targets.shape)
  1045. for ind in fg_inds:
  1046. class_label = int(class_labels[
  1047. ind]) if not is_cls_agnostic else 1
  1048. start_ind = class_label * 4
  1049. end_ind = class_label * 4 + 4
  1050. bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind,
  1051. 1:]
  1052. bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0,
  1053. 1.0)
  1054. return bbox_targets, bbox_inside_weights
  1055. def generate_func(
  1056. rpn_rois,
  1057. gt_classes,
  1058. is_crowd,
  1059. gt_boxes,
  1060. im_info, ):
  1061. rpn_rois_lod = rpn_rois.lod()[0]
  1062. gt_classes_lod = gt_classes.lod()[0]
  1063. # convert
  1064. rpn_rois = np.array(rpn_rois)
  1065. gt_classes = np.array(gt_classes)
  1066. is_crowd = np.array(is_crowd)
  1067. gt_boxes = np.array(gt_boxes)
  1068. im_info = np.array(im_info)
  1069. rois = []
  1070. labels_int32 = []
  1071. bbox_targets = []
  1072. bbox_inside_weights = []
  1073. bbox_outside_weights = []
  1074. lod = [0]
  1075. for idx in range(len(rpn_rois_lod) - 1):
  1076. rois_si = rpn_rois_lod[idx]
  1077. rois_ei = rpn_rois_lod[idx + 1]
  1078. gt_si = gt_classes_lod[idx]
  1079. gt_ei = gt_classes_lod[idx + 1]
  1080. frcn_blobs = _sample_rois(
  1081. rpn_rois[rois_si:rois_ei], gt_classes[gt_si:gt_ei],
  1082. is_crowd[gt_si:gt_ei], gt_boxes[gt_si:gt_ei], im_info[idx],
  1083. batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
  1084. bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
  1085. is_cls_agnostic, is_cascade_rcnn)
  1086. lod.append(frcn_blobs['rois'].shape[0] + lod[-1])
  1087. rois.append(frcn_blobs['rois'])
  1088. labels_int32.append(frcn_blobs['labels_int32'].reshape(-1, 1))
  1089. bbox_targets.append(frcn_blobs['bbox_targets'])
  1090. bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
  1091. bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
  1092. rois = np.vstack(rois)
  1093. labels_int32 = np.vstack(labels_int32)
  1094. bbox_targets = np.vstack(bbox_targets)
  1095. bbox_inside_weights = np.vstack(bbox_inside_weights)
  1096. bbox_outside_weights = np.vstack(bbox_outside_weights)
  1097. # create lod-tensor for return
  1098. # notice that the func create_lod_tensor does not work well here
  1099. ret_rois = fluid.LoDTensor()
  1100. ret_rois.set_lod([lod])
  1101. ret_rois.set(rois.astype("float32"), fluid.CPUPlace())
  1102. ret_labels_int32 = fluid.LoDTensor()
  1103. ret_labels_int32.set_lod([lod])
  1104. ret_labels_int32.set(labels_int32.astype("int32"), fluid.CPUPlace())
  1105. ret_bbox_targets = fluid.LoDTensor()
  1106. ret_bbox_targets.set_lod([lod])
  1107. ret_bbox_targets.set(
  1108. bbox_targets.astype("float32"), fluid.CPUPlace())
  1109. ret_bbox_inside_weights = fluid.LoDTensor()
  1110. ret_bbox_inside_weights.set_lod([lod])
  1111. ret_bbox_inside_weights.set(
  1112. bbox_inside_weights.astype("float32"), fluid.CPUPlace())
  1113. ret_bbox_outside_weights = fluid.LoDTensor()
  1114. ret_bbox_outside_weights.set_lod([lod])
  1115. ret_bbox_outside_weights.set(
  1116. bbox_outside_weights.astype("float32"), fluid.CPUPlace())
  1117. return ret_rois, ret_labels_int32, ret_bbox_targets, ret_bbox_inside_weights, ret_bbox_outside_weights
  1118. rois = create_tmp_var(
  1119. fluid.default_main_program(),
  1120. name=None,
  1121. dtype='float32',
  1122. shape=[-1, 4], )
  1123. bbox_inside_weights = create_tmp_var(
  1124. fluid.default_main_program(),
  1125. name=None,
  1126. dtype='float32',
  1127. shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
  1128. bbox_outside_weights = create_tmp_var(
  1129. fluid.default_main_program(),
  1130. name=None,
  1131. dtype='float32',
  1132. shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
  1133. bbox_targets = create_tmp_var(
  1134. fluid.default_main_program(),
  1135. name=None,
  1136. dtype='float32',
  1137. shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
  1138. labels_int32 = create_tmp_var(
  1139. fluid.default_main_program(),
  1140. name=None,
  1141. dtype='int32',
  1142. shape=[-1, 1], )
  1143. outs = [
  1144. rois, labels_int32, bbox_targets, bbox_inside_weights,
  1145. bbox_outside_weights
  1146. ]
  1147. fluid.layers.py_func(
  1148. func=generate_func,
  1149. x=[rpn_rois, gt_classes, is_crowd, gt_boxes, im_info],
  1150. out=outs)
  1151. return outs
  1152. @register
  1153. class RoIAlign(object):
  1154. __op__ = fluid.layers.roi_align
  1155. __append_doc__ = True
  1156. def __init__(self, resolution=7, spatial_scale=1. / 16, sampling_ratio=0):
  1157. super(RoIAlign, self).__init__()
  1158. if isinstance(resolution, Integral):
  1159. resolution = [resolution, resolution]
  1160. self.pooled_height = resolution[0]
  1161. self.pooled_width = resolution[1]
  1162. self.spatial_scale = spatial_scale
  1163. self.sampling_ratio = sampling_ratio
  1164. @register
  1165. class RoIPool(object):
  1166. __op__ = fluid.layers.roi_pool
  1167. __append_doc__ = True
  1168. def __init__(self, resolution=7, spatial_scale=1. / 16):
  1169. super(RoIPool, self).__init__()
  1170. if isinstance(resolution, Integral):
  1171. resolution = [resolution, resolution]
  1172. self.pooled_height = resolution[0]
  1173. self.pooled_width = resolution[1]
  1174. self.spatial_scale = spatial_scale
  1175. @register
  1176. class MultiBoxHead(object):
  1177. __op__ = fluid.layers.multi_box_head
  1178. __append_doc__ = True
  1179. def __init__(self,
  1180. min_ratio=20,
  1181. max_ratio=90,
  1182. base_size=300,
  1183. min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
  1184. max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
  1185. aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
  1186. [2., 3.]],
  1187. steps=None,
  1188. offset=0.5,
  1189. flip=True,
  1190. min_max_aspect_ratios_order=False,
  1191. kernel_size=1,
  1192. pad=0):
  1193. super(MultiBoxHead, self).__init__()
  1194. self.min_ratio = min_ratio
  1195. self.max_ratio = max_ratio
  1196. self.base_size = base_size
  1197. self.min_sizes = min_sizes
  1198. self.max_sizes = max_sizes
  1199. self.aspect_ratios = aspect_ratios
  1200. self.steps = steps
  1201. self.offset = offset
  1202. self.flip = flip
  1203. self.min_max_aspect_ratios_order = min_max_aspect_ratios_order
  1204. self.kernel_size = kernel_size
  1205. self.pad = pad
  1206. @register
  1207. @serializable
  1208. class SSDLiteMultiBoxHead(object):
  1209. def __init__(self,
  1210. min_ratio=20,
  1211. max_ratio=90,
  1212. base_size=300,
  1213. min_sizes=None,
  1214. max_sizes=None,
  1215. aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
  1216. [2., 3.]],
  1217. steps=None,
  1218. offset=0.5,
  1219. flip=True,
  1220. clip=False,
  1221. pad=0,
  1222. conv_decay=0.0):
  1223. super(SSDLiteMultiBoxHead, self).__init__()
  1224. self.min_ratio = min_ratio
  1225. self.max_ratio = max_ratio
  1226. self.base_size = base_size
  1227. self.min_sizes = min_sizes
  1228. self.max_sizes = max_sizes
  1229. self.aspect_ratios = aspect_ratios
  1230. self.steps = steps
  1231. self.offset = offset
  1232. self.flip = flip
  1233. self.pad = pad
  1234. self.clip = clip
  1235. self.conv_decay = conv_decay
  1236. def _separable_conv(self, input, num_filters, name):
  1237. dwconv_param_attr = ParamAttr(
  1238. name=name + 'dw_weights', regularizer=L2Decay(self.conv_decay))
  1239. num_filter1 = input.shape[1]
  1240. depthwise_conv = fluid.layers.conv2d(
  1241. input=input,
  1242. num_filters=num_filter1,
  1243. filter_size=3,
  1244. stride=1,
  1245. padding="SAME",
  1246. groups=int(num_filter1),
  1247. act=None,
  1248. use_cudnn=False,
  1249. param_attr=dwconv_param_attr,
  1250. bias_attr=False)
  1251. bn_name = name + '_bn'
  1252. bn_param_attr = ParamAttr(
  1253. name=bn_name + "_scale", regularizer=L2Decay(0.0))
  1254. bn_bias_attr = ParamAttr(
  1255. name=bn_name + "_offset", regularizer=L2Decay(0.0))
  1256. bn = fluid.layers.batch_norm(
  1257. input=depthwise_conv,
  1258. param_attr=bn_param_attr,
  1259. bias_attr=bn_bias_attr,
  1260. moving_mean_name=bn_name + '_mean',
  1261. moving_variance_name=bn_name + '_variance')
  1262. bn = fluid.layers.relu6(bn)
  1263. pwconv_param_attr = ParamAttr(
  1264. name=name + 'pw_weights', regularizer=L2Decay(self.conv_decay))
  1265. pointwise_conv = fluid.layers.conv2d(
  1266. input=bn,
  1267. num_filters=num_filters,
  1268. filter_size=1,
  1269. stride=1,
  1270. act=None,
  1271. use_cudnn=True,
  1272. param_attr=pwconv_param_attr,
  1273. bias_attr=False)
  1274. return pointwise_conv
  1275. def __call__(self, inputs, image, num_classes):
  1276. def _permute_and_reshape(input, last_dim):
  1277. trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
  1278. compile_shape = [0, -1, last_dim]
  1279. return fluid.layers.reshape(trans, shape=compile_shape)
  1280. def _is_list_or_tuple_(data):
  1281. return (isinstance(data, list) or isinstance(data, tuple))
  1282. if self.min_sizes is None and self.max_sizes is None:
  1283. num_layer = len(inputs)
  1284. self.min_sizes = []
  1285. self.max_sizes = []
  1286. step = int(
  1287. math.floor(((self.max_ratio - self.min_ratio)) / (num_layer - 2
  1288. )))
  1289. for ratio in six.moves.range(self.min_ratio, self.max_ratio + 1,
  1290. step):
  1291. self.min_sizes.append(self.base_size * ratio / 100.)
  1292. self.max_sizes.append(self.base_size * (ratio + step) / 100.)
  1293. self.min_sizes = [self.base_size * .10] + self.min_sizes
  1294. self.max_sizes = [self.base_size * .20] + self.max_sizes
  1295. locs, confs = [], []
  1296. boxes, mvars = [], []
  1297. for i, input in enumerate(inputs):
  1298. min_size = self.min_sizes[i]
  1299. max_size = self.max_sizes[i]
  1300. if not _is_list_or_tuple_(min_size):
  1301. min_size = [min_size]
  1302. if not _is_list_or_tuple_(max_size):
  1303. max_size = [max_size]
  1304. step = [
  1305. self.steps[i] if self.steps else 0.0, self.steps[i]
  1306. if self.steps else 0.0
  1307. ]
  1308. box, var = fluid.layers.prior_box(
  1309. input,
  1310. image,
  1311. min_sizes=min_size,
  1312. max_sizes=max_size,
  1313. steps=step,
  1314. aspect_ratios=self.aspect_ratios[i],
  1315. variance=[0.1, 0.1, 0.2, 0.2],
  1316. clip=self.clip,
  1317. flip=self.flip,
  1318. offset=0.5)
  1319. num_boxes = box.shape[2]
  1320. box = fluid.layers.reshape(box, shape=[-1, 4])
  1321. var = fluid.layers.reshape(var, shape=[-1, 4])
  1322. num_loc_output = num_boxes * 4
  1323. num_conf_output = num_boxes * num_classes
  1324. # get loc
  1325. mbox_loc = self._separable_conv(input, num_loc_output,
  1326. "loc_{}".format(i + 1))
  1327. loc = _permute_and_reshape(mbox_loc, 4)
  1328. # get conf
  1329. mbox_conf = self._separable_conv(input, num_conf_output,
  1330. "conf_{}".format(i + 1))
  1331. conf = _permute_and_reshape(mbox_conf, num_classes)
  1332. locs.append(loc)
  1333. confs.append(conf)
  1334. boxes.append(box)
  1335. mvars.append(var)
  1336. ssd_mbox_loc = fluid.layers.concat(locs, axis=1)
  1337. ssd_mbox_conf = fluid.layers.concat(confs, axis=1)
  1338. prior_boxes = fluid.layers.concat(boxes)
  1339. box_vars = fluid.layers.concat(mvars)
  1340. prior_boxes.stop_gradient = True
  1341. box_vars.stop_gradient = True
  1342. return ssd_mbox_loc, ssd_mbox_conf, prior_boxes, box_vars
  1343. @register
  1344. @serializable
  1345. class SSDOutputDecoder(object):
  1346. __op__ = fluid.layers.detection_output
  1347. __append_doc__ = True
  1348. def __init__(self,
  1349. nms_threshold=0.45,
  1350. nms_top_k=400,
  1351. keep_top_k=200,
  1352. score_threshold=0.01,
  1353. nms_eta=1.0,
  1354. background_label=0,
  1355. return_index=False):
  1356. super(SSDOutputDecoder, self).__init__()
  1357. self.nms_threshold = nms_threshold
  1358. self.background_label = background_label
  1359. self.nms_top_k = nms_top_k
  1360. self.keep_top_k = keep_top_k
  1361. self.score_threshold = score_threshold
  1362. self.nms_eta = nms_eta
  1363. self.return_index = return_index
  1364. @register
  1365. @serializable
  1366. class RetinaTargetAssign(object):
  1367. __op__ = fluid.layers.retinanet_target_assign
  1368. __append_doc__ = True
  1369. def __init__(self, positive_overlap=0.5, negative_overlap=0.4):
  1370. super(RetinaTargetAssign, self).__init__()
  1371. self.positive_overlap = positive_overlap
  1372. self.negative_overlap = negative_overlap
  1373. @register
  1374. @serializable
  1375. class RetinaOutputDecoder(object):
  1376. __op__ = fluid.layers.retinanet_detection_output
  1377. __append_doc__ = True
  1378. def __init__(self,
  1379. score_thresh=0.05,
  1380. nms_thresh=0.3,
  1381. pre_nms_top_n=1000,
  1382. detections_per_im=100,
  1383. nms_eta=1.0):
  1384. super(RetinaOutputDecoder, self).__init__()
  1385. self.score_threshold = score_thresh
  1386. self.nms_threshold = nms_thresh
  1387. self.nms_top_k = pre_nms_top_n
  1388. self.keep_top_k = detections_per_im
  1389. self.nms_eta = nms_eta
  1390. @register
  1391. @serializable
  1392. class MaskMatrixNMS(object):
  1393. """
  1394. Matrix NMS for multi-class masks.
  1395. Args:
  1396. update_threshold (float): Updated threshold of categroy score in second time.
  1397. pre_nms_top_n (int): Number of total instance to be kept per image before NMS
  1398. post_nms_top_n (int): Number of total instance to be kept per image after NMS.
  1399. kernel (str): 'linear' or 'gaussian'.
  1400. sigma (float): std in gaussian method.
  1401. Input:
  1402. seg_preds (Variable): shape (n, h, w), segmentation feature maps
  1403. seg_masks (Variable): shape (n, h, w), segmentation feature maps
  1404. cate_labels (Variable): shape (n), mask labels in descending order
  1405. cate_scores (Variable): shape (n), mask scores in descending order
  1406. sum_masks (Variable): a float tensor of the sum of seg_masks
  1407. Returns:
  1408. Variable: cate_scores, tensors of shape (n)
  1409. """
  1410. def __init__(self,
  1411. update_threshold=0.05,
  1412. pre_nms_top_n=500,
  1413. post_nms_top_n=100,
  1414. kernel='gaussian',
  1415. sigma=2.0):
  1416. super(MaskMatrixNMS, self).__init__()
  1417. self.update_threshold = update_threshold
  1418. self.pre_nms_top_n = pre_nms_top_n
  1419. self.post_nms_top_n = post_nms_top_n
  1420. self.kernel = kernel
  1421. self.sigma = sigma
  1422. def _sort_score(self, scores, top_num):
  1423. self.case_scores = scores
  1424. def fn_1():
  1425. return fluid.layers.topk(self.case_scores, top_num)
  1426. def fn_2():
  1427. return fluid.layers.argsort(self.case_scores, descending=True)
  1428. sort_inds = fluid.layers.case(
  1429. pred_fn_pairs=[(fluid.layers.shape(scores)[0] > top_num, fn_1)],
  1430. default=fn_2)
  1431. return sort_inds
  1432. def __call__(self,
  1433. seg_preds,
  1434. seg_masks,
  1435. cate_labels,
  1436. cate_scores,
  1437. sum_masks=None):
  1438. # sort and keep top nms_pre
  1439. sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
  1440. seg_masks = fluid.layers.gather(seg_masks, index=sort_inds[1])
  1441. seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
  1442. sum_masks = fluid.layers.gather(sum_masks, index=sort_inds[1])
  1443. cate_scores = sort_inds[0]
  1444. cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
  1445. seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
  1446. # inter.
  1447. inter_matrix = paddle.mm(seg_masks,
  1448. fluid.layers.transpose(seg_masks, [1, 0]))
  1449. n_samples = fluid.layers.shape(cate_labels)
  1450. # union.
  1451. sum_masks_x = fluid.layers.reshape(
  1452. fluid.layers.expand(
  1453. sum_masks, expand_times=[n_samples]),
  1454. shape=[n_samples, n_samples])
  1455. # iou.
  1456. iou_matrix = paddle.divide(inter_matrix,
  1457. paddle.subtract(
  1458. paddle.add(sum_masks_x,
  1459. fluid.layers.transpose(
  1460. sum_masks_x, [1, 0])),
  1461. inter_matrix))
  1462. iou_matrix = paddle.triu(iou_matrix, diagonal=1)
  1463. # label_specific matrix.
  1464. cate_labels_x = fluid.layers.reshape(
  1465. fluid.layers.expand(
  1466. cate_labels, expand_times=[n_samples]),
  1467. shape=[n_samples, n_samples])
  1468. label_matrix = fluid.layers.cast(
  1469. paddle.equal(cate_labels_x,
  1470. fluid.layers.transpose(cate_labels_x, [1, 0])),
  1471. 'float32')
  1472. label_matrix = paddle.triu(label_matrix, diagonal=1)
  1473. # IoU compensation
  1474. compensate_iou = paddle.max(paddle.multiply(iou_matrix, label_matrix),
  1475. axis=0)
  1476. compensate_iou = fluid.layers.reshape(
  1477. fluid.layers.expand(
  1478. compensate_iou, expand_times=[n_samples]),
  1479. shape=[n_samples, n_samples])
  1480. compensate_iou = fluid.layers.transpose(compensate_iou, [1, 0])
  1481. # IoU decay
  1482. decay_iou = paddle.multiply(iou_matrix, label_matrix)
  1483. # matrix nms
  1484. if self.kernel == 'gaussian':
  1485. decay_matrix = fluid.layers.exp(-1 * self.sigma * (decay_iou**2))
  1486. compensate_matrix = fluid.layers.exp(-1 * self.sigma *
  1487. (compensate_iou**2))
  1488. decay_coefficient = paddle.min(
  1489. paddle.divide(decay_matrix, compensate_matrix), axis=0)
  1490. elif self.kernel == 'linear':
  1491. decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
  1492. decay_coefficient = paddle.min(decay_matrix, axis=0)
  1493. else:
  1494. raise NotImplementedError
  1495. # update the score.
  1496. cate_scores = paddle.multiply(cate_scores, decay_coefficient)
  1497. keep = fluid.layers.where(cate_scores >= self.update_threshold)
  1498. keep = fluid.layers.squeeze(keep, axes=[1])
  1499. # Prevent empty and increase fake data
  1500. keep = fluid.layers.concat([
  1501. keep, fluid.layers.cast(
  1502. fluid.layers.shape(cate_scores)[0] - 1, 'int64')
  1503. ])
  1504. seg_preds = fluid.layers.gather(seg_preds, index=keep)
  1505. cate_scores = fluid.layers.gather(cate_scores, index=keep)
  1506. cate_labels = fluid.layers.gather(cate_labels, index=keep)
  1507. # sort and keep top_k
  1508. sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
  1509. seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
  1510. cate_scores = sort_inds[0]
  1511. cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
  1512. return seg_preds, cate_scores, cate_labels