123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710 |
- # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import numpy as np
- from numbers import Integral
- import math
- import six
- import paddle
- from paddle import fluid
- from paddle.fluid.layer_helper import LayerHelper
- from paddle.fluid.initializer import NumpyArrayInitializer
- from paddle.fluid.param_attr import ParamAttr
- from paddle.fluid.regularizer import L2Decay
- from ppdet.core.workspace import register, serializable
- from ppdet.utils.bbox_utils import bbox_overlaps, box_to_delta
- __all__ = [
- 'AnchorGenerator', 'AnchorGrid', 'DropBlock', 'RPNTargetAssign',
- 'GenerateProposals', 'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner',
- 'RoIAlign', 'RoIPool', 'MultiBoxHead', 'SSDLiteMultiBoxHead',
- 'SSDOutputDecoder', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm',
- 'DeformConvNorm', 'MultiClassSoftNMS', 'MatrixNMS', 'LibraBBoxAssigner',
- 'DeformConv'
- ]
- def _conv_offset(input, filter_size, stride, padding, act=None, name=None):
- out_channel = filter_size * filter_size * 3
- out = fluid.layers.conv2d(
- input,
- num_filters=out_channel,
- filter_size=filter_size,
- stride=stride,
- padding=padding,
- param_attr=ParamAttr(
- initializer=fluid.initializer.Constant(0), name=name + ".w_0"),
- bias_attr=ParamAttr(
- initializer=fluid.initializer.Constant(0),
- learning_rate=2.,
- regularizer=L2Decay(0.),
- name=name + ".b_0"),
- act=act,
- name=name)
- return out
- def DeformConv(input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- dilation=1,
- lr_scale=1,
- initializer=None,
- bias_attr=False,
- name=None):
- if bias_attr:
- bias_para = ParamAttr(
- name=name + "_bias",
- initializer=fluid.initializer.Constant(0),
- regularizer=L2Decay(0.),
- learning_rate=lr_scale * 2)
- else:
- bias_para = False
- offset_mask = _conv_offset(
- input=input,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2,
- act=None,
- name=name + "_conv_offset")
- offset_channel = filter_size**2 * 2
- mask_channel = filter_size**2
- offset, mask = fluid.layers.split(
- input=offset_mask,
- num_or_sections=[offset_channel, mask_channel],
- dim=1)
- mask = fluid.layers.sigmoid(mask)
- conv = fluid.layers.deformable_conv(
- input=input,
- offset=offset,
- mask=mask,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=(filter_size - 1) // 2 * dilation,
- dilation=dilation,
- groups=groups,
- deformable_groups=1,
- im2col_step=1,
- param_attr=ParamAttr(
- name=name + "_weights",
- initializer=initializer,
- learning_rate=lr_scale),
- bias_attr=bias_para,
- name=name + ".conv2d.output.1")
- return conv
- def DeformConvNorm(input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- norm_decay=0.,
- norm_type='affine_channel',
- norm_groups=32,
- dilation=1,
- lr_scale=1,
- freeze_norm=False,
- act=None,
- norm_name=None,
- initializer=None,
- bias_attr=False,
- name=None):
- assert norm_type in ['bn', 'sync_bn', 'affine_channel', 'gn']
- conv = DeformConv(input, num_filters, filter_size, stride, groups, dilation,
- lr_scale, initializer, bias_attr, name)
- norm_lr = 0. if freeze_norm else 1.
- pattr = ParamAttr(
- name=norm_name + '_scale',
- learning_rate=norm_lr * lr_scale,
- regularizer=L2Decay(norm_decay))
- battr = ParamAttr(
- name=norm_name + '_offset',
- learning_rate=norm_lr * lr_scale,
- regularizer=L2Decay(norm_decay))
- if norm_type in ['bn', 'sync_bn']:
- global_stats = True if freeze_norm else False
- out = fluid.layers.batch_norm(
- input=conv,
- act=act,
- name=norm_name + '.output.1',
- param_attr=pattr,
- bias_attr=battr,
- moving_mean_name=norm_name + '_mean',
- moving_variance_name=norm_name + '_variance',
- use_global_stats=global_stats)
- scale = fluid.framework._get_var(pattr.name)
- bias = fluid.framework._get_var(battr.name)
- elif norm_type == 'gn':
- out = fluid.layers.group_norm(
- input=conv,
- act=act,
- name=norm_name + '.output.1',
- groups=norm_groups,
- param_attr=pattr,
- bias_attr=battr)
- scale = fluid.framework._get_var(pattr.name)
- bias = fluid.framework._get_var(battr.name)
- elif norm_type == 'affine_channel':
- scale = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=pattr,
- default_initializer=fluid.initializer.Constant(1.))
- bias = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=battr,
- default_initializer=fluid.initializer.Constant(0.))
- out = fluid.layers.affine_channel(
- x=conv, scale=scale, bias=bias, act=act)
- if freeze_norm:
- scale.stop_gradient = True
- bias.stop_gradient = True
- return out
- def ConvNorm(input,
- num_filters,
- filter_size,
- stride=1,
- groups=1,
- norm_decay=0.,
- norm_type='affine_channel',
- norm_groups=32,
- dilation=1,
- lr_scale=1,
- freeze_norm=False,
- act=None,
- norm_name=None,
- initializer=None,
- bias_attr=False,
- name=None):
- fan = num_filters
- if bias_attr:
- bias_para = ParamAttr(
- name=name + "_bias",
- initializer=fluid.initializer.Constant(value=0),
- learning_rate=lr_scale * 2)
- else:
- bias_para = False
- conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filters,
- filter_size=filter_size,
- stride=stride,
- padding=((filter_size - 1) // 2) * dilation,
- dilation=dilation,
- groups=groups,
- act=None,
- param_attr=ParamAttr(
- name=name + "_weights",
- initializer=initializer,
- learning_rate=lr_scale),
- bias_attr=bias_para,
- name=name + '.conv2d.output.1')
- norm_lr = 0. if freeze_norm else 1.
- pattr = ParamAttr(
- name=norm_name + '_scale',
- learning_rate=norm_lr * lr_scale,
- regularizer=L2Decay(norm_decay))
- battr = ParamAttr(
- name=norm_name + '_offset',
- learning_rate=norm_lr * lr_scale,
- regularizer=L2Decay(norm_decay))
- if norm_type in ['bn', 'sync_bn']:
- global_stats = True if freeze_norm else False
- out = fluid.layers.batch_norm(
- input=conv,
- act=act,
- name=norm_name + '.output.1',
- param_attr=pattr,
- bias_attr=battr,
- moving_mean_name=norm_name + '_mean',
- moving_variance_name=norm_name + '_variance',
- use_global_stats=global_stats)
- scale = fluid.framework._get_var(pattr.name)
- bias = fluid.framework._get_var(battr.name)
- elif norm_type == 'gn':
- out = fluid.layers.group_norm(
- input=conv,
- act=act,
- name=norm_name + '.output.1',
- groups=norm_groups,
- param_attr=pattr,
- bias_attr=battr)
- scale = fluid.framework._get_var(pattr.name)
- bias = fluid.framework._get_var(battr.name)
- elif norm_type == 'affine_channel':
- scale = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=pattr,
- default_initializer=fluid.initializer.Constant(1.))
- bias = fluid.layers.create_parameter(
- shape=[conv.shape[1]],
- dtype=conv.dtype,
- attr=battr,
- default_initializer=fluid.initializer.Constant(0.))
- out = fluid.layers.affine_channel(
- x=conv, scale=scale, bias=bias, act=act)
- if freeze_norm:
- scale.stop_gradient = True
- bias.stop_gradient = True
- return out
- def DropBlock(input, block_size, keep_prob, is_test):
- if is_test:
- return input
- def CalculateGamma(input, block_size, keep_prob):
- input_shape = fluid.layers.shape(input)
- feat_shape_tmp = fluid.layers.slice(input_shape, [0], [3], [4])
- feat_shape_tmp = fluid.layers.cast(feat_shape_tmp, dtype="float32")
- feat_shape_t = fluid.layers.reshape(feat_shape_tmp, [1, 1, 1, 1])
- feat_area = fluid.layers.pow(feat_shape_t, factor=2)
- block_shape_t = fluid.layers.fill_constant(
- shape=[1, 1, 1, 1], value=block_size, dtype='float32')
- block_area = fluid.layers.pow(block_shape_t, factor=2)
- useful_shape_t = feat_shape_t - block_shape_t + 1
- useful_area = fluid.layers.pow(useful_shape_t, factor=2)
- upper_t = feat_area * (1 - keep_prob)
- bottom_t = block_area * useful_area
- output = upper_t / bottom_t
- return output
- gamma = CalculateGamma(input, block_size=block_size, keep_prob=keep_prob)
- input_shape = fluid.layers.shape(input)
- p = fluid.layers.expand_as(gamma, input)
- input_shape_tmp = fluid.layers.cast(input_shape, dtype="int64")
- random_matrix = fluid.layers.uniform_random(
- input_shape_tmp, dtype='float32', min=0.0, max=1.0)
- one_zero_m = fluid.layers.less_than(random_matrix, p)
- one_zero_m.stop_gradient = True
- one_zero_m = fluid.layers.cast(one_zero_m, dtype="float32")
- mask_flag = fluid.layers.pool2d(
- one_zero_m,
- pool_size=block_size,
- pool_type='max',
- pool_stride=1,
- pool_padding=block_size // 2)
- mask = 1.0 - mask_flag
- elem_numel = fluid.layers.reduce_prod(input_shape)
- elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
- elem_numel_m.stop_gradient = True
- elem_sum = fluid.layers.reduce_sum(mask)
- elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
- elem_sum_m.stop_gradient = True
- output = fluid.layers.elementwise_mul(input,
- mask) * elem_numel_m / elem_sum_m
- return output
- @register
- @serializable
- class AnchorGenerator(object):
- __op__ = fluid.layers.anchor_generator
- __append_doc__ = True
- def __init__(self,
- stride=[16.0, 16.0],
- anchor_sizes=[32, 64, 128, 256, 512],
- aspect_ratios=[0.5, 1., 2.],
- variance=[1., 1., 1., 1.]):
- super(AnchorGenerator, self).__init__()
- self.anchor_sizes = anchor_sizes
- self.aspect_ratios = aspect_ratios
- self.variance = variance
- self.stride = stride
- @register
- @serializable
- class AnchorGrid(object):
- """Generate anchor grid
- Args:
- image_size (int or list): input image size, may be a single integer or
- list of [h, w]. Default: 512
- min_level (int): min level of the feature pyramid. Default: 3
- max_level (int): max level of the feature pyramid. Default: 7
- anchor_base_scale: base anchor scale. Default: 4
- num_scales: number of anchor scales. Default: 3
- aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]]
- """
- def __init__(self,
- image_size=512,
- min_level=3,
- max_level=7,
- anchor_base_scale=4,
- num_scales=3,
- aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]):
- super(AnchorGrid, self).__init__()
- if isinstance(image_size, Integral):
- self.image_size = [image_size, image_size]
- else:
- self.image_size = image_size
- for dim in self.image_size:
- assert dim % 2 ** max_level == 0, \
- "image size should be multiple of the max level stride"
- self.min_level = min_level
- self.max_level = max_level
- self.anchor_base_scale = anchor_base_scale
- self.num_scales = num_scales
- self.aspect_ratios = aspect_ratios
- @property
- def base_cell(self):
- if not hasattr(self, '_base_cell'):
- self._base_cell = self.make_cell()
- return self._base_cell
- def make_cell(self):
- scales = [2**(i / self.num_scales) for i in range(self.num_scales)]
- scales = np.array(scales)
- ratios = np.array(self.aspect_ratios)
- ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1)
- hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1)
- anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs))
- return anchors
- def make_grid(self, stride):
- cell = self.base_cell * stride * self.anchor_base_scale
- x_steps = np.arange(stride // 2, self.image_size[1], stride)
- y_steps = np.arange(stride // 2, self.image_size[0], stride)
- offset_x, offset_y = np.meshgrid(x_steps, y_steps)
- offset_x = offset_x.flatten()
- offset_y = offset_y.flatten()
- offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1)
- offsets = offsets[:, np.newaxis, :]
- return (cell + offsets).reshape(-1, 4)
- def generate(self):
- return [
- self.make_grid(2**l)
- for l in range(self.min_level, self.max_level + 1)
- ]
- def __call__(self):
- if not hasattr(self, '_anchor_vars'):
- anchor_vars = []
- helper = LayerHelper('anchor_grid')
- for idx, l in enumerate(range(self.min_level, self.max_level + 1)):
- stride = 2**l
- anchors = self.make_grid(stride)
- var = helper.create_parameter(
- attr=ParamAttr(name='anchors_{}'.format(idx)),
- shape=anchors.shape,
- dtype='float32',
- stop_gradient=True,
- default_initializer=NumpyArrayInitializer(anchors))
- anchor_vars.append(var)
- var.persistable = True
- self._anchor_vars = anchor_vars
- return self._anchor_vars
- @register
- @serializable
- class RPNTargetAssign(object):
- __op__ = fluid.layers.rpn_target_assign
- __append_doc__ = True
- def __init__(self,
- rpn_batch_size_per_im=256,
- rpn_straddle_thresh=0.,
- rpn_fg_fraction=0.5,
- rpn_positive_overlap=0.7,
- rpn_negative_overlap=0.3,
- use_random=True):
- super(RPNTargetAssign, self).__init__()
- self.rpn_batch_size_per_im = rpn_batch_size_per_im
- self.rpn_straddle_thresh = rpn_straddle_thresh
- self.rpn_fg_fraction = rpn_fg_fraction
- self.rpn_positive_overlap = rpn_positive_overlap
- self.rpn_negative_overlap = rpn_negative_overlap
- self.use_random = use_random
- @register
- @serializable
- class GenerateProposals(object):
- __op__ = fluid.layers.generate_proposals
- __append_doc__ = True
- def __init__(self,
- pre_nms_top_n=6000,
- post_nms_top_n=1000,
- nms_thresh=.5,
- min_size=.1,
- eta=1.):
- super(GenerateProposals, self).__init__()
- self.pre_nms_top_n = pre_nms_top_n
- self.post_nms_top_n = post_nms_top_n
- self.nms_thresh = nms_thresh
- self.min_size = min_size
- self.eta = eta
- @register
- class MaskAssigner(object):
- __op__ = fluid.layers.generate_mask_labels
- __append_doc__ = True
- __shared__ = ['num_classes']
- def __init__(self, num_classes=81, resolution=14):
- super(MaskAssigner, self).__init__()
- self.num_classes = num_classes
- self.resolution = resolution
- @register
- @serializable
- class MultiClassNMS(object):
- __op__ = fluid.layers.multiclass_nms
- __append_doc__ = True
- def __init__(self,
- score_threshold=.05,
- nms_top_k=-1,
- keep_top_k=100,
- nms_threshold=.5,
- normalized=False,
- nms_eta=1.0,
- background_label=0):
- super(MultiClassNMS, self).__init__()
- self.score_threshold = score_threshold
- self.nms_top_k = nms_top_k
- self.keep_top_k = keep_top_k
- self.nms_threshold = nms_threshold
- self.normalized = normalized
- self.nms_eta = nms_eta
- self.background_label = background_label
- @register
- @serializable
- class MatrixNMS(object):
- __op__ = 'paddle.fluid.layers.matrix_nms'
- __append_doc__ = True
- def __init__(self,
- score_threshold=.05,
- post_threshold=.05,
- nms_top_k=-1,
- keep_top_k=100,
- use_gaussian=False,
- gaussian_sigma=2.,
- normalized=False,
- background_label=0):
- super(MatrixNMS, self).__init__()
- self.score_threshold = score_threshold
- self.post_threshold = post_threshold
- self.nms_top_k = nms_top_k
- self.keep_top_k = keep_top_k
- self.normalized = normalized
- self.use_gaussian = use_gaussian
- self.gaussian_sigma = gaussian_sigma
- self.background_label = background_label
- @register
- @serializable
- class MultiClassSoftNMS(object):
- def __init__(
- self,
- score_threshold=0.01,
- keep_top_k=300,
- softnms_sigma=0.5,
- normalized=False,
- background_label=0, ):
- super(MultiClassSoftNMS, self).__init__()
- self.score_threshold = score_threshold
- self.keep_top_k = keep_top_k
- self.softnms_sigma = softnms_sigma
- self.normalized = normalized
- self.background_label = background_label
- def __call__(self, bboxes, scores):
- def create_tmp_var(program, name, dtype, shape, lod_level):
- return program.current_block().create_var(
- name=name, dtype=dtype, shape=shape, lod_level=lod_level)
- def _soft_nms_for_cls(dets, sigma, thres):
- """soft_nms_for_cls"""
- dets_final = []
- while len(dets) > 0:
- maxpos = np.argmax(dets[:, 0])
- dets_final.append(dets[maxpos].copy())
- ts, tx1, ty1, tx2, ty2 = dets[maxpos]
- scores = dets[:, 0]
- # force remove bbox at maxpos
- scores[maxpos] = -1
- x1 = dets[:, 1]
- y1 = dets[:, 2]
- x2 = dets[:, 3]
- y2 = dets[:, 4]
- eta = 0 if self.normalized else 1
- areas = (x2 - x1 + eta) * (y2 - y1 + eta)
- xx1 = np.maximum(tx1, x1)
- yy1 = np.maximum(ty1, y1)
- xx2 = np.minimum(tx2, x2)
- yy2 = np.minimum(ty2, y2)
- w = np.maximum(0.0, xx2 - xx1 + eta)
- h = np.maximum(0.0, yy2 - yy1 + eta)
- inter = w * h
- ovr = inter / (areas + areas[maxpos] - inter)
- weight = np.exp(-(ovr * ovr) / sigma)
- scores = scores * weight
- idx_keep = np.where(scores >= thres)
- dets[:, 0] = scores
- dets = dets[idx_keep]
- dets_final = np.array(dets_final).reshape(-1, 5)
- return dets_final
- def _soft_nms(bboxes, scores):
- class_nums = scores.shape[-1]
- softnms_thres = self.score_threshold
- softnms_sigma = self.softnms_sigma
- keep_top_k = self.keep_top_k
- cls_boxes = [[] for _ in range(class_nums)]
- cls_ids = [[] for _ in range(class_nums)]
- start_idx = 1 if self.background_label == 0 else 0
- for j in range(start_idx, class_nums):
- inds = np.where(scores[:, j] >= softnms_thres)[0]
- scores_j = scores[inds, j]
- rois_j = bboxes[inds, j, :] if len(
- bboxes.shape) > 2 else bboxes[inds, :]
- dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
- np.float32, copy=False)
- cls_rank = np.argsort(-dets_j[:, 0])
- dets_j = dets_j[cls_rank]
- cls_boxes[j] = _soft_nms_for_cls(
- dets_j, sigma=softnms_sigma, thres=softnms_thres)
- cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
- 1)
- cls_boxes = np.vstack(cls_boxes[start_idx:])
- cls_ids = np.vstack(cls_ids[start_idx:])
- pred_result = np.hstack([cls_ids, cls_boxes])
- # Limit to max_per_image detections **over all classes**
- image_scores = cls_boxes[:, 0]
- if len(image_scores) > keep_top_k:
- image_thresh = np.sort(image_scores)[-keep_top_k]
- keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
- pred_result = pred_result[keep, :]
- return pred_result
- def _batch_softnms(bboxes, scores):
- batch_offsets = bboxes.lod()
- bboxes = np.array(bboxes)
- scores = np.array(scores)
- out_offsets = [0]
- pred_res = []
- if len(batch_offsets) > 0:
- batch_offset = batch_offsets[0]
- for i in range(len(batch_offset) - 1):
- s, e = batch_offset[i], batch_offset[i + 1]
- pred = _soft_nms(bboxes[s:e], scores[s:e])
- out_offsets.append(pred.shape[0] + out_offsets[-1])
- pred_res.append(pred)
- else:
- assert len(bboxes.shape) == 3
- assert len(scores.shape) == 3
- for i in range(bboxes.shape[0]):
- pred = _soft_nms(bboxes[i], scores[i])
- out_offsets.append(pred.shape[0] + out_offsets[-1])
- pred_res.append(pred)
- res = fluid.LoDTensor()
- res.set_lod([out_offsets])
- if len(pred_res) == 0:
- pred_res = np.array([[1]], dtype=np.float32)
- res.set(np.vstack(pred_res).astype(np.float32), fluid.CPUPlace())
- return res
- pred_result = create_tmp_var(
- fluid.default_main_program(),
- name='softnms_pred_result',
- dtype='float32',
- shape=[-1, 6],
- lod_level=1)
- fluid.layers.py_func(
- func=_batch_softnms, x=[bboxes, scores], out=pred_result)
- return pred_result
- @register
- @serializable
- class MultiClassDiouNMS(object):
- def __init__(
- self,
- score_threshold=0.05,
- keep_top_k=100,
- nms_threshold=0.5,
- normalized=False,
- background_label=0, ):
- super(MultiClassDiouNMS, self).__init__()
- self.score_threshold = score_threshold
- self.nms_threshold = nms_threshold
- self.keep_top_k = keep_top_k
- self.normalized = normalized
- self.background_label = background_label
- def __call__(self, bboxes, scores):
- def create_tmp_var(program, name, dtype, shape, lod_level):
- return program.current_block().create_var(
- name=name, dtype=dtype, shape=shape, lod_level=lod_level)
- def _calc_diou_term(dets1, dets2):
- eps = 1.e-10
- eta = 0 if self.normalized else 1
- x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
- x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]
- cx = (x1 + x2) / 2
- cy = (y1 + y2) / 2
- w = x2 - x1 + eta
- h = y2 - y1 + eta
- cxg = (x1g + x2g) / 2
- cyg = (y1g + y2g) / 2
- wg = x2g - x1g + eta
- hg = y2g - y1g + eta
- x2 = np.maximum(x1, x2)
- y2 = np.maximum(y1, y2)
- # A or B
- xc1 = np.minimum(x1, x1g)
- yc1 = np.minimum(y1, y1g)
- xc2 = np.maximum(x2, x2g)
- yc2 = np.maximum(y2, y2g)
- # DIOU term
- dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
- dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
- diou_term = (dist_intersection + eps) / (dist_union + eps)
- return diou_term
- def _diou_nms_for_cls(dets, thres):
- """_diou_nms_for_cls"""
- scores = dets[:, 0]
- x1 = dets[:, 1]
- y1 = dets[:, 2]
- x2 = dets[:, 3]
- y2 = dets[:, 4]
- eta = 0 if self.normalized else 1
- areas = (x2 - x1 + eta) * (y2 - y1 + eta)
- dt_num = dets.shape[0]
- order = np.array(range(dt_num))
- keep = []
- while order.size > 0:
- i = order[0]
- keep.append(i)
- xx1 = np.maximum(x1[i], x1[order[1:]])
- yy1 = np.maximum(y1[i], y1[order[1:]])
- xx2 = np.minimum(x2[i], x2[order[1:]])
- yy2 = np.minimum(y2[i], y2[order[1:]])
- w = np.maximum(0.0, xx2 - xx1 + eta)
- h = np.maximum(0.0, yy2 - yy1 + eta)
- inter = w * h
- ovr = inter / (areas[i] + areas[order[1:]] - inter)
- diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
- x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
- ])
- inds = np.where(ovr - diou_term <= thres)[0]
- order = order[inds + 1]
- dets_final = dets[keep]
- return dets_final
- def _diou_nms(bboxes, scores):
- bboxes = np.array(bboxes)
- scores = np.array(scores)
- class_nums = scores.shape[-1]
- score_threshold = self.score_threshold
- nms_threshold = self.nms_threshold
- keep_top_k = self.keep_top_k
- cls_boxes = [[] for _ in range(class_nums)]
- cls_ids = [[] for _ in range(class_nums)]
- start_idx = 1 if self.background_label == 0 else 0
- for j in range(start_idx, class_nums):
- inds = np.where(scores[:, j] >= score_threshold)[0]
- scores_j = scores[inds, j]
- rois_j = bboxes[inds, j, :]
- dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
- np.float32, copy=False)
- cls_rank = np.argsort(-dets_j[:, 0])
- dets_j = dets_j[cls_rank]
- cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
- cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
- 1)
- cls_boxes = np.vstack(cls_boxes[start_idx:])
- cls_ids = np.vstack(cls_ids[start_idx:])
- pred_result = np.hstack([cls_ids, cls_boxes]).astype(np.float32)
- # Limit to max_per_image detections **over all classes**
- image_scores = cls_boxes[:, 0]
- if len(image_scores) > keep_top_k:
- image_thresh = np.sort(image_scores)[-keep_top_k]
- keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
- pred_result = pred_result[keep, :]
- res = fluid.LoDTensor()
- res.set_lod([[0, pred_result.shape[0]]])
- if pred_result.shape[0] == 0:
- pred_result = np.array([[1]], dtype=np.float32)
- res.set(pred_result, fluid.CPUPlace())
- return res
- pred_result = create_tmp_var(
- fluid.default_main_program(),
- name='diou_nms_pred_result',
- dtype='float32',
- shape=[-1, 6],
- lod_level=0)
- fluid.layers.py_func(
- func=_diou_nms, x=[bboxes, scores], out=pred_result)
- return pred_result
- @register
- class BBoxAssigner(object):
- __op__ = fluid.layers.generate_proposal_labels
- __append_doc__ = True
- __shared__ = ['num_classes']
- def __init__(self,
- batch_size_per_im=512,
- fg_fraction=.25,
- fg_thresh=.5,
- bg_thresh_hi=.5,
- bg_thresh_lo=0.,
- bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
- num_classes=81,
- shuffle_before_sample=True):
- super(BBoxAssigner, self).__init__()
- self.batch_size_per_im = batch_size_per_im
- self.fg_fraction = fg_fraction
- self.fg_thresh = fg_thresh
- self.bg_thresh_hi = bg_thresh_hi
- self.bg_thresh_lo = bg_thresh_lo
- self.bbox_reg_weights = bbox_reg_weights
- self.class_nums = num_classes
- self.use_random = shuffle_before_sample
- @register
- class LibraBBoxAssigner(object):
- __shared__ = ['num_classes']
- def __init__(self,
- batch_size_per_im=512,
- fg_fraction=.25,
- fg_thresh=.5,
- bg_thresh_hi=.5,
- bg_thresh_lo=0.,
- bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
- num_classes=81,
- shuffle_before_sample=True,
- is_cls_agnostic=False,
- num_bins=3):
- super(LibraBBoxAssigner, self).__init__()
- self.batch_size_per_im = batch_size_per_im
- self.fg_fraction = fg_fraction
- self.fg_thresh = fg_thresh
- self.bg_thresh_hi = bg_thresh_hi
- self.bg_thresh_lo = bg_thresh_lo
- self.bbox_reg_weights = bbox_reg_weights
- self.class_nums = num_classes
- self.use_random = shuffle_before_sample
- self.is_cls_agnostic = is_cls_agnostic
- self.num_bins = num_bins
- def __call__(
- self,
- rpn_rois,
- gt_classes,
- is_crowd,
- gt_boxes,
- im_info, ):
- return self.generate_proposal_label_libra(
- rpn_rois=rpn_rois,
- gt_classes=gt_classes,
- is_crowd=is_crowd,
- gt_boxes=gt_boxes,
- im_info=im_info,
- batch_size_per_im=self.batch_size_per_im,
- fg_fraction=self.fg_fraction,
- fg_thresh=self.fg_thresh,
- bg_thresh_hi=self.bg_thresh_hi,
- bg_thresh_lo=self.bg_thresh_lo,
- bbox_reg_weights=self.bbox_reg_weights,
- class_nums=self.class_nums,
- use_random=self.use_random,
- is_cls_agnostic=self.is_cls_agnostic,
- is_cascade_rcnn=False)
- def generate_proposal_label_libra(
- self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
- batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
- bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
- is_cls_agnostic, is_cascade_rcnn):
- num_bins = self.num_bins
- def create_tmp_var(program, name, dtype, shape, lod_level=None):
- return program.current_block().create_var(
- name=name, dtype=dtype, shape=shape, lod_level=lod_level)
- def _sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
- if len(pos_inds) <= num_expected:
- return pos_inds
- else:
- unique_gt_inds = np.unique(max_classes[pos_inds])
- num_gts = len(unique_gt_inds)
- num_per_gt = int(round(num_expected / float(num_gts)) + 1)
- sampled_inds = []
- for i in unique_gt_inds:
- inds = np.nonzero(max_classes == i)[0]
- before_len = len(inds)
- inds = list(set(inds) & set(pos_inds))
- after_len = len(inds)
- if len(inds) > num_per_gt:
- inds = np.random.choice(
- inds, size=num_per_gt, replace=False)
- sampled_inds.extend(list(inds)) # combine as a new sampler
- if len(sampled_inds) < num_expected:
- num_extra = num_expected - len(sampled_inds)
- extra_inds = np.array(
- list(set(pos_inds) - set(sampled_inds)))
- assert len(sampled_inds)+len(extra_inds) == len(pos_inds), \
- "sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
- len(sampled_inds), len(extra_inds), len(pos_inds))
- if len(extra_inds) > num_extra:
- extra_inds = np.random.choice(
- extra_inds, size=num_extra, replace=False)
- sampled_inds.extend(extra_inds.tolist())
- elif len(sampled_inds) > num_expected:
- sampled_inds = np.random.choice(
- sampled_inds, size=num_expected, replace=False)
- return sampled_inds
- def sample_via_interval(max_overlaps, full_set, num_expected, floor_thr,
- num_bins, bg_thresh_hi):
- max_iou = max_overlaps.max()
- iou_interval = (max_iou - floor_thr) / num_bins
- per_num_expected = int(num_expected / num_bins)
- sampled_inds = []
- for i in range(num_bins):
- start_iou = floor_thr + i * iou_interval
- end_iou = floor_thr + (i + 1) * iou_interval
- tmp_set = set(
- np.where(
- np.logical_and(max_overlaps >= start_iou, max_overlaps <
- end_iou))[0])
- tmp_inds = list(tmp_set & full_set)
- if len(tmp_inds) > per_num_expected:
- tmp_sampled_set = np.random.choice(
- tmp_inds, size=per_num_expected, replace=False)
- else:
- tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
- sampled_inds.append(tmp_sampled_set)
- sampled_inds = np.concatenate(sampled_inds)
- if len(sampled_inds) < num_expected:
- num_extra = num_expected - len(sampled_inds)
- extra_inds = np.array(list(full_set - set(sampled_inds)))
- assert len(sampled_inds)+len(extra_inds) == len(full_set), \
- "sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
- len(sampled_inds), len(extra_inds), len(full_set))
- if len(extra_inds) > num_extra:
- extra_inds = np.random.choice(
- extra_inds, num_extra, replace=False)
- sampled_inds = np.concatenate([sampled_inds, extra_inds])
- return sampled_inds
- def _sample_neg(max_overlaps,
- max_classes,
- neg_inds,
- num_expected,
- floor_thr=-1,
- floor_fraction=0,
- num_bins=3,
- bg_thresh_hi=0.5):
- if len(neg_inds) <= num_expected:
- return neg_inds
- else:
- # balance sampling for negative samples
- neg_set = set(neg_inds)
- if floor_thr > 0:
- floor_set = set(
- np.where(
- np.logical_and(max_overlaps >= 0, max_overlaps <
- floor_thr))[0])
- iou_sampling_set = set(
- np.where(max_overlaps >= floor_thr)[0])
- elif floor_thr == 0:
- floor_set = set(np.where(max_overlaps == 0)[0])
- iou_sampling_set = set(
- np.where(max_overlaps > floor_thr)[0])
- else:
- floor_set = set()
- iou_sampling_set = set(
- np.where(max_overlaps > floor_thr)[0])
- floor_thr = 0
- floor_neg_inds = list(floor_set & neg_set)
- iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
- num_expected_iou_sampling = int(num_expected *
- (1 - floor_fraction))
- if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
- if num_bins >= 2:
- iou_sampled_inds = sample_via_interval(
- max_overlaps,
- set(iou_sampling_neg_inds),
- num_expected_iou_sampling, floor_thr, num_bins,
- bg_thresh_hi)
- else:
- iou_sampled_inds = np.random.choice(
- iou_sampling_neg_inds,
- size=num_expected_iou_sampling,
- replace=False)
- else:
- iou_sampled_inds = np.array(
- iou_sampling_neg_inds, dtype=np.int)
- num_expected_floor = num_expected - len(iou_sampled_inds)
- if len(floor_neg_inds) > num_expected_floor:
- sampled_floor_inds = np.random.choice(
- floor_neg_inds, size=num_expected_floor, replace=False)
- else:
- sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
- sampled_inds = np.concatenate(
- (sampled_floor_inds, iou_sampled_inds))
- if len(sampled_inds) < num_expected:
- num_extra = num_expected - len(sampled_inds)
- extra_inds = np.array(list(neg_set - set(sampled_inds)))
- if len(extra_inds) > num_extra:
- extra_inds = np.random.choice(
- extra_inds, size=num_extra, replace=False)
- sampled_inds = np.concatenate((sampled_inds, extra_inds))
- return sampled_inds
- def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
- batch_size_per_im, fg_fraction, fg_thresh,
- bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
- class_nums, use_random, is_cls_agnostic,
- is_cascade_rcnn):
- rois_per_image = int(batch_size_per_im)
- fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
- # Roidb
- im_scale = im_info[2]
- inv_im_scale = 1. / im_scale
- rpn_rois = rpn_rois * inv_im_scale
- if is_cascade_rcnn:
- rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
- boxes = np.vstack([gt_boxes, rpn_rois])
- gt_overlaps = np.zeros((boxes.shape[0], class_nums))
- box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
- if len(gt_boxes) > 0:
- proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
- overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
- overlaps_max = proposal_to_gt_overlaps.max(axis=1)
- # Boxes which with non-zero overlap with gt boxes
- overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
- overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
- overlapped_boxes_ind]]
- for idx in range(len(overlapped_boxes_ind)):
- gt_overlaps[overlapped_boxes_ind[
- idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[
- overlapped_boxes_ind[idx]]
- box_to_gt_ind_map[overlapped_boxes_ind[
- idx]] = overlaps_argmax[overlapped_boxes_ind[idx]]
- crowd_ind = np.where(is_crowd)[0]
- gt_overlaps[crowd_ind] = -1
- max_overlaps = gt_overlaps.max(axis=1)
- max_classes = gt_overlaps.argmax(axis=1)
- # Cascade RCNN Decode Filter
- if is_cascade_rcnn:
- ws = boxes[:, 2] - boxes[:, 0] + 1
- hs = boxes[:, 3] - boxes[:, 1] + 1
- keep = np.where((ws > 0) & (hs > 0))[0]
- boxes = boxes[keep]
- max_overlaps = max_overlaps[keep]
- fg_inds = np.where(max_overlaps >= fg_thresh)[0]
- bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
- max_overlaps >= bg_thresh_lo))[0]
- fg_rois_per_this_image = fg_inds.shape[0]
- bg_rois_per_this_image = bg_inds.shape[0]
- else:
- # Foreground
- fg_inds = np.where(max_overlaps >= fg_thresh)[0]
- fg_rois_per_this_image = np.minimum(fg_rois_per_im,
- fg_inds.shape[0])
- # Sample foreground if there are too many
- if fg_inds.shape[0] > fg_rois_per_this_image:
- if use_random:
- fg_inds = _sample_pos(max_overlaps, max_classes,
- fg_inds, fg_rois_per_this_image)
- fg_inds = fg_inds[:fg_rois_per_this_image]
- # Background
- bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
- max_overlaps >= bg_thresh_lo))[0]
- bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
- bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
- bg_inds.shape[0])
- assert bg_rois_per_this_image >= 0, "bg_rois_per_this_image must be >= 0 but got {}".format(
- bg_rois_per_this_image)
- # Sample background if there are too many
- if bg_inds.shape[0] > bg_rois_per_this_image:
- if use_random:
- # libra neg sample
- bg_inds = _sample_neg(
- max_overlaps,
- max_classes,
- bg_inds,
- bg_rois_per_this_image,
- num_bins=num_bins,
- bg_thresh_hi=bg_thresh_hi)
- bg_inds = bg_inds[:bg_rois_per_this_image]
- keep_inds = np.append(fg_inds, bg_inds)
- sampled_labels = max_classes[keep_inds] # N x 1
- sampled_labels[fg_rois_per_this_image:] = 0
- sampled_boxes = boxes[keep_inds] # N x 324
- sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
- sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
- bbox_label_targets = _compute_targets(
- sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights)
- bbox_targets, bbox_inside_weights = _expand_bbox_targets(
- bbox_label_targets, class_nums, is_cls_agnostic)
- bbox_outside_weights = np.array(
- bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
- # Scale rois
- sampled_rois = sampled_boxes * im_scale
- # Faster RCNN blobs
- frcn_blobs = dict(
- rois=sampled_rois,
- labels_int32=sampled_labels,
- bbox_targets=bbox_targets,
- bbox_inside_weights=bbox_inside_weights,
- bbox_outside_weights=bbox_outside_weights)
- return frcn_blobs
- def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
- assert roi_boxes.shape[0] == gt_boxes.shape[0]
- assert roi_boxes.shape[1] == 4
- assert gt_boxes.shape[1] == 4
- targets = np.zeros(roi_boxes.shape)
- bbox_reg_weights = np.asarray(bbox_reg_weights)
- targets = box_to_delta(
- ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights)
- return np.hstack([labels[:, np.newaxis], targets]).astype(
- np.float32, copy=False)
- def _expand_bbox_targets(bbox_targets_input, class_nums,
- is_cls_agnostic):
- class_labels = bbox_targets_input[:, 0]
- fg_inds = np.where(class_labels > 0)[0]
- bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
- if not is_cls_agnostic else 4 * 2))
- bbox_inside_weights = np.zeros(bbox_targets.shape)
- for ind in fg_inds:
- class_label = int(class_labels[
- ind]) if not is_cls_agnostic else 1
- start_ind = class_label * 4
- end_ind = class_label * 4 + 4
- bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind,
- 1:]
- bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0,
- 1.0)
- return bbox_targets, bbox_inside_weights
- def generate_func(
- rpn_rois,
- gt_classes,
- is_crowd,
- gt_boxes,
- im_info, ):
- rpn_rois_lod = rpn_rois.lod()[0]
- gt_classes_lod = gt_classes.lod()[0]
- # convert
- rpn_rois = np.array(rpn_rois)
- gt_classes = np.array(gt_classes)
- is_crowd = np.array(is_crowd)
- gt_boxes = np.array(gt_boxes)
- im_info = np.array(im_info)
- rois = []
- labels_int32 = []
- bbox_targets = []
- bbox_inside_weights = []
- bbox_outside_weights = []
- lod = [0]
- for idx in range(len(rpn_rois_lod) - 1):
- rois_si = rpn_rois_lod[idx]
- rois_ei = rpn_rois_lod[idx + 1]
- gt_si = gt_classes_lod[idx]
- gt_ei = gt_classes_lod[idx + 1]
- frcn_blobs = _sample_rois(
- rpn_rois[rois_si:rois_ei], gt_classes[gt_si:gt_ei],
- is_crowd[gt_si:gt_ei], gt_boxes[gt_si:gt_ei], im_info[idx],
- batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
- bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
- is_cls_agnostic, is_cascade_rcnn)
- lod.append(frcn_blobs['rois'].shape[0] + lod[-1])
- rois.append(frcn_blobs['rois'])
- labels_int32.append(frcn_blobs['labels_int32'].reshape(-1, 1))
- bbox_targets.append(frcn_blobs['bbox_targets'])
- bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
- bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
- rois = np.vstack(rois)
- labels_int32 = np.vstack(labels_int32)
- bbox_targets = np.vstack(bbox_targets)
- bbox_inside_weights = np.vstack(bbox_inside_weights)
- bbox_outside_weights = np.vstack(bbox_outside_weights)
- # create lod-tensor for return
- # notice that the func create_lod_tensor does not work well here
- ret_rois = fluid.LoDTensor()
- ret_rois.set_lod([lod])
- ret_rois.set(rois.astype("float32"), fluid.CPUPlace())
- ret_labels_int32 = fluid.LoDTensor()
- ret_labels_int32.set_lod([lod])
- ret_labels_int32.set(labels_int32.astype("int32"), fluid.CPUPlace())
- ret_bbox_targets = fluid.LoDTensor()
- ret_bbox_targets.set_lod([lod])
- ret_bbox_targets.set(
- bbox_targets.astype("float32"), fluid.CPUPlace())
- ret_bbox_inside_weights = fluid.LoDTensor()
- ret_bbox_inside_weights.set_lod([lod])
- ret_bbox_inside_weights.set(
- bbox_inside_weights.astype("float32"), fluid.CPUPlace())
- ret_bbox_outside_weights = fluid.LoDTensor()
- ret_bbox_outside_weights.set_lod([lod])
- ret_bbox_outside_weights.set(
- bbox_outside_weights.astype("float32"), fluid.CPUPlace())
- return ret_rois, ret_labels_int32, ret_bbox_targets, ret_bbox_inside_weights, ret_bbox_outside_weights
- rois = create_tmp_var(
- fluid.default_main_program(),
- name=None,
- dtype='float32',
- shape=[-1, 4], )
- bbox_inside_weights = create_tmp_var(
- fluid.default_main_program(),
- name=None,
- dtype='float32',
- shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
- bbox_outside_weights = create_tmp_var(
- fluid.default_main_program(),
- name=None,
- dtype='float32',
- shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
- bbox_targets = create_tmp_var(
- fluid.default_main_program(),
- name=None,
- dtype='float32',
- shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
- labels_int32 = create_tmp_var(
- fluid.default_main_program(),
- name=None,
- dtype='int32',
- shape=[-1, 1], )
- outs = [
- rois, labels_int32, bbox_targets, bbox_inside_weights,
- bbox_outside_weights
- ]
- fluid.layers.py_func(
- func=generate_func,
- x=[rpn_rois, gt_classes, is_crowd, gt_boxes, im_info],
- out=outs)
- return outs
- @register
- class RoIAlign(object):
- __op__ = fluid.layers.roi_align
- __append_doc__ = True
- def __init__(self, resolution=7, spatial_scale=1. / 16, sampling_ratio=0):
- super(RoIAlign, self).__init__()
- if isinstance(resolution, Integral):
- resolution = [resolution, resolution]
- self.pooled_height = resolution[0]
- self.pooled_width = resolution[1]
- self.spatial_scale = spatial_scale
- self.sampling_ratio = sampling_ratio
- @register
- class RoIPool(object):
- __op__ = fluid.layers.roi_pool
- __append_doc__ = True
- def __init__(self, resolution=7, spatial_scale=1. / 16):
- super(RoIPool, self).__init__()
- if isinstance(resolution, Integral):
- resolution = [resolution, resolution]
- self.pooled_height = resolution[0]
- self.pooled_width = resolution[1]
- self.spatial_scale = spatial_scale
- @register
- class MultiBoxHead(object):
- __op__ = fluid.layers.multi_box_head
- __append_doc__ = True
- def __init__(self,
- min_ratio=20,
- max_ratio=90,
- base_size=300,
- min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
- max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
- aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
- [2., 3.]],
- steps=None,
- offset=0.5,
- flip=True,
- min_max_aspect_ratios_order=False,
- kernel_size=1,
- pad=0):
- super(MultiBoxHead, self).__init__()
- self.min_ratio = min_ratio
- self.max_ratio = max_ratio
- self.base_size = base_size
- self.min_sizes = min_sizes
- self.max_sizes = max_sizes
- self.aspect_ratios = aspect_ratios
- self.steps = steps
- self.offset = offset
- self.flip = flip
- self.min_max_aspect_ratios_order = min_max_aspect_ratios_order
- self.kernel_size = kernel_size
- self.pad = pad
- @register
- @serializable
- class SSDLiteMultiBoxHead(object):
- def __init__(self,
- min_ratio=20,
- max_ratio=90,
- base_size=300,
- min_sizes=None,
- max_sizes=None,
- aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
- [2., 3.]],
- steps=None,
- offset=0.5,
- flip=True,
- clip=False,
- pad=0,
- conv_decay=0.0):
- super(SSDLiteMultiBoxHead, self).__init__()
- self.min_ratio = min_ratio
- self.max_ratio = max_ratio
- self.base_size = base_size
- self.min_sizes = min_sizes
- self.max_sizes = max_sizes
- self.aspect_ratios = aspect_ratios
- self.steps = steps
- self.offset = offset
- self.flip = flip
- self.pad = pad
- self.clip = clip
- self.conv_decay = conv_decay
- def _separable_conv(self, input, num_filters, name):
- dwconv_param_attr = ParamAttr(
- name=name + 'dw_weights', regularizer=L2Decay(self.conv_decay))
- num_filter1 = input.shape[1]
- depthwise_conv = fluid.layers.conv2d(
- input=input,
- num_filters=num_filter1,
- filter_size=3,
- stride=1,
- padding="SAME",
- groups=int(num_filter1),
- act=None,
- use_cudnn=False,
- param_attr=dwconv_param_attr,
- bias_attr=False)
- bn_name = name + '_bn'
- bn_param_attr = ParamAttr(
- name=bn_name + "_scale", regularizer=L2Decay(0.0))
- bn_bias_attr = ParamAttr(
- name=bn_name + "_offset", regularizer=L2Decay(0.0))
- bn = fluid.layers.batch_norm(
- input=depthwise_conv,
- param_attr=bn_param_attr,
- bias_attr=bn_bias_attr,
- moving_mean_name=bn_name + '_mean',
- moving_variance_name=bn_name + '_variance')
- bn = fluid.layers.relu6(bn)
- pwconv_param_attr = ParamAttr(
- name=name + 'pw_weights', regularizer=L2Decay(self.conv_decay))
- pointwise_conv = fluid.layers.conv2d(
- input=bn,
- num_filters=num_filters,
- filter_size=1,
- stride=1,
- act=None,
- use_cudnn=True,
- param_attr=pwconv_param_attr,
- bias_attr=False)
- return pointwise_conv
- def __call__(self, inputs, image, num_classes):
- def _permute_and_reshape(input, last_dim):
- trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
- compile_shape = [0, -1, last_dim]
- return fluid.layers.reshape(trans, shape=compile_shape)
- def _is_list_or_tuple_(data):
- return (isinstance(data, list) or isinstance(data, tuple))
- if self.min_sizes is None and self.max_sizes is None:
- num_layer = len(inputs)
- self.min_sizes = []
- self.max_sizes = []
- step = int(
- math.floor(((self.max_ratio - self.min_ratio)) / (num_layer - 2
- )))
- for ratio in six.moves.range(self.min_ratio, self.max_ratio + 1,
- step):
- self.min_sizes.append(self.base_size * ratio / 100.)
- self.max_sizes.append(self.base_size * (ratio + step) / 100.)
- self.min_sizes = [self.base_size * .10] + self.min_sizes
- self.max_sizes = [self.base_size * .20] + self.max_sizes
- locs, confs = [], []
- boxes, mvars = [], []
- for i, input in enumerate(inputs):
- min_size = self.min_sizes[i]
- max_size = self.max_sizes[i]
- if not _is_list_or_tuple_(min_size):
- min_size = [min_size]
- if not _is_list_or_tuple_(max_size):
- max_size = [max_size]
- step = [
- self.steps[i] if self.steps else 0.0, self.steps[i]
- if self.steps else 0.0
- ]
- box, var = fluid.layers.prior_box(
- input,
- image,
- min_sizes=min_size,
- max_sizes=max_size,
- steps=step,
- aspect_ratios=self.aspect_ratios[i],
- variance=[0.1, 0.1, 0.2, 0.2],
- clip=self.clip,
- flip=self.flip,
- offset=0.5)
- num_boxes = box.shape[2]
- box = fluid.layers.reshape(box, shape=[-1, 4])
- var = fluid.layers.reshape(var, shape=[-1, 4])
- num_loc_output = num_boxes * 4
- num_conf_output = num_boxes * num_classes
- # get loc
- mbox_loc = self._separable_conv(input, num_loc_output,
- "loc_{}".format(i + 1))
- loc = _permute_and_reshape(mbox_loc, 4)
- # get conf
- mbox_conf = self._separable_conv(input, num_conf_output,
- "conf_{}".format(i + 1))
- conf = _permute_and_reshape(mbox_conf, num_classes)
- locs.append(loc)
- confs.append(conf)
- boxes.append(box)
- mvars.append(var)
- ssd_mbox_loc = fluid.layers.concat(locs, axis=1)
- ssd_mbox_conf = fluid.layers.concat(confs, axis=1)
- prior_boxes = fluid.layers.concat(boxes)
- box_vars = fluid.layers.concat(mvars)
- prior_boxes.stop_gradient = True
- box_vars.stop_gradient = True
- return ssd_mbox_loc, ssd_mbox_conf, prior_boxes, box_vars
- @register
- @serializable
- class SSDOutputDecoder(object):
- __op__ = fluid.layers.detection_output
- __append_doc__ = True
- def __init__(self,
- nms_threshold=0.45,
- nms_top_k=400,
- keep_top_k=200,
- score_threshold=0.01,
- nms_eta=1.0,
- background_label=0,
- return_index=False):
- super(SSDOutputDecoder, self).__init__()
- self.nms_threshold = nms_threshold
- self.background_label = background_label
- self.nms_top_k = nms_top_k
- self.keep_top_k = keep_top_k
- self.score_threshold = score_threshold
- self.nms_eta = nms_eta
- self.return_index = return_index
- @register
- @serializable
- class RetinaTargetAssign(object):
- __op__ = fluid.layers.retinanet_target_assign
- __append_doc__ = True
- def __init__(self, positive_overlap=0.5, negative_overlap=0.4):
- super(RetinaTargetAssign, self).__init__()
- self.positive_overlap = positive_overlap
- self.negative_overlap = negative_overlap
- @register
- @serializable
- class RetinaOutputDecoder(object):
- __op__ = fluid.layers.retinanet_detection_output
- __append_doc__ = True
- def __init__(self,
- score_thresh=0.05,
- nms_thresh=0.3,
- pre_nms_top_n=1000,
- detections_per_im=100,
- nms_eta=1.0):
- super(RetinaOutputDecoder, self).__init__()
- self.score_threshold = score_thresh
- self.nms_threshold = nms_thresh
- self.nms_top_k = pre_nms_top_n
- self.keep_top_k = detections_per_im
- self.nms_eta = nms_eta
- @register
- @serializable
- class MaskMatrixNMS(object):
- """
- Matrix NMS for multi-class masks.
- Args:
- update_threshold (float): Updated threshold of categroy score in second time.
- pre_nms_top_n (int): Number of total instance to be kept per image before NMS
- post_nms_top_n (int): Number of total instance to be kept per image after NMS.
- kernel (str): 'linear' or 'gaussian'.
- sigma (float): std in gaussian method.
- Input:
- seg_preds (Variable): shape (n, h, w), segmentation feature maps
- seg_masks (Variable): shape (n, h, w), segmentation feature maps
- cate_labels (Variable): shape (n), mask labels in descending order
- cate_scores (Variable): shape (n), mask scores in descending order
- sum_masks (Variable): a float tensor of the sum of seg_masks
- Returns:
- Variable: cate_scores, tensors of shape (n)
- """
- def __init__(self,
- update_threshold=0.05,
- pre_nms_top_n=500,
- post_nms_top_n=100,
- kernel='gaussian',
- sigma=2.0):
- super(MaskMatrixNMS, self).__init__()
- self.update_threshold = update_threshold
- self.pre_nms_top_n = pre_nms_top_n
- self.post_nms_top_n = post_nms_top_n
- self.kernel = kernel
- self.sigma = sigma
- def _sort_score(self, scores, top_num):
- self.case_scores = scores
- def fn_1():
- return fluid.layers.topk(self.case_scores, top_num)
- def fn_2():
- return fluid.layers.argsort(self.case_scores, descending=True)
- sort_inds = fluid.layers.case(
- pred_fn_pairs=[(fluid.layers.shape(scores)[0] > top_num, fn_1)],
- default=fn_2)
- return sort_inds
- def __call__(self,
- seg_preds,
- seg_masks,
- cate_labels,
- cate_scores,
- sum_masks=None):
- # sort and keep top nms_pre
- sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
- seg_masks = fluid.layers.gather(seg_masks, index=sort_inds[1])
- seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
- sum_masks = fluid.layers.gather(sum_masks, index=sort_inds[1])
- cate_scores = sort_inds[0]
- cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
- seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
- # inter.
- inter_matrix = paddle.mm(seg_masks,
- fluid.layers.transpose(seg_masks, [1, 0]))
- n_samples = fluid.layers.shape(cate_labels)
- # union.
- sum_masks_x = fluid.layers.reshape(
- fluid.layers.expand(
- sum_masks, expand_times=[n_samples]),
- shape=[n_samples, n_samples])
- # iou.
- iou_matrix = paddle.divide(inter_matrix,
- paddle.subtract(
- paddle.add(sum_masks_x,
- fluid.layers.transpose(
- sum_masks_x, [1, 0])),
- inter_matrix))
- iou_matrix = paddle.triu(iou_matrix, diagonal=1)
- # label_specific matrix.
- cate_labels_x = fluid.layers.reshape(
- fluid.layers.expand(
- cate_labels, expand_times=[n_samples]),
- shape=[n_samples, n_samples])
- label_matrix = fluid.layers.cast(
- paddle.equal(cate_labels_x,
- fluid.layers.transpose(cate_labels_x, [1, 0])),
- 'float32')
- label_matrix = paddle.triu(label_matrix, diagonal=1)
- # IoU compensation
- compensate_iou = paddle.max(paddle.multiply(iou_matrix, label_matrix),
- axis=0)
- compensate_iou = fluid.layers.reshape(
- fluid.layers.expand(
- compensate_iou, expand_times=[n_samples]),
- shape=[n_samples, n_samples])
- compensate_iou = fluid.layers.transpose(compensate_iou, [1, 0])
- # IoU decay
- decay_iou = paddle.multiply(iou_matrix, label_matrix)
- # matrix nms
- if self.kernel == 'gaussian':
- decay_matrix = fluid.layers.exp(-1 * self.sigma * (decay_iou**2))
- compensate_matrix = fluid.layers.exp(-1 * self.sigma *
- (compensate_iou**2))
- decay_coefficient = paddle.min(
- paddle.divide(decay_matrix, compensate_matrix), axis=0)
- elif self.kernel == 'linear':
- decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
- decay_coefficient = paddle.min(decay_matrix, axis=0)
- else:
- raise NotImplementedError
- # update the score.
- cate_scores = paddle.multiply(cate_scores, decay_coefficient)
- keep = fluid.layers.where(cate_scores >= self.update_threshold)
- keep = fluid.layers.squeeze(keep, axes=[1])
- # Prevent empty and increase fake data
- keep = fluid.layers.concat([
- keep, fluid.layers.cast(
- fluid.layers.shape(cate_scores)[0] - 1, 'int64')
- ])
- seg_preds = fluid.layers.gather(seg_preds, index=keep)
- cate_scores = fluid.layers.gather(cate_scores, index=keep)
- cate_labels = fluid.layers.gather(cate_labels, index=keep)
- # sort and keep top_k
- sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
- seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
- cate_scores = sort_inds[0]
- cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
- return seg_preds, cate_scores, cate_labels
|