faceboxes.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from collections import OrderedDict
  18. from paddle import fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from paddle.fluid.regularizer import L2Decay
  21. from ppdet.core.workspace import register
  22. from ppdet.modeling.ops import SSDOutputDecoder
  23. __all__ = ['FaceBoxes']
  24. @register
  25. class FaceBoxes(object):
  26. """
  27. FaceBoxes: A CPU Real-time Face Detector with High Accuracy.
  28. see https://arxiv.org/abs/1708.05234
  29. Args:
  30. backbone (object): backbone instance
  31. output_decoder (object): `SSDOutputDecoder` instance
  32. densities (list|None): the densities of generated density prior boxes,
  33. this attribute should be a list or tuple of integers.
  34. fixed_sizes (list|None): the fixed sizes of generated density prior boxes,
  35. this attribute should a list or tuple of same length with `densities`.
  36. num_classes (int): number of output classes.
  37. steps (list|None): step size of adjacent prior boxes on each feature map.
  38. """
  39. __category__ = 'architecture'
  40. __inject__ = ['backbone', 'output_decoder']
  41. __shared__ = ['num_classes']
  42. def __init__(self,
  43. backbone="FaceBoxNet",
  44. output_decoder=SSDOutputDecoder().__dict__,
  45. densities=[[4, 2, 1], [1], [1]],
  46. fixed_sizes=[[32., 64., 128.], [256.], [512.]],
  47. num_classes=2,
  48. steps=[8., 16., 32.]):
  49. super(FaceBoxes, self).__init__()
  50. self.backbone = backbone
  51. self.num_classes = num_classes
  52. self.output_decoder = output_decoder
  53. if isinstance(output_decoder, dict):
  54. self.output_decoder = SSDOutputDecoder(**output_decoder)
  55. self.densities = densities
  56. self.fixed_sizes = fixed_sizes
  57. self.steps = steps
  58. def build(self, feed_vars, mode='train'):
  59. im = feed_vars['image']
  60. if mode == 'train':
  61. gt_bbox = feed_vars['gt_bbox']
  62. gt_class = feed_vars['gt_class']
  63. body_feats = self.backbone(im)
  64. locs, confs, box, box_var = self._multi_box_head(
  65. inputs=body_feats, image=im, num_classes=self.num_classes)
  66. if mode == 'train':
  67. loss = fluid.layers.ssd_loss(
  68. locs,
  69. confs,
  70. gt_bbox,
  71. gt_class,
  72. box,
  73. box_var,
  74. overlap_threshold=0.35,
  75. neg_overlap=0.35)
  76. loss = fluid.layers.reduce_sum(loss)
  77. return {'loss': loss}
  78. else:
  79. pred = self.output_decoder(locs, confs, box, box_var)
  80. return {'bbox': pred}
  81. def _multi_box_head(self, inputs, image, num_classes=2):
  82. def permute_and_reshape(input, last_dim):
  83. trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
  84. compile_shape = [0, -1, last_dim]
  85. return fluid.layers.reshape(trans, shape=compile_shape)
  86. def _is_list_or_tuple_(data):
  87. return (isinstance(data, list) or isinstance(data, tuple))
  88. locs, confs = [], []
  89. boxes, vars = [], []
  90. b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
  91. for i, input in enumerate(inputs):
  92. densities = self.densities[i]
  93. fixed_sizes = self.fixed_sizes[i]
  94. box, var = fluid.layers.density_prior_box(
  95. input,
  96. image,
  97. densities=densities,
  98. fixed_sizes=fixed_sizes,
  99. fixed_ratios=[1.],
  100. clip=False,
  101. offset=0.5,
  102. steps=[self.steps[i]] * 2)
  103. num_boxes = box.shape[2]
  104. box = fluid.layers.reshape(box, shape=[-1, 4])
  105. var = fluid.layers.reshape(var, shape=[-1, 4])
  106. num_loc_output = num_boxes * 4
  107. num_conf_output = num_boxes * num_classes
  108. # get loc
  109. mbox_loc = fluid.layers.conv2d(
  110. input, num_loc_output, 3, 1, 1, bias_attr=b_attr)
  111. loc = permute_and_reshape(mbox_loc, 4)
  112. # get conf
  113. mbox_conf = fluid.layers.conv2d(
  114. input, num_conf_output, 3, 1, 1, bias_attr=b_attr)
  115. conf = permute_and_reshape(mbox_conf, 2)
  116. locs.append(loc)
  117. confs.append(conf)
  118. boxes.append(box)
  119. vars.append(var)
  120. face_mbox_loc = fluid.layers.concat(locs, axis=1)
  121. face_mbox_conf = fluid.layers.concat(confs, axis=1)
  122. prior_boxes = fluid.layers.concat(boxes)
  123. box_vars = fluid.layers.concat(vars)
  124. return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
  125. def _inputs_def(self, image_shape):
  126. im_shape = [None] + image_shape
  127. # yapf: disable
  128. inputs_def = {
  129. 'image': {'shape': im_shape, 'dtype': 'float32', 'lod_level': 0},
  130. 'im_id': {'shape': [None, 1], 'dtype': 'int64', 'lod_level': 0},
  131. 'gt_bbox': {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
  132. 'gt_class': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
  133. 'im_shape': {'shape': [None, 3], 'dtype': 'int32', 'lod_level': 0},
  134. }
  135. # yapf: enable
  136. return inputs_def
  137. def build_inputs(
  138. self,
  139. image_shape=[3, None, None],
  140. fields=['image', 'im_id', 'gt_bbox', 'gt_class'], # for train
  141. use_dataloader=True,
  142. iterable=False):
  143. inputs_def = self._inputs_def(image_shape)
  144. feed_vars = OrderedDict([(key, fluid.data(
  145. name=key,
  146. shape=inputs_def[key]['shape'],
  147. dtype=inputs_def[key]['dtype'],
  148. lod_level=inputs_def[key]['lod_level'])) for key in fields])
  149. loader = fluid.io.DataLoader.from_generator(
  150. feed_list=list(feed_vars.values()),
  151. capacity=16,
  152. use_double_buffer=True,
  153. iterable=iterable) if use_dataloader else None
  154. return feed_vars, loader
  155. def train(self, feed_vars):
  156. return self.build(feed_vars, 'train')
  157. def eval(self, feed_vars):
  158. return self.build(feed_vars, 'eval')
  159. def test(self, feed_vars, exclude_nms=False):
  160. assert not exclude_nms, "exclude_nms for {} is not support currently".format(
  161. self.__class__.__name__)
  162. return self.build(feed_vars, 'test')
  163. def is_bbox_normalized(self):
  164. return True