blazeface.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from collections import OrderedDict
  18. from paddle import fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from paddle.fluid.regularizer import L2Decay
  21. from ppdet.core.workspace import register
  22. from ppdet.modeling.ops import SSDOutputDecoder
  23. from ppdet.modeling.losses import SSDWithLmkLoss
  24. __all__ = ['BlazeFace']
  25. @register
  26. class BlazeFace(object):
  27. """
  28. BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
  29. see https://arxiv.org/abs/1907.05047
  30. Args:
  31. backbone (object): backbone instance
  32. output_decoder (object): `SSDOutputDecoder` instance
  33. min_sizes (list|None): min sizes of generated prior boxes.
  34. max_sizes (list|None): max sizes of generated prior boxes. Default: None.
  35. steps (list|None): step size of adjacent prior boxes on each feature map.
  36. num_classes (int): number of output classes
  37. use_density_prior_box (bool): whether or not use density_prior_box
  38. instead of prior_box
  39. densities (list|None): the densities of generated density prior boxes,
  40. this attribute should be a list or tuple of integers
  41. """
  42. __category__ = 'architecture'
  43. __inject__ = ['backbone', 'output_decoder']
  44. __shared__ = ['num_classes', 'with_lmk']
  45. def __init__(self,
  46. backbone="BlazeNet",
  47. output_decoder=SSDOutputDecoder().__dict__,
  48. min_sizes=[[16., 24.], [32., 48., 64., 80., 96., 128.]],
  49. max_sizes=None,
  50. steps=[8., 16.],
  51. num_classes=2,
  52. use_density_prior_box=False,
  53. densities=[[2, 2], [2, 1, 1, 1, 1, 1]],
  54. with_lmk=False,
  55. lmk_loss=SSDWithLmkLoss().__dict__):
  56. super(BlazeFace, self).__init__()
  57. self.backbone = backbone
  58. self.num_classes = num_classes
  59. self.with_lmk = with_lmk
  60. self.output_decoder = output_decoder
  61. if isinstance(output_decoder, dict):
  62. if self.with_lmk:
  63. output_decoder['return_index'] = True
  64. self.output_decoder = SSDOutputDecoder(**output_decoder)
  65. self.min_sizes = min_sizes
  66. self.max_sizes = max_sizes
  67. self.steps = steps
  68. self.use_density_prior_box = use_density_prior_box
  69. self.densities = densities
  70. self.landmark = None
  71. if self.with_lmk and isinstance(lmk_loss, dict):
  72. self.lmk_loss = SSDWithLmkLoss(**lmk_loss)
  73. def build(self, feed_vars, mode='train'):
  74. im = feed_vars['image']
  75. body_feats = self.backbone(im)
  76. locs, confs, box, box_var = self._multi_box_head(
  77. inputs=body_feats,
  78. image=im,
  79. num_classes=self.num_classes,
  80. use_density_prior_box=self.use_density_prior_box)
  81. if mode == 'train':
  82. gt_bbox = feed_vars['gt_bbox']
  83. gt_class = feed_vars['gt_class']
  84. if self.with_lmk:
  85. lmk_labels = feed_vars['gt_keypoint']
  86. lmk_ignore_flag = feed_vars["keypoint_ignore"]
  87. loss = self.lmk_loss(locs, confs, gt_bbox, gt_class,
  88. self.landmark, lmk_labels, lmk_ignore_flag,
  89. box, box_var)
  90. else:
  91. loss = fluid.layers.ssd_loss(
  92. locs,
  93. confs,
  94. gt_bbox,
  95. gt_class,
  96. box,
  97. box_var,
  98. overlap_threshold=0.35,
  99. neg_overlap=0.35)
  100. loss = fluid.layers.reduce_sum(loss)
  101. return {'loss': loss}
  102. else:
  103. if self.with_lmk:
  104. pred, face_index = self.output_decoder(locs, confs, box,
  105. box_var)
  106. return {
  107. 'bbox': pred,
  108. 'face_index': face_index,
  109. 'prior_boxes': box,
  110. 'landmark': self.landmark
  111. }
  112. else:
  113. pred = self.output_decoder(locs, confs, box, box_var)
  114. return {'bbox': pred}
  115. def _multi_box_head(self,
  116. inputs,
  117. image,
  118. num_classes=2,
  119. use_density_prior_box=False):
  120. def permute_and_reshape(input, last_dim):
  121. trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
  122. compile_shape = [0, -1, last_dim]
  123. return fluid.layers.reshape(trans, shape=compile_shape)
  124. locs, confs = [], []
  125. boxes, vars = [], []
  126. lmk_locs = []
  127. b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
  128. for i, input in enumerate(inputs):
  129. min_size = self.min_sizes[i]
  130. if use_density_prior_box:
  131. densities = self.densities[i]
  132. box, var = fluid.layers.density_prior_box(
  133. input,
  134. image,
  135. densities=densities,
  136. fixed_sizes=min_size,
  137. fixed_ratios=[1.],
  138. clip=False,
  139. offset=0.5,
  140. steps=[self.steps[i]] * 2)
  141. else:
  142. box, var = fluid.layers.prior_box(
  143. input,
  144. image,
  145. min_sizes=min_size,
  146. max_sizes=None,
  147. steps=[self.steps[i]] * 2,
  148. aspect_ratios=[1.],
  149. clip=False,
  150. flip=False,
  151. offset=0.5)
  152. num_boxes = box.shape[2]
  153. box = fluid.layers.reshape(box, shape=[-1, 4])
  154. var = fluid.layers.reshape(var, shape=[-1, 4])
  155. num_loc_output = num_boxes * 4
  156. num_conf_output = num_boxes * num_classes
  157. # get loc
  158. mbox_loc = fluid.layers.conv2d(
  159. input, num_loc_output, 3, 1, 1, bias_attr=b_attr)
  160. loc = permute_and_reshape(mbox_loc, 4)
  161. # get conf
  162. mbox_conf = fluid.layers.conv2d(
  163. input, num_conf_output, 3, 1, 1, bias_attr=b_attr)
  164. conf = permute_and_reshape(mbox_conf, num_classes)
  165. if self.with_lmk:
  166. # get landmark
  167. lmk_loc_output = num_boxes * 10
  168. lmk_box_loc = fluid.layers.conv2d(
  169. input,
  170. lmk_loc_output,
  171. 3,
  172. 1,
  173. 1,
  174. param_attr=ParamAttr(name='lmk' + str(i) + '_weights'),
  175. bias_attr=False)
  176. lmk_loc = permute_and_reshape(lmk_box_loc, 10)
  177. lmk_locs.append(lmk_loc)
  178. locs.append(loc)
  179. confs.append(conf)
  180. boxes.append(box)
  181. vars.append(var)
  182. face_mbox_loc = fluid.layers.concat(locs, axis=1)
  183. face_mbox_conf = fluid.layers.concat(confs, axis=1)
  184. prior_boxes = fluid.layers.concat(boxes)
  185. box_vars = fluid.layers.concat(vars)
  186. if self.with_lmk:
  187. self.landmark = fluid.layers.concat(lmk_locs, axis=1)
  188. return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
  189. def _inputs_def(self, image_shape):
  190. im_shape = [None] + image_shape
  191. # yapf: disable
  192. inputs_def = {
  193. 'image': {'shape': im_shape, 'dtype': 'float32', 'lod_level': 0},
  194. 'im_id': {'shape': [None, 1], 'dtype': 'int64', 'lod_level': 0},
  195. 'gt_bbox': {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
  196. 'gt_class': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
  197. 'im_shape': {'shape': [None, 3], 'dtype': 'int32', 'lod_level': 0},
  198. 'gt_keypoint': {'shape': [None, 10], 'dtype': 'float32', 'lod_level': 1},
  199. 'keypoint_ignore': {'shape': [None, 1], 'dtype': 'float32', 'lod_level': 1},
  200. }
  201. # yapf: enable
  202. return inputs_def
  203. def build_inputs(
  204. self,
  205. image_shape=[3, None, None],
  206. fields=['image', 'im_id', 'gt_bbox', 'gt_class'], # for train
  207. use_dataloader=True,
  208. iterable=False):
  209. inputs_def = self._inputs_def(image_shape)
  210. feed_vars = OrderedDict([(key, fluid.data(
  211. name=key,
  212. shape=inputs_def[key]['shape'],
  213. dtype=inputs_def[key]['dtype'],
  214. lod_level=inputs_def[key]['lod_level'])) for key in fields])
  215. loader = fluid.io.DataLoader.from_generator(
  216. feed_list=list(feed_vars.values()),
  217. capacity=16,
  218. use_double_buffer=True,
  219. iterable=iterable) if use_dataloader else None
  220. return feed_vars, loader
  221. def train(self, feed_vars):
  222. return self.build(feed_vars, 'train')
  223. def eval(self, feed_vars):
  224. return self.build(feed_vars, 'eval')
  225. def test(self, feed_vars, exclude_nms=False):
  226. assert not exclude_nms, "exclude_nms for {} is not support currently".format(
  227. self.__class__.__name__)
  228. return self.build(feed_vars, 'test')
  229. def is_bbox_normalized(self):
  230. return True