ssd.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from collections import OrderedDict
  18. import paddle.fluid as fluid
  19. from ppdet.experimental import mixed_precision_global_state
  20. from ppdet.core.workspace import register
  21. from ppdet.modeling.ops import SSDOutputDecoder
  22. __all__ = ['SSD']
  23. @register
  24. class SSD(object):
  25. """
  26. Single Shot MultiBox Detector, see https://arxiv.org/abs/1512.02325
  27. Args:
  28. backbone (object): backbone instance
  29. multi_box_head (object): `MultiBoxHead` instance
  30. output_decoder (object): `SSDOutputDecoder` instance
  31. num_classes (int): number of output classes
  32. """
  33. __category__ = 'architecture'
  34. __inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'fpn']
  35. __shared__ = ['num_classes']
  36. def __init__(self,
  37. backbone,
  38. fpn=None,
  39. multi_box_head='MultiBoxHead',
  40. output_decoder=SSDOutputDecoder().__dict__,
  41. num_classes=21):
  42. super(SSD, self).__init__()
  43. self.backbone = backbone
  44. self.fpn = fpn
  45. self.multi_box_head = multi_box_head
  46. self.num_classes = num_classes
  47. self.output_decoder = output_decoder
  48. if isinstance(output_decoder, dict):
  49. self.output_decoder = SSDOutputDecoder(**output_decoder)
  50. def build(self, feed_vars, mode='train'):
  51. im = feed_vars['image']
  52. if mode == 'train' or mode == 'eval':
  53. gt_bbox = feed_vars['gt_bbox']
  54. gt_class = feed_vars['gt_class']
  55. mixed_precision_enabled = mixed_precision_global_state() is not None
  56. # cast inputs to FP16
  57. if mixed_precision_enabled:
  58. im = fluid.layers.cast(im, 'float16')
  59. # backbone
  60. body_feats = self.backbone(im)
  61. if self.fpn is not None:
  62. body_feats, spatial_scale = self.fpn.get_output(body_feats)
  63. if isinstance(body_feats, OrderedDict):
  64. body_feat_names = list(body_feats.keys())
  65. body_feats = [body_feats[name] for name in body_feat_names]
  66. # cast features back to FP32
  67. if mixed_precision_enabled:
  68. body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]
  69. locs, confs, box, box_var = self.multi_box_head(
  70. inputs=body_feats, image=im, num_classes=self.num_classes)
  71. if mode == 'train':
  72. loss = fluid.layers.ssd_loss(locs, confs, gt_bbox, gt_class, box,
  73. box_var)
  74. loss = fluid.layers.reduce_sum(loss)
  75. return {'loss': loss}
  76. else:
  77. pred = self.output_decoder(locs, confs, box, box_var)
  78. return {'bbox': pred}
  79. def _inputs_def(self, image_shape):
  80. im_shape = [None] + image_shape
  81. # yapf: disable
  82. inputs_def = {
  83. 'image': {'shape': im_shape, 'dtype': 'float32', 'lod_level': 0},
  84. 'im_id': {'shape': [None, 1], 'dtype': 'int64', 'lod_level': 0},
  85. 'gt_bbox': {'shape': [None, 4], 'dtype': 'float32', 'lod_level': 1},
  86. 'gt_class': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
  87. 'im_shape': {'shape': [None, 3], 'dtype': 'int32', 'lod_level': 0},
  88. 'is_difficult': {'shape': [None, 1], 'dtype': 'int32', 'lod_level': 1},
  89. }
  90. # yapf: enable
  91. return inputs_def
  92. def build_inputs(
  93. self,
  94. image_shape=[3, None, None],
  95. fields=['image', 'im_id', 'gt_bbox', 'gt_class'], # for train
  96. use_dataloader=True,
  97. iterable=False):
  98. inputs_def = self._inputs_def(image_shape)
  99. feed_vars = OrderedDict([(key, fluid.data(
  100. name=key,
  101. shape=inputs_def[key]['shape'],
  102. dtype=inputs_def[key]['dtype'],
  103. lod_level=inputs_def[key]['lod_level'])) for key in fields])
  104. loader = fluid.io.DataLoader.from_generator(
  105. feed_list=list(feed_vars.values()),
  106. capacity=16,
  107. use_double_buffer=True,
  108. iterable=iterable) if use_dataloader else None
  109. return feed_vars, loader
  110. def train(self, feed_vars):
  111. return self.build(feed_vars, 'train')
  112. def eval(self, feed_vars):
  113. return self.build(feed_vars, 'eval')
  114. def test(self, feed_vars, exclude_nms=False):
  115. assert not exclude_nms, "exclude_nms for {} is not support currently".format(
  116. self.__class__.__name__)
  117. return self.build(feed_vars, 'test')
  118. def is_bbox_normalized(self):
  119. # SSD use output_decoder in output layers, bbox is normalized
  120. # to range [0, 1], is_bbox_normalized is used in eval.py and infer.py
  121. return True