vgg.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. from __future__ import division
  2. import paddle
  3. import paddle.nn as nn
  4. import paddle.nn.functional as F
  5. from paddle import ParamAttr
  6. from paddle.nn import Conv2D, MaxPool2D
  7. from ppdet.core.workspace import register, serializable
  8. from ..shape_spec import ShapeSpec
  9. __all__ = ['VGG']
  10. VGG_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]}
  11. class ConvBlock(nn.Layer):
  12. def __init__(self,
  13. in_channels,
  14. out_channels,
  15. groups,
  16. pool_size=2,
  17. pool_stride=2,
  18. pool_padding=0,
  19. name=None):
  20. super(ConvBlock, self).__init__()
  21. self.groups = groups
  22. self.conv0 = nn.Conv2D(
  23. in_channels=in_channels,
  24. out_channels=out_channels,
  25. kernel_size=3,
  26. stride=1,
  27. padding=1)
  28. self.conv_out_list = []
  29. for i in range(1, groups):
  30. conv_out = self.add_sublayer(
  31. 'conv{}'.format(i),
  32. Conv2D(
  33. in_channels=out_channels,
  34. out_channels=out_channels,
  35. kernel_size=3,
  36. stride=1,
  37. padding=1))
  38. self.conv_out_list.append(conv_out)
  39. self.pool = MaxPool2D(
  40. kernel_size=pool_size,
  41. stride=pool_stride,
  42. padding=pool_padding,
  43. ceil_mode=True)
  44. def forward(self, inputs):
  45. out = self.conv0(inputs)
  46. out = F.relu(out)
  47. for conv_i in self.conv_out_list:
  48. out = conv_i(out)
  49. out = F.relu(out)
  50. pool = self.pool(out)
  51. return out, pool
  52. class ExtraBlock(nn.Layer):
  53. def __init__(self,
  54. in_channels,
  55. mid_channels,
  56. out_channels,
  57. padding,
  58. stride,
  59. kernel_size,
  60. name=None):
  61. super(ExtraBlock, self).__init__()
  62. self.conv0 = Conv2D(
  63. in_channels=in_channels,
  64. out_channels=mid_channels,
  65. kernel_size=1,
  66. stride=1,
  67. padding=0)
  68. self.conv1 = Conv2D(
  69. in_channels=mid_channels,
  70. out_channels=out_channels,
  71. kernel_size=kernel_size,
  72. stride=stride,
  73. padding=padding)
  74. def forward(self, inputs):
  75. out = self.conv0(inputs)
  76. out = F.relu(out)
  77. out = self.conv1(out)
  78. out = F.relu(out)
  79. return out
  80. class L2NormScale(nn.Layer):
  81. def __init__(self, num_channels, scale=1.0):
  82. super(L2NormScale, self).__init__()
  83. self.scale = self.create_parameter(
  84. attr=ParamAttr(initializer=paddle.nn.initializer.Constant(scale)),
  85. shape=[num_channels])
  86. def forward(self, inputs):
  87. out = F.normalize(inputs, axis=1, epsilon=1e-10)
  88. # out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
  89. # out) * out
  90. out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3) * out
  91. return out
  92. @register
  93. @serializable
  94. class VGG(nn.Layer):
  95. def __init__(self,
  96. depth=16,
  97. normalizations=[20., -1, -1, -1, -1, -1],
  98. extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
  99. [128, 256, 0, 1, 3],
  100. [128, 256, 0, 1, 3]]):
  101. super(VGG, self).__init__()
  102. assert depth in [16, 19], \
  103. "depth as 16/19 supported currently, but got {}".format(depth)
  104. self.depth = depth
  105. self.groups = VGG_cfg[depth]
  106. self.normalizations = normalizations
  107. self.extra_block_filters = extra_block_filters
  108. self._out_channels = []
  109. self.conv_block_0 = ConvBlock(
  110. 3, 64, self.groups[0], 2, 2, 0, name="conv1_")
  111. self.conv_block_1 = ConvBlock(
  112. 64, 128, self.groups[1], 2, 2, 0, name="conv2_")
  113. self.conv_block_2 = ConvBlock(
  114. 128, 256, self.groups[2], 2, 2, 0, name="conv3_")
  115. self.conv_block_3 = ConvBlock(
  116. 256, 512, self.groups[3], 2, 2, 0, name="conv4_")
  117. self.conv_block_4 = ConvBlock(
  118. 512, 512, self.groups[4], 3, 1, 1, name="conv5_")
  119. self._out_channels.append(512)
  120. self.fc6 = Conv2D(
  121. in_channels=512,
  122. out_channels=1024,
  123. kernel_size=3,
  124. stride=1,
  125. padding=6,
  126. dilation=6)
  127. self.fc7 = Conv2D(
  128. in_channels=1024,
  129. out_channels=1024,
  130. kernel_size=1,
  131. stride=1,
  132. padding=0)
  133. self._out_channels.append(1024)
  134. # extra block
  135. self.extra_convs = []
  136. last_channels = 1024
  137. for i, v in enumerate(self.extra_block_filters):
  138. assert len(v) == 5, "extra_block_filters size not fix"
  139. extra_conv = self.add_sublayer("conv{}".format(6 + i),
  140. ExtraBlock(last_channels, v[0], v[1],
  141. v[2], v[3], v[4]))
  142. last_channels = v[1]
  143. self.extra_convs.append(extra_conv)
  144. self._out_channels.append(last_channels)
  145. self.norms = []
  146. for i, n in enumerate(self.normalizations):
  147. if n != -1:
  148. norm = self.add_sublayer("norm{}".format(i),
  149. L2NormScale(
  150. self.extra_block_filters[i][1], n))
  151. else:
  152. norm = None
  153. self.norms.append(norm)
  154. def forward(self, inputs):
  155. outputs = []
  156. conv, pool = self.conv_block_0(inputs['image'])
  157. conv, pool = self.conv_block_1(pool)
  158. conv, pool = self.conv_block_2(pool)
  159. conv, pool = self.conv_block_3(pool)
  160. outputs.append(conv)
  161. conv, pool = self.conv_block_4(pool)
  162. out = self.fc6(pool)
  163. out = F.relu(out)
  164. out = self.fc7(out)
  165. out = F.relu(out)
  166. outputs.append(out)
  167. if not self.extra_block_filters:
  168. return outputs
  169. # extra block
  170. for extra_conv in self.extra_convs:
  171. out = extra_conv(out)
  172. outputs.append(out)
  173. for i, n in enumerate(self.normalizations):
  174. if n != -1:
  175. outputs[i] = self.norms[i](outputs[i])
  176. return outputs
  177. @property
  178. def out_shape(self):
  179. return [ShapeSpec(channels=c) for c in self._out_channels]