123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- from __future__ import division
- import paddle
- import paddle.nn as nn
- import paddle.nn.functional as F
- from paddle import ParamAttr
- from paddle.nn import Conv2D, MaxPool2D
- from ppdet.core.workspace import register, serializable
- from ..shape_spec import ShapeSpec
- __all__ = ['VGG']
- VGG_cfg = {16: [2, 2, 3, 3, 3], 19: [2, 2, 4, 4, 4]}
- class ConvBlock(nn.Layer):
- def __init__(self,
- in_channels,
- out_channels,
- groups,
- pool_size=2,
- pool_stride=2,
- pool_padding=0,
- name=None):
- super(ConvBlock, self).__init__()
- self.groups = groups
- self.conv0 = nn.Conv2D(
- in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv_out_list = []
- for i in range(1, groups):
- conv_out = self.add_sublayer(
- 'conv{}'.format(i),
- Conv2D(
- in_channels=out_channels,
- out_channels=out_channels,
- kernel_size=3,
- stride=1,
- padding=1))
- self.conv_out_list.append(conv_out)
- self.pool = MaxPool2D(
- kernel_size=pool_size,
- stride=pool_stride,
- padding=pool_padding,
- ceil_mode=True)
- def forward(self, inputs):
- out = self.conv0(inputs)
- out = F.relu(out)
- for conv_i in self.conv_out_list:
- out = conv_i(out)
- out = F.relu(out)
- pool = self.pool(out)
- return out, pool
- class ExtraBlock(nn.Layer):
- def __init__(self,
- in_channels,
- mid_channels,
- out_channels,
- padding,
- stride,
- kernel_size,
- name=None):
- super(ExtraBlock, self).__init__()
- self.conv0 = Conv2D(
- in_channels=in_channels,
- out_channels=mid_channels,
- kernel_size=1,
- stride=1,
- padding=0)
- self.conv1 = Conv2D(
- in_channels=mid_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding)
- def forward(self, inputs):
- out = self.conv0(inputs)
- out = F.relu(out)
- out = self.conv1(out)
- out = F.relu(out)
- return out
- class L2NormScale(nn.Layer):
- def __init__(self, num_channels, scale=1.0):
- super(L2NormScale, self).__init__()
- self.scale = self.create_parameter(
- attr=ParamAttr(initializer=paddle.nn.initializer.Constant(scale)),
- shape=[num_channels])
- def forward(self, inputs):
- out = F.normalize(inputs, axis=1, epsilon=1e-10)
- # out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
- # out) * out
- out = self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3) * out
- return out
- @register
- @serializable
- class VGG(nn.Layer):
- def __init__(self,
- depth=16,
- normalizations=[20., -1, -1, -1, -1, -1],
- extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
- [128, 256, 0, 1, 3],
- [128, 256, 0, 1, 3]]):
- super(VGG, self).__init__()
- assert depth in [16, 19], \
- "depth as 16/19 supported currently, but got {}".format(depth)
- self.depth = depth
- self.groups = VGG_cfg[depth]
- self.normalizations = normalizations
- self.extra_block_filters = extra_block_filters
- self._out_channels = []
- self.conv_block_0 = ConvBlock(
- 3, 64, self.groups[0], 2, 2, 0, name="conv1_")
- self.conv_block_1 = ConvBlock(
- 64, 128, self.groups[1], 2, 2, 0, name="conv2_")
- self.conv_block_2 = ConvBlock(
- 128, 256, self.groups[2], 2, 2, 0, name="conv3_")
- self.conv_block_3 = ConvBlock(
- 256, 512, self.groups[3], 2, 2, 0, name="conv4_")
- self.conv_block_4 = ConvBlock(
- 512, 512, self.groups[4], 3, 1, 1, name="conv5_")
- self._out_channels.append(512)
- self.fc6 = Conv2D(
- in_channels=512,
- out_channels=1024,
- kernel_size=3,
- stride=1,
- padding=6,
- dilation=6)
- self.fc7 = Conv2D(
- in_channels=1024,
- out_channels=1024,
- kernel_size=1,
- stride=1,
- padding=0)
- self._out_channels.append(1024)
- # extra block
- self.extra_convs = []
- last_channels = 1024
- for i, v in enumerate(self.extra_block_filters):
- assert len(v) == 5, "extra_block_filters size not fix"
- extra_conv = self.add_sublayer("conv{}".format(6 + i),
- ExtraBlock(last_channels, v[0], v[1],
- v[2], v[3], v[4]))
- last_channels = v[1]
- self.extra_convs.append(extra_conv)
- self._out_channels.append(last_channels)
- self.norms = []
- for i, n in enumerate(self.normalizations):
- if n != -1:
- norm = self.add_sublayer("norm{}".format(i),
- L2NormScale(
- self.extra_block_filters[i][1], n))
- else:
- norm = None
- self.norms.append(norm)
- def forward(self, inputs):
- outputs = []
- conv, pool = self.conv_block_0(inputs['image'])
- conv, pool = self.conv_block_1(pool)
- conv, pool = self.conv_block_2(pool)
- conv, pool = self.conv_block_3(pool)
- outputs.append(conv)
- conv, pool = self.conv_block_4(pool)
- out = self.fc6(pool)
- out = F.relu(out)
- out = self.fc7(out)
- out = F.relu(out)
- outputs.append(out)
- if not self.extra_block_filters:
- return outputs
- # extra block
- for extra_conv in self.extra_convs:
- out = extra_conv(out)
- outputs.append(out)
- for i, n in enumerate(self.normalizations):
- if n != -1:
- outputs[i] = self.norms[i](outputs[i])
- return outputs
- @property
- def out_shape(self):
- return [ShapeSpec(channels=c) for c in self._out_channels]
|