import torch.nn as nn import math import torch.utils.model_zoo as model_zoo import torch import torch.nn.functional as F __all__ = ['Res2Net', 'res2net50'] class Bottle2neck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale = 4, stype='normal'): """ Constructor Args: inplanes: input channel dimensionality planes: output channel dimensionality stride: conv stride. Replaces pooling layer. downsample: None when stride = 1 baseWidth: basic width of conv3x3 scale: number of scale. type: 'normal': normal set. 'stage': first block of a new stage. """ super(Bottle2neck, self).__init__() width = int(math.floor(planes * (baseWidth/64.0))) self.conv1 = nn.Conv2d(inplanes, width*scale, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(width*scale) if scale == 1: self.nums = 1 else: self.nums = scale -1 if stype == 'stage': self.pool = nn.AvgPool2d(kernel_size=3, stride = stride, padding=1) convs = [] bns = [] for i in range(self.nums): convs.append(nn.Conv2d(width, width, kernel_size=3, stride = stride, padding=1, bias=False)) bns.append(nn.BatchNorm2d(width)) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) self.conv3 = nn.Conv2d(width*scale, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stype = stype self.scale = scale self.width = width def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) spx = torch.split(out, self.width, 1) for i in range(self.nums): if i==0 or self.stype=='stage': sp = spx[i] else: sp = sp + spx[i] sp = self.convs[i](sp) sp = self.relu(self.bns[i](sp)) if i==0: out = sp else: out = torch.cat((out, sp), 1) if self.scale != 1 and self.stype=='normal': out = torch.cat((out, spx[self.nums]),1) elif self.scale != 1 and self.stype=='stage': out = torch.cat((out, self.pool(spx[self.nums])),1) out = self.conv3(out) out = self.bn3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Res2Net(nn.Module): def __init__(self, block, layers, baseWidth = 26, scale = 4, num_classes=1000): self.inplanes = 64 super(Res2Net, self).__init__() self.baseWidth = baseWidth self.scale = scale self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.toplayer = nn.Conv2d(2048, 256, kernel_size=1, stride=1, padding=0) self.latlayer1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) self.latlayer2 = nn.Conv2d( 512, 256, kernel_size=1, stride=1, padding=0) self.latlayer3 = nn.Conv2d( 256, 256, kernel_size=1, stride=1, padding=0) self.smooth1 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.smooth2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.smooth3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) self._load_pretrained_model() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample=downsample, stype='stage', baseWidth = self.baseWidth, scale=self.scale)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, baseWidth = self.baseWidth, scale=self.scale)) return nn.Sequential(*layers) def forward(self, input): # Bottom-up x = self.conv1(input) x = self.bn1(x) x = self.relu(x) c1 = self.maxpool(x) c2 = self.layer1(c1) # x4 c3 = self.layer2(c2) #x8 c4 = self.layer3(c3) #x16 c5 = self.layer4(c4) #x32 # Top-down p5 = self.toplayer(c5) p4 = nn.functional.upsample(p5, size=c4.size()[2:], mode='bilinear') + self.latlayer1(c4) p3 = nn.functional.upsample(p4, size=c3.size()[2:], mode='bilinear') + self.latlayer2(c3) p2 = nn.functional.upsample(p3, size=c2.size()[2:], mode='bilinear') + self.latlayer3(c2) p4 = self.smooth1(p4) p3 = self.smooth2(p3) p2 = self.smooth3(p2) return p2, p3, p4, p5 def _load_pretrained_model(self): pretrain_dict = model_zoo.load_url('https://shanghuagao.oss-cn-beijing.aliyuncs.com/res2net/res2net50_26w_4s-06e79181.pth') model_dict = {} state_dict = self.state_dict() for k, v in pretrain_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.load_state_dict(state_dict) def res2net50_FPN(pretrained=True, **kwargs): """Constructs a Res2Net-50 model. Res2Net-50 refers to the Res2Net-50_26w_4s. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = Res2Net(Bottle2neck, [3, 4, 6, 3], baseWidth = 26, scale = 4, **kwargs) return model