esnet.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. import paddle.nn as nn
  19. import paddle.nn.functional as F
  20. from paddle import ParamAttr
  21. from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
  22. from paddle.nn.initializer import KaimingNormal
  23. from paddle.regularizer import L2Decay
  24. from ppdet.core.workspace import register, serializable
  25. from numbers import Integral
  26. from ..shape_spec import ShapeSpec
  27. from ppdet.modeling.ops import channel_shuffle
  28. from ppdet.modeling.backbones.shufflenet_v2 import ConvBNLayer
  29. __all__ = ['ESNet']
  30. def make_divisible(v, divisor=16, min_value=None):
  31. if min_value is None:
  32. min_value = divisor
  33. new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  34. if new_v < 0.9 * v:
  35. new_v += divisor
  36. return new_v
  37. class SEModule(nn.Layer):
  38. def __init__(self, channel, reduction=4):
  39. super(SEModule, self).__init__()
  40. self.avg_pool = AdaptiveAvgPool2D(1)
  41. self.conv1 = Conv2D(
  42. in_channels=channel,
  43. out_channels=channel // reduction,
  44. kernel_size=1,
  45. stride=1,
  46. padding=0,
  47. weight_attr=ParamAttr(),
  48. bias_attr=ParamAttr())
  49. self.conv2 = Conv2D(
  50. in_channels=channel // reduction,
  51. out_channels=channel,
  52. kernel_size=1,
  53. stride=1,
  54. padding=0,
  55. weight_attr=ParamAttr(),
  56. bias_attr=ParamAttr())
  57. def forward(self, inputs):
  58. outputs = self.avg_pool(inputs)
  59. outputs = self.conv1(outputs)
  60. outputs = F.relu(outputs)
  61. outputs = self.conv2(outputs)
  62. outputs = F.hardsigmoid(outputs)
  63. return paddle.multiply(x=inputs, y=outputs)
  64. class InvertedResidual(nn.Layer):
  65. def __init__(self,
  66. in_channels,
  67. mid_channels,
  68. out_channels,
  69. stride,
  70. act="relu"):
  71. super(InvertedResidual, self).__init__()
  72. self._conv_pw = ConvBNLayer(
  73. in_channels=in_channels // 2,
  74. out_channels=mid_channels // 2,
  75. kernel_size=1,
  76. stride=1,
  77. padding=0,
  78. groups=1,
  79. act=act)
  80. self._conv_dw = ConvBNLayer(
  81. in_channels=mid_channels // 2,
  82. out_channels=mid_channels // 2,
  83. kernel_size=3,
  84. stride=stride,
  85. padding=1,
  86. groups=mid_channels // 2,
  87. act=None)
  88. self._se = SEModule(mid_channels)
  89. self._conv_linear = ConvBNLayer(
  90. in_channels=mid_channels,
  91. out_channels=out_channels // 2,
  92. kernel_size=1,
  93. stride=1,
  94. padding=0,
  95. groups=1,
  96. act=act)
  97. def forward(self, inputs):
  98. x1, x2 = paddle.split(
  99. inputs,
  100. num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
  101. axis=1)
  102. x2 = self._conv_pw(x2)
  103. x3 = self._conv_dw(x2)
  104. x3 = paddle.concat([x2, x3], axis=1)
  105. x3 = self._se(x3)
  106. x3 = self._conv_linear(x3)
  107. out = paddle.concat([x1, x3], axis=1)
  108. return channel_shuffle(out, 2)
  109. class InvertedResidualDS(nn.Layer):
  110. def __init__(self,
  111. in_channels,
  112. mid_channels,
  113. out_channels,
  114. stride,
  115. act="relu"):
  116. super(InvertedResidualDS, self).__init__()
  117. # branch1
  118. self._conv_dw_1 = ConvBNLayer(
  119. in_channels=in_channels,
  120. out_channels=in_channels,
  121. kernel_size=3,
  122. stride=stride,
  123. padding=1,
  124. groups=in_channels,
  125. act=None)
  126. self._conv_linear_1 = ConvBNLayer(
  127. in_channels=in_channels,
  128. out_channels=out_channels // 2,
  129. kernel_size=1,
  130. stride=1,
  131. padding=0,
  132. groups=1,
  133. act=act)
  134. # branch2
  135. self._conv_pw_2 = ConvBNLayer(
  136. in_channels=in_channels,
  137. out_channels=mid_channels // 2,
  138. kernel_size=1,
  139. stride=1,
  140. padding=0,
  141. groups=1,
  142. act=act)
  143. self._conv_dw_2 = ConvBNLayer(
  144. in_channels=mid_channels // 2,
  145. out_channels=mid_channels // 2,
  146. kernel_size=3,
  147. stride=stride,
  148. padding=1,
  149. groups=mid_channels // 2,
  150. act=None)
  151. self._se = SEModule(mid_channels // 2)
  152. self._conv_linear_2 = ConvBNLayer(
  153. in_channels=mid_channels // 2,
  154. out_channels=out_channels // 2,
  155. kernel_size=1,
  156. stride=1,
  157. padding=0,
  158. groups=1,
  159. act=act)
  160. self._conv_dw_mv1 = ConvBNLayer(
  161. in_channels=out_channels,
  162. out_channels=out_channels,
  163. kernel_size=3,
  164. stride=1,
  165. padding=1,
  166. groups=out_channels,
  167. act="hard_swish")
  168. self._conv_pw_mv1 = ConvBNLayer(
  169. in_channels=out_channels,
  170. out_channels=out_channels,
  171. kernel_size=1,
  172. stride=1,
  173. padding=0,
  174. groups=1,
  175. act="hard_swish")
  176. def forward(self, inputs):
  177. x1 = self._conv_dw_1(inputs)
  178. x1 = self._conv_linear_1(x1)
  179. x2 = self._conv_pw_2(inputs)
  180. x2 = self._conv_dw_2(x2)
  181. x2 = self._se(x2)
  182. x2 = self._conv_linear_2(x2)
  183. out = paddle.concat([x1, x2], axis=1)
  184. out = self._conv_dw_mv1(out)
  185. out = self._conv_pw_mv1(out)
  186. return out
  187. @register
  188. @serializable
  189. class ESNet(nn.Layer):
  190. def __init__(self,
  191. scale=1.0,
  192. act="hard_swish",
  193. feature_maps=[4, 11, 14],
  194. channel_ratio=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]):
  195. super(ESNet, self).__init__()
  196. self.scale = scale
  197. if isinstance(feature_maps, Integral):
  198. feature_maps = [feature_maps]
  199. self.feature_maps = feature_maps
  200. stage_repeats = [3, 7, 3]
  201. stage_out_channels = [
  202. -1, 24, make_divisible(128 * scale), make_divisible(256 * scale),
  203. make_divisible(512 * scale), 1024
  204. ]
  205. self._out_channels = []
  206. self._feature_idx = 0
  207. # 1. conv1
  208. self._conv1 = ConvBNLayer(
  209. in_channels=3,
  210. out_channels=stage_out_channels[1],
  211. kernel_size=3,
  212. stride=2,
  213. padding=1,
  214. act=act)
  215. self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
  216. self._feature_idx += 1
  217. # 2. bottleneck sequences
  218. self._block_list = []
  219. arch_idx = 0
  220. for stage_id, num_repeat in enumerate(stage_repeats):
  221. for i in range(num_repeat):
  222. channels_scales = channel_ratio[arch_idx]
  223. mid_c = make_divisible(
  224. int(stage_out_channels[stage_id + 2] * channels_scales),
  225. divisor=8)
  226. if i == 0:
  227. block = self.add_sublayer(
  228. name=str(stage_id + 2) + '_' + str(i + 1),
  229. sublayer=InvertedResidualDS(
  230. in_channels=stage_out_channels[stage_id + 1],
  231. mid_channels=mid_c,
  232. out_channels=stage_out_channels[stage_id + 2],
  233. stride=2,
  234. act=act))
  235. else:
  236. block = self.add_sublayer(
  237. name=str(stage_id + 2) + '_' + str(i + 1),
  238. sublayer=InvertedResidual(
  239. in_channels=stage_out_channels[stage_id + 2],
  240. mid_channels=mid_c,
  241. out_channels=stage_out_channels[stage_id + 2],
  242. stride=1,
  243. act=act))
  244. self._block_list.append(block)
  245. arch_idx += 1
  246. self._feature_idx += 1
  247. self._update_out_channels(stage_out_channels[stage_id + 2],
  248. self._feature_idx, self.feature_maps)
  249. def _update_out_channels(self, channel, feature_idx, feature_maps):
  250. if feature_idx in feature_maps:
  251. self._out_channels.append(channel)
  252. def forward(self, inputs):
  253. y = self._conv1(inputs['image'])
  254. y = self._max_pool(y)
  255. outs = []
  256. for i, inv in enumerate(self._block_list):
  257. y = inv(y)
  258. if i + 2 in self.feature_maps:
  259. outs.append(y)
  260. return outs
  261. @property
  262. def out_shape(self):
  263. return [ShapeSpec(channels=c) for c in self._out_channels]