ghostnet.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import math
  18. import paddle.fluid as fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from paddle.fluid.regularizer import L2Decay
  21. from collections import OrderedDict
  22. from ppdet.core.workspace import register
  23. __all__ = ["GhostNet"]
  24. @register
  25. class GhostNet(object):
  26. """
  27. scale (float): scaling factor for convolution groups proportion of GhostNet.
  28. feature_maps (list): index of stages whose feature maps are returned.
  29. conv_decay (float): weight decay for convolution layer weights.
  30. extra_block_filters (list): number of filter for each extra block.
  31. lr_mult_list (list): learning rate ratio of different blocks, lower learning rate ratio
  32. is need for pretrained model got using distillation(default as
  33. [1.0, 1.0, 1.0, 1.0, 1.0]).
  34. """
  35. def __init__(
  36. self,
  37. scale,
  38. feature_maps=[5, 6, 7, 8, 9, 10],
  39. conv_decay=0.00001,
  40. extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
  41. lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
  42. freeze_norm=False):
  43. self.scale = scale
  44. self.feature_maps = feature_maps
  45. self.extra_block_filters = extra_block_filters
  46. self.end_points = []
  47. self.block_stride = 0
  48. self.conv_decay = conv_decay
  49. self.lr_mult_list = lr_mult_list
  50. self.freeze_norm = freeze_norm
  51. self.curr_stage = 0
  52. self.cfgs = [
  53. # k, t, c, se, s
  54. [3, 16, 16, 0, 1],
  55. [3, 48, 24, 0, 2],
  56. [3, 72, 24, 0, 1],
  57. [5, 72, 40, 1, 2],
  58. [5, 120, 40, 1, 1],
  59. [3, 240, 80, 0, 2],
  60. [3, 200, 80, 0, 1],
  61. [3, 184, 80, 0, 1],
  62. [3, 184, 80, 0, 1],
  63. [3, 480, 112, 1, 1],
  64. [3, 672, 112, 1, 1],
  65. [5, 672, 160, 1, 2],
  66. [5, 960, 160, 0, 1],
  67. [5, 960, 160, 1, 1],
  68. [5, 960, 160, 0, 1],
  69. [5, 960, 160, 1, 1]
  70. ]
  71. def _conv_bn_layer(self,
  72. input,
  73. num_filters,
  74. filter_size,
  75. stride=1,
  76. groups=1,
  77. act=None,
  78. name=None):
  79. lr_idx = self.curr_stage // 3
  80. lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
  81. lr_mult = self.lr_mult_list[lr_idx]
  82. norm_lr = 0. if self.freeze_norm else lr_mult
  83. x = fluid.layers.conv2d(
  84. input=input,
  85. num_filters=num_filters,
  86. filter_size=filter_size,
  87. stride=stride,
  88. padding=(filter_size - 1) // 2,
  89. groups=groups,
  90. act=None,
  91. param_attr=ParamAttr(
  92. regularizer=L2Decay(self.conv_decay),
  93. learning_rate=lr_mult,
  94. initializer=fluid.initializer.MSRA(),
  95. name=name + "_weights"),
  96. bias_attr=False)
  97. bn_name = name + "_bn"
  98. x = fluid.layers.batch_norm(
  99. input=x,
  100. act=act,
  101. param_attr=ParamAttr(
  102. name=bn_name + "_scale",
  103. learning_rate=norm_lr,
  104. regularizer=L2Decay(0.0)),
  105. bias_attr=ParamAttr(
  106. name=bn_name + "_offset",
  107. learning_rate=norm_lr,
  108. regularizer=L2Decay(0.0)),
  109. moving_mean_name=bn_name + "_mean",
  110. moving_variance_name=name + "_variance")
  111. return x
  112. def se_block(self, input, num_channels, reduction_ratio=4, name=None):
  113. lr_idx = self.curr_stage // 3
  114. lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
  115. lr_mult = self.lr_mult_list[lr_idx]
  116. pool = fluid.layers.pool2d(
  117. input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
  118. stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
  119. squeeze = fluid.layers.fc(
  120. input=pool,
  121. size=num_channels // reduction_ratio,
  122. act='relu',
  123. param_attr=ParamAttr(
  124. learning_rate=lr_mult,
  125. initializer=fluid.initializer.Uniform(-stdv, stdv),
  126. name=name + '_1_weights'),
  127. bias_attr=ParamAttr(
  128. name=name + '_1_offset', learning_rate=lr_mult))
  129. stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
  130. excitation = fluid.layers.fc(
  131. input=squeeze,
  132. size=num_channels,
  133. act=None,
  134. param_attr=ParamAttr(
  135. learning_rate=lr_mult,
  136. initializer=fluid.initializer.Uniform(-stdv, stdv),
  137. name=name + '_2_weights'),
  138. bias_attr=ParamAttr(
  139. name=name + '_2_offset', learning_rate=lr_mult))
  140. excitation = fluid.layers.clip(x=excitation, min=0, max=1)
  141. se_scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
  142. return se_scale
  143. def depthwise_conv(self,
  144. input,
  145. output,
  146. kernel_size,
  147. stride=1,
  148. relu=False,
  149. name=None):
  150. return self._conv_bn_layer(
  151. input=input,
  152. num_filters=output,
  153. filter_size=kernel_size,
  154. stride=stride,
  155. groups=input.shape[1],
  156. act="relu" if relu else None,
  157. name=name + "_depthwise")
  158. def ghost_module(self,
  159. input,
  160. output,
  161. kernel_size=1,
  162. ratio=2,
  163. dw_size=3,
  164. stride=1,
  165. relu=True,
  166. name=None):
  167. self.output = output
  168. init_channels = int(math.ceil(output / ratio))
  169. new_channels = int(init_channels * (ratio - 1))
  170. primary_conv = self._conv_bn_layer(
  171. input=input,
  172. num_filters=init_channels,
  173. filter_size=kernel_size,
  174. stride=stride,
  175. groups=1,
  176. act="relu" if relu else None,
  177. name=name + "_primary_conv")
  178. cheap_operation = self._conv_bn_layer(
  179. input=primary_conv,
  180. num_filters=new_channels,
  181. filter_size=dw_size,
  182. stride=1,
  183. groups=init_channels,
  184. act="relu" if relu else None,
  185. name=name + "_cheap_operation")
  186. out = fluid.layers.concat([primary_conv, cheap_operation], axis=1)
  187. return out
  188. def ghost_bottleneck(self,
  189. input,
  190. hidden_dim,
  191. output,
  192. kernel_size,
  193. stride,
  194. use_se,
  195. name=None):
  196. inp_channels = input.shape[1]
  197. x = self.ghost_module(
  198. input=input,
  199. output=hidden_dim,
  200. kernel_size=1,
  201. stride=1,
  202. relu=True,
  203. name=name + "_ghost_module_1")
  204. if self.block_stride == 4 and stride == 2:
  205. self.block_stride += 1
  206. if self.block_stride in self.feature_maps:
  207. self.end_points.append(x)
  208. if stride == 2:
  209. x = self.depthwise_conv(
  210. input=x,
  211. output=hidden_dim,
  212. kernel_size=kernel_size,
  213. stride=stride,
  214. relu=False,
  215. name=name + "_depthwise")
  216. if use_se:
  217. x = self.se_block(
  218. input=x, num_channels=hidden_dim, name=name + "_se")
  219. x = self.ghost_module(
  220. input=x,
  221. output=output,
  222. kernel_size=1,
  223. relu=False,
  224. name=name + "_ghost_module_2")
  225. if stride == 1 and inp_channels == output:
  226. shortcut = input
  227. else:
  228. shortcut = self.depthwise_conv(
  229. input=input,
  230. output=inp_channels,
  231. kernel_size=kernel_size,
  232. stride=stride,
  233. relu=False,
  234. name=name + "_shortcut_depthwise")
  235. shortcut = self._conv_bn_layer(
  236. input=shortcut,
  237. num_filters=output,
  238. filter_size=1,
  239. stride=1,
  240. groups=1,
  241. act=None,
  242. name=name + "_shortcut_conv")
  243. return fluid.layers.elementwise_add(x=x, y=shortcut, axis=-1)
  244. def _extra_block_dw(self,
  245. input,
  246. num_filters1,
  247. num_filters2,
  248. stride,
  249. name=None):
  250. pointwise_conv = self._conv_bn_layer(
  251. input=input,
  252. filter_size=1,
  253. num_filters=int(num_filters1),
  254. stride=1,
  255. act='relu6',
  256. name=name + "_extra1")
  257. depthwise_conv = self._conv_bn_layer(
  258. input=pointwise_conv,
  259. filter_size=3,
  260. num_filters=int(num_filters2),
  261. stride=stride,
  262. groups=int(num_filters1),
  263. act='relu6',
  264. name=name + "_extra2_dw")
  265. normal_conv = self._conv_bn_layer(
  266. input=depthwise_conv,
  267. filter_size=1,
  268. num_filters=int(num_filters2),
  269. stride=1,
  270. act='relu6',
  271. name=name + "_extra2_sep")
  272. return normal_conv
  273. def _make_divisible(self, v, divisor=8, min_value=None):
  274. if min_value is None:
  275. min_value = divisor
  276. new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  277. if new_v < 0.9 * v:
  278. new_v += divisor
  279. return new_v
  280. def __call__(self, input):
  281. # build first layer
  282. output_channel = int(self._make_divisible(16 * self.scale, 4))
  283. x = self._conv_bn_layer(
  284. input=input,
  285. num_filters=output_channel,
  286. filter_size=3,
  287. stride=2,
  288. groups=1,
  289. act="relu",
  290. name="conv1")
  291. # build inverted residual blocks
  292. idx = 0
  293. for k, exp_size, c, use_se, s in self.cfgs:
  294. if s == 2:
  295. self.block_stride += 1
  296. if self.block_stride in self.feature_maps:
  297. self.end_points.append(x)
  298. output_channel = int(self._make_divisible(c * self.scale, 4))
  299. hidden_channel = int(self._make_divisible(exp_size * self.scale, 4))
  300. x = self.ghost_bottleneck(
  301. input=x,
  302. hidden_dim=hidden_channel,
  303. output=output_channel,
  304. kernel_size=k,
  305. stride=s,
  306. use_se=use_se,
  307. name="_ghostbottleneck_" + str(idx))
  308. idx += 1
  309. self.curr_stage += 1
  310. self.block_stride += 1
  311. if self.block_stride in self.feature_maps:
  312. self.end_points.append(conv)
  313. # extra block
  314. # check whether conv_extra is needed
  315. if self.block_stride < max(self.feature_maps):
  316. conv_extra = self._conv_bn_layer(
  317. x,
  318. num_filters=self._make_divisible(self.scale * self.cfgs[-1][1]),
  319. filter_size=1,
  320. stride=1,
  321. groups=1,
  322. act='relu6',
  323. name='conv' + str(idx + 2))
  324. self.block_stride += 1
  325. if self.block_stride in self.feature_maps:
  326. self.end_points.append(conv_extra)
  327. idx += 1
  328. for block_filter in self.extra_block_filters:
  329. conv_extra = self._extra_block_dw(conv_extra, block_filter[0],
  330. block_filter[1], 2,
  331. 'conv' + str(idx + 2))
  332. self.block_stride += 1
  333. if self.block_stride in self.feature_maps:
  334. self.end_points.append(conv_extra)
  335. idx += 1
  336. return OrderedDict([('ghost_{}'.format(idx), feat)
  337. for idx, feat in enumerate(self.end_points)])
  338. return res