mobilenet_v3.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. from collections import OrderedDict
  18. import paddle.fluid as fluid
  19. from paddle.fluid.param_attr import ParamAttr
  20. from paddle.fluid.regularizer import L2Decay
  21. import numpy as np
  22. from ppdet.core.workspace import register
  23. from numbers import Integral
  24. __all__ = ['MobileNetV3', 'MobileNetV3RCNN']
  25. @register
  26. class MobileNetV3(object):
  27. """
  28. MobileNet v3, see https://arxiv.org/abs/1905.02244
  29. Args:
  30. scale (float): scaling factor for convolution groups proportion of mobilenet_v3.
  31. model_name (str): There are two modes, small and large.
  32. norm_type (str): normalization type, 'bn' and 'sync_bn' are supported.
  33. norm_decay (float): weight decay for normalization layer weights.
  34. conv_decay (float): weight decay for convolution layer weights.
  35. feature_maps (list): index of stages whose feature maps are returned.
  36. extra_block_filters (list): number of filter for each extra block.
  37. lr_mult_list (list): learning rate ratio of different blocks, lower learning rate ratio
  38. is need for pretrained model got using distillation(default as
  39. [1.0, 1.0, 1.0, 1.0, 1.0]).
  40. freeze_norm (bool): freeze normalization layers.
  41. multiplier (float): The multiplier by which to reduce the convolution expansion and
  42. number of channels.
  43. """
  44. __shared__ = ['norm_type']
  45. def __init__(
  46. self,
  47. scale=1.0,
  48. model_name='small',
  49. feature_maps=[5, 6, 7, 8, 9, 10],
  50. conv_decay=0.0,
  51. norm_type='bn',
  52. norm_decay=0.0,
  53. extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]],
  54. lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
  55. freeze_norm=False,
  56. multiplier=1.0):
  57. if isinstance(feature_maps, Integral):
  58. feature_maps = [feature_maps]
  59. if norm_type == 'sync_bn' and freeze_norm:
  60. raise ValueError(
  61. "The norm_type should not be sync_bn when freeze_norm is True")
  62. self.scale = scale
  63. self.model_name = model_name
  64. self.feature_maps = feature_maps
  65. self.extra_block_filters = extra_block_filters
  66. self.conv_decay = conv_decay
  67. self.norm_decay = norm_decay
  68. self.inplanes = 16
  69. self.end_points = []
  70. self.block_stride = 0
  71. self.lr_mult_list = lr_mult_list
  72. self.freeze_norm = freeze_norm
  73. self.norm_type = norm_type
  74. self.curr_stage = 0
  75. if model_name == "large":
  76. self.cfg = [
  77. # kernel_size, expand, channel, se_block, act_mode, stride
  78. [3, 16, 16, False, 'relu', 1],
  79. [3, 64, 24, False, 'relu', 2],
  80. [3, 72, 24, False, 'relu', 1],
  81. [5, 72, 40, True, 'relu', 2],
  82. [5, 120, 40, True, 'relu', 1],
  83. [5, 120, 40, True, 'relu', 1],
  84. [3, 240, 80, False, 'hard_swish', 2],
  85. [3, 200, 80, False, 'hard_swish', 1],
  86. [3, 184, 80, False, 'hard_swish', 1],
  87. [3, 184, 80, False, 'hard_swish', 1],
  88. [3, 480, 112, True, 'hard_swish', 1],
  89. [3, 672, 112, True, 'hard_swish', 1],
  90. [5, 672, 160, True, 'hard_swish', 2],
  91. [5, 960, 160, True, 'hard_swish', 1],
  92. [5, 960, 160, True, 'hard_swish', 1],
  93. ]
  94. self.cls_ch_squeeze = 960
  95. self.cls_ch_expand = 1280
  96. elif model_name == "small":
  97. self.cfg = [
  98. # kernel_size, expand, channel, se_block, act_mode, stride
  99. [3, 16, 16, True, 'relu', 2],
  100. [3, 72, 24, False, 'relu', 2],
  101. [3, 88, 24, False, 'relu', 1],
  102. [5, 96, 40, True, 'hard_swish', 2],
  103. [5, 240, 40, True, 'hard_swish', 1],
  104. [5, 240, 40, True, 'hard_swish', 1],
  105. [5, 120, 48, True, 'hard_swish', 1],
  106. [5, 144, 48, True, 'hard_swish', 1],
  107. [5, 288, 96, True, 'hard_swish', 2],
  108. [5, 576, 96, True, 'hard_swish', 1],
  109. [5, 576, 96, True, 'hard_swish', 1],
  110. ]
  111. self.cls_ch_squeeze = 576
  112. self.cls_ch_expand = 1280
  113. else:
  114. raise NotImplementedError
  115. if multiplier != 1.0:
  116. self.cfg[-3][2] = int(self.cfg[-3][2] * multiplier)
  117. self.cfg[-2][1] = int(self.cfg[-2][1] * multiplier)
  118. self.cfg[-2][2] = int(self.cfg[-2][2] * multiplier)
  119. self.cfg[-1][1] = int(self.cfg[-1][1] * multiplier)
  120. self.cfg[-1][2] = int(self.cfg[-1][2] * multiplier)
  121. def _conv_bn_layer(self,
  122. input,
  123. filter_size,
  124. num_filters,
  125. stride,
  126. padding,
  127. num_groups=1,
  128. if_act=True,
  129. act=None,
  130. name=None,
  131. use_cudnn=True):
  132. lr_idx = self.curr_stage // 3
  133. lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
  134. lr_mult = self.lr_mult_list[lr_idx]
  135. conv = fluid.layers.conv2d(
  136. input=input,
  137. num_filters=num_filters,
  138. filter_size=filter_size,
  139. stride=stride,
  140. padding=padding,
  141. groups=num_groups,
  142. act=None,
  143. use_cudnn=use_cudnn,
  144. param_attr=ParamAttr(
  145. name=name + '_weights',
  146. learning_rate=lr_mult,
  147. regularizer=L2Decay(self.conv_decay)),
  148. bias_attr=False)
  149. bn_name = name + '_bn'
  150. bn = self._bn(conv, bn_name=bn_name)
  151. if if_act:
  152. if act == 'relu':
  153. bn = fluid.layers.relu(bn)
  154. elif act == 'hard_swish':
  155. bn = self._hard_swish(bn)
  156. elif act == 'relu6':
  157. bn = fluid.layers.relu6(bn)
  158. return bn
  159. def _bn(self, input, act=None, bn_name=None):
  160. lr_idx = self.curr_stage // 3
  161. lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
  162. lr_mult = self.lr_mult_list[lr_idx]
  163. norm_lr = 0. if self.freeze_norm else lr_mult
  164. norm_decay = self.norm_decay
  165. pattr = ParamAttr(
  166. name=bn_name + '_scale',
  167. learning_rate=norm_lr,
  168. regularizer=L2Decay(norm_decay))
  169. battr = ParamAttr(
  170. name=bn_name + '_offset',
  171. learning_rate=norm_lr,
  172. regularizer=L2Decay(norm_decay))
  173. conv = input
  174. if self.norm_type in ['bn', 'sync_bn']:
  175. global_stats = True if self.freeze_norm else False
  176. out = fluid.layers.batch_norm(
  177. input=conv,
  178. act=act,
  179. name=bn_name + '.output.1',
  180. param_attr=pattr,
  181. bias_attr=battr,
  182. moving_mean_name=bn_name + '_mean',
  183. moving_variance_name=bn_name + '_variance',
  184. use_global_stats=global_stats)
  185. scale = fluid.framework._get_var(pattr.name)
  186. bias = fluid.framework._get_var(battr.name)
  187. elif self.norm_type == 'affine_channel':
  188. scale = fluid.layers.create_parameter(
  189. shape=[conv.shape[1]],
  190. dtype=conv.dtype,
  191. attr=pattr,
  192. default_initializer=fluid.initializer.Constant(1.))
  193. bias = fluid.layers.create_parameter(
  194. shape=[conv.shape[1]],
  195. dtype=conv.dtype,
  196. attr=battr,
  197. default_initializer=fluid.initializer.Constant(0.))
  198. out = fluid.layers.affine_channel(
  199. x=conv, scale=scale, bias=bias, act=act)
  200. if self.freeze_norm:
  201. scale.stop_gradient = True
  202. bias.stop_gradient = True
  203. return out
  204. def _hard_swish(self, x):
  205. return fluid.layers.elementwise_mul(x, fluid.layers.relu6(x + 3) / 6.)
  206. def _se_block(self, input, num_out_filter, ratio=4, name=None):
  207. lr_idx = self.curr_stage // 3
  208. lr_idx = min(lr_idx, len(self.lr_mult_list) - 1)
  209. lr_mult = self.lr_mult_list[lr_idx]
  210. num_mid_filter = int(num_out_filter // ratio)
  211. pool = fluid.layers.pool2d(
  212. input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
  213. conv1 = fluid.layers.conv2d(
  214. input=pool,
  215. filter_size=1,
  216. num_filters=num_mid_filter,
  217. act='relu',
  218. param_attr=ParamAttr(
  219. name=name + '_1_weights',
  220. learning_rate=lr_mult,
  221. regularizer=L2Decay(self.conv_decay)),
  222. bias_attr=ParamAttr(
  223. name=name + '_1_offset',
  224. learning_rate=lr_mult,
  225. regularizer=L2Decay(self.conv_decay)))
  226. conv2 = fluid.layers.conv2d(
  227. input=conv1,
  228. filter_size=1,
  229. num_filters=num_out_filter,
  230. act='hard_sigmoid',
  231. param_attr=ParamAttr(
  232. name=name + '_2_weights',
  233. learning_rate=lr_mult,
  234. regularizer=L2Decay(self.conv_decay)),
  235. bias_attr=ParamAttr(
  236. name=name + '_2_offset',
  237. learning_rate=lr_mult,
  238. regularizer=L2Decay(self.conv_decay)))
  239. scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
  240. return scale
  241. def _residual_unit(self,
  242. input,
  243. num_in_filter,
  244. num_mid_filter,
  245. num_out_filter,
  246. stride,
  247. filter_size,
  248. act=None,
  249. use_se=False,
  250. name=None):
  251. input_data = input
  252. conv0 = self._conv_bn_layer(
  253. input=input,
  254. filter_size=1,
  255. num_filters=num_mid_filter,
  256. stride=1,
  257. padding=0,
  258. if_act=True,
  259. act=act,
  260. name=name + '_expand')
  261. if self.block_stride == 4 and stride == 2:
  262. self.block_stride += 1
  263. if self.block_stride in self.feature_maps:
  264. self.end_points.append(conv0)
  265. with fluid.name_scope('res_conv1'):
  266. conv1 = self._conv_bn_layer(
  267. input=conv0,
  268. filter_size=filter_size,
  269. num_filters=num_mid_filter,
  270. stride=stride,
  271. padding=int((filter_size - 1) // 2),
  272. if_act=True,
  273. act=act,
  274. num_groups=num_mid_filter,
  275. use_cudnn=False,
  276. name=name + '_depthwise')
  277. if use_se:
  278. with fluid.name_scope('se_block'):
  279. conv1 = self._se_block(
  280. input=conv1,
  281. num_out_filter=num_mid_filter,
  282. name=name + '_se')
  283. conv2 = self._conv_bn_layer(
  284. input=conv1,
  285. filter_size=1,
  286. num_filters=num_out_filter,
  287. stride=1,
  288. padding=0,
  289. if_act=False,
  290. name=name + '_linear')
  291. if num_in_filter != num_out_filter or stride != 1:
  292. return conv2
  293. else:
  294. return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None)
  295. def _extra_block_dw(self,
  296. input,
  297. num_filters1,
  298. num_filters2,
  299. stride,
  300. name=None):
  301. pointwise_conv = self._conv_bn_layer(
  302. input=input,
  303. filter_size=1,
  304. num_filters=int(num_filters1),
  305. stride=1,
  306. padding="SAME",
  307. act='relu6',
  308. name=name + "_extra1")
  309. depthwise_conv = self._conv_bn_layer(
  310. input=pointwise_conv,
  311. filter_size=3,
  312. num_filters=int(num_filters2),
  313. stride=stride,
  314. padding="SAME",
  315. num_groups=int(num_filters1),
  316. act='relu6',
  317. use_cudnn=False,
  318. name=name + "_extra2_dw")
  319. normal_conv = self._conv_bn_layer(
  320. input=depthwise_conv,
  321. filter_size=1,
  322. num_filters=int(num_filters2),
  323. stride=1,
  324. padding="SAME",
  325. act='relu6',
  326. name=name + "_extra2_sep")
  327. return normal_conv
  328. def _make_divisible(self, v, divisor=8, min_value=None):
  329. if min_value is None:
  330. min_value = divisor
  331. new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  332. if new_v < 0.9 * v:
  333. new_v += divisor
  334. return new_v
  335. def __call__(self, input):
  336. scale = self.scale
  337. inplanes = self.inplanes
  338. cfg = self.cfg
  339. blocks = []
  340. #conv1
  341. conv = self._conv_bn_layer(
  342. input,
  343. filter_size=3,
  344. num_filters=self._make_divisible(inplanes * scale),
  345. stride=2,
  346. padding=1,
  347. num_groups=1,
  348. if_act=True,
  349. act='hard_swish',
  350. name='conv1')
  351. i = 0
  352. inplanes = self._make_divisible(inplanes * scale)
  353. for layer_cfg in cfg:
  354. if layer_cfg[5] == 2:
  355. self.block_stride += 1
  356. if self.block_stride in self.feature_maps:
  357. self.end_points.append(conv)
  358. conv = self._residual_unit(
  359. input=conv,
  360. num_in_filter=inplanes,
  361. num_mid_filter=self._make_divisible(scale * layer_cfg[1]),
  362. num_out_filter=self._make_divisible(scale * layer_cfg[2]),
  363. act=layer_cfg[4],
  364. stride=layer_cfg[5],
  365. filter_size=layer_cfg[0],
  366. use_se=layer_cfg[3],
  367. name='conv' + str(i + 2))
  368. inplanes = self._make_divisible(scale * layer_cfg[2])
  369. i += 1
  370. self.curr_stage += 1
  371. self.block_stride += 1
  372. if self.block_stride in self.feature_maps:
  373. self.end_points.append(conv)
  374. # extra block
  375. # check whether conv_extra is needed
  376. if self.block_stride < max(self.feature_maps):
  377. conv_extra = self._conv_bn_layer(
  378. conv,
  379. filter_size=1,
  380. num_filters=self._make_divisible(scale * cfg[-1][1]),
  381. stride=1,
  382. padding="SAME",
  383. num_groups=1,
  384. if_act=True,
  385. act='hard_swish',
  386. name='conv' + str(i + 2))
  387. self.block_stride += 1
  388. if self.block_stride in self.feature_maps:
  389. self.end_points.append(conv_extra)
  390. i += 1
  391. for block_filter in self.extra_block_filters:
  392. conv_extra = self._extra_block_dw(conv_extra, block_filter[0],
  393. block_filter[1], 2,
  394. 'conv' + str(i + 2))
  395. self.block_stride += 1
  396. if self.block_stride in self.feature_maps:
  397. self.end_points.append(conv_extra)
  398. i += 1
  399. return OrderedDict([('mbv3_{}'.format(idx), feat)
  400. for idx, feat in enumerate(self.end_points)])
  401. @register
  402. class MobileNetV3RCNN(MobileNetV3):
  403. def __init__(self,
  404. scale=1.0,
  405. model_name='large',
  406. conv_decay=0.0,
  407. norm_type='bn',
  408. norm_decay=0.0,
  409. freeze_norm=True,
  410. feature_maps=[2, 3, 4, 5],
  411. lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0]):
  412. super(MobileNetV3RCNN, self).__init__(
  413. scale=scale,
  414. model_name=model_name,
  415. conv_decay=conv_decay,
  416. norm_type=norm_type,
  417. norm_decay=norm_decay,
  418. lr_mult_list=lr_mult_list,
  419. feature_maps=feature_maps,
  420. freeze_norm=freeze_norm)
  421. self.curr_stage = 0
  422. self.block_stride = 1
  423. def _residual_unit(self,
  424. input,
  425. num_in_filter,
  426. num_mid_filter,
  427. num_out_filter,
  428. stride,
  429. filter_size,
  430. act=None,
  431. use_se=False,
  432. name=None):
  433. input_data = input
  434. conv0 = self._conv_bn_layer(
  435. input=input,
  436. filter_size=1,
  437. num_filters=num_mid_filter,
  438. stride=1,
  439. padding=0,
  440. if_act=True,
  441. act=act,
  442. name=name + '_expand')
  443. feature_level = int(np.log2(self.block_stride))
  444. if feature_level in self.feature_maps and stride == 2:
  445. self.end_points.append(conv0)
  446. conv1 = self._conv_bn_layer(
  447. input=conv0,
  448. filter_size=filter_size,
  449. num_filters=num_mid_filter,
  450. stride=stride,
  451. padding=int((filter_size - 1) // 2),
  452. if_act=True,
  453. act=act,
  454. num_groups=num_mid_filter,
  455. use_cudnn=False,
  456. name=name + '_depthwise')
  457. if use_se:
  458. conv1 = self._se_block(
  459. input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
  460. conv2 = self._conv_bn_layer(
  461. input=conv1,
  462. filter_size=1,
  463. num_filters=num_out_filter,
  464. stride=1,
  465. padding=0,
  466. if_act=False,
  467. name=name + '_linear')
  468. if num_in_filter != num_out_filter or stride != 1:
  469. return conv2
  470. else:
  471. return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None)
  472. def __call__(self, input):
  473. scale = self.scale
  474. inplanes = self.inplanes
  475. cfg = self.cfg
  476. #conv1
  477. conv = self._conv_bn_layer(
  478. input,
  479. filter_size=3,
  480. num_filters=self._make_divisible(inplanes * scale),
  481. stride=2,
  482. padding=1,
  483. num_groups=1,
  484. if_act=True,
  485. act='hard_swish',
  486. name='conv1')
  487. i = 0
  488. inplanes = self._make_divisible(inplanes * scale)
  489. for layer_cfg in cfg:
  490. self.block_stride *= layer_cfg[5]
  491. conv = self._residual_unit(
  492. input=conv,
  493. num_in_filter=inplanes,
  494. num_mid_filter=self._make_divisible(scale * layer_cfg[1]),
  495. num_out_filter=self._make_divisible(scale * layer_cfg[2]),
  496. act=layer_cfg[4],
  497. stride=layer_cfg[5],
  498. filter_size=layer_cfg[0],
  499. use_se=layer_cfg[3],
  500. name='conv' + str(i + 2))
  501. inplanes = self._make_divisible(scale * layer_cfg[2])
  502. i += 1
  503. self.curr_stage += 1
  504. if np.max(self.feature_maps) >= 5:
  505. conv = self._conv_bn_layer(
  506. input=conv,
  507. filter_size=1,
  508. num_filters=self._make_divisible(scale * cfg[-1][1]),
  509. stride=1,
  510. padding=0,
  511. num_groups=1,
  512. if_act=True,
  513. act='hard_swish',
  514. name='conv_last')
  515. self.end_points.append(conv)
  516. i += 1
  517. res = OrderedDict([('mv3_{}'.format(idx), self.end_points[idx])
  518. for idx, feat_idx in enumerate(self.feature_maps)])
  519. return res