pplcnet_embedding.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. import paddle.nn as nn
  19. import paddle.nn.functional as F
  20. from paddle.nn.initializer import Normal, Constant
  21. from paddle import ParamAttr
  22. from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Linear
  23. from paddle.regularizer import L2Decay
  24. from paddle.nn.initializer import KaimingNormal, XavierNormal
  25. from ppdet.core.workspace import register
  26. __all__ = ['PPLCNetEmbedding']
  27. # Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se.
  28. # k: kernel_size
  29. # in_c: input channel number in depthwise block
  30. # out_c: output channel number in depthwise block
  31. # s: stride in depthwise block
  32. # use_se: whether to use SE block
  33. NET_CONFIG = {
  34. "blocks2":
  35. #k, in_c, out_c, s, use_se
  36. [[3, 16, 32, 1, False]],
  37. "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
  38. "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
  39. "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
  40. [5, 256, 256, 1, False], [5, 256, 256, 1, False],
  41. [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
  42. "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
  43. }
  44. def make_divisible(v, divisor=8, min_value=None):
  45. if min_value is None:
  46. min_value = divisor
  47. new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  48. if new_v < 0.9 * v:
  49. new_v += divisor
  50. return new_v
  51. class ConvBNLayer(nn.Layer):
  52. def __init__(self,
  53. num_channels,
  54. filter_size,
  55. num_filters,
  56. stride,
  57. num_groups=1):
  58. super().__init__()
  59. self.conv = Conv2D(
  60. in_channels=num_channels,
  61. out_channels=num_filters,
  62. kernel_size=filter_size,
  63. stride=stride,
  64. padding=(filter_size - 1) // 2,
  65. groups=num_groups,
  66. weight_attr=ParamAttr(initializer=KaimingNormal()),
  67. bias_attr=False)
  68. self.bn = BatchNorm2D(
  69. num_filters,
  70. weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
  71. bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
  72. self.hardswish = nn.Hardswish()
  73. def forward(self, x):
  74. x = self.conv(x)
  75. x = self.bn(x)
  76. x = self.hardswish(x)
  77. return x
  78. class DepthwiseSeparable(nn.Layer):
  79. def __init__(self,
  80. num_channels,
  81. num_filters,
  82. stride,
  83. dw_size=3,
  84. use_se=False):
  85. super().__init__()
  86. self.use_se = use_se
  87. self.dw_conv = ConvBNLayer(
  88. num_channels=num_channels,
  89. num_filters=num_channels,
  90. filter_size=dw_size,
  91. stride=stride,
  92. num_groups=num_channels)
  93. if use_se:
  94. self.se = SEModule(num_channels)
  95. self.pw_conv = ConvBNLayer(
  96. num_channels=num_channels,
  97. filter_size=1,
  98. num_filters=num_filters,
  99. stride=1)
  100. def forward(self, x):
  101. x = self.dw_conv(x)
  102. if self.use_se:
  103. x = self.se(x)
  104. x = self.pw_conv(x)
  105. return x
  106. class SEModule(nn.Layer):
  107. def __init__(self, channel, reduction=4):
  108. super().__init__()
  109. self.avg_pool = AdaptiveAvgPool2D(1)
  110. self.conv1 = Conv2D(
  111. in_channels=channel,
  112. out_channels=channel // reduction,
  113. kernel_size=1,
  114. stride=1,
  115. padding=0)
  116. self.relu = nn.ReLU()
  117. self.conv2 = Conv2D(
  118. in_channels=channel // reduction,
  119. out_channels=channel,
  120. kernel_size=1,
  121. stride=1,
  122. padding=0)
  123. self.hardsigmoid = nn.Hardsigmoid()
  124. def forward(self, x):
  125. identity = x
  126. x = self.avg_pool(x)
  127. x = self.conv1(x)
  128. x = self.relu(x)
  129. x = self.conv2(x)
  130. x = self.hardsigmoid(x)
  131. x = paddle.multiply(x=identity, y=x)
  132. return x
  133. class PPLCNet(nn.Layer):
  134. """
  135. PP-LCNet, see https://arxiv.org/abs/2109.15099.
  136. This code is different from PPLCNet in ppdet/modeling/backbones/lcnet.py
  137. or in PaddleClas, because the output is the flatten feature of last_conv.
  138. Args:
  139. scale (float): Scale ratio of channels.
  140. class_expand (int): Number of channels of conv feature.
  141. """
  142. def __init__(self, scale=1.0, class_expand=1280):
  143. super(PPLCNet, self).__init__()
  144. self.scale = scale
  145. self.class_expand = class_expand
  146. self.conv1 = ConvBNLayer(
  147. num_channels=3,
  148. filter_size=3,
  149. num_filters=make_divisible(16 * scale),
  150. stride=2)
  151. self.blocks2 = nn.Sequential(*[
  152. DepthwiseSeparable(
  153. num_channels=make_divisible(in_c * scale),
  154. num_filters=make_divisible(out_c * scale),
  155. dw_size=k,
  156. stride=s,
  157. use_se=se)
  158. for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"])
  159. ])
  160. self.blocks3 = nn.Sequential(*[
  161. DepthwiseSeparable(
  162. num_channels=make_divisible(in_c * scale),
  163. num_filters=make_divisible(out_c * scale),
  164. dw_size=k,
  165. stride=s,
  166. use_se=se)
  167. for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"])
  168. ])
  169. self.blocks4 = nn.Sequential(*[
  170. DepthwiseSeparable(
  171. num_channels=make_divisible(in_c * scale),
  172. num_filters=make_divisible(out_c * scale),
  173. dw_size=k,
  174. stride=s,
  175. use_se=se)
  176. for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"])
  177. ])
  178. self.blocks5 = nn.Sequential(*[
  179. DepthwiseSeparable(
  180. num_channels=make_divisible(in_c * scale),
  181. num_filters=make_divisible(out_c * scale),
  182. dw_size=k,
  183. stride=s,
  184. use_se=se)
  185. for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"])
  186. ])
  187. self.blocks6 = nn.Sequential(*[
  188. DepthwiseSeparable(
  189. num_channels=make_divisible(in_c * scale),
  190. num_filters=make_divisible(out_c * scale),
  191. dw_size=k,
  192. stride=s,
  193. use_se=se)
  194. for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"])
  195. ])
  196. self.avg_pool = AdaptiveAvgPool2D(1)
  197. self.last_conv = Conv2D(
  198. in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
  199. out_channels=self.class_expand,
  200. kernel_size=1,
  201. stride=1,
  202. padding=0,
  203. bias_attr=False)
  204. self.hardswish = nn.Hardswish()
  205. self.flatten = nn.Flatten(start_axis=1, stop_axis=-1)
  206. def forward(self, x):
  207. x = self.conv1(x)
  208. x = self.blocks2(x)
  209. x = self.blocks3(x)
  210. x = self.blocks4(x)
  211. x = self.blocks5(x)
  212. x = self.blocks6(x)
  213. x = self.avg_pool(x)
  214. x = self.last_conv(x)
  215. x = self.hardswish(x)
  216. x = self.flatten(x)
  217. return x
  218. class FC(nn.Layer):
  219. def __init__(self, input_ch, output_ch):
  220. super(FC, self).__init__()
  221. weight_attr = ParamAttr(initializer=XavierNormal())
  222. self.fc = paddle.nn.Linear(input_ch, output_ch, weight_attr=weight_attr)
  223. def forward(self, x):
  224. out = self.fc(x)
  225. return out
  226. @register
  227. class PPLCNetEmbedding(nn.Layer):
  228. """
  229. PPLCNet Embedding
  230. Args:
  231. input_ch (int): Number of channels of input conv feature.
  232. output_ch (int): Number of channels of output conv feature.
  233. """
  234. def __init__(self, scale=2.5, input_ch=1280, output_ch=512):
  235. super(PPLCNetEmbedding, self).__init__()
  236. self.backbone = PPLCNet(scale=scale)
  237. self.neck = FC(input_ch, output_ch)
  238. def forward(self, x):
  239. feat = self.backbone(x)
  240. feat_out = self.neck(feat)
  241. return feat_out