nonlocal_helper.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. from __future__ import absolute_import
  2. from __future__ import division
  3. from __future__ import print_function
  4. from __future__ import unicode_literals
  5. import paddle.fluid as fluid
  6. from paddle.fluid import ParamAttr
  7. from paddle.fluid.initializer import ConstantInitializer
  8. def space_nonlocal(input,
  9. dim_in,
  10. dim_out,
  11. prefix,
  12. dim_inner,
  13. with_bias=False,
  14. with_scale=True):
  15. theta = fluid.layers.conv2d(
  16. input=input,
  17. num_filters=dim_inner,
  18. filter_size=1,
  19. stride=1,
  20. padding=0,
  21. param_attr=ParamAttr(name=prefix + '_theta_w'),
  22. bias_attr=ParamAttr(
  23. name=prefix + '_theta_b', initializer=ConstantInitializer(value=0.))
  24. if with_bias else False)
  25. theta_shape = theta.shape
  26. theta_shape_op = fluid.layers.shape(theta)
  27. theta_shape_op.stop_gradient = True
  28. # we have to use explicit batch size (to support arbitrary spacetime size)
  29. # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784)
  30. theta = fluid.layers.reshape(theta, shape=(0, 0, -1))
  31. theta = fluid.layers.transpose(theta, [0, 2, 1])
  32. phi = fluid.layers.conv2d(
  33. input=input,
  34. num_filters=dim_inner,
  35. filter_size=1,
  36. stride=1,
  37. padding=0,
  38. param_attr=ParamAttr(name=prefix + '_phi_w'),
  39. bias_attr=ParamAttr(
  40. name=prefix + '_phi_b', initializer=ConstantInitializer(value=0.))
  41. if with_bias else False,
  42. name=prefix + '_phi')
  43. phi = fluid.layers.reshape(phi, [0, 0, -1])
  44. theta_phi = fluid.layers.matmul(theta, phi)
  45. g = fluid.layers.conv2d(
  46. input=input,
  47. num_filters=dim_inner,
  48. filter_size=1,
  49. stride=1,
  50. padding=0,
  51. param_attr=ParamAttr(name=prefix + '_g_w'),
  52. bias_attr=ParamAttr(
  53. name=prefix + '_g_b', initializer=ConstantInitializer(value=0.))
  54. if with_bias else False,
  55. name=prefix + '_g')
  56. g = fluid.layers.reshape(g, [0, 0, -1])
  57. # scale
  58. if with_scale:
  59. theta_phi = fluid.layers.scale(theta_phi, scale=dim_inner**-.5)
  60. p = fluid.layers.softmax(theta_phi)
  61. # note g's axis[2] corresponds to p's axis[2]
  62. # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1)
  63. p = fluid.layers.transpose(p, [0, 2, 1])
  64. t = fluid.layers.matmul(g, p)
  65. # reshape back
  66. # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14)
  67. n = fluid.layers.slice(theta_shape_op, axes=[0], starts=[0], ends=[1])
  68. h = fluid.layers.slice(theta_shape_op, axes=[0], starts=[2], ends=[3])
  69. w = fluid.layers.slice(theta_shape_op, axes=[0], starts=[3], ends=[4])
  70. ch = int(theta_shape[1])
  71. t_re = fluid.layers.reshape(t, shape=[n, ch, h, w])
  72. blob_out = t_re
  73. blob_out = fluid.layers.conv2d(
  74. input=blob_out,
  75. num_filters=dim_out,
  76. filter_size=1,
  77. stride=1,
  78. padding=0,
  79. param_attr=ParamAttr(
  80. name=prefix + '_out_w', initializer=ConstantInitializer(value=0.0)),
  81. bias_attr=ParamAttr(
  82. name=prefix + '_out_b', initializer=ConstantInitializer(value=0.0))
  83. if with_bias else False,
  84. name=prefix + '_out')
  85. blob_out_shape = blob_out.shape
  86. return blob_out
  87. def add_space_nonlocal(input,
  88. dim_in,
  89. dim_out,
  90. prefix,
  91. dim_inner,
  92. with_bias=False,
  93. with_scale=True):
  94. '''
  95. add_space_nonlocal:
  96. Non-local Neural Networks: see https://arxiv.org/abs/1711.07971
  97. '''
  98. conv = space_nonlocal(
  99. input,
  100. dim_in,
  101. dim_out,
  102. prefix,
  103. dim_inner,
  104. with_bias=with_bias,
  105. with_scale=with_scale)
  106. output = input + conv
  107. return output