123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- from __future__ import unicode_literals
- import paddle.fluid as fluid
- from paddle.fluid import ParamAttr
- from paddle.fluid.initializer import ConstantInitializer
- def space_nonlocal(input,
- dim_in,
- dim_out,
- prefix,
- dim_inner,
- with_bias=False,
- with_scale=True):
- theta = fluid.layers.conv2d(
- input=input,
- num_filters=dim_inner,
- filter_size=1,
- stride=1,
- padding=0,
- param_attr=ParamAttr(name=prefix + '_theta_w'),
- bias_attr=ParamAttr(
- name=prefix + '_theta_b', initializer=ConstantInitializer(value=0.))
- if with_bias else False)
- theta_shape = theta.shape
- theta_shape_op = fluid.layers.shape(theta)
- theta_shape_op.stop_gradient = True
- # we have to use explicit batch size (to support arbitrary spacetime size)
- # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784)
- theta = fluid.layers.reshape(theta, shape=(0, 0, -1))
- theta = fluid.layers.transpose(theta, [0, 2, 1])
- phi = fluid.layers.conv2d(
- input=input,
- num_filters=dim_inner,
- filter_size=1,
- stride=1,
- padding=0,
- param_attr=ParamAttr(name=prefix + '_phi_w'),
- bias_attr=ParamAttr(
- name=prefix + '_phi_b', initializer=ConstantInitializer(value=0.))
- if with_bias else False,
- name=prefix + '_phi')
- phi = fluid.layers.reshape(phi, [0, 0, -1])
- theta_phi = fluid.layers.matmul(theta, phi)
- g = fluid.layers.conv2d(
- input=input,
- num_filters=dim_inner,
- filter_size=1,
- stride=1,
- padding=0,
- param_attr=ParamAttr(name=prefix + '_g_w'),
- bias_attr=ParamAttr(
- name=prefix + '_g_b', initializer=ConstantInitializer(value=0.))
- if with_bias else False,
- name=prefix + '_g')
- g = fluid.layers.reshape(g, [0, 0, -1])
- # scale
- if with_scale:
- theta_phi = fluid.layers.scale(theta_phi, scale=dim_inner**-.5)
- p = fluid.layers.softmax(theta_phi)
- # note g's axis[2] corresponds to p's axis[2]
- # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1)
- p = fluid.layers.transpose(p, [0, 2, 1])
- t = fluid.layers.matmul(g, p)
- # reshape back
- # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14)
- n = fluid.layers.slice(theta_shape_op, axes=[0], starts=[0], ends=[1])
- h = fluid.layers.slice(theta_shape_op, axes=[0], starts=[2], ends=[3])
- w = fluid.layers.slice(theta_shape_op, axes=[0], starts=[3], ends=[4])
- ch = int(theta_shape[1])
- t_re = fluid.layers.reshape(t, shape=[n, ch, h, w])
- blob_out = t_re
- blob_out = fluid.layers.conv2d(
- input=blob_out,
- num_filters=dim_out,
- filter_size=1,
- stride=1,
- padding=0,
- param_attr=ParamAttr(
- name=prefix + '_out_w', initializer=ConstantInitializer(value=0.0)),
- bias_attr=ParamAttr(
- name=prefix + '_out_b', initializer=ConstantInitializer(value=0.0))
- if with_bias else False,
- name=prefix + '_out')
- blob_out_shape = blob_out.shape
- return blob_out
- def add_space_nonlocal(input,
- dim_in,
- dim_out,
- prefix,
- dim_inner,
- with_bias=False,
- with_scale=True):
- '''
- add_space_nonlocal:
- Non-local Neural Networks: see https://arxiv.org/abs/1711.07971
- '''
- conv = space_nonlocal(
- input,
- dim_in,
- dim_out,
- prefix,
- dim_inner,
- with_bias=with_bias,
- with_scale=with_scale)
- output = input + conv
- return output
|