123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318 |
- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """
- This code is based on https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
- Ths copyright of pytorch/pytorch is a BSD-style license, as found in the LICENSE file.
- """
- import math
- import numpy as np
- import paddle
- import paddle.nn as nn
- __all__ = [
- 'uniform_',
- 'normal_',
- 'constant_',
- 'ones_',
- 'zeros_',
- 'xavier_uniform_',
- 'xavier_normal_',
- 'kaiming_uniform_',
- 'kaiming_normal_',
- 'linear_init_',
- 'conv_init_',
- 'reset_initialized_parameter',
- ]
- def _no_grad_uniform_(tensor, a, b):
- with paddle.no_grad():
- tensor.set_value(
- paddle.uniform(
- shape=tensor.shape, dtype=tensor.dtype, min=a, max=b))
- return tensor
- def _no_grad_normal_(tensor, mean=0., std=1.):
- with paddle.no_grad():
- tensor.set_value(paddle.normal(mean=mean, std=std, shape=tensor.shape))
- return tensor
- def _no_grad_fill_(tensor, value=0.):
- with paddle.no_grad():
- tensor.set_value(paddle.full_like(tensor, value, dtype=tensor.dtype))
- return tensor
- def uniform_(tensor, a, b):
- """
- Modified tensor inspace using uniform_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- a (float|int): min value.
- b (float|int): max value.
- Return:
- tensor
- """
- return _no_grad_uniform_(tensor, a, b)
- def normal_(tensor, mean=0., std=1.):
- """
- Modified tensor inspace using normal_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- mean (float|int): mean value.
- std (float|int): std value.
- Return:
- tensor
- """
- return _no_grad_normal_(tensor, mean, std)
- def constant_(tensor, value=0.):
- """
- Modified tensor inspace using constant_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- value (float|int): value to fill tensor.
- Return:
- tensor
- """
- return _no_grad_fill_(tensor, value)
- def ones_(tensor):
- """
- Modified tensor inspace using ones_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- Return:
- tensor
- """
- return _no_grad_fill_(tensor, 1)
- def zeros_(tensor):
- """
- Modified tensor inspace using zeros_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- Return:
- tensor
- """
- return _no_grad_fill_(tensor, 0)
- def _calculate_fan_in_and_fan_out(tensor, reverse=False):
- """
- Calculate (fan_in, _fan_out) for tensor
- Args:
- tensor (Tensor): paddle.Tensor
- reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. e.g. : conv.weight [cout, cin, kh, kw] is False; linear.weight [cin, cout] is True
- Return:
- Tuple[fan_in, fan_out]
- """
- if tensor.ndim < 2:
- raise ValueError(
- "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
- )
- if reverse:
- num_input_fmaps, num_output_fmaps = tensor.shape[0], tensor.shape[1]
- else:
- num_input_fmaps, num_output_fmaps = tensor.shape[1], tensor.shape[0]
- receptive_field_size = 1
- if tensor.ndim > 2:
- receptive_field_size = np.prod(tensor.shape[2:])
- fan_in = num_input_fmaps * receptive_field_size
- fan_out = num_output_fmaps * receptive_field_size
- return fan_in, fan_out
- def xavier_uniform_(tensor, gain=1., reverse=False):
- """
- Modified tensor inspace using xavier_uniform_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- gain (float): super parameter, 1. default.
- reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
- Return:
- tensor
- """
- fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
- std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
- k = math.sqrt(3.0) * std
- return _no_grad_uniform_(tensor, -k, k)
- def xavier_normal_(tensor, gain=1., reverse=False):
- """
- Modified tensor inspace using xavier_normal_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- gain (float): super parameter, 1. default.
- reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
- Return:
- tensor
- """
- fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
- std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
- return _no_grad_normal_(tensor, 0, std)
- # reference: https://pytorch.org/docs/stable/_modules/torch/nn/init.html
- def _calculate_correct_fan(tensor, mode, reverse=False):
- mode = mode.lower()
- valid_modes = ['fan_in', 'fan_out']
- if mode not in valid_modes:
- raise ValueError("Mode {} not supported, please use one of {}".format(
- mode, valid_modes))
- fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse)
- return fan_in if mode == 'fan_in' else fan_out
- def _calculate_gain(nonlinearity, param=None):
- linear_fns = [
- 'linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d',
- 'conv_transpose2d', 'conv_transpose3d'
- ]
- if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
- return 1
- elif nonlinearity == 'tanh':
- return 5.0 / 3
- elif nonlinearity == 'relu':
- return math.sqrt(2.0)
- elif nonlinearity == 'leaky_relu':
- if param is None:
- negative_slope = 0.01
- elif not isinstance(param, bool) and isinstance(
- param, int) or isinstance(param, float):
- # True/False are instances of int, hence check above
- negative_slope = param
- else:
- raise ValueError("negative_slope {} not a valid number".format(
- param))
- return math.sqrt(2.0 / (1 + negative_slope**2))
- elif nonlinearity == 'selu':
- return 3.0 / 4
- else:
- raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
- def kaiming_uniform_(tensor,
- a=0,
- mode='fan_in',
- nonlinearity='leaky_relu',
- reverse=False):
- """
- Modified tensor inspace using kaiming_uniform method
- Args:
- tensor (paddle.Tensor): paddle Tensor
- mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut
- nonlinearity (str): nonlinearity method name
- reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
- Return:
- tensor
- """
- fan = _calculate_correct_fan(tensor, mode, reverse)
- gain = _calculate_gain(nonlinearity, a)
- std = gain / math.sqrt(fan)
- k = math.sqrt(3.0) * std
- return _no_grad_uniform_(tensor, -k, k)
- def kaiming_normal_(tensor,
- a=0,
- mode='fan_in',
- nonlinearity='leaky_relu',
- reverse=False):
- """
- Modified tensor inspace using kaiming_normal_
- Args:
- tensor (paddle.Tensor): paddle Tensor
- mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut
- nonlinearity (str): nonlinearity method name
- reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
- Return:
- tensor
- """
- fan = _calculate_correct_fan(tensor, mode, reverse)
- gain = _calculate_gain(nonlinearity, a)
- std = gain / math.sqrt(fan)
- return _no_grad_normal_(tensor, 0, std)
- def linear_init_(module):
- bound = 1 / math.sqrt(module.weight.shape[0])
- uniform_(module.weight, -bound, bound)
- uniform_(module.bias, -bound, bound)
- def conv_init_(module):
- bound = 1 / np.sqrt(np.prod(module.weight.shape[1:]))
- uniform_(module.weight, -bound, bound)
- if module.bias is not None:
- uniform_(module.bias, -bound, bound)
- def bias_init_with_prob(prior_prob=0.01):
- """initialize conv/fc bias value according to a given probability value."""
- bias_init = float(-np.log((1 - prior_prob) / prior_prob))
- return bias_init
- @paddle.no_grad()
- def reset_initialized_parameter(model, include_self=True):
- """
- Reset initialized parameter using following method for [conv, linear, embedding, bn]
- Args:
- model (paddle.Layer): paddle Layer
- include_self (bool: False): include_self for Layer.named_sublayers method. Indicate whether including itself
- Return:
- None
- """
- for _, m in model.named_sublayers(include_self=include_self):
- if isinstance(m, nn.Conv2D):
- k = float(m._groups) / (m._in_channels * m._kernel_size[0] *
- m._kernel_size[1])
- k = math.sqrt(k)
- _no_grad_uniform_(m.weight, -k, k)
- if hasattr(m, 'bias') and getattr(m, 'bias') is not None:
- _no_grad_uniform_(m.bias, -k, k)
- elif isinstance(m, nn.Linear):
- k = math.sqrt(1. / m.weight.shape[0])
- _no_grad_uniform_(m.weight, -k, k)
- if hasattr(m, 'bias') and getattr(m, 'bias') is not None:
- _no_grad_uniform_(m.bias, -k, k)
- elif isinstance(m, nn.Embedding):
- _no_grad_normal_(m.weight, mean=0., std=1.)
- elif isinstance(m, (nn.BatchNorm2D, nn.LayerNorm)):
- _no_grad_fill_(m.weight, 1.)
- if hasattr(m, 'bias') and getattr(m, 'bias') is not None:
- _no_grad_fill_(m.bias, 0)
|