diff --git a/ppdet/modeling/initializer.py b/ppdet/modeling/initializer.py new file mode 100644 index 0000000000000000000000000000000000000000..96f5eee2f8898e14aea518f1354d829c46a0392d --- /dev/null +++ b/ppdet/modeling/initializer.py @@ -0,0 +1,294 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import paddle +import paddle.nn as nn + +__all__ = [ + 'uniform_', + 'normal_', + 'constant_', + 'ones_', + 'zeros_', + 'xavier_uniform_', + 'xavier_normal_', + 'kaiming_uniform_', + 'kaiming_normal_', + 'reset_initialized_parameter', +] + + +def _no_grad_uniform_(tensor, a, b): + with paddle.no_grad(): + tensor.set_value( + paddle.uniform( + shape=tensor.shape, dtype=tensor.dtype, min=a, max=b)) + return tensor + + +def _no_grad_normal_(tensor, mean=0., std=1.): + with paddle.no_grad(): + tensor.set_value(paddle.normal(mean=mean, std=std, shape=tensor.shape)) + return tensor + + +def _no_grad_fill_(tensor, value=0): + with paddle.no_grad(): + v = paddle.rand(shape=tensor.shape, dtype=tensor.dtype) + v[...] = value + tensor.set_value(v) + return tensor + + +def uniform_(tensor, a, b): + """ + Modified tensor inspace using uniform_ + Args: + tensor (paddle.Tensor): paddle Tensor + a (float|int): min value. + b (float|int): max value. + Return: + tensor + """ + return _no_grad_uniform_(tensor, a, b) + + +def normal_(tensor, mean=0., std=1.): + """ + Modified tensor inspace using normal_ + Args: + tensor (paddle.Tensor): paddle Tensor + mean (float|int): mean value. + std (float|int): std value. + Return: + tensor + """ + return _no_grad_normal_(tensor, mean, std) + + +def constant_(tensor, value=0): + """ + Modified tensor inspace using constant_ + Args: + tensor (paddle.Tensor): paddle Tensor + value (float|int): value to fill tensor. + Return: + tensor + """ + return _no_grad_fill_(tensor, value) + + +def ones_(tensor): + """ + Modified tensor inspace using ones_ + Args: + tensor (paddle.Tensor): paddle Tensor + Return: + tensor + """ + return _no_grad_fill_(tensor, 1) + + +def zeros_(tensor): + """ + Modified tensor inspace using zeros_ + Args: + tensor (paddle.Tensor): paddle Tensor + Return: + tensor + """ + return _no_grad_fill_(tensor, 0) + + +def _calculate_fan_in_and_fan_out(tensor, reverse=False): + """ + Calculate (fan_in, _fan_out) for tensor + + Args: + tensor (Tensor): paddle.Tensor + reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. e.g. : conv.weight [cout, cin, kh, kw] is False; linear.weight [cin, cout] is True + + Return: + Tuple[fan_in, fan_out] + """ + if tensor.ndim < 2: + raise ValueError( + "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions" + ) + + if reverse: + num_input_fmaps, num_output_fmaps = tensor.shape[0], tensor.shape[1] + else: + num_input_fmaps, num_output_fmaps = tensor.shape[1], tensor.shape[0] + + receptive_field_size = 1 + if tensor.ndim > 2: + receptive_field_size = math.prod(tensor.shape[2:]) + + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + + return fan_in, fan_out + + +def xavier_uniform_(tensor, gain=1., reverse=False): + """ + Modified tensor inspace using xavier_uniform_ + Args: + tensor (paddle.Tensor): paddle Tensor + gain (str): super parameter, 1. default. + reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. + Return: + tensor + """ + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse) + std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) + k = math.sqrt(3.0) * std + return _no_grad_uniform_(tensor, -k, k) + + +def xavier_normal_(tensor, gain=1., reverse=False): + """ + Modified tensor inspace using xavier_normal_ + Args: + tensor (paddle.Tensor): paddle Tensor + gain (str): super parameter, 1. default. + reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. + Return: + tensor + """ + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse) + std = gain * math.sqrt(2.0 / float(fan_in + fan_out)) + return _no_grad_normal_(tensor, 0, std) + + +# reference: https://pytorch.org/docs/stable/_modules/torch/nn/init.html +def _calculate_correct_fan(tensor, mode, reverse=False): + mode = mode.lower() + valid_modes = ['fan_in', 'fan_out'] + if mode not in valid_modes: + raise ValueError("Mode {} not supported, please use one of {}".format( + mode, valid_modes)) + + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse) + + return fan_in if mode == 'fan_in' else fan_out + + +def _calculate_gain(nonlinearity, param=None): + linear_fns = [ + 'linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', + 'conv_transpose2d', 'conv_transpose3d' + ] + if nonlinearity in linear_fns or nonlinearity == 'sigmoid': + return 1 + elif nonlinearity == 'tanh': + return 5.0 / 3 + elif nonlinearity == 'relu': + return math.sqrt(2.0) + elif nonlinearity == 'leaky_relu': + if param is None: + negative_slope = 0.01 + elif not isinstance(param, bool) and isinstance( + param, int) or isinstance(param, float): + # True/False are instances of int, hence check above + negative_slope = param + else: + raise ValueError("negative_slope {} not a valid number".format( + param)) + return math.sqrt(2.0 / (1 + negative_slope**2)) + elif nonlinearity == 'selu': + return 3.0 / 4 + else: + raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) + + +def kaiming_uniform_(tensor, + a=0, + mode='fan_in', + nonlinearity='leaky_relu', + reverse=False): + """ + Modified tensor inspace using kaiming_uniform method + Args: + tensor (paddle.Tensor): paddle Tensor + mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut + nonlinearity (str): nonlinearity method name + reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. + Return: + tensor + """ + fan = _calculate_correct_fan(tensor, mode, reverse) + gain = _calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + k = math.sqrt(3.0) * std + return _no_grad_uniform_(tensor, -k, k) + + +def kaiming_normal_(tensor, + a=0, + mode='fan_in', + nonlinearity='leaky_relu', + reverse=False): + """ + Modified tensor inspace using kaiming_normal_ + Args: + tensor (paddle.Tensor): paddle Tensor + mode (str): ['fan_in', 'fan_out'], 'fin_in' defalut + nonlinearity (str): nonlinearity method name + reverse (bool): reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. + Return: + tensor + """ + fan = _calculate_correct_fan(tensor, mode, reverse) + gain = _calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + return _no_grad_normal_(tensor, 0, std) + + +@paddle.no_grad() +def reset_initialized_parameter(model, include_self=True): + """ + Reset initialized parameter using following method for [conv, linear, embedding, bn] + + Args: + model (paddle.Layer): paddle Layer + include_self (bool: False): include_self for Layer.named_sublayers method. Indicate whether including itself + Return: + None + """ + for _, m in model.named_sublayers(include_self=include_self): + if isinstance(m, nn.Conv2D): + k = float(m._groups) / (m._in_channels * m._kernel_size[0] * + m._kernel_size[1]) + k = math.sqrt(k) + _no_grad_uniform_(m.weight, -k, k) + if hasattr(m, 'bias') and getattr(m, 'bias') is not None: + _no_grad_uniform_(m.bias, -k, k) + + elif isinstance(m, nn.Linear): + k = math.sqrt(1. / m.weight.shape[0]) + _no_grad_uniform_(m.weight, -k, k) + if hasattr(m, 'bias') and getattr(m, 'bias') is not None: + _no_grad_uniform_(m.bias, -k, k) + + elif isinstance(m, nn.Embedding): + _no_grad_normal_(m.weight, mean=0., std=1.) + + elif isinstance(m, (nn.BatchNorm2D, nn.LayerNorm)): + _no_grad_fill_(m.weight, 1.) + if hasattr(m, 'bias') and getattr(m, 'bias') is not None: + _no_grad_fill_(m.bias, 0)