diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index ce975ea8423263bd099fda5b2f12add9aab088d4..ac97a8c17fa2e31dbbe7e9a218cf496476d15d0e 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -23,10 +23,12 @@ from . import unique_name from .data_feeder import check_variable_and_dtype, check_type, check_dtype __all__ = [ - 'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear', - 'MSRA', 'ConstantInitializer', 'UniformInitializer', 'NormalInitializer', - 'TruncatedNormalInitializer', 'XavierInitializer', 'BilinearInitializer', - 'MSRAInitializer', 'NumpyArrayInitializer', 'set_global_initializer' + 'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', + 'XavierNormal', 'XavierUniform', 'Bilinear', 'MSRA', 'ConstantInitializer', + 'UniformInitializer', 'NormalInitializer', 'TruncatedNormalInitializer', + 'XavierInitializer', 'XavierNormalInitializer', 'XavierUniformInitializer', + 'BilinearInitializer', 'MSRAInitializer', 'NumpyArrayInitializer', + 'set_global_initializer' ] _global_weight_initializer_ = None @@ -174,14 +176,22 @@ class UniformInitializer(Initializer): which is generally the width of the square matrix. diag_val (float): the value of the diagonal element to be initialized, default 1.0. It takes effect only if the diag_num is greater than 0. + name(str, optional): The default value is None. Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. Examples: .. code-block:: python - import paddle.fluid as fluid - x = fluid.data(name='x', shape=[None, 1], dtype='float32') - fc = fluid.layers.fc(input=x, size=10, - param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5)) + import paddle + from paddle import nn + + data = paddle.ones(shape=[3, 1, 2], dtype='float32') + weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5)) + bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5)) + linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr) + res = linear(data) """ def __init__(self, @@ -190,7 +200,8 @@ class UniformInitializer(Initializer): seed=0, diag_num=0, diag_step=0, - diag_val=1.0): + diag_val=1.0, + name=None): assert low is not None assert high is not None assert high >= low @@ -274,27 +285,34 @@ class NormalInitializer(Initializer): """Implements the Random Normal(Gaussian) distribution initializer Args: - loc (float): mean of the normal distribution - scale (float): standard deviation of the normal distribution + mean (float): mean of the normal distribution + std (float): standard deviation of the normal distribution seed (int): random seed + name(str, optional): The default value is None. Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. Examples: .. code-block:: python - import paddle.fluid as fluid - x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32") - fc = fluid.layers.fc(input=x, size=10, - param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0)) + import paddle + from paddle import nn + data = paddle.ones(shape=[3, 1, 2], dtype='float32') + weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0)) + bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0)) + linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr) + res = linear(data) """ - def __init__(self, loc=0.0, scale=1.0, seed=0): - assert loc is not None - assert scale is not None + def __init__(self, mean=0.0, std=1.0, seed=0, name=None): + assert mean is not None + assert std is not None assert seed is not None super(NormalInitializer, self).__init__() - self._mean = loc - self._std_dev = scale + self._mean = mean + self._std_dev = std self._seed = seed def __call__(self, var, block): @@ -359,26 +377,34 @@ class TruncatedNormalInitializer(Initializer): """Implements the Random TruncatedNormal(Gaussian) distribution initializer Args: - loc (float): mean of the normal distribution - scale (float): standard deviation of the normal distribution + mean (float): mean of the normal distribution + std (float): standard deviation of the normal distribution seed (int): random seed + name(str, optional): The default value is None. Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. Examples: .. code-block:: python - import paddle.fluid as fluid - x = fluid.data(name='x', shape=[None, 1], dtype='float32') - fc = fluid.layers.fc(input=x, size=10, - param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) + import paddle + from paddle import nn + + # data = paddle.ones(shape=[3, 1, 2], dtype='float32') + # weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0, + # trainable=False, regularizer=None, initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0)) + # bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0, + # trainable=False, regularizer=None, initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0)) + # linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr) + # res = linear(data) """ - def __init__(self, loc=0.0, scale=1.0, seed=0): - assert loc is not None - assert scale is not None + def __init__(self, mean=0.0, std=1.0, seed=0, name=None): + assert mean is not None + assert std is not None assert seed is not None super(TruncatedNormalInitializer, self).__init__() - self._mean = loc - self._std_dev = scale + self._mean = mean + self._std_dev = std self._seed = seed def __call__(self, var, block): @@ -570,6 +596,124 @@ class XavierInitializer(Initializer): return op +class XavierNormalInitializer(XavierInitializer): + """ + This class implements the Xavier weight initializer from the paper + `Understanding the difficulty of training deep feedforward neural + networks `_ + by Xavier Glorot and Yoshua Bengio. + + This initializer is designed to keep the scale of the gradients + approximately same in all the layers. In case of Uniform distribution, + the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in + fan\_out}} + + + Args: + uniform (bool,default True): whether to use uniform ,if False use normal distribution + fan_in (float,default None): fan_in for Xavier initialization. If None, it is + inferred from the variable. + fan_out (float,default None): fan_out for Xavier initialization. If None, it is + inferred from the variable. + seed (int): random seed + + Note: + It is recommended to set fan_in and fan_out to None for most cases. + + Examples: + .. code-block:: python + + import paddle + from paddle import nn + + data = paddle.ones(shape=[3, 1, 2], dtype='float32') + weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierNormal()) + bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierNormal()) + linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr) + res = linear(data) + + """ + + def __init__(self, gain=0): + assert gain is not None + super(XavierNormalInitializer, self).__init__() + self._uniform = False + self._fan_in = None + self._fan_out = None + self._seed = gain + + +class XavierUniformInitializer(XavierInitializer): + """ + This class implements the Xavier weight initializer from the paper + `Understanding the difficulty of training deep feedforward neural + networks `_ + by Xavier Glorot and Yoshua Bengio. + + This initializer is designed to keep the scale of the gradients + approximately same in all the layers. In case of Uniform distribution, + the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in + fan\_out}} + + + Args: + uniform (bool,default True): whether to use uniform ,if False use normal distribution + fan_in (float,default None): fan_in for Xavier initialization. If None, it is + inferred from the variable. + fan_out (float,default None): fan_out for Xavier initialization. If None, it is + inferred from the variable. + seed (int): random seed + + Note: + It is recommended to set fan_in and fan_out to None for most cases. + + Examples: + .. code-block:: python + + import paddle + from paddle import nn + + data = paddle.ones(shape=[3, 1, 2], dtype='float32') + weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierUniform()) + bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierUniform()) + linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr) + res = linear(data) + + """ + + def __init__(self, gain=0): + assert gain is not None + super(XavierUniformInitializer, self).__init__() + self._uniform = True + self._fan_in = None + self._fan_out = None + self._seed = gain + + class MSRAInitializer(Initializer): """Implements the MSRA initializer a.k.a. Kaiming Initializer @@ -835,6 +979,8 @@ class NumpyArrayInitializer(Initializer): Args: value (numpy): numpy array to initialize the variable + name(str, optional): The default value is None. Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. Returns: A Tensor variable initialized by numpy. @@ -842,14 +988,20 @@ class NumpyArrayInitializer(Initializer): Examples: .. code-block:: python - import paddle.fluid as fluid - import numpy - x = fluid.data(name="x", shape=[2, 1], dtype='float32') - fc = fluid.layers.fc(input=x, size=10, - param_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2]))) + import paddle + from paddle import nn + import numpy as np + + data = paddle.ones(shape=[1, 2], dtype='float32') + weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.NumpyArrayInitializer(np.array([2,2]))) + bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0, + trainable=False, regularizer=None, initializer=paddle.nn.initializer.NumpyArrayInitializer(np.array([2]))) + linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr) + res = linear(data) """ - def __init__(self, value): + def __init__(self, value, name=None): import numpy assert isinstance(value, numpy.ndarray) super(NumpyArrayInitializer, self).__init__() @@ -1006,5 +1158,7 @@ Uniform = UniformInitializer Normal = NormalInitializer TruncatedNormal = TruncatedNormalInitializer Xavier = XavierInitializer +XavierNormal = XavierNormalInitializer +XavierUniform = XavierUniformInitializer MSRA = MSRAInitializer Bilinear = BilinearInitializer diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 4c76af616f4a2b7a8b78967f7271270aa3875cad..82356790ec0df21fef10d75b14017b8c7df34735 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -17,11 +17,14 @@ from __future__ import print_function import numpy as np import unittest +import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework import paddle.fluid.initializer as initializer from paddle.fluid.core import VarDesc +paddle.enable_static() + DELTA = 0.00001 @@ -345,6 +348,98 @@ class TestXavierInitializer(unittest.TestCase): self.assertTrue(check_cast_op(block.ops[1])) +class TestXavierUniformInitializer(unittest.TestCase): + def test_uniform_xavier_initializer(self): + """Test Xavier initializer with uniform distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + for _ in range(2): + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.XavierUniformInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / (param.shape[0] + param.shape[1])) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_uniform_xavier_initializer_conv(self): + """Test Xavier initializer with uniform distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + for _ in range(2): + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.XavierUniformInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + receptive_field_size = float(15 * 20) + limit = np.sqrt(6.0 / ( + (param.shape[0] + param.shape[1]) * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + +class TestXavierNormalInitializer(unittest.TestCase): + def test_normal_xavier_initializer(self): + """Test Xavier initializer with normal distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + for _ in range(2): + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.XavierNormalInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + std = np.sqrt(2.0 / (param.shape[0] + param.shape[1])) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_normal_xavier_initializer_conv(self): + """Test Xavier initializer with normal distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + for _ in range(2): + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.XavierNormalInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + receptive_field_size = float(15 * 20) + std = np.sqrt(2.0 / ( + (param.shape[0] + param.shape[1]) * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + class TestMSRAInitializer(unittest.TestCase): def test_uniform_msra_initializer(self): """Test MSRA initializer with uniform distribution on @@ -559,7 +654,7 @@ class TestSetGlobalInitializer(unittest.TestCase): initializer.Uniform( low=-0.5, high=0.5), bias_init=initializer.Normal( - loc=0.0, scale=2.0)) + mean=0.0, std=2.0)) with fluid.program_guard(main_prog, startup_prog): x = fluid.data(name="x", shape=[1, 3, 32, 32]) # default initilizer of bias in layers.conv2d is ConstantInitializer diff --git a/python/paddle/nn/initializer/__init__.py b/python/paddle/nn/initializer/__init__.py index 489f324868a3ed345c021ae8d78285266cacafb1..050ea1dbf09d54631dba0feddda48c0c4598c09e 100644 --- a/python/paddle/nn/initializer/__init__.py +++ b/python/paddle/nn/initializer/__init__.py @@ -20,7 +20,9 @@ from ...fluid.initializer import MSRA #DEFINE_ALIAS from ...fluid.initializer import Normal #DEFINE_ALIAS from ...fluid.initializer import TruncatedNormal #DEFINE_ALIAS from ...fluid.initializer import Uniform #DEFINE_ALIAS -from ...fluid.initializer import Xavier #DEFINE_ALIAS +from ...fluid.initializer import XavierNormal #DEFINE_ALIAS +from ...fluid.initializer import XavierUniform #DEFINE_ALIAS +from ...fluid.initializer import NumpyArrayInitializer #DEFINE_ALIAS __all__ = [ 'Bilinear', @@ -29,5 +31,7 @@ __all__ = [ 'Normal', 'TruncatedNormal', 'Uniform', - 'Xavier', + 'XavierNormal', + 'XavierUniform' + 'NumpyArrayInitializer', ]