提交 c667e001 编写于 作者: R root 提交者: zhangkeliang

update initializer API, including: NormalInitializer,...

update initializer API, including: NormalInitializer, TruncatedNormalInitializer, UniformInitializer, XavierNormalInitializer, XavierUniformInitializer, NumpyArrayInitializer.
上级 35074963
...@@ -23,10 +23,12 @@ from . import unique_name ...@@ -23,10 +23,12 @@ from . import unique_name
from .data_feeder import check_variable_and_dtype, check_type, check_dtype from .data_feeder import check_variable_and_dtype, check_type, check_dtype
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear', 'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier',
'MSRA', 'ConstantInitializer', 'UniformInitializer', 'NormalInitializer', 'XavierNormal', 'XavierUniform', 'Bilinear', 'MSRA', 'ConstantInitializer',
'TruncatedNormalInitializer', 'XavierInitializer', 'BilinearInitializer', 'UniformInitializer', 'NormalInitializer', 'TruncatedNormalInitializer',
'MSRAInitializer', 'NumpyArrayInitializer', 'set_global_initializer' 'XavierInitializer', 'XavierNormalInitializer', 'XavierUniformInitializer',
'BilinearInitializer', 'MSRAInitializer', 'NumpyArrayInitializer',
'set_global_initializer'
] ]
_global_weight_initializer_ = None _global_weight_initializer_ = None
...@@ -174,14 +176,22 @@ class UniformInitializer(Initializer): ...@@ -174,14 +176,22 @@ class UniformInitializer(Initializer):
which is generally the width of the square matrix. which is generally the width of the square matrix.
diag_val (float): the value of the diagonal element to be initialized, diag_val (float): the value of the diagonal element to be initialized,
default 1.0. It takes effect only if the diag_num is greater than 0. default 1.0. It takes effect only if the diag_num is greater than 0.
name(str, optional): The default value is None. Normally there is no need for user to set this
property. For more information, please refer to :ref:`api_guide_Name`.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
x = fluid.data(name='x', shape=[None, 1], dtype='float32') from paddle import nn
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5)) data = paddle.ones(shape=[3, 1, 2], dtype='float32')
weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr)
res = linear(data)
""" """
def __init__(self, def __init__(self,
...@@ -190,7 +200,8 @@ class UniformInitializer(Initializer): ...@@ -190,7 +200,8 @@ class UniformInitializer(Initializer):
seed=0, seed=0,
diag_num=0, diag_num=0,
diag_step=0, diag_step=0,
diag_val=1.0): diag_val=1.0,
name=None):
assert low is not None assert low is not None
assert high is not None assert high is not None
assert high >= low assert high >= low
...@@ -274,27 +285,34 @@ class NormalInitializer(Initializer): ...@@ -274,27 +285,34 @@ class NormalInitializer(Initializer):
"""Implements the Random Normal(Gaussian) distribution initializer """Implements the Random Normal(Gaussian) distribution initializer
Args: Args:
loc (float): mean of the normal distribution mean (float): mean of the normal distribution
scale (float): standard deviation of the normal distribution std (float): standard deviation of the normal distribution
seed (int): random seed seed (int): random seed
name(str, optional): The default value is None. Normally there is no need for user to set this
property. For more information, please refer to :ref:`api_guide_Name`.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32") from paddle import nn
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0))
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr)
res = linear(data)
""" """
def __init__(self, loc=0.0, scale=1.0, seed=0): def __init__(self, mean=0.0, std=1.0, seed=0, name=None):
assert loc is not None assert mean is not None
assert scale is not None assert std is not None
assert seed is not None assert seed is not None
super(NormalInitializer, self).__init__() super(NormalInitializer, self).__init__()
self._mean = loc self._mean = mean
self._std_dev = scale self._std_dev = std
self._seed = seed self._seed = seed
def __call__(self, var, block): def __call__(self, var, block):
...@@ -359,26 +377,34 @@ class TruncatedNormalInitializer(Initializer): ...@@ -359,26 +377,34 @@ class TruncatedNormalInitializer(Initializer):
"""Implements the Random TruncatedNormal(Gaussian) distribution initializer """Implements the Random TruncatedNormal(Gaussian) distribution initializer
Args: Args:
loc (float): mean of the normal distribution mean (float): mean of the normal distribution
scale (float): standard deviation of the normal distribution std (float): standard deviation of the normal distribution
seed (int): random seed seed (int): random seed
name(str, optional): The default value is None. Normally there is no need for user to set this
property. For more information, please refer to :ref:`api_guide_Name`.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
x = fluid.data(name='x', shape=[None, 1], dtype='float32') from paddle import nn
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) # data = paddle.ones(shape=[3, 1, 2], dtype='float32')
# weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0,
# trainable=False, regularizer=None, initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0))
# bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0,
# trainable=False, regularizer=None, initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0))
# linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr)
# res = linear(data)
""" """
def __init__(self, loc=0.0, scale=1.0, seed=0): def __init__(self, mean=0.0, std=1.0, seed=0, name=None):
assert loc is not None assert mean is not None
assert scale is not None assert std is not None
assert seed is not None assert seed is not None
super(TruncatedNormalInitializer, self).__init__() super(TruncatedNormalInitializer, self).__init__()
self._mean = loc self._mean = mean
self._std_dev = scale self._std_dev = std
self._seed = seed self._seed = seed
def __call__(self, var, block): def __call__(self, var, block):
...@@ -570,6 +596,124 @@ class XavierInitializer(Initializer): ...@@ -570,6 +596,124 @@ class XavierInitializer(Initializer):
return op return op
class XavierNormalInitializer(XavierInitializer):
"""
This class implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in + fan\_out}}
Args:
uniform (bool,default True): whether to use uniform ,if False use normal distribution
fan_in (float,default None): fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out (float,default None): fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in and fan_out to None for most cases.
Examples:
.. code-block:: python
import paddle
from paddle import nn
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierNormal())
bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierNormal())
linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr)
res = linear(data)
"""
def __init__(self, gain=0):
assert gain is not None
super(XavierNormalInitializer, self).__init__()
self._uniform = False
self._fan_in = None
self._fan_out = None
self._seed = gain
class XavierUniformInitializer(XavierInitializer):
"""
This class implements the Xavier weight initializer from the paper
`Understanding the difficulty of training deep feedforward neural
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in + fan\_out}}
Args:
uniform (bool,default True): whether to use uniform ,if False use normal distribution
fan_in (float,default None): fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out (float,default None): fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in and fan_out to None for most cases.
Examples:
.. code-block:: python
import paddle
from paddle import nn
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierUniform())
bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.XavierUniform())
linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr)
res = linear(data)
"""
def __init__(self, gain=0):
assert gain is not None
super(XavierUniformInitializer, self).__init__()
self._uniform = True
self._fan_in = None
self._fan_out = None
self._seed = gain
class MSRAInitializer(Initializer): class MSRAInitializer(Initializer):
"""Implements the MSRA initializer a.k.a. Kaiming Initializer """Implements the MSRA initializer a.k.a. Kaiming Initializer
...@@ -835,6 +979,8 @@ class NumpyArrayInitializer(Initializer): ...@@ -835,6 +979,8 @@ class NumpyArrayInitializer(Initializer):
Args: Args:
value (numpy): numpy array to initialize the variable value (numpy): numpy array to initialize the variable
name(str, optional): The default value is None. Normally there is no need for user to set this
property. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
A Tensor variable initialized by numpy. A Tensor variable initialized by numpy.
...@@ -842,14 +988,20 @@ class NumpyArrayInitializer(Initializer): ...@@ -842,14 +988,20 @@ class NumpyArrayInitializer(Initializer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
import numpy from paddle import nn
x = fluid.data(name="x", shape=[2, 1], dtype='float32') import numpy as np
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2]))) data = paddle.ones(shape=[1, 2], dtype='float32')
weight_attr=paddle.framework.ParamAttr(name="linear_weight", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.NumpyArrayInitializer(np.array([2,2])))
bias_attr=paddle.framework.ParamAttr(name="linear_bias", learning_rate=1.0,
trainable=False, regularizer=None, initializer=paddle.nn.initializer.NumpyArrayInitializer(np.array([2])))
linear = nn.Linear(2,2,weight_attr=weight_attr, bias_attr=bias_attr)
res = linear(data)
""" """
def __init__(self, value): def __init__(self, value, name=None):
import numpy import numpy
assert isinstance(value, numpy.ndarray) assert isinstance(value, numpy.ndarray)
super(NumpyArrayInitializer, self).__init__() super(NumpyArrayInitializer, self).__init__()
...@@ -1006,5 +1158,7 @@ Uniform = UniformInitializer ...@@ -1006,5 +1158,7 @@ Uniform = UniformInitializer
Normal = NormalInitializer Normal = NormalInitializer
TruncatedNormal = TruncatedNormalInitializer TruncatedNormal = TruncatedNormalInitializer
Xavier = XavierInitializer Xavier = XavierInitializer
XavierNormal = XavierNormalInitializer
XavierUniform = XavierUniformInitializer
MSRA = MSRAInitializer MSRA = MSRAInitializer
Bilinear = BilinearInitializer Bilinear = BilinearInitializer
...@@ -17,11 +17,14 @@ from __future__ import print_function ...@@ -17,11 +17,14 @@ from __future__ import print_function
import numpy as np import numpy as np
import unittest import unittest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
import paddle.fluid.initializer as initializer import paddle.fluid.initializer as initializer
from paddle.fluid.core import VarDesc from paddle.fluid.core import VarDesc
paddle.enable_static()
DELTA = 0.00001 DELTA = 0.00001
...@@ -345,6 +348,98 @@ class TestXavierInitializer(unittest.TestCase): ...@@ -345,6 +348,98 @@ class TestXavierInitializer(unittest.TestCase):
self.assertTrue(check_cast_op(block.ops[1])) self.assertTrue(check_cast_op(block.ops[1]))
class TestXavierUniformInitializer(unittest.TestCase):
def test_uniform_xavier_initializer(self):
"""Test Xavier initializer with uniform distribution on
for matrix multiply.
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
param = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.XavierUniformInitializer())
self.assertEqual(len(block.ops), 1)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
limit = np.sqrt(6.0 / (param.shape[0] + param.shape[1]))
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
def test_uniform_xavier_initializer_conv(self):
"""Test Xavier initializer with uniform distribution on
for convolutions.
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
param = block.create_parameter(
dtype="float32",
shape=[5, 10, 15, 20],
lod_level=0,
name="param",
initializer=initializer.XavierUniformInitializer())
self.assertEqual(len(block.ops), 1)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'uniform_random')
receptive_field_size = float(15 * 20)
limit = np.sqrt(6.0 / (
(param.shape[0] + param.shape[1]) * receptive_field_size))
self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA)
self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
class TestXavierNormalInitializer(unittest.TestCase):
def test_normal_xavier_initializer(self):
"""Test Xavier initializer with normal distribution on
for matrix multiply.
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
param = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="param",
initializer=initializer.XavierNormalInitializer())
self.assertEqual(len(block.ops), 1)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'gaussian_random')
std = np.sqrt(2.0 / (param.shape[0] + param.shape[1]))
self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA)
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
def test_normal_xavier_initializer_conv(self):
"""Test Xavier initializer with normal distribution on
for convolutions.
"""
program = framework.Program()
block = program.global_block()
for _ in range(2):
param = block.create_parameter(
dtype="float32",
shape=[5, 10, 15, 20],
lod_level=0,
name="param",
initializer=initializer.XavierNormalInitializer())
self.assertEqual(len(block.ops), 1)
init_op = block.ops[0]
self.assertEqual(init_op.type, 'gaussian_random')
receptive_field_size = float(15 * 20)
std = np.sqrt(2.0 / (
(param.shape[0] + param.shape[1]) * receptive_field_size))
self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA)
self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA)
self.assertEqual(init_op.attr('seed'), 0)
class TestMSRAInitializer(unittest.TestCase): class TestMSRAInitializer(unittest.TestCase):
def test_uniform_msra_initializer(self): def test_uniform_msra_initializer(self):
"""Test MSRA initializer with uniform distribution on """Test MSRA initializer with uniform distribution on
...@@ -559,7 +654,7 @@ class TestSetGlobalInitializer(unittest.TestCase): ...@@ -559,7 +654,7 @@ class TestSetGlobalInitializer(unittest.TestCase):
initializer.Uniform( initializer.Uniform(
low=-0.5, high=0.5), low=-0.5, high=0.5),
bias_init=initializer.Normal( bias_init=initializer.Normal(
loc=0.0, scale=2.0)) mean=0.0, std=2.0))
with fluid.program_guard(main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog):
x = fluid.data(name="x", shape=[1, 3, 32, 32]) x = fluid.data(name="x", shape=[1, 3, 32, 32])
# default initilizer of bias in layers.conv2d is ConstantInitializer # default initilizer of bias in layers.conv2d is ConstantInitializer
......
...@@ -20,7 +20,9 @@ from ...fluid.initializer import MSRA #DEFINE_ALIAS ...@@ -20,7 +20,9 @@ from ...fluid.initializer import MSRA #DEFINE_ALIAS
from ...fluid.initializer import Normal #DEFINE_ALIAS from ...fluid.initializer import Normal #DEFINE_ALIAS
from ...fluid.initializer import TruncatedNormal #DEFINE_ALIAS from ...fluid.initializer import TruncatedNormal #DEFINE_ALIAS
from ...fluid.initializer import Uniform #DEFINE_ALIAS from ...fluid.initializer import Uniform #DEFINE_ALIAS
from ...fluid.initializer import Xavier #DEFINE_ALIAS from ...fluid.initializer import XavierNormal #DEFINE_ALIAS
from ...fluid.initializer import XavierUniform #DEFINE_ALIAS
from ...fluid.initializer import NumpyArrayInitializer #DEFINE_ALIAS
__all__ = [ __all__ = [
'Bilinear', 'Bilinear',
...@@ -29,5 +31,7 @@ __all__ = [ ...@@ -29,5 +31,7 @@ __all__ = [
'Normal', 'Normal',
'TruncatedNormal', 'TruncatedNormal',
'Uniform', 'Uniform',
'Xavier', 'XavierNormal',
'XavierUniform'
'NumpyArrayInitializer',
] ]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册