提交 86092a97 编写于 作者: Q qiaolongfei

add doc for XavierInitializer

上级 69d568bd
...@@ -21,7 +21,8 @@ from core import VarDesc ...@@ -21,7 +21,8 @@ from core import VarDesc
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu', 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu',
'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer',
'NormalInitializer', 'XavierInitializer', 'BilinearInitializer' 'NormalInitializer', 'XavierInitializer', 'BilinearInitializer',
'MSRAInitializer'
] ]
_force_init_on_cpu_ = False _force_init_on_cpu_ = False
...@@ -246,39 +247,49 @@ class NormalInitializer(Initializer): ...@@ -246,39 +247,49 @@ class NormalInitializer(Initializer):
class XavierInitializer(Initializer): class XavierInitializer(Initializer):
"""Implements the Xavier initializer """
This class implements the Xavier weight initializer from the paper This class implements the Xavier weight initializer from the paper
Understanding the difficulty of training deep feedforward neural `Understanding the difficulty of training deep feedforward neural
networks[1] by Xavier Glorot and Yoshua Bengio. networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution, approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)). the range is [-x, x], where
.. math::
x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
In case of Normal distribution, the mean is 0 and the standard deviation In case of Normal distribution, the mean is 0 and the standard deviation
is sqrt(2/ (fan_in + fan_out)). is
References: .. math::
[1] Understanding the difficulty of training deep feedforward neural
networks. International conference on artificial intelligence and
statistics.
(http://proceedings.mlr.press/v9/glorot10a.html)
"""
def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): \sqrt{\\frac{2.0}{fan\_in + fan\_out}}
"""Constructor for XavierInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out: fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in and fan_out to None for Args:
most cases. uniform (bool): whether to use uniform or normal distribution
""" fan_in (float): fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out (float): fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in and fan_out to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.Xavier(uniform=False))
"""
def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0):
assert uniform is not None assert uniform is not None
assert seed is not None assert seed is not None
super(XavierInitializer, self).__init__() super(XavierInitializer, self).__init__()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册