[Initializer] Improve MSRAInitializer (#43334)

* improve MSRAInitializer * improve the doc

[Initializer] Improve MSRAInitializer (#43334)
* improve MSRAInitializer * improve the doc
e176cc40 · Jackwaterveg · GitHub · 4b3e8d56 · e176cc40 · e176cc40
隐藏空白更改
内联并排

Showing with 44 addition and 19 deletion

python/paddle/fluid/initializer.py python/paddle/fluid/initializer.py +24 -9

python/paddle/nn/initializer/kaiming.py python/paddle/nn/initializer/kaiming.py +20 -10

未找到文件。
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -676,20 +676,23 @@ class MSRAInitializer(Initializer):
    .. math::
-        x = \sqrt{\\frac{6.0}{fan\_in}}
+        x = gain \times \sqrt{\frac{3}{fan\_in}}
    In case of Normal distribution, the mean is 0 and the standard deviation
    is
    .. math::
-        \sqrt{\\frac{2.0}{fan\_in}}
+        \frac{gain}{\sqrt{{fan\_in}}}
    Args:
        uniform (bool): whether to use uniform or normal distribution
-        fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\
+        fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
-        inferred from the variable. default is None.
+        If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
+        you can set the value of 'fan_in' smartly by yourself. default is None.
        seed (int32): random seed
+        negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
+        nonlinearity(str, optional): the non-linear function. default is relu.
    Note:
        It is recommended to set fan_in to None for most cases.
@@ -706,7 +709,12 @@ class MSRAInitializer(Initializer):
    """
-    def __init__(self, uniform=True, fan_in=None, seed=0):
+    def __init__(self,
+                 uniform=True,
+                 fan_in=None,
+                 seed=0,
+                 negative_slope=0,
+                 nonlinearity='relu'):
        """Constructor for MSRAInitializer
        """
        assert uniform is not None
@@ -715,6 +723,8 @@ class MSRAInitializer(Initializer):
        self._uniform = uniform
        self._fan_in = fan_in
        self._seed = seed
+        self._negative_slope = negative_slope
+        self._nonlinearity = nonlinearity
    def __call__(self, var, block=None):
        """Initialize the input tensor with MSRA initialization.
@@ -755,13 +765,16 @@ class MSRAInitializer(Initializer):
        if framework._non_static_mode():
            if self._uniform:
-                limit = math.sqrt(6.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                limit = gain * math.sqrt(3.0 / float(fan_in))
                out_var = _C_ops.uniform_random('shape', out_var.shape, 'min',
                                                -limit, 'max', limit, 'seed',
                                                self._seed, 'dtype',
                                                int(out_dtype))
            else:
-                std = math.sqrt(2.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                std = gain / math.sqrt(float(fan_in))
                if in_dygraph_mode():
                    place = _current_expected_place()
                    out_var = _C_ops.final_state_gaussian_random(
@@ -783,7 +796,8 @@ class MSRAInitializer(Initializer):
            return None
        else:
            if self._uniform:
-                limit = math.sqrt(6.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                limit = gain * math.sqrt(3.0 / float(fan_in))
                op = block.append_op(type="uniform_random",
                                     inputs={},
                                     outputs={"Out": out_var},
@@ -797,7 +811,8 @@ class MSRAInitializer(Initializer):
                                     stop_gradient=True)
            else:
-                std = math.sqrt(2.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                std = gain / math.sqrt(float(fan_in))
                op = block.append_op(type="gaussian_random",
                                     outputs={"Out": out_var},
                                     attrs={

--- a/python/paddle/nn/initializer/kaiming.py
+++ b/python/paddle/nn/initializer/kaiming.py
@@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer):
    .. math::
-        \sqrt{\frac{2.0}{fan\_in}}
+        \frac{gain}{\sqrt{{fan\_in}}}
    Args:
-        fan_in (float32|None, optional): fan_in for Kaiming normal Initializer. If None, it is 
+        fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
-        inferred from the variable. default is None.
+        If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
+        you can set the value of 'fan_in' smartly by yourself. default is None.
+        negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
+        nonlinearity(str, optional): the non-linear function. default is relu.
    Note:
        It is recommended to set fan_in to None for most cases.
@@ -56,10 +59,12 @@ class KaimingNormal(MSRAInitializer):
    """
-    def __init__(self, fan_in=None):
+    def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
        super(KaimingNormal, self).__init__(uniform=False,
                                            fan_in=fan_in,
-                                            seed=0)
+                                            seed=0,
+                                            negative_slope=negative_slope,
+                                            nonlinearity=nonlinearity)
 class KaimingUniform(MSRAInitializer):
@@ -76,11 +81,14 @@ class KaimingUniform(MSRAInitializer):
    .. math::
-        x = \sqrt{\frac{6.0}{fan\_in}}
+        x = gain \times \sqrt{\frac{3}{fan\_in}}
    Args:
-        fan_in (float32|None, optional): fan_in for Kaiming uniform Initializer. If None, it is 
+        fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
-        inferred from the variable. default is None.
+        If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
+        you can set the value of 'fan_in' smartly by yourself. default is None.
+        negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
+        nonlinearity(str, optional): the non-linear function. default is relu.
    Note:
        It is recommended to set fan_in to None for most cases.
@@ -99,7 +107,9 @@ class KaimingUniform(MSRAInitializer):
    """
-    def __init__(self, fan_in=None):
+    def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
        super(KaimingUniform, self).__init__(uniform=True,
                                             fan_in=fan_in,
-                                             seed=0)
+                                             seed=0,
+                                             negative_slope=negative_slope,
+                                             nonlinearity=nonlinearity)