未验证 提交 e176cc40 编写于 作者: J Jackwaterveg 提交者: GitHub

[Initializer] Improve MSRAInitializer (#43334)

* improve MSRAInitializer

* improve the doc
上级 4b3e8d56
...@@ -676,20 +676,23 @@ class MSRAInitializer(Initializer): ...@@ -676,20 +676,23 @@ class MSRAInitializer(Initializer):
.. math:: .. math::
x = \sqrt{\\frac{6.0}{fan\_in}} x = gain \times \sqrt{\frac{3}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation In case of Normal distribution, the mean is 0 and the standard deviation
is is
.. math:: .. math::
\sqrt{\\frac{2.0}{fan\_in}} \frac{gain}{\sqrt{{fan\_in}}}
Args: Args:
uniform (bool): whether to use uniform or normal distribution uniform (bool): whether to use uniform or normal distribution
fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\ fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
inferred from the variable. default is None. If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
seed (int32): random seed seed (int32): random seed
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note: Note:
It is recommended to set fan_in to None for most cases. It is recommended to set fan_in to None for most cases.
...@@ -706,7 +709,12 @@ class MSRAInitializer(Initializer): ...@@ -706,7 +709,12 @@ class MSRAInitializer(Initializer):
""" """
def __init__(self, uniform=True, fan_in=None, seed=0): def __init__(self,
uniform=True,
fan_in=None,
seed=0,
negative_slope=0,
nonlinearity='relu'):
"""Constructor for MSRAInitializer """Constructor for MSRAInitializer
""" """
assert uniform is not None assert uniform is not None
...@@ -715,6 +723,8 @@ class MSRAInitializer(Initializer): ...@@ -715,6 +723,8 @@ class MSRAInitializer(Initializer):
self._uniform = uniform self._uniform = uniform
self._fan_in = fan_in self._fan_in = fan_in
self._seed = seed self._seed = seed
self._negative_slope = negative_slope
self._nonlinearity = nonlinearity
def __call__(self, var, block=None): def __call__(self, var, block=None):
"""Initialize the input tensor with MSRA initialization. """Initialize the input tensor with MSRA initialization.
...@@ -755,13 +765,16 @@ class MSRAInitializer(Initializer): ...@@ -755,13 +765,16 @@ class MSRAInitializer(Initializer):
if framework._non_static_mode(): if framework._non_static_mode():
if self._uniform: if self._uniform:
limit = math.sqrt(6.0 / float(fan_in)) gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
out_var = _C_ops.uniform_random('shape', out_var.shape, 'min', out_var = _C_ops.uniform_random('shape', out_var.shape, 'min',
-limit, 'max', limit, 'seed', -limit, 'max', limit, 'seed',
self._seed, 'dtype', self._seed, 'dtype',
int(out_dtype)) int(out_dtype))
else: else:
std = math.sqrt(2.0 / float(fan_in)) gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
if in_dygraph_mode(): if in_dygraph_mode():
place = _current_expected_place() place = _current_expected_place()
out_var = _C_ops.final_state_gaussian_random( out_var = _C_ops.final_state_gaussian_random(
...@@ -783,7 +796,8 @@ class MSRAInitializer(Initializer): ...@@ -783,7 +796,8 @@ class MSRAInitializer(Initializer):
return None return None
else: else:
if self._uniform: if self._uniform:
limit = math.sqrt(6.0 / float(fan_in)) gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
op = block.append_op(type="uniform_random", op = block.append_op(type="uniform_random",
inputs={}, inputs={},
outputs={"Out": out_var}, outputs={"Out": out_var},
...@@ -797,7 +811,8 @@ class MSRAInitializer(Initializer): ...@@ -797,7 +811,8 @@ class MSRAInitializer(Initializer):
stop_gradient=True) stop_gradient=True)
else: else:
std = math.sqrt(2.0 / float(fan_in)) gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
op = block.append_op(type="gaussian_random", op = block.append_op(type="gaussian_random",
outputs={"Out": out_var}, outputs={"Out": out_var},
attrs={ attrs={
......
...@@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer): ...@@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer):
.. math:: .. math::
\sqrt{\frac{2.0}{fan\_in}} \frac{gain}{\sqrt{{fan\_in}}}
Args: Args:
fan_in (float32|None, optional): fan_in for Kaiming normal Initializer. If None, it is fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
inferred from the variable. default is None. If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note: Note:
It is recommended to set fan_in to None for most cases. It is recommended to set fan_in to None for most cases.
...@@ -56,10 +59,12 @@ class KaimingNormal(MSRAInitializer): ...@@ -56,10 +59,12 @@ class KaimingNormal(MSRAInitializer):
""" """
def __init__(self, fan_in=None): def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
super(KaimingNormal, self).__init__(uniform=False, super(KaimingNormal, self).__init__(uniform=False,
fan_in=fan_in, fan_in=fan_in,
seed=0) seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity)
class KaimingUniform(MSRAInitializer): class KaimingUniform(MSRAInitializer):
...@@ -76,11 +81,14 @@ class KaimingUniform(MSRAInitializer): ...@@ -76,11 +81,14 @@ class KaimingUniform(MSRAInitializer):
.. math:: .. math::
x = \sqrt{\frac{6.0}{fan\_in}} x = gain \times \sqrt{\frac{3}{fan\_in}}
Args: Args:
fan_in (float32|None, optional): fan_in for Kaiming uniform Initializer. If None, it is fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
inferred from the variable. default is None. If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note: Note:
It is recommended to set fan_in to None for most cases. It is recommended to set fan_in to None for most cases.
...@@ -99,7 +107,9 @@ class KaimingUniform(MSRAInitializer): ...@@ -99,7 +107,9 @@ class KaimingUniform(MSRAInitializer):
""" """
def __init__(self, fan_in=None): def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
super(KaimingUniform, self).__init__(uniform=True, super(KaimingUniform, self).__init__(uniform=True,
fan_in=fan_in, fan_in=fan_in,
seed=0) seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册