未验证 提交 1aafc31b 编写于 作者: J Jackwaterveg 提交者: GitHub

[Cherry-pick]to Release/2.3, Improve MSRAInitializer (#43721)

* fix conflict

* improve the doc
上级 4dcfc6df
......@@ -679,20 +679,23 @@ class MSRAInitializer(Initializer):
.. math::
x = \sqrt{\\frac{6.0}{fan\_in}}
x = gain \times \sqrt{\frac{3}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in}}
\frac{gain}{\sqrt{{fan\_in}}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\
inferred from the variable. default is None.
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
seed (int32): random seed
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
It is recommended to set fan_in to None for most cases.
......@@ -709,7 +712,12 @@ class MSRAInitializer(Initializer):
"""
def __init__(self, uniform=True, fan_in=None, seed=0):
def __init__(self,
uniform=True,
fan_in=None,
seed=0,
negative_slope=0,
nonlinearity='relu'):
"""Constructor for MSRAInitializer
"""
assert uniform is not None
......@@ -718,6 +726,8 @@ class MSRAInitializer(Initializer):
self._uniform = uniform
self._fan_in = fan_in
self._seed = seed
self._negative_slope = negative_slope
self._nonlinearity = nonlinearity
def __call__(self, var, block=None):
"""Initialize the input tensor with MSRA initialization.
......@@ -759,13 +769,16 @@ class MSRAInitializer(Initializer):
if framework._non_static_mode():
if self._uniform:
limit = np.sqrt(6.0 / float(fan_in))
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
out_var = _C_ops.uniform_random('shape', out_var.shape, 'min',
-limit, 'max', limit, 'seed',
self._seed, 'dtype',
int(out_dtype))
else:
std = math.sqrt(2.0 / float(fan_in))
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
if in_dygraph_mode():
place = _current_expected_place()
out_var = _C_ops.final_state_gaussian_random(
......@@ -786,33 +799,33 @@ class MSRAInitializer(Initializer):
return None
else:
if self._uniform:
limit = np.sqrt(6.0 / float(fan_in))
op = block.append_op(
type="uniform_random",
inputs={},
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"min": -limit,
"max": limit,
"seed": self._seed
},
stop_gradient=True)
gain = calculate_gain(self._nonlinearity, self._negative_slope)
limit = gain * math.sqrt(3.0 / float(fan_in))
op = block.append_op(type="uniform_random",
inputs={},
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"min": -limit,
"max": limit,
"seed": self._seed
},
stop_gradient=True)
else:
std = np.sqrt(2.0 / float(fan_in))
op = block.append_op(
type="gaussian_random",
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"mean": 0.0,
"std": std,
"seed": self._seed
},
stop_gradient=True)
gain = calculate_gain(self._nonlinearity, self._negative_slope)
std = gain / math.sqrt(float(fan_in))
op = block.append_op(type="gaussian_random",
outputs={"Out": out_var},
attrs={
"shape": out_var.shape,
"dtype": int(out_dtype),
"mean": 0.0,
"std": std,
"seed": self._seed
},
stop_gradient=True)
if var.dtype == VarDesc.VarType.FP16 or (
var.dtype == VarDesc.VarType.BF16 and not self._uniform):
......
......@@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer):
.. math::
\sqrt{\frac{2.0}{fan\_in}}
\frac{gain}{\sqrt{{fan\_in}}}
Args:
fan_in (float32|None): fan_in for Kaiming normal Initializer. If None, it is\
inferred from the variable. default is None.
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
It is recommended to set fan_in to None for most cases.
......@@ -56,9 +59,12 @@ class KaimingNormal(MSRAInitializer):
"""
def __init__(self, fan_in=None):
super(KaimingNormal, self).__init__(
uniform=False, fan_in=fan_in, seed=0)
def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
super(KaimingNormal, self).__init__(uniform=False,
fan_in=fan_in,
seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity)
class KaimingUniform(MSRAInitializer):
......@@ -75,11 +81,14 @@ class KaimingUniform(MSRAInitializer):
.. math::
x = \sqrt{\frac{6.0}{fan\_in}}
x = gain \times \sqrt{\frac{3}{fan\_in}}
Args:
fan_in (float32|None): fan_in for Kaiming uniform Initializer. If None, it is\
inferred from the variable. default is None.
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
It is recommended to set fan_in to None for most cases.
......@@ -98,6 +107,9 @@ class KaimingUniform(MSRAInitializer):
"""
def __init__(self, fan_in=None):
super(KaimingUniform, self).__init__(
uniform=True, fan_in=fan_in, seed=0)
def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
super(KaimingUniform, self).__init__(uniform=True,
fan_in=fan_in,
seed=0,
negative_slope=negative_slope,
nonlinearity=nonlinearity)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册