Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1aafc31b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1aafc31b
编写于
6月 22, 2022
作者:
J
Jackwaterveg
提交者:
GitHub
6月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Cherry-pick]to Release/2.3, Improve MSRAInitializer (#43721)
* fix conflict * improve the doc
上级
4dcfc6df
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
69 addition
and
44 deletion
+69
-44
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+45
-32
python/paddle/nn/initializer/kaiming.py
python/paddle/nn/initializer/kaiming.py
+24
-12
未找到文件。
python/paddle/fluid/initializer.py
浏览文件 @
1aafc31b
...
@@ -679,20 +679,23 @@ class MSRAInitializer(Initializer):
...
@@ -679,20 +679,23 @@ class MSRAInitializer(Initializer):
.. math::
.. math::
x =
\sqrt{\\frac{6.0
}{fan\_in}}
x =
gain \times \sqrt{\frac{3
}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation
In case of Normal distribution, the mean is 0 and the standard deviation
is
is
.. math::
.. math::
\
sqrt{\\frac{2.0}{fan\_in
}}
\
frac{gain}{\sqrt{{fan\_in}
}}
Args:
Args:
uniform (bool): whether to use uniform or normal distribution
uniform (bool): whether to use uniform or normal distribution
fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
inferred from the variable. default is None.
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
seed (int32): random seed
seed (int32): random seed
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
Note:
It is recommended to set fan_in to None for most cases.
It is recommended to set fan_in to None for most cases.
...
@@ -709,7 +712,12 @@ class MSRAInitializer(Initializer):
...
@@ -709,7 +712,12 @@ class MSRAInitializer(Initializer):
"""
"""
def
__init__
(
self
,
uniform
=
True
,
fan_in
=
None
,
seed
=
0
):
def
__init__
(
self
,
uniform
=
True
,
fan_in
=
None
,
seed
=
0
,
negative_slope
=
0
,
nonlinearity
=
'relu'
):
"""Constructor for MSRAInitializer
"""Constructor for MSRAInitializer
"""
"""
assert
uniform
is
not
None
assert
uniform
is
not
None
...
@@ -718,6 +726,8 @@ class MSRAInitializer(Initializer):
...
@@ -718,6 +726,8 @@ class MSRAInitializer(Initializer):
self
.
_uniform
=
uniform
self
.
_uniform
=
uniform
self
.
_fan_in
=
fan_in
self
.
_fan_in
=
fan_in
self
.
_seed
=
seed
self
.
_seed
=
seed
self
.
_negative_slope
=
negative_slope
self
.
_nonlinearity
=
nonlinearity
def
__call__
(
self
,
var
,
block
=
None
):
def
__call__
(
self
,
var
,
block
=
None
):
"""Initialize the input tensor with MSRA initialization.
"""Initialize the input tensor with MSRA initialization.
...
@@ -759,13 +769,16 @@ class MSRAInitializer(Initializer):
...
@@ -759,13 +769,16 @@ class MSRAInitializer(Initializer):
if
framework
.
_non_static_mode
():
if
framework
.
_non_static_mode
():
if
self
.
_uniform
:
if
self
.
_uniform
:
limit
=
np
.
sqrt
(
6.0
/
float
(
fan_in
))
gain
=
calculate_gain
(
self
.
_nonlinearity
,
self
.
_negative_slope
)
limit
=
gain
*
math
.
sqrt
(
3.0
/
float
(
fan_in
))
out_var
=
_C_ops
.
uniform_random
(
'shape'
,
out_var
.
shape
,
'min'
,
out_var
=
_C_ops
.
uniform_random
(
'shape'
,
out_var
.
shape
,
'min'
,
-
limit
,
'max'
,
limit
,
'seed'
,
-
limit
,
'max'
,
limit
,
'seed'
,
self
.
_seed
,
'dtype'
,
self
.
_seed
,
'dtype'
,
int
(
out_dtype
))
int
(
out_dtype
))
else
:
else
:
std
=
math
.
sqrt
(
2.0
/
float
(
fan_in
))
gain
=
calculate_gain
(
self
.
_nonlinearity
,
self
.
_negative_slope
)
std
=
gain
/
math
.
sqrt
(
float
(
fan_in
))
if
in_dygraph_mode
():
if
in_dygraph_mode
():
place
=
_current_expected_place
()
place
=
_current_expected_place
()
out_var
=
_C_ops
.
final_state_gaussian_random
(
out_var
=
_C_ops
.
final_state_gaussian_random
(
...
@@ -786,33 +799,33 @@ class MSRAInitializer(Initializer):
...
@@ -786,33 +799,33 @@ class MSRAInitializer(Initializer):
return
None
return
None
else
:
else
:
if
self
.
_uniform
:
if
self
.
_uniform
:
limit
=
np
.
sqrt
(
6.0
/
float
(
fan_in
)
)
gain
=
calculate_gain
(
self
.
_nonlinearity
,
self
.
_negative_slope
)
op
=
block
.
append_op
(
limit
=
gain
*
math
.
sqrt
(
3.0
/
float
(
fan_in
))
type
=
"uniform_random"
,
op
=
block
.
append_op
(
type
=
"uniform_random"
,
inputs
=
{},
inputs
=
{},
outputs
=
{
"Out"
:
out_var
},
outputs
=
{
"Out"
:
out_var
},
attrs
=
{
attrs
=
{
"shape"
:
out_var
.
shape
,
"shape"
:
out_var
.
shape
,
"dtype"
:
int
(
out_dtype
),
"dtype"
:
int
(
out_dtype
),
"min"
:
-
limit
,
"min"
:
-
limit
,
"max"
:
limit
,
"max"
:
limit
,
"seed"
:
self
.
_seed
"seed"
:
self
.
_seed
},
},
stop_gradient
=
True
)
stop_gradient
=
True
)
else
:
else
:
std
=
np
.
sqrt
(
2.0
/
float
(
fan_in
)
)
gain
=
calculate_gain
(
self
.
_nonlinearity
,
self
.
_negative_slope
)
op
=
block
.
append_op
(
std
=
gain
/
math
.
sqrt
(
float
(
fan_in
))
type
=
"gaussian_random"
,
op
=
block
.
append_op
(
type
=
"gaussian_random"
,
outputs
=
{
"Out"
:
out_var
},
outputs
=
{
"Out"
:
out_var
},
attrs
=
{
attrs
=
{
"shape"
:
out_var
.
shape
,
"shape"
:
out_var
.
shape
,
"dtype"
:
int
(
out_dtype
),
"dtype"
:
int
(
out_dtype
),
"mean"
:
0.0
,
"mean"
:
0.0
,
"std"
:
std
,
"std"
:
std
,
"seed"
:
self
.
_seed
"seed"
:
self
.
_seed
},
},
stop_gradient
=
True
)
stop_gradient
=
True
)
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
or
(
if
var
.
dtype
==
VarDesc
.
VarType
.
FP16
or
(
var
.
dtype
==
VarDesc
.
VarType
.
BF16
and
not
self
.
_uniform
):
var
.
dtype
==
VarDesc
.
VarType
.
BF16
and
not
self
.
_uniform
):
...
...
python/paddle/nn/initializer/kaiming.py
浏览文件 @
1aafc31b
...
@@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer):
...
@@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer):
.. math::
.. math::
\
sqrt{\frac{2.0}{fan\_in
}}
\
frac{gain}{\sqrt{{fan\_in}
}}
Args:
Args:
fan_in (float32|None): fan_in for Kaiming normal Initializer. If None, it is\
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
inferred from the variable. default is None.
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
Note:
It is recommended to set fan_in to None for most cases.
It is recommended to set fan_in to None for most cases.
...
@@ -56,9 +59,12 @@ class KaimingNormal(MSRAInitializer):
...
@@ -56,9 +59,12 @@ class KaimingNormal(MSRAInitializer):
"""
"""
def
__init__
(
self
,
fan_in
=
None
):
def
__init__
(
self
,
fan_in
=
None
,
negative_slope
=
0.0
,
nonlinearity
=
'relu'
):
super
(
KaimingNormal
,
self
).
__init__
(
super
(
KaimingNormal
,
self
).
__init__
(
uniform
=
False
,
uniform
=
False
,
fan_in
=
fan_in
,
seed
=
0
)
fan_in
=
fan_in
,
seed
=
0
,
negative_slope
=
negative_slope
,
nonlinearity
=
nonlinearity
)
class
KaimingUniform
(
MSRAInitializer
):
class
KaimingUniform
(
MSRAInitializer
):
...
@@ -75,11 +81,14 @@ class KaimingUniform(MSRAInitializer):
...
@@ -75,11 +81,14 @@ class KaimingUniform(MSRAInitializer):
.. math::
.. math::
x =
\sqrt{\frac{6.0
}{fan\_in}}
x =
gain \times \sqrt{\frac{3
}{fan\_in}}
Args:
Args:
fan_in (float32|None): fan_in for Kaiming uniform Initializer. If None, it is\
fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
inferred from the variable. default is None.
If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
you can set the value of 'fan_in' smartly by yourself. default is None.
negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
nonlinearity(str, optional): the non-linear function. default is relu.
Note:
Note:
It is recommended to set fan_in to None for most cases.
It is recommended to set fan_in to None for most cases.
...
@@ -98,6 +107,9 @@ class KaimingUniform(MSRAInitializer):
...
@@ -98,6 +107,9 @@ class KaimingUniform(MSRAInitializer):
"""
"""
def
__init__
(
self
,
fan_in
=
None
):
def
__init__
(
self
,
fan_in
=
None
,
negative_slope
=
0.0
,
nonlinearity
=
'relu'
):
super
(
KaimingUniform
,
self
).
__init__
(
super
(
KaimingUniform
,
self
).
__init__
(
uniform
=
True
,
uniform
=
True
,
fan_in
=
fan_in
,
seed
=
0
)
fan_in
=
fan_in
,
seed
=
0
,
negative_slope
=
negative_slope
,
nonlinearity
=
nonlinearity
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录