Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
77cac5cd
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
77cac5cd
编写于
10月 19, 2017
作者:
A
Abhinav Arora
提交者:
GitHub
10月 19, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Removing updates of Beta1 power accumulators outside the op (#4931)
上级
11bebeb2
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
15 addition
and
31 deletion
+15
-31
paddle/operators/adamax_op.cc
paddle/operators/adamax_op.cc
+1
-6
paddle/operators/adamax_op.h
paddle/operators/adamax_op.h
+1
-6
python/paddle/v2/framework/tests/test_adamax_op.py
python/paddle/v2/framework/tests/test_adamax_op.py
+13
-19
未找到文件。
paddle/operators/adamax_op.cc
浏览文件 @
77cac5cd
...
@@ -41,8 +41,6 @@ class AdamaxOp : public framework::OperatorWithKernel {
...
@@ -41,8 +41,6 @@ class AdamaxOp : public framework::OperatorWithKernel {
"Output(MomentOut) of AdamaxOp should not be null."
);
"Output(MomentOut) of AdamaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"InfNormOut"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"InfNormOut"
),
"Output(InfNormOut) of AdamaxOp should not be null."
);
"Output(InfNormOut) of AdamaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Beta1PowOut"
),
"Output(Beta1PowOut) of AdamaxOp should not be null."
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
...
@@ -64,7 +62,6 @@ class AdamaxOp : public framework::OperatorWithKernel {
...
@@ -64,7 +62,6 @@ class AdamaxOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"InfNormOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"InfNormOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"Beta1PowOut"
,
beta1_pow_dims
);
}
}
};
};
...
@@ -86,7 +83,6 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -86,7 +83,6 @@ class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"InfNormOut"
,
AddOutput
(
"InfNormOut"
,
"(Tensor) "
"(Tensor) "
"Output exponentially weighted infinity norm"
);
"Output exponentially weighted infinity norm"
);
AddOutput
(
"Beta1PowOut"
,
"(Tensor) Output beta1 power accumulator"
);
AddAttr
<
float
>
(
"beta1"
,
AddAttr
<
float
>
(
"beta1"
,
"(float, default 0.9) "
"(float, default 0.9) "
...
@@ -113,8 +109,7 @@ Adamax updates:
...
@@ -113,8 +109,7 @@ Adamax updates:
moment_out = beta1 * moment + (1 - beta1) * grad
moment_out = beta1 * moment + (1 - beta1) * grad
inf_norm_out = max(beta2 * inf_norm + epsilon, abs(grad))
inf_norm_out = max(beta2 * inf_norm + epsilon, abs(grad))
beta1_pow_out = beta1_pow * beta1
learning_rate_t = learning_rate/(1 - beta1_pow)
learning_rate_t = learning_rate/(1 - beta1_pow_out)
param_out = param - learning_rate_t * moment_out/inf_norm_out
param_out = param - learning_rate_t * moment_out/inf_norm_out
The original paper does not have an epsilon attribute.
The original paper does not have an epsilon attribute.
...
...
paddle/operators/adamax_op.h
浏览文件 @
77cac5cd
...
@@ -26,12 +26,10 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
...
@@ -26,12 +26,10 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
auto
inf_norm_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"InfNormOut"
);
auto
inf_norm_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"InfNormOut"
);
auto
beta1_pow_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Beta1PowOut"
);
param_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
param_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
inf_norm_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
inf_norm_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
beta1_pow_out_tensor
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
float
beta1
=
ctx
.
Attr
<
float
>
(
"beta1"
);
float
beta1
=
ctx
.
Attr
<
float
>
(
"beta1"
);
float
beta2
=
ctx
.
Attr
<
float
>
(
"beta2"
);
float
beta2
=
ctx
.
Attr
<
float
>
(
"beta2"
);
...
@@ -53,15 +51,12 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
...
@@ -53,15 +51,12 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
auto
moment_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment_out_tensor
);
auto
moment_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
moment_out_tensor
);
auto
inf_norm_out
=
auto
inf_norm_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
inf_norm_out_tensor
);
framework
::
EigenVector
<
T
>::
Flatten
(
*
inf_norm_out_tensor
);
auto
beta1_pow_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
beta1_pow_out_tensor
);
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
auto
place
=
ctx
.
GetEigenDevice
<
Place
>
();
moment_out
.
device
(
place
)
=
beta1
*
moment
+
(
1
-
beta1
)
*
grad
;
moment_out
.
device
(
place
)
=
beta1
*
moment
+
(
1
-
beta1
)
*
grad
;
inf_norm_out
.
device
(
place
)
=
inf_norm_out
.
device
(
place
)
=
grad
.
abs
().
cwiseMax
((
beta2
*
inf_norm
)
+
epsilon
);
grad
.
abs
().
cwiseMax
((
beta2
*
inf_norm
)
+
epsilon
);
beta1_pow_out
.
device
(
place
)
=
beta1_pow
*
beta1
;
auto
lr_t
=
lr
/
(
1
-
beta1_pow
);
auto
lr_t
=
lr
/
(
1
-
beta1_pow_out
);
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment_out_tensor
->
numel
());
Eigen
::
DSizes
<
int
,
1
>
m_dsize
(
moment_out_tensor
->
numel
());
param_out
.
device
(
place
)
=
param_out
.
device
(
place
)
=
param
-
lr_t
.
broadcast
(
m_dsize
)
*
(
moment_out
/
inf_norm_out
);
param
-
lr_t
.
broadcast
(
m_dsize
)
*
(
moment_out
/
inf_norm_out
);
...
...
python/paddle/v2/framework/tests/test_adamax_op.py
浏览文件 @
77cac5cd
...
@@ -31,14 +31,13 @@ class TestAdamaxOp1(OpTest):
...
@@ -31,14 +31,13 @@ class TestAdamaxOp1(OpTest):
self
.
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
self
.
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
param_out
,
moment_out
,
inf_norm_out
=
adamax_step
(
self
.
inputs
,
self
.
inputs
,
self
.
attrs
)
self
.
attrs
)
self
.
outputs
=
{
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'MomentOut'
:
moment_out
,
'InfNormOut'
:
inf_norm_out
,
'InfNormOut'
:
inf_norm_out
'Beta1PowOut'
:
beta1_pow_out
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
@@ -73,14 +72,12 @@ class TestAdamaxOp2(OpTest):
...
@@ -73,14 +72,12 @@ class TestAdamaxOp2(OpTest):
}
}
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
param_out
,
moment_out
,
inf_norm_out
=
adamax_step
(
self
.
inputs
,
attrs
)
self
.
inputs
,
attrs
)
self
.
outputs
=
{
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'MomentOut'
:
moment_out
,
'InfNormOut'
:
inf_norm_out
,
'InfNormOut'
:
inf_norm_out
'Beta1PowOut'
:
beta1_pow_out
}
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
...
@@ -117,19 +114,15 @@ class TestAdamaxOpMultipleSteps(OpTest):
...
@@ -117,19 +114,15 @@ class TestAdamaxOpMultipleSteps(OpTest):
self
.
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
self
.
attrs
=
{
'beta1'
:
beta1
,
'beta2'
:
beta2
,
'epsilon'
:
epsilon
}
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
self
.
inputs
,
self
.
attrs
)
def
test_check_output
(
self
):
def
test_check_output
(
self
):
for
_
in
range
(
self
.
num_steps
):
for
_
in
range
(
self
.
num_steps
):
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
=
adamax_step
(
param_out
,
moment_out
,
inf_norm_out
=
adamax_step
(
self
.
inputs
,
self
.
inputs
,
self
.
attrs
)
self
.
attrs
)
self
.
outputs
=
{
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'ParamOut'
:
param_out
,
'MomentOut'
:
moment_out
,
'MomentOut'
:
moment_out
,
'InfNormOut'
:
inf_norm_out
,
'InfNormOut'
:
inf_norm_out
'Beta1PowOut'
:
beta1_pow_out
}
}
# Verify output for this step
# Verify output for this step
...
@@ -139,7 +132,9 @@ class TestAdamaxOpMultipleSteps(OpTest):
...
@@ -139,7 +132,9 @@ class TestAdamaxOpMultipleSteps(OpTest):
self
.
inputs
[
'Param'
]
=
param_out
self
.
inputs
[
'Param'
]
=
param_out
self
.
inputs
[
'Moment'
]
=
moment_out
self
.
inputs
[
'Moment'
]
=
moment_out
self
.
inputs
[
'InfNorm'
]
=
inf_norm_out
self
.
inputs
[
'InfNorm'
]
=
inf_norm_out
self
.
inputs
[
'Beta1Pow'
]
=
beta1_pow_out
# Update Beta1 Power accumulator for next step
self
.
inputs
[
'Beta1Pow'
]
*=
self
.
attrs
[
'beta1'
]
# Randomize gradient for next step
# Randomize gradient for next step
self
.
inputs
[
'Grad'
]
=
np
.
random
.
uniform
(
self
.
inputs
[
'Grad'
]
=
np
.
random
.
uniform
(
...
@@ -167,11 +162,10 @@ def adamax_step(inputs, attributes):
...
@@ -167,11 +162,10 @@ def adamax_step(inputs, attributes):
moment_out
=
beta1
*
moment
+
(
1
-
beta1
)
*
grad
moment_out
=
beta1
*
moment
+
(
1
-
beta1
)
*
grad
inf_norm_out
=
np
.
maximum
(
beta2
*
inf_norm
+
epsilon
,
np
.
abs
(
grad
))
inf_norm_out
=
np
.
maximum
(
beta2
*
inf_norm
+
epsilon
,
np
.
abs
(
grad
))
beta1_pow_out
=
beta1_pow
*
beta1
lr_t
=
(
lr
/
(
1
-
beta1_pow
))
lr_t
=
(
lr
/
(
1
-
beta1_pow_out
))
param_out
=
param
-
lr_t
*
np
.
divide
(
moment_out
,
inf_norm_out
)
param_out
=
param
-
lr_t
*
np
.
divide
(
moment_out
,
inf_norm_out
)
return
param_out
,
moment_out
,
inf_norm_out
,
beta1_pow_out
return
param_out
,
moment_out
,
inf_norm_out
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录