Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1141db81
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1141db81
编写于
12月 17, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update test_adam_op
test=develop
上级
96604fda
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
20 addition
and
11 deletion
+20
-11
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+1
-0
python/paddle/fluid/tests/unittests/test_adam_op.py
python/paddle/fluid/tests/unittests/test_adam_op.py
+19
-11
未找到文件。
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
1141db81
...
@@ -358,6 +358,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -358,6 +358,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
grad_merge
.
rows
().
size
(),
lazy_mode
);
grad_merge
.
rows
().
size
(),
lazy_mode
);
VLOG
(
3
)
<<
"lazy_mode :"
<<
lazy_mode
;
if
(
lazy_mode
)
{
if
(
lazy_mode
)
{
std
::
vector
<
int64_t
>
id_vector
;
std
::
vector
<
int64_t
>
id_vector
;
size_t
row_count
=
grad_merge
.
rows
().
size
();
size_t
row_count
=
grad_merge
.
rows
().
size
();
...
...
python/paddle/fluid/tests/unittests/test_adam_op.py
浏览文件 @
1141db81
...
@@ -219,14 +219,25 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad,
...
@@ -219,14 +219,25 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad,
moment2_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
moment2_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
param_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
param_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
for
idx
,
row_id
in
enumerate
(
rows
):
def
update_row
(
row_id
,
update_value
):
moment1_out
[
row_id
]
=
beta1
*
moment1
[
row_id
]
+
(
1
-
beta1
moment1_out
[
row_id
]
=
beta1
*
moment1
[
row_id
]
+
(
1
-
beta1
)
*
np_grad
[
idx
]
)
*
update_value
moment2_out
[
row_id
]
=
beta2
*
moment2
[
row_id
]
+
(
moment2_out
[
row_id
]
=
beta2
*
moment2
[
row_id
]
+
(
1
-
beta2
)
*
np
.
square
(
np_grad
[
idx
]
)
1
-
beta2
)
*
np
.
square
(
update_value
)
lr_t
=
lr
*
np
.
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
)
lr_t
=
lr
*
np
.
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
)
param_out
[
row_id
]
=
param
[
row_id
]
-
lr_t
*
(
moment1_out
[
row_id
]
/
(
param_out
[
row_id
]
=
param
[
row_id
]
-
lr_t
*
(
moment1_out
[
row_id
]
/
(
np
.
sqrt
(
moment2_out
[
row_id
])
+
epsilon
))
np
.
sqrt
(
moment2_out
[
row_id
])
+
epsilon
))
if
lazy_mode
:
for
idx
,
row_id
in
enumerate
(
rows
):
update_row
(
row_id
,
np_grad
[
idx
])
else
:
for
row_id
in
range
(
param_out
.
shape
[
0
]):
update_value
=
np
.
zeros
(
np_grad
[
0
].
shape
).
astype
(
"float32"
)
if
row_id
in
rows
:
update_value
=
np_grad
[
rows
.
index
(
row_id
)]
update_row
(
row_id
,
update_value
)
return
param_out
,
moment1_out
,
moment2_out
return
param_out
,
moment1_out
,
moment2_out
...
@@ -249,6 +260,7 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -249,6 +260,7 @@ class TestSparseAdamOp(unittest.TestCase):
'Beta2Pow'
:
np
.
array
([
beta2
**
10
]).
astype
(
"float32"
),
'Beta2Pow'
:
np
.
array
([
beta2
**
10
]).
astype
(
"float32"
),
"LearningRate"
:
np
.
full
((
1
),
2.0
).
astype
(
"float32"
)
"LearningRate"
:
np
.
full
((
1
),
2.0
).
astype
(
"float32"
)
}
}
self
.
init_output
=
np
.
full
((
height
,
row_numel
),
0.0
).
astype
(
"float32"
)
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'beta1'
:
beta1
,
'beta2'
:
beta2
}
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'beta1'
:
beta1
,
'beta2'
:
beta2
}
grad_selected_rows
=
scope
.
var
(
'Grad'
).
get_selected_rows
()
grad_selected_rows
=
scope
.
var
(
'Grad'
).
get_selected_rows
()
...
@@ -286,7 +298,7 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -286,7 +298,7 @@ class TestSparseAdamOp(unittest.TestCase):
op_args
[
s
]
=
s
op_args
[
s
]
=
s
for
s
in
self
.
outputs
:
for
s
in
self
.
outputs
:
var
=
scope
.
var
(
s
).
get_tensor
()
var
=
scope
.
var
(
s
).
get_tensor
()
var
.
set
(
self
.
outputs
[
s
]
,
place
)
var
.
set
(
self
.
init_output
,
place
)
op_args
[
s
]
=
s
op_args
[
s
]
=
s
for
k
in
self
.
attrs
:
for
k
in
self
.
attrs
:
op_args
[
k
]
=
self
.
attrs
[
k
]
op_args
[
k
]
=
self
.
attrs
[
k
]
...
@@ -300,13 +312,9 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -300,13 +312,9 @@ class TestSparseAdamOp(unittest.TestCase):
actual
=
np
.
array
(
out_var
)
actual
=
np
.
array
(
out_var
)
actual
=
actual
.
reshape
([
actual
.
size
])
actual
=
actual
.
reshape
([
actual
.
size
])
np_array
=
np_array
.
reshape
([
np_array
.
size
])
np_array
=
np_array
.
reshape
([
np_array
.
size
])
for
idx
,
row_id
in
enumerate
(
self
.
rows
):
j
=
0
for
i
in
range
(
np_array
.
size
):
while
j
<
self
.
row_numel
:
self
.
assertLess
((
actual
[
i
]
-
np_array
[
i
]),
0.00001
)
pos
=
row_id
*
self
.
row_numel
+
j
self
.
assertLess
((
actual
[
pos
]
-
np_array
[
pos
])
/
actual
[
pos
],
0.00001
)
j
+=
1
def
test_sparse_adam
(
self
):
def
test_sparse_adam
(
self
):
places
=
[
core
.
CPUPlace
()]
places
=
[
core
.
CPUPlace
()]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录