Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5361911c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5361911c
编写于
12月 26, 2017
作者:
T
typhoonzero
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adam support sparse
上级
6cc4bd53
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
32 addition
and
39 deletion
+32
-39
paddle/operators/adam_op.h
paddle/operators/adam_op.h
+5
-8
python/paddle/v2/fluid/tests/test_adam_op.py
python/paddle/v2/fluid/tests/test_adam_op.py
+27
-31
未找到文件。
paddle/operators/adam_op.h
浏览文件 @
5361911c
...
...
@@ -98,13 +98,12 @@ struct SparseAdamFunctor {
const
int64_t
*
rows_
;
int64_t
row_numel_
;
int64_t
height_
;
SparseAdamFunctor
(
T
beta1
,
T
beta2
,
T
epsilon
,
const
T
*
beta1_pow
,
const
T
*
beta2_pow
,
const
T
*
mom1
,
T
*
mom1_out
,
const
T
*
mom2
,
T
*
mom2_out
,
const
T
*
lr
,
const
T
*
grad
,
const
T
*
param
,
T
*
param_out
,
const
int64_t
*
rows
,
int64_t
row_numel
,
int64_t
height
)
int64_t
row_numel
)
:
beta1_
(
beta1
),
beta2_
(
beta2
),
epsilon_
(
epsilon
),
...
...
@@ -119,8 +118,7 @@ struct SparseAdamFunctor {
param_
(
param
),
param_out_
(
param_out
),
rows_
(
rows
),
row_numel_
(
row_numel
),
height_
(
height
)
{}
row_numel_
(
row_numel
)
{}
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
for
(
int64_t
j
=
0
;
j
<
row_numel_
;
++
j
)
{
...
...
@@ -136,6 +134,7 @@ struct SparseAdamFunctor {
mom1
=
beta1_
*
mom1
+
(
1
-
beta1_
)
*
g
;
mom2
=
beta2_
*
mom2
+
(
1
-
beta2_
)
*
g
*
g
;
p
-=
lr
*
(
mom1
/
(
sqrt
(
mom2
)
+
epsilon_
));
// IMPORTANT:
// FIXME(typhoonzero): row id may be duplicate
moment1_out_
[
rows_
[
i
]
*
row_numel_
+
j
]
=
mom1
;
moment2_out_
[
rows_
[
i
]
*
row_numel_
+
j
]
=
mom2
;
...
...
@@ -195,8 +194,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto
&
grad_tensor
=
grad
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
auto
*
rows
=
grad
.
rows
().
data
();
auto
height
=
grad
.
height
();
auto
row_numel
=
grad_tensor
.
numel
()
/
height
;
auto
row_numel
=
grad_tensor
.
numel
()
/
grad
.
rows
().
size
();
SparseAdamFunctor
<
T
>
functor
(
beta1
,
beta2
,
epsilon
,
beta1_pow
.
template
data
<
T
>(),
...
...
@@ -205,8 +203,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
mom2
.
template
data
<
T
>(),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
height
);
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
);
platform
::
ForRange
<
DeviceContext
>
for_range
(
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
grad
.
rows
().
size
());
...
...
python/paddle/v2/fluid/tests/test_adam_op.py
浏览文件 @
5361911c
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
from
paddle.v2.fluid
import
core
from
paddle.v2.fluid.op
import
Operator
class
TestAdamOp1
(
OpTest
):
...
...
@@ -196,9 +198,9 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad):
beta2
=
attributes
[
'beta2'
]
epsilon
=
attributes
[
'epsilon'
]
moment1_out
=
np
.
array
(
[
height
,
row_numel
])
moment2_out
=
np
.
array
(
[
height
,
row_numel
])
param_out
=
np
.
array
(
[
height
,
row_numel
])
moment1_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
moment2_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
param_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
for
idx
,
row_id
in
enumerate
(
rows
):
moment1_out
[
row_id
]
=
beta1
*
moment1
[
row_id
]
+
(
1
-
beta1
...
...
@@ -206,8 +208,8 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad):
moment2_out
[
row_id
]
=
beta2
*
moment2
[
row_id
]
+
(
1
-
beta2
)
*
np
.
square
(
np_grad
[
idx
])
lr_t
=
lr
*
np
.
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
)
param_out
[
row_id
]
=
param
[
row_id
]
-
lr_t
*
(
moment1_out
/
(
np
.
sqrt
(
moment2_out
)
+
epsilon
))
param_out
[
row_id
]
=
param
[
row_id
]
-
lr_t
*
(
moment1_out
[
row_id
]
/
(
np
.
sqrt
(
moment2_out
[
row_id
]
)
+
epsilon
))
return
param_out
,
moment1_out
,
moment2_out
...
...
@@ -219,13 +221,15 @@ class TestSparseAdamOp(unittest.TestCase):
height
=
10
rows
=
[
0
,
4
,
7
]
self
.
rows
=
rows
row_numel
=
12
self
.
row_numel
=
row_numel
self
.
dense_inputs
=
{
"Param"
:
np
.
full
((
height
,
row_numel
),
5.0
).
astype
(
"float32"
),
"Moment1"
:
np
.
full
((
height
,
row_numel
),
5.0
).
astype
(
"float32"
),
"Moment2"
:
np
.
full
((
height
,
row_numel
),
5.0
).
astype
(
"float32"
),
'Beta1Pow'
:
np
.
array
([
0.9
**
10
]).
astype
(
"float32"
),
'Beta2Pow'
:
np
.
array
([
0.999
**
10
]).
astype
(
"float32"
),
'Beta1Pow'
:
np
.
array
([
beta1
**
10
]).
astype
(
"float32"
),
'Beta2Pow'
:
np
.
array
([
beta2
**
10
]).
astype
(
"float32"
),
"LearningRate"
:
np
.
full
((
1
),
2.0
).
astype
(
"float32"
)
}
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'beta1'
:
beta1
,
'beta2'
:
beta2
}
...
...
@@ -245,7 +249,7 @@ class TestSparseAdamOp(unittest.TestCase):
param_out
,
mom1
,
mom2
=
adam_step_sparse
(
self
.
dense_inputs
,
self
.
attrs
,
height
,
rows
,
row_numel
,
np_array
)
self
.
outputs
=
{
"Param"
:
param_out
,
"Param
Out
"
:
param_out
,
"Moment1Out"
:
mom1
,
"Moment2Out"
:
mom2
}
...
...
@@ -261,37 +265,29 @@ class TestSparseAdamOp(unittest.TestCase):
op_args
[
key
]
=
key
for
s
in
self
.
sparse_inputs
:
op_args
[
s
]
=
s
for
s
in
self
.
outputs
:
var
=
scope
.
var
(
s
).
get_tensor
()
var
.
set
(
self
.
outputs
[
s
],
place
)
op_args
[
s
]
=
s
for
k
in
self
.
attrs
:
op_args
[
k
]
=
self
.
attrs
[
k
]
# create and run sgd operator
sgd
_op
=
Operator
(
"adam"
,
**
op_args
)
sgd
_op
.
run
(
scope
,
place
)
adam
_op
=
Operator
(
"adam"
,
**
op_args
)
adam
_op
.
run
(
scope
,
place
)
for
key
,
np_array
in
self
.
outputs
.
iteritems
():
out_var
=
scope
.
var
(
key
).
get_tensor
()
actual
=
np
.
array
(
out_var
)
actual
.
reshape
([
actual
.
size
()])
np_array
.
reshape
([
np_array
.
size
()])
i
=
0
while
i
<
actual
.
size
():
self
.
assertAlmostEqual
(
actual
[
i
],
np_array
[
i
])
i
+=
1
# # rows[0] = 0, 5.0 - 2.0 * 2.0
# self.assertAlmostEqual(1.0, result_array[rows[0], 0])
# # rows[0] = 0, 5.0 - 2.0 * 1.0
# self.assertAlmostEqual(3.0, result_array[rows[0], 2])
# # 5.0 - 2.0 * 0.0
# self.assertAlmostEqual(5.0, result_array[1, 0])
# # rows[1] = 4, 5.0 - 2.0 * 1.0
# self.assertAlmostEqual(3.0, result_array[rows[1], 10])
# # 5.0 - 2.0 * 0.0
# self.assertAlmostEqual(5.0, result_array[5, 8])
# # rows[2] = 7, 5.0 - 2.0 * 1.0
# self.assertAlmostEqual(3.0, result_array[rows[2], 1])
# # rows[2] = 7, 5.0 - 2.0 * 4.0
# self.assertAlmostEqual(-3.0, result_array[rows[2], 8])
actual
=
actual
.
reshape
([
actual
.
size
])
np_array
=
np_array
.
reshape
([
np_array
.
size
])
for
idx
,
row_id
in
enumerate
(
self
.
rows
):
j
=
0
while
j
<
self
.
row_numel
:
pos
=
row_id
*
self
.
row_numel
+
j
print
(
actual
[
pos
]
-
np_array
[
pos
])
/
actual
[
pos
]
self
.
assertLess
((
actual
[
pos
]
-
np_array
[
pos
])
/
actual
[
pos
],
0.00001
)
j
+=
1
def
test_sparse_sgd
(
self
):
places
=
[
core
.
CPUPlace
()]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录