Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
fc6ec6bd
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fc6ec6bd
编写于
12月 13, 2018
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add sparse mode adam
上级
66b6e473
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
51 addition
and
22 deletion
+51
-22
paddle/fluid/operators/optimizers/adam_op.cc
paddle/fluid/operators/optimizers/adam_op.cc
+5
-0
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+29
-12
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+5
-2
python/paddle/fluid/tests/unittests/test_adam_op.py
python/paddle/fluid/tests/unittests/test_adam_op.py
+12
-8
未找到文件。
paddle/fluid/operators/optimizers/adam_op.cc
浏览文件 @
fc6ec6bd
...
@@ -110,6 +110,11 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -110,6 +110,11 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
"(float, default 1.0e-8) "
"(float, default 1.0e-8) "
"Constant for numerical stability"
)
"Constant for numerical stability"
)
.
SetDefault
(
1.0e-8
f
);
.
SetDefault
(
1.0e-8
f
);
AddAttr
<
bool
>
(
"sparse_mode"
,
"(bool, default false) "
"only update the parameter that has gradient in sparse update"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Adam Optimizer.
Adam Optimizer.
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
fc6ec6bd
...
@@ -177,12 +177,13 @@ struct SparseAdamFunctor {
...
@@ -177,12 +177,13 @@ struct SparseAdamFunctor {
const
int64_t
*
rows_
;
const
int64_t
*
rows_
;
int64_t
row_numel_
;
int64_t
row_numel_
;
int64_t
row_count_
;
int64_t
row_count_
;
bool
sparse_mode_
;
SparseAdamFunctor
(
T
beta1
,
T
beta2
,
T
epsilon
,
const
T
*
beta1_pow
,
SparseAdamFunctor
(
T
beta1
,
T
beta2
,
T
epsilon
,
const
T
*
beta1_pow
,
const
T
*
beta2_pow
,
const
T
*
mom1
,
T
*
mom1_out
,
const
T
*
beta2_pow
,
const
T
*
mom1
,
T
*
mom1_out
,
const
T
*
mom2
,
T
*
mom2_out
,
const
T
*
lr
,
const
T
*
grad
,
const
T
*
mom2
,
T
*
mom2_out
,
const
T
*
lr
,
const
T
*
grad
,
const
T
*
param
,
T
*
param_out
,
const
int64_t
*
rows
,
const
T
*
param
,
T
*
param_out
,
const
int64_t
*
rows
,
int64_t
row_numel
,
int64_t
row_count
)
int64_t
row_numel
,
int64_t
row_count
,
bool
sparse_mode
)
:
beta1_
(
beta1
),
:
beta1_
(
beta1
),
beta2_
(
beta2
),
beta2_
(
beta2
),
epsilon_
(
epsilon
),
epsilon_
(
epsilon
),
...
@@ -198,13 +199,10 @@ struct SparseAdamFunctor {
...
@@ -198,13 +199,10 @@ struct SparseAdamFunctor {
param_out_
(
param_out
),
param_out_
(
param_out
),
rows_
(
rows
),
rows_
(
rows
),
row_numel_
(
row_numel
),
row_numel_
(
row_numel
),
row_count_
(
row_count
)
{}
row_count_
(
row_count
),
sparse_mode_
(
sparse_mode
)
{}
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_count_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
grad_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
inline
HOSTDEVICE
void
sparse_update
(
size_t
i
,
T
g
)
const
{
// The following code is the same as dense
// The following code is the same as dense
T
mom1
=
moment1_
[
i
];
T
mom1
=
moment1_
[
i
];
T
mom2
=
moment2_
[
i
];
T
mom2
=
moment2_
[
i
];
...
@@ -225,6 +223,13 @@ struct SparseAdamFunctor {
...
@@ -225,6 +223,13 @@ struct SparseAdamFunctor {
moment2_out_
[
i
]
=
mom2
;
moment2_out_
[
i
]
=
mom2
;
param_out_
[
i
]
=
p
;
param_out_
[
i
]
=
p
;
}
}
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_count_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
grad_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
sparse_update
(
i
,
g
);
}
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
...
@@ -240,6 +245,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -240,6 +245,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
using
paddle
::
operators
::
detail
::
Ref
;
bool
sparse_mode
=
ctx
.
Attr
<
bool
>
(
"sparse_mode"
);
T
beta1
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta1"
));
T
beta1
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta1"
));
T
beta2
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta2"
));
T
beta2
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta2"
));
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
...
@@ -351,11 +357,22 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -351,11 +357,22 @@ class AdamOpKernel : public framework::OpKernel<T> {
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
grad_merge
.
rows
().
size
());
grad_merge
.
rows
().
size
(),
sparse_mode
);
if
(
sparse_mode
)
{
size_t
row_count
=
grad_merge
.
rows
().
size
();
for
(
size_t
row_index
=
0
;
row_index
<
row_count
;
++
row_index
)
{
for
(
size_t
offset
=
0
;
offset
<
row_numel
;
++
offset
)
{
size_t
i
=
rows
[
row_index
]
*
row_numel
+
offset
;
T
g
=
grad_data
[
row_index
*
row_numel
+
offset
];
functor
.
sparse_update
(
i
,
g
);
}
}
}
else
{
platform
::
ForRange
<
DeviceContext
>
for_range
(
platform
::
ForRange
<
DeviceContext
>
for_range
(
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
param
.
numel
());
param
.
numel
());
for_range
(
functor
);
for_range
(
functor
);
}
}
else
{
}
else
{
PADDLE_THROW
(
"Variable type not supported by adam_op"
);
PADDLE_THROW
(
"Variable type not supported by adam_op"
);
}
}
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
fc6ec6bd
...
@@ -663,7 +663,8 @@ class AdamOptimizer(Optimizer):
...
@@ -663,7 +663,8 @@ class AdamOptimizer(Optimizer):
beta2
=
0.999
,
beta2
=
0.999
,
epsilon
=
1e-8
,
epsilon
=
1e-8
,
regularization
=
None
,
regularization
=
None
,
name
=
None
):
name
=
None
,
sparse_mode
=
False
):
assert
learning_rate
is
not
None
assert
learning_rate
is
not
None
assert
beta1
is
not
None
assert
beta1
is
not
None
assert
beta2
is
not
None
assert
beta2
is
not
None
...
@@ -676,6 +677,7 @@ class AdamOptimizer(Optimizer):
...
@@ -676,6 +677,7 @@ class AdamOptimizer(Optimizer):
self
.
_beta1
=
beta1
self
.
_beta1
=
beta1
self
.
_beta2
=
beta2
self
.
_beta2
=
beta2
self
.
_epsilon
=
epsilon
self
.
_epsilon
=
epsilon
self
.
_sparse_mode
=
sparse_mode
def
_create_accumulators
(
self
,
block
,
parameters
):
def
_create_accumulators
(
self
,
block
,
parameters
):
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
...
@@ -729,7 +731,8 @@ class AdamOptimizer(Optimizer):
...
@@ -729,7 +731,8 @@ class AdamOptimizer(Optimizer):
attrs
=
{
attrs
=
{
"beta1"
:
self
.
_beta1
,
"beta1"
:
self
.
_beta1
,
"beta2"
:
self
.
_beta2
,
"beta2"
:
self
.
_beta2
,
"epsilon"
:
self
.
_epsilon
"epsilon"
:
self
.
_epsilon
,
"sparse_mode"
:
self
.
_sparse_mode
})
})
return
adam_op
return
adam_op
...
...
python/paddle/fluid/tests/unittests/test_adam_op.py
浏览文件 @
fc6ec6bd
...
@@ -194,7 +194,8 @@ def adam_step(inputs, attributes):
...
@@ -194,7 +194,8 @@ def adam_step(inputs, attributes):
return
param_out
,
moment1_out
,
moment2_out
return
param_out
,
moment1_out
,
moment2_out
def
adam_step_sparse
(
inputs
,
attributes
,
height
,
rows
,
row_numel
,
np_grad
):
def
adam_step_sparse
(
inputs
,
attributes
,
height
,
rows
,
row_numel
,
np_grad
,
sparse_mode
):
'''
'''
Simulate one step of the adam optimizer
Simulate one step of the adam optimizer
:param inputs: dict of inputs
:param inputs: dict of inputs
...
@@ -230,7 +231,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad):
...
@@ -230,7 +231,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad):
class
TestSparseAdamOp
(
unittest
.
TestCase
):
class
TestSparseAdamOp
(
unittest
.
TestCase
):
def
setup
(
self
,
scope
,
place
):
def
setup
(
self
,
scope
,
place
,
sparse_mode
):
beta1
=
0.78
beta1
=
0.78
beta2
=
0.836
beta2
=
0.836
epsilon
=
1e-4
epsilon
=
1e-4
...
@@ -262,19 +263,21 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -262,19 +263,21 @@ class TestSparseAdamOp(unittest.TestCase):
self
.
sparse_inputs
=
[
"Grad"
]
self
.
sparse_inputs
=
[
"Grad"
]
param_out
,
mom1
,
mom2
=
adam_step_sparse
(
param_out
,
mom1
,
mom2
=
adam_step_sparse
(
self
.
dense_inputs
,
self
.
attrs
,
self
.
dense_inputs
,
self
.
attrs
,
height
,
rows
,
row_numel
,
np_array
)
height
,
rows
,
row_numel
,
np_array
,
sparse_mode
)
self
.
outputs
=
{
self
.
outputs
=
{
"ParamOut"
:
param_out
,
"ParamOut"
:
param_out
,
"Moment1Out"
:
mom1
,
"Moment1Out"
:
mom1
,
"Moment2Out"
:
mom2
"Moment2Out"
:
mom2
}
}
def
check_with_place
(
self
,
place
):
def
check_with_place
(
self
,
place
,
sparse_mode
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
self
.
setup
(
scope
,
place
)
self
.
setup
(
scope
,
place
,
sparse_mode
)
op_args
=
dict
()
op_args
=
dict
()
op_args
[
'sparse_mode'
]
=
sparse_mode
for
key
,
np_array
in
self
.
dense_inputs
.
items
():
for
key
,
np_array
in
self
.
dense_inputs
.
items
():
var
=
scope
.
var
(
key
).
get_tensor
()
var
=
scope
.
var
(
key
).
get_tensor
()
var
.
set
(
np_array
,
place
)
var
.
set
(
np_array
,
place
)
...
@@ -305,12 +308,13 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -305,12 +308,13 @@ class TestSparseAdamOp(unittest.TestCase):
0.00001
)
0.00001
)
j
+=
1
j
+=
1
def
test_sparse_
sgd
(
self
):
def
test_sparse_
adam
(
self
):
places
=
[
core
.
CPUPlace
()]
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
for
place
in
places
:
self
.
check_with_place
(
place
)
for
sparse_mode
in
(
True
,
False
):
self
.
check_with_place
(
place
,
sparse_mode
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录