Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
3cf50b74
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3cf50b74
编写于
8月 27, 2020
作者:
S
seiriosPlus
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix fuse
上级
66321576
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
29 addition
and
24 deletion
+29
-24
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc
...ators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc
+2
-4
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h
...rators/distributed_ops/lookup_sparse_table_fuse_adam_op.h
+10
-8
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc
...rators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc
+2
-3
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h
...erators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h
+10
-8
python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py
.../fluid/incubate/fleet/parameter_server/ir/pserver_pass.py
+5
-1
未找到文件。
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc
浏览文件 @
3cf50b74
...
@@ -82,7 +82,7 @@ class LargeScaleFuseAdamOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -82,7 +82,7 @@ class LargeScaleFuseAdamOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Beta1Pow"
,
"(Tensor) Input beta1 power accumulator"
);
AddInput
(
"Beta1Pow"
,
"(Tensor) Input beta1 power accumulator"
);
AddInput
(
"Beta2Pow"
,
"(Tensor) Input beta2 power accumulator"
);
AddInput
(
"Beta2Pow"
,
"(Tensor) Input beta2 power accumulator"
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate of SGD"
);
AddOutput
(
"Beta1PowOut"
,
"(Tensor) Output beta1 power accumulator"
);
AddOutput
(
"Beta1PowOut"
,
"(Tensor) Output beta1 power accumulator"
);
AddOutput
(
"Beta2PowOut"
,
"(Tensor) Output beta2 power accumulator"
);
AddOutput
(
"Beta2PowOut"
,
"(Tensor) Output beta2 power accumulator"
);
...
@@ -150,6 +150,4 @@ REGISTER_OPERATOR(
...
@@ -150,6 +150,4 @@ REGISTER_OPERATOR(
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
lookup_sparse_table_fuse_adam
,
lookup_sparse_table_fuse_adam
,
ops
::
LargeScaleFuseAdamOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LargeScaleFuseAdamOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
ops
::
LargeScaleFuseAdamOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h
浏览文件 @
3cf50b74
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/distributed/large_scale_kv.h"
#include "paddle/fluid/operators/distributed/large_scale_kv.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -37,8 +38,9 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
...
@@ -37,8 +38,9 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
:
public
framework
::
OpKernel
<
T
>
{
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
)
;
using
paddle
::
framework
::
LoDTensor
;
const
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
PADDLE_ENFORCE
(
...
@@ -56,8 +58,8 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
...
@@ -56,8 +58,8 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
framework
::
SelectedRows
tmp_grad_merge
;
framework
::
SelectedRows
tmp_grad_merge
;
const
framework
::
SelectedRows
*
grad_merge_ptr
;
const
framework
::
SelectedRows
*
grad_merge_ptr
;
math
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
math
::
scatter
::
MergeAdd
<
platform
::
CPU
DeviceContext
,
T
>
merge_func
;
merge_func
(
c
ontext
.
template
device_context
<
DeviceContext
>(),
*
in_
grad
,
merge_func
(
c
tx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
grad
,
&
tmp_grad_merge
,
true
);
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
grad_merge_ptr
=
&
tmp_grad_merge
;
...
@@ -71,8 +73,8 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
...
@@ -71,8 +73,8 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
auto
grad_width
=
grad_v
.
dims
()[
1
];
auto
grad_width
=
grad_v
.
dims
()[
1
];
// auto is_entry = context.Attr<bool>("is_entry");
// auto is_entry = context.Attr<bool>("is_entry");
auto
tablename
=
c
ontext
.
Attr
<
std
::
string
>
(
"tablename"
);
auto
tablename
=
c
tx
.
Attr
<
std
::
string
>
(
"tablename"
);
auto
value_names
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"value_names"
);
auto
value_names
=
ctx
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"value_names"
);
auto
*
beta1_pow
=
ctx
.
Input
<
LoDTensor
>
(
"Beta1Pow"
);
auto
*
beta1_pow
=
ctx
.
Input
<
LoDTensor
>
(
"Beta1Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
LoDTensor
>
(
"Beta2Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
LoDTensor
>
(
"Beta2Pow"
);
...
@@ -116,11 +118,11 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
...
@@ -116,11 +118,11 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
auto
&
moment_1
=
values
[
1
];
auto
&
moment_1
=
values
[
1
];
auto
&
moment_2
=
values
[
2
];
auto
&
moment_2
=
values
[
2
];
T
lr
=
*
lr_
;
T
lr
_
=
lr
[
0
]
;
T
beta1_
=
beta1_pow
->
data
<
T
>
()[
0
];
T
beta1_
=
beta1_pow
->
data
<
T
>
()[
0
];
T
beta2_
=
beta2_pow
->
data
<
T
>
()[
0
];
T
beta2_
=
beta2_pow
->
data
<
T
>
()[
0
];
lr
*=
sqrt
(
1
-
beta1_
)
/
(
1
-
beta2_
);
lr
_
*=
sqrt
(
1
-
beta1_
)
/
(
1
-
beta2_
);
for
(
size_t
i
=
0
;
i
<
in_rows
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
in_rows
.
size
();
i
++
)
{
auto
*
m1_data
=
moment_1
[
i
]
->
data
();
auto
*
m1_data
=
moment_1
[
i
]
->
data
();
...
@@ -131,7 +133,7 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
...
@@ -131,7 +133,7 @@ class LargeScaleFuseAdamOpKernel<platform::CPUDeviceContext, T>
auto
g
=
grad_v
.
data
<
T
>
()[
grad_width
*
i
+
x
];
auto
g
=
grad_v
.
data
<
T
>
()[
grad_width
*
i
+
x
];
m1_data
[
x
]
=
beta1_
*
m1_data
[
x
]
+
(
1
-
beta1_
)
*
g
;
m1_data
[
x
]
=
beta1_
*
m1_data
[
x
]
+
(
1
-
beta1_
)
*
g
;
m2_data
[
x
]
=
beta2_
*
m2_data
[
x
]
+
(
1
-
beta2_
)
*
g
*
g
;
m2_data
[
x
]
=
beta2_
*
m2_data
[
x
]
+
(
1
-
beta2_
)
*
g
*
g
;
p_data
[
x
]
-=
lr
*
(
m1_data
[
x
]
/
(
sqrt
(
m2_data
[
x
])
+
epsilon
));
p_data
[
x
]
-=
lr
_
*
(
m1_data
[
x
]
/
(
sqrt
(
m2_data
[
x
])
+
epsilon
));
}
}
}
}
}
}
...
...
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc
浏览文件 @
3cf50b74
...
@@ -79,7 +79,7 @@ class LargeScaleFuseSGDOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -79,7 +79,7 @@ class LargeScaleFuseSGDOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Grad"
,
AddInput
(
"Grad"
,
"(SelectedRows) Ids's type should be SelectedRows"
"(SelectedRows) Ids's type should be SelectedRows"
"THe ids to be looked up in W."
);
"THe ids to be looked up in W."
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate of SGD"
);
AddAttr
<
bool
>
(
"is_entry"
,
AddAttr
<
bool
>
(
"is_entry"
,
"(bool)"
"(bool)"
"sparse table need entry"
);
"sparse table need entry"
);
...
@@ -117,5 +117,4 @@ REGISTER_OPERATOR(
...
@@ -117,5 +117,4 @@ REGISTER_OPERATOR(
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
lookup_sparse_table_fuse_sgd
,
lookup_sparse_table_fuse_sgd
,
ops
::
LargeScaleFuseSGDOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LargeScaleFuseSGDOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
ops
::
LargeScaleFuseSGDOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h
浏览文件 @
3cf50b74
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/distributed/large_scale_kv.h"
#include "paddle/fluid/operators/distributed/large_scale_kv.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -56,8 +57,8 @@ class LargeScaleFuseSGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -56,8 +57,8 @@ class LargeScaleFuseSGDOpKernel<platform::CPUDeviceContext, T>
framework
::
SelectedRows
tmp_grad_merge
;
framework
::
SelectedRows
tmp_grad_merge
;
const
framework
::
SelectedRows
*
grad_merge_ptr
;
const
framework
::
SelectedRows
*
grad_merge_ptr
;
math
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
math
::
scatter
::
MergeAdd
<
platform
::
CPU
DeviceContext
,
T
>
merge_func
;
merge_func
(
c
ontext
.
template
device_context
<
DeviceContext
>(),
*
in_
grad
,
merge_func
(
c
tx
.
template
device_context
<
platform
::
CPUDeviceContext
>(),
grad
,
&
tmp_grad_merge
,
true
);
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
grad_merge_ptr
=
&
tmp_grad_merge
;
...
@@ -71,8 +72,8 @@ class LargeScaleFuseSGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -71,8 +72,8 @@ class LargeScaleFuseSGDOpKernel<platform::CPUDeviceContext, T>
auto
grad_width
=
grad_v
.
dims
()[
1
];
auto
grad_width
=
grad_v
.
dims
()[
1
];
// auto is_entry = context.Attr<bool>("is_entry");
// auto is_entry = context.Attr<bool>("is_entry");
auto
tablename
=
c
ontext
.
Attr
<
std
::
string
>
(
"tablename"
);
auto
tablename
=
c
tx
.
Attr
<
std
::
string
>
(
"tablename"
);
auto
value_names
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"value_names"
);
auto
value_names
=
ctx
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"value_names"
);
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>
*>>
values
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
float
>
*>>
values
;
std
::
vector
<
int64_t
>
dims
;
std
::
vector
<
int64_t
>
dims
;
...
@@ -88,15 +89,16 @@ class LargeScaleFuseSGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -88,15 +89,16 @@ class LargeScaleFuseSGDOpKernel<platform::CPUDeviceContext, T>
auto
&
params
=
values
[
0
];
auto
&
params
=
values
[
0
];
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
context
);
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
ctx
);
std
::
vector
<
T
>
grads
;
std
::
vector
<
T
>
grads
;
framework
::
TensorToVector
(
grad_v
,
c
ontext
.
device_context
(),
&
grads
);
framework
::
TensorToVector
(
grad_v
,
c
tx
.
device_context
(),
&
grads
);
blas
.
VMUL
(
grads
,
lr
[
0
],
grads
);
blas
.
SCAL
(
grads
.
size
(),
lr
[
0
],
grads
.
data
()
);
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
in_rows
.
size
());
++
x
)
{
for
(
int
x
=
0
;
x
<
static_cast
<
int
>
(
in_rows
.
size
());
++
x
)
{
blas
.
VSUB
(
grad_width
,
params
[
x
],
grads
.
data
()
+
grad_width
*
x
,
params
);
blas
.
VSUB
(
grad_width
,
params
[
x
]
->
data
(),
grads
.
data
()
+
grad_width
*
x
,
params
[
x
]
->
data
());
}
}
}
}
};
};
...
...
python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py
浏览文件 @
3cf50b74
...
@@ -657,13 +657,15 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False):
...
@@ -657,13 +657,15 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False):
if
op
.
type
==
"sgd"
:
if
op
.
type
==
"sgd"
:
grad
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Grad"
)[
0
]]
grad
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Grad"
)[
0
]]
lr
=
main_program
.
global_block
().
vars
[
op
.
input
(
"LearningRate"
)[
0
]]
# remove origin optimzier op
# remove origin optimzier op
block
.
_remove_op
(
opt_idx
)
block
.
_remove_op
(
opt_idx
)
block
.
_insert_op
(
block
.
_insert_op
(
opt_idx
,
opt_idx
,
type
=
"lookup_sparse_table_fuse_sgd"
,
type
=
"lookup_sparse_table_fuse_sgd"
,
inputs
=
{
"Grad"
:
grad
},
inputs
=
{
"Grad"
:
grad
,
"LearningRate"
:
lr
},
attrs
=
{
attrs
=
{
"is_entry"
:
is_entry
,
"is_entry"
:
is_entry
,
"tablename"
:
table_name
,
"tablename"
:
table_name
,
...
@@ -672,6 +674,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False):
...
@@ -672,6 +674,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False):
elif
op
.
type
==
"adam"
:
elif
op
.
type
==
"adam"
:
grad
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Grad"
)[
0
]]
grad
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Grad"
)[
0
]]
lr
=
main_program
.
global_block
().
vars
[
op
.
input
(
"LearningRate"
)[
0
]]
beta1_pow
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Beta1Pow"
)[
beta1_pow
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Beta1Pow"
)[
0
]]
0
]]
beta2_pow
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Beta2Pow"
)[
beta2_pow
=
main_program
.
global_block
().
vars
[
op
.
input
(
"Beta2Pow"
)[
...
@@ -693,6 +696,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False):
...
@@ -693,6 +696,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False):
type
=
"lookup_sparse_table_fuse_adam"
,
type
=
"lookup_sparse_table_fuse_adam"
,
inputs
=
{
inputs
=
{
"Grad"
:
grad
,
"Grad"
:
grad
,
"LearningRate"
:
lr
,
"Beta1Pow"
:
beta1_pow
,
"Beta1Pow"
:
beta1_pow
,
"Beta2Pow"
:
beta2_pow
"Beta2Pow"
:
beta2_pow
},
},
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录