Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
209f24a2
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
209f24a2
编写于
10月 28, 2018
作者:
Q
Qiyang Min
提交者:
GitHub
10月 28, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14051 from velconia/accelerate_embedding_grad
[1.1] Accelerate sparse embedding grad op in CPU device
上级
9da9b192
a8b17537
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
38 addition
and
19 deletion
+38
-19
paddle/fluid/operators/lookup_table_op.cc
paddle/fluid/operators/lookup_table_op.cc
+6
-0
paddle/fluid/operators/lookup_table_op.h
paddle/fluid/operators/lookup_table_op.h
+31
-19
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+1
-0
未找到文件。
paddle/fluid/operators/lookup_table_op.cc
浏览文件 @
209f24a2
...
...
@@ -81,6 +81,12 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
"Otherwise the given value indicates padding the output "
"with zeros whenever lookup encounters it in Ids."
)
.
SetDefault
(
kNoPadding
);
// NOTE(minqiyang): grad_inplace is an temporal attribute,
// please do NOT set this attribute in python layer.
AddAttr
<
bool
>
(
"grad_inplace"
,
"(boolean, default false) "
"If the grad op reuse the input's variable."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Lookup Table Operator.
...
...
paddle/fluid/operators/lookup_table_op.h
浏览文件 @
209f24a2
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -68,6 +69,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
const
auto
*
table
=
table_t
.
value
().
data
<
T
>
();
auto
*
output
=
output_t
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
int64_t
i
=
0
;
i
<
ids_numel
;
++
i
)
{
if
(
padding_idx
!=
kNoPadding
&&
ids
[
i
]
==
padding_idx
)
{
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
...
...
@@ -75,8 +77,8 @@ class LookupTableKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
);
auto
id_index
=
table_t
.
Index
(
ids
[
i
]);
PADDLE_ENFORCE_GE
(
id_index
,
0
,
"the input key should be exists."
);
memcpy
(
output
+
i
*
row_width
,
table
+
id_index
*
row_width
,
row_width
*
sizeof
(
T
)
);
blas
.
VCOPY
(
row_width
,
table
+
id_index
*
row_width
,
output
+
i
*
row_width
);
}
}
}
...
...
@@ -111,27 +113,37 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
auto
*
ids_data
=
ids
->
data
<
int64_t
>
();
int64_t
ids_num
=
ids
->
numel
();
framework
::
Vector
<
int64_t
>
new_rows
;
new_rows
.
reserve
(
ids_num
);
for
(
int64_t
i
=
0
;
i
<
ids_num
;
i
++
)
{
new_rows
.
push_back
(
ids_data
[
i
]);
}
std
::
vector
<
int64_t
>
new_rows
;
new_rows
.
resize
(
ids_num
);
std
::
memcpy
(
&
new_rows
[
0
],
ids_data
,
ids_num
*
sizeof
(
int64_t
));
d_table
->
set_rows
(
new_rows
);
auto
*
d_table_value
=
d_table
->
mutable_value
();
d_table_value
->
Resize
({
ids_num
,
table_dim
[
1
]});
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
d_table
->
set_height
(
table_dim
[
0
]);
auto
*
d_output_data
=
d_output
->
data
<
T
>
();
auto
*
d_table_data
=
d_table_value
->
data
<
T
>
();
auto
d_output_dims
=
d_output
->
dims
();
PADDLE_ENFORCE_EQ
(
d_table_value
->
dims
(),
framework
::
flatten_to_2d
(
d_output_dims
,
d_output_dims
.
size
()
-
1
));
memcpy
(
d_table_data
,
d_output_data
,
sizeof
(
T
)
*
d_output
->
numel
());
// FIXME(minqiyang):
// memory optimization will NOT reuse Tensor with SelectedRows
// so we could just share the tensor here directly.
// However, the InferVarType method will infer the output SelectedRows
// to Tensor sometimes, which is a bug, so we will add an attribute
// here to indicate the inplace and remove this attribute after
// the InferVarType's bug was fixed
bool
grad_inplace
=
context
.
Attr
<
bool
>
(
"grad_inplace"
);
if
(
grad_inplace
)
{
d_table_value
->
ShareDataWith
(
*
d_output
);
}
else
{
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
d_table
->
set_height
(
table_dim
[
0
]);
auto
*
d_output_data
=
d_output
->
data
<
T
>
();
auto
*
d_table_data
=
d_table_value
->
data
<
T
>
();
auto
d_output_dims
=
d_output
->
dims
();
PADDLE_ENFORCE_EQ
(
d_table_value
->
dims
(),
framework
::
flatten_to_2d
(
d_output_dims
,
d_output_dims
.
size
()
-
1
));
memcpy
(
d_table_data
,
d_output_data
,
sizeof
(
T
)
*
d_output
->
numel
());
}
}
else
{
auto
*
ids
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
209f24a2
...
...
@@ -1159,6 +1159,7 @@ def prepare_encoder(src_word,
name
=
pos_enc_param_name
,
trainable
=
False
,
initializer
=
fluid
.
initializer
.
ConstantInitializer
(
0.001
)))
src_pos_enc
.
stop_gradient
=
True
enc_input
=
src_word_emb
+
src_pos_enc
return
layers
.
dropout
(
enc_input
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录