Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
1cd67218
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1cd67218
编写于
11月 10, 2019
作者:
A
Aurelius84
提交者:
Tao Luo
11月 10, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimizer mmcpy if _rand_len=16 and remove data copy in GradKernel (#21099)
上级
78cc1ca6
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
11 addition
and
9 deletion
+11
-9
paddle/fluid/operators/pyramid_hash_op.cc
paddle/fluid/operators/pyramid_hash_op.cc
+11
-9
未找到文件。
paddle/fluid/operators/pyramid_hash_op.cc
浏览文件 @
1cd67218
...
@@ -163,10 +163,14 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
...
@@ -163,10 +163,14 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
int
_space_len
)
const
{
int
_space_len
)
const
{
for
(
unsigned
int
j
=
0
;
j
!=
_num_emb
;
j
+=
_rand_len
)
{
for
(
unsigned
int
j
=
0
;
j
!=
_num_emb
;
j
+=
_rand_len
)
{
unsigned
int
pos
=
XXH32
(
hash_id
,
len
*
sizeof
(
T
),
j
)
%
_space_len
;
unsigned
int
pos
=
XXH32
(
hash_id
,
len
*
sizeof
(
T
),
j
)
%
_space_len
;
if
(
_rand_len
==
16
)
{
memcpy
(
top_pos
+
j
,
const_cast
<
float
*>
(
weights
+
pos
),
16
*
sizeof
(
T
));
}
else
{
memcpy
(
top_pos
+
j
,
const_cast
<
float
*>
(
weights
+
pos
),
memcpy
(
top_pos
+
j
,
const_cast
<
float
*>
(
weights
+
pos
),
_rand_len
*
sizeof
(
T
));
_rand_len
*
sizeof
(
T
));
}
}
}
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
bottom
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
bottom
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
...
@@ -322,6 +326,8 @@ class PyramidHashOpGrad : public framework::OperatorWithKernel {
...
@@ -322,6 +326,8 @@ class PyramidHashOpGrad : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"W"
),
true
,
"Input(W) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"W"
),
true
,
"Input(W) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"DropPos"
),
true
,
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"DropPos"
),
true
,
"Input(DropPos) should not be null."
);
"Input(DropPos) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X_Temp_Out"
),
true
,
"Input(X_Temp_Out) should not be null."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
true
,
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
true
,
"Input(Out@GRAD) of PyramidHashGradOp should not be null."
);
"Input(Out@GRAD) of PyramidHashGradOp should not be null."
);
...
@@ -347,6 +353,7 @@ class PyramidHashGradOpMaker : public framework::SingleGradOpMaker<T> {
...
@@ -347,6 +353,7 @@ class PyramidHashGradOpMaker : public framework::SingleGradOpMaker<T> {
op_desc_ptr
->
SetInput
(
"X"
,
this
->
Input
(
"X"
));
op_desc_ptr
->
SetInput
(
"X"
,
this
->
Input
(
"X"
));
op_desc_ptr
->
SetInput
(
"W"
,
this
->
Input
(
"W"
));
op_desc_ptr
->
SetInput
(
"W"
,
this
->
Input
(
"W"
));
op_desc_ptr
->
SetInput
(
"DropPos"
,
this
->
Output
(
"DropPos"
));
op_desc_ptr
->
SetInput
(
"DropPos"
,
this
->
Output
(
"DropPos"
));
op_desc_ptr
->
SetInput
(
"X_Temp_Out"
,
this
->
Output
(
"X_Temp_Out"
));
op_desc_ptr
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
op_desc_ptr
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
this
->
OutputGrad
(
"Out"
));
this
->
OutputGrad
(
"Out"
));
...
@@ -380,13 +387,8 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
...
@@ -380,13 +387,8 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
int
_space_len
=
ctx
.
Attr
<
int
>
(
"space_len"
);
int
_space_len
=
ctx
.
Attr
<
int
>
(
"space_len"
);
int
_pyramid_layer
=
ctx
.
Attr
<
int
>
(
"pyramid_layer"
);
int
_pyramid_layer
=
ctx
.
Attr
<
int
>
(
"pyramid_layer"
);
const
auto
*
bottom_data_ori
=
bottom
->
data
<
int32_t
>
();
auto
*
buff
=
ctx
.
Input
<
LoDTensor
>
(
"X_Temp_Out"
);
Tensor
buff
;
auto
*
bottom_data
=
buff
->
data
<
T
>
();
buff
.
Resize
(
framework
::
make_ddim
({
bottom
->
dims
()[
0
],
bottom
->
dims
()[
1
]}));
T
*
bottom_data
=
buff
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
size_t
i
=
0
;
i
<
bottom
->
dims
()[
0
];
i
++
)
{
bottom_data
[
i
]
=
bottom_data_ori
[
i
];
}
int
_slot_len
=
bottom
->
dims
()[
0
];
int
_slot_len
=
bottom
->
dims
()[
0
];
if
(
_slot_len
==
bottom
->
lod
()[
0
].
size
()
-
1
&&
if
(
_slot_len
==
bottom
->
lod
()[
0
].
size
()
-
1
&&
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录