Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0c5ed5f6
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0c5ed5f6
编写于
11月 22, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enable peephole jitcode
test=develop
上级
e3b61cf5
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
27 addition
and
3 deletion
+27
-3
paddle/fluid/operators/math/jit_code.cc
paddle/fluid/operators/math/jit_code.cc
+26
-2
paddle/fluid/operators/math/jit_kernel_rnn.cc
paddle/fluid/operators/math/jit_kernel_rnn.cc
+1
-1
未找到文件。
paddle/fluid/operators/math/jit_code.cc
浏览文件 @
0c5ed5f6
...
@@ -221,10 +221,14 @@ void LSTMJitCode::generate() {
...
@@ -221,10 +221,14 @@ void LSTMJitCode::generate() {
reg64_t
reg_ptr_ct_1
=
r9
;
reg64_t
reg_ptr_ct_1
=
r9
;
reg64_t
reg_ptr_ct
=
r10
;
reg64_t
reg_ptr_ct
=
r10
;
reg64_t
reg_ptr_ht
=
r11
;
reg64_t
reg_ptr_ht
=
r11
;
reg64_t
reg_ptr_wp
=
r12
;
mov
(
reg_ptr_gates
,
ptr
[
param1
+
offsetof
(
lstm_t
,
gates
)]);
mov
(
reg_ptr_gates
,
ptr
[
param1
+
offsetof
(
lstm_t
,
gates
)]);
mov
(
reg_ptr_ct_1
,
ptr
[
param1
+
offsetof
(
lstm_t
,
ct_1
)]);
mov
(
reg_ptr_ct_1
,
ptr
[
param1
+
offsetof
(
lstm_t
,
ct_1
)]);
mov
(
reg_ptr_ct
,
ptr
[
param1
+
offsetof
(
lstm_t
,
ct
)]);
mov
(
reg_ptr_ct
,
ptr
[
param1
+
offsetof
(
lstm_t
,
ct
)]);
mov
(
reg_ptr_ht
,
ptr
[
param1
+
offsetof
(
lstm_t
,
ht
)]);
mov
(
reg_ptr_ht
,
ptr
[
param1
+
offsetof
(
lstm_t
,
ht
)]);
if
(
use_peephole_
)
{
mov
(
reg_ptr_wp
,
ptr
[
param1
+
offsetof
(
lstm_t
,
wp
)]);
}
int
offset
=
0
;
int
offset
=
0
;
int
d
=
num_
*
sizeof
(
float
);
int
d
=
num_
*
sizeof
(
float
);
...
@@ -235,13 +239,27 @@ void LSTMJitCode::generate() {
...
@@ -235,13 +239,27 @@ void LSTMJitCode::generate() {
act
<
ymm_t
>
(
ymm_c
,
ymm_src
,
act_cand_
);
act
<
ymm_t
>
(
ymm_c
,
ymm_src
,
act_cand_
);
// i
// i
vmovups
(
ymm_src
,
ptr
[
reg_ptr_gates
+
offset
+
d
]);
vmovups
(
ymm_src
,
ptr
[
reg_ptr_gates
+
offset
+
d
]);
if
(
!
compute_c1h1_
&&
use_peephole_
)
{
ymm_t
ymm_wp
=
ymm_t
(
2
);
ymm_t
ymm_ct_1
=
ymm_t
(
3
);
vmovups
(
ymm_wp
,
ptr
[
reg_ptr_wp
+
offset
]);
vmovups
(
ymm_ct_1
,
ptr
[
reg_ptr_ct_1
+
offset
]);
vmulps
(
ymm_wp
,
ymm_ct_1
,
ymm_wp
);
vaddps
(
ymm_src
,
ymm_src
,
ymm_wp
);
}
act
<
ymm_t
>
(
ymm_i
,
ymm_src
,
act_gate_
);
act
<
ymm_t
>
(
ymm_i
,
ymm_src
,
act_gate_
);
vmulps
(
ymm_c
,
ymm_c
,
ymm_i
);
vmulps
(
ymm_c
,
ymm_c
,
ymm_i
);
if
(
!
compute_c1h1_
)
{
if
(
!
compute_c1h1_
)
{
// f
// f
vmovups
(
ymm_src
,
ptr
[
reg_ptr_gates
+
offset
+
2
*
d
]);
vmovups
(
ymm_src
,
ptr
[
reg_ptr_gates
+
offset
+
2
*
d
]);
act
<
ymm_t
>
(
ymm_f
,
ymm_src
,
act_gate_
);
vmovups
(
ymm_i
,
ptr
[
reg_ptr_ct_1
+
offset
]);
vmovups
(
ymm_i
,
ptr
[
reg_ptr_ct_1
+
offset
]);
if
(
use_peephole_
)
{
ymm_t
ymm_wp
=
ymm_t
(
3
);
vmovups
(
ymm_wp
,
ptr
[
reg_ptr_wp
+
offset
+
d
]);
vmulps
(
ymm_wp
,
ymm_i
,
ymm_wp
);
vaddps
(
ymm_src
,
ymm_src
,
ymm_wp
);
}
act
<
ymm_t
>
(
ymm_f
,
ymm_src
,
act_gate_
);
vmulps
(
ymm_f
,
ymm_f
,
ymm_i
);
vmulps
(
ymm_f
,
ymm_f
,
ymm_i
);
vaddps
(
ymm_f
,
ymm_f
,
ymm_c
);
vaddps
(
ymm_f
,
ymm_f
,
ymm_c
);
}
}
...
@@ -250,8 +268,14 @@ void LSTMJitCode::generate() {
...
@@ -250,8 +268,14 @@ void LSTMJitCode::generate() {
ymm_t
ymm_o
=
compute_c1h1_
?
ymm_f
:
ymm_c
;
ymm_t
ymm_o
=
compute_c1h1_
?
ymm_f
:
ymm_c
;
ymm_t
ymm_tmp
=
ymm_i
;
ymm_t
ymm_tmp
=
ymm_i
;
vmovups
(
ptr
[
reg_ptr_ct
+
offset
],
ymm_ct
);
// save ct
vmovups
(
ptr
[
reg_ptr_ct
+
offset
],
ymm_ct
);
// save ct
act
<
ymm_t
>
(
ymm_tmp
,
ymm_ct
,
act_cell_
);
vmovups
(
ymm_src
,
ptr
[
reg_ptr_gates
+
offset
+
3
*
d
]);
vmovups
(
ymm_src
,
ptr
[
reg_ptr_gates
+
offset
+
3
*
d
]);
if
(
use_peephole_
)
{
ymm_t
ymm_wp
=
ymm_t
(
2
);
vmovups
(
ymm_wp
,
ptr
[
reg_ptr_wp
+
offset
+
d
*
2
]);
vmulps
(
ymm_wp
,
ymm_ct
,
ymm_wp
);
vaddps
(
ymm_src
,
ymm_src
,
ymm_wp
);
}
act
<
ymm_t
>
(
ymm_tmp
,
ymm_ct
,
act_cell_
);
act
<
ymm_t
>
(
ymm_o
,
ymm_src
,
act_gate_
);
act
<
ymm_t
>
(
ymm_o
,
ymm_src
,
act_gate_
);
vmulps
(
ymm_o
,
ymm_tmp
,
ymm_o
);
vmulps
(
ymm_o
,
ymm_tmp
,
ymm_o
);
vmovups
(
ptr
[
reg_ptr_ht
+
offset
],
ymm_o
);
// save ht
vmovups
(
ptr
[
reg_ptr_ht
+
offset
],
ymm_o
);
// save ht
...
...
paddle/fluid/operators/math/jit_kernel_rnn.cc
浏览文件 @
0c5ed5f6
...
@@ -108,7 +108,7 @@ class PeepholeKernelImpl : public LSTMKernel<T> {
...
@@ -108,7 +108,7 @@ class PeepholeKernelImpl : public LSTMKernel<T> {
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
template
<
>
template
<
>
bool
PeepholeKernelImpl
<
float
>::
useJIT
(
int
d
)
{
bool
PeepholeKernelImpl
<
float
>::
useJIT
(
int
d
)
{
return
false
;
// peephole jitcode not ready yet
return
gen
::
LSTMJitCode
::
init
(
d
);
}
}
#endif
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录