Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d7ac1cc8
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d7ac1cc8
编写于
9月 05, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine seq when bs is large
上级
9dd5a177
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
59 addition
and
30 deletion
+59
-30
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+58
-29
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+1
-1
未找到文件。
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
d7ac1cc8
...
@@ -285,18 +285,23 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -285,18 +285,23 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
act_cell(D, ct, gates + D2); \
act_cell(D, ct, gates + D2); \
blas.VMUL(D, gates + D2, gates + D3, ht)
blas.VMUL(D, gates + D2, gates + D3, ht)
#define COMPUTE_CtHt_WITHOUT_H0C0(gates, ct, ht) \
#define GET_Ct_NOH0C0(gates, ct) \
act_gate(D, gates + D, gates + D); \
/* C_t = igated * cgated*/
\
act_cand(D, gates, gates); \
act_gate(D, gates + D, gates + D); \
/* C_t = igated * cgated*/
\
act_cand(D, gates, gates); \
blas.VMUL(D, gates, gates + D, ct); \
blas.VMUL(D, gates, gates + D, ct)
/* get outgated*/
\
if (use_peepholes) { \
#define COMPUTE_CtHt_NOH0C0(gates, ct, ht) \
/* put W_oc * C_t on igated */
\
GET_Ct_NOH0C0(gates, ct); \
blas.VMUL(D, wc_data + D2, ct, gates + D); \
act_gate(D, gates + D3, gates + D3); \
blas.VADD(D, gates + D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
} \
act_gate(D, gates + D3, gates + D3); \
#define COMPUTE_CtHt_PEEPHOLE_NOH0C0(gates, ct, ht) \
GET_Ct_NOH0C0(gates, ct); \
/* get outgated, put W_oc * C_t on igated */
\
blas.VMUL(D, wc_data + D2, ct, gates + D); \
blas.VADD(D, gates + D, gates + D3, gates + D3); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt(gates, ct_1, ct, ht) \
#define COMPUTE_CtHt(gates, ct_1, ct, ht) \
...
@@ -354,24 +359,38 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -354,24 +359,38 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
h_out_data = h_out_data + gate_offset; \
h_out_data = h_out_data + gate_offset; \
c_out_data = c_out_data + gate_offset
c_out_data = c_out_data + gate_offset
#define PROCESS_H0C0 \
#define PROCESS_H0C0_DEFINES \
int bid = is_reverse ? N - 1 - i : i; \
int bid = is_reverse ? N - 1 - i : i; \
int seq_len = x_lod[0][bid + 1] - x_lod[0][bid]; \
int seq_len = x_lod[0][bid + 1] - x_lod[0][bid]; \
const T* prev_c_data = nullptr; \
const T* prev_c_data = nullptr; \
const T* prev_h_data = nullptr; \
const T* prev_h_data = nullptr; \
int tstart = 0; \
int tstart = 0
if (h0_data) { \
prev_h_data = h0_data + bid * D; \
#define PROCESS_H0C0_PEEPHOLE \
prev_c_data = c0_data + bid * D; \
PROCESS_H0C0_DEFINES; \
} else { \
if (h0_data) { \
COMPUTE_CtHt_WITHOUT_H0C0(xx_data, c_out_data, h_out_data); \
prev_h_data = h0_data + bid * D; \
MOVE_ONE_STEP; \
prev_c_data = c0_data + bid * D; \
tstart = 1; \
} else { \
COMPUTE_CtHt_PEEPHOLE_NOH0C0(xx_data, c_out_data, h_out_data); \
MOVE_ONE_STEP; \
tstart = 1; \
}
#define PROCESS_H0C0 \
PROCESS_H0C0_DEFINES; \
if (h0_data) { \
prev_h_data = h0_data + bid * D; \
prev_c_data = c0_data + bid * D; \
} else { \
COMPUTE_CtHt_NOH0C0(xx_data, c_out_data, h_out_data); \
MOVE_ONE_STEP; \
tstart = 1; \
}
}
if
(
use_peepholes
)
{
if
(
use_peepholes
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
PROCESS_H0C0
;
PROCESS_H0C0
_PEEPHOLE
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
COMPUTE_CtHt_PEEPHOLE
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
COMPUTE_CtHt_PEEPHOLE
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
...
@@ -380,7 +399,7 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -380,7 +399,7 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
}
}
}
else
{
}
else
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
PROCESS_H0C0
;
PROCESS_H0C0
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
COMPUTE_CtHt
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
COMPUTE_CtHt
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
...
@@ -388,6 +407,8 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -388,6 +407,8 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
}
}
}
}
}
}
#undef PROCESS_H0C0_DEFINES
#undef PROCESS_H0C0_PEEPHOLE
#undef PROCESS_H0C0
#undef PROCESS_H0C0
#undef MOVE_ONE_STEP
#undef MOVE_ONE_STEP
}
}
...
@@ -460,7 +481,13 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -460,7 +481,13 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
T
*
cur_h_out_data
=
batched_h_out_data
;
T
*
cur_h_out_data
=
batched_h_out_data
;
T
*
cur_c_out_data
=
batched_c_out_data
;
T
*
cur_c_out_data
=
batched_c_out_data
;
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
COMPUTE_CtHt_WITHOUT_H0C0
(
cur_in_data
,
cur_c_out_data
,
cur_h_out_data
);
GET_Ct_NOH0C0
(
cur_in_data
,
cur_c_out_data
);
if
(
use_peepholes
)
{
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cur_c_out_data
,
cur_in_data
+
D
);
blas
.
VADD
(
D
,
cur_in_data
+
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
}
act_gate
(
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
GET_Ht
(
cur_c_out_data
,
cur_in_data
,
cur_h_out_data
);
cur_in_data
+=
D4
;
cur_in_data
+=
D4
;
cur_c_out_data
+=
D
;
cur_c_out_data
+=
D
;
cur_h_out_data
+=
D
;
cur_h_out_data
+=
D
;
...
@@ -541,7 +568,9 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -541,7 +568,9 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
#undef COMPUTE_CtHt_PEEPHOLE
#undef COMPUTE_CtHt_PEEPHOLE
#undef COMPUTE_CtHt
#undef COMPUTE_CtHt
#undef COMPUTE_CtHt_WITHOUT_H0C0
#undef GET_Ct_NOH0C0
#undef COMPUTE_CtHt_NOH0C0
#undef COMPUTE_CtHt_PEEPHOLE_NOH0C0
#undef GET_Ht
#undef GET_Ht
#undef GET_Ct
#undef GET_Ct
#undef GEMM_WH_ADDON
#undef GEMM_WH_ADDON
...
...
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
浏览文件 @
d7ac1cc8
...
@@ -183,7 +183,7 @@ class TestFusionLSTMOpPeepholesInitReverse(TestFusionLSTMOp):
...
@@ -183,7 +183,7 @@ class TestFusionLSTMOpPeepholesInitReverse(TestFusionLSTMOp):
self
.
is_reverse
=
True
self
.
is_reverse
=
True
class
TestFusionLSTMOpP
oo
pholesBS1
(
TestFusionLSTMOp
):
class
TestFusionLSTMOpP
ee
pholesBS1
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
def
set_conf
(
self
):
self
.
use_peepholes
=
True
self
.
use_peepholes
=
True
self
.
lod
=
[[
2
]]
self
.
lod
=
[[
2
]]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录