Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
8f913295
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8f913295
编写于
8月 15, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fuse fc in lstm
上级
ddb05dff
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
136 addition
and
94 deletion
+136
-94
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+136
-90
paddle/fluid/operators/fusion_lstm_op.h
paddle/fluid/operators/fusion_lstm_op.h
+0
-4
未找到文件。
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
8f913295
...
...
@@ -14,29 +14,37 @@ limitations under the License. */
#include "paddle/fluid/operators/fusion_lstm_op.h"
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
DECLARE_int32
(
paddle_num_threads
);
namespace
paddle
{
namespace
operators
{
void
FusionLSTMOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Input"
),
"Input(Input) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Weight"
),
"Input(Weight) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"WeightX"
),
"Input(WeightX) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"WeightH"
),
"Input(WeightH) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Bias"
),
"Input(Bias) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XX"
),
"Output(XX) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Hidden"
),
"Output(Hidden) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Cell"
),
"Output(Cell) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchGate"
),
"Output(BatchGate) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Batch
ed
Gate"
),
"Output(Batch
ed
Gate) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchCellPreAct"
),
"Output(BatchGate) of LSTM should not be null."
);
"Output(Batch
ed
Gate) of LSTM should not be null."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"Input
"
);
PADDLE_ENFORCE_EQ
(
in
_dims
.
size
(),
2
,
"Input(X)'s rank must be 2."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X
"
);
PADDLE_ENFORCE_EQ
(
x
_dims
.
size
(),
2
,
"Input(X)'s rank must be 2."
);
if
(
ctx
->
HasInput
(
"H0"
))
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"C0"
),
...
...
@@ -49,15 +57,24 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
"should be the same."
);
}
int
frame_size
=
in_dims
[
1
]
/
4
;
auto
w_dims
=
ctx
->
GetInputDim
(
"Weight"
);
PADDLE_ENFORCE_EQ
(
w_dims
.
size
(),
2
,
"The rank of Input(Weight) should be 2."
);
PADDLE_ENFORCE_EQ
(
w_dims
[
0
],
frame_size
,
"The first dimension of Input(Weight) "
auto
wx_dims
=
ctx
->
GetInputDim
(
"WeightX"
);
PADDLE_ENFORCE_EQ
(
wx_dims
.
size
(),
2
,
"The rank of Input(WeightX) should be 2."
);
PADDLE_ENFORCE_EQ
(
wx_dims
[
0
],
x_dims
[
1
],
"The first dimension of Input(WeightX) "
"should be %d."
,
x_dims
[
1
]);
int
frame_size
=
wx_dims
[
1
]
/
4
;
auto
wh_dims
=
ctx
->
GetInputDim
(
"WeightH"
);
PADDLE_ENFORCE_EQ
(
wh_dims
.
size
(),
2
,
"The rank of Input(WeightH) should be 2."
);
PADDLE_ENFORCE_EQ
(
wh_dims
[
0
],
frame_size
,
"The first dimension of Input(WeightH) "
"should be %d."
,
frame_size
);
PADDLE_ENFORCE_EQ
(
w_dims
[
1
],
4
*
frame_size
,
"The second dimension of Input(Weight) "
PADDLE_ENFORCE_EQ
(
w
h
_dims
[
1
],
4
*
frame_size
,
"The second dimension of Input(Weight
H
) "
"should be 4 * %d."
,
frame_size
);
...
...
@@ -66,36 +83,35 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
"The first dimension of Input(Bias) should be 1."
);
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"use_peepholes"
))
{
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
7
*
frame_size
,
"The second dimension of Input(Bias) should be "
"7 * %d if enable peepholes connection"
,
frame_size
);
}
else
{
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
4
*
frame_size
,
"The second dimension of Input(Bias) should be "
"4 * %d if disable peepholes connection"
,
frame_size
);
}
PADDLE_ENFORCE
(
!
ctx
->
Attrs
().
Get
<
bool
>
(
"use_peepholes"
),
"Do not support peephole yet."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
4
*
frame_size
,
"The second dimension of Input(Bias) should be "
"4 * %d if disable peepholes connection"
,
frame_size
);
framework
::
DDim
out_dims
({
in
_dims
[
0
],
frame_size
});
framework
::
DDim
out_dims
({
x
_dims
[
0
],
frame_size
});
ctx
->
SetOutputDim
(
"Hidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"Cell"
,
out_dims
);
ctx
->
SetOutputDim
(
"Batch
Gate"
,
in_dims
);
ctx
->
SetOutputDim
(
"Batch
edGate"
,
{
x_dims
[
0
],
wx_dims
[
1
]}
);
ctx
->
SetOutputDim
(
"BatchCellPreAct"
,
out_dims
);
ctx
->
ShareLoD
(
"Input"
,
"Hidden"
);
ctx
->
ShareLoD
(
"Input"
,
"Cell"
);
ctx
->
ShareLoD
(
"X"
,
"Hidden"
);
ctx
->
ShareLoD
(
"X"
,
"Cell"
);
int
xx_width
=
x_dims
[
1
]
>
wx_dims
[
1
]
?
wx_dims
[
1
]
:
x_dims
[
1
];
ctx
->
SetOutputDim
(
"XX"
,
{
x_dims
[
0
],
xx_width
});
ctx
->
ShareLoD
(
"X"
,
"XX"
);
}
framework
::
OpKernelType
FusionLSTMOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"
Input
"
)
->
type
()),
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"
X
"
)
->
type
()),
ctx
.
device_context
());
}
void
FusionLSTMOpMaker
::
Make
()
{
AddInput
(
"
Input
"
,
AddInput
(
"
X
"
,
"(LoDTensor) the first input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X 4D), where T is the "
...
...
@@ -130,7 +146,12 @@ void FusionLSTMOpMaker::Make() {
AddOutput
(
"Cell"
,
"(LoDTensor) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"BatchGate"
,
AddOutput
(
"XX"
,
"(LoDTensor) the first input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X 4D), where T is the "
"total time steps in this mini-batch, D is the hidden size."
);
AddOutput
(
"BatchedGate"
,
"(LoDTensor) This LoDTensor contains input gate, forget gate "
"and output gate after the nonlinear computation. This "
"LoDTensor has the same shape as the reorganized input, which "
...
...
@@ -219,80 +240,102 @@ inline void ReorderInitState(const DeviceContext& ctx,
framework
::
Tensor
*
dst
,
bool
indexed_src
)
{
math
::
CopyMatrixRowsFunctor
<
DeviceContext
,
T
>
row_shuffle
;
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
ctx
.
GetPlace
());
// TODO(TJ): check mem copy perf
row_shuffle
(
ctx
,
src
,
index_lod
,
dst
,
indexed_src
);
}
// TODO(TJ): can move to math::details
template
<
typename
DeviceContext
,
typename
T
>
inline
void
SimpleFC
(
const
math
::
BlasT
<
DeviceContext
,
T
>&
blas
,
const
int
M
,
const
int
N
,
const
int
K
,
const
T
*
A
,
const
T
*
B
,
T
*
C
,
const
T
*
bias_data
=
NULL
)
{
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
M
,
N
,
K
,
static_cast
<
T
>
(
1
),
A
,
B
,
static_cast
<
T
>
(
0
),
C
);
if
(
bias_data
)
{
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
#endif
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
blas
.
AXPY
(
N
,
static_cast
<
T
>
(
1
),
bias_data
,
C
+
i
*
N
);
}
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
LSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
class
Fuison
LSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
input
=
ctx
.
Input
<
LoDTensor
>
(
"Input"
);
auto
*
weight
=
ctx
.
Input
<
Tensor
>
(
"Weight"
);
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
wx
=
ctx
.
Input
<
Tensor
>
(
"WeightX"
);
// x*4D
auto
*
wh
=
ctx
.
Input
<
Tensor
>
(
"WeightH"
);
// D*4D
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
hidden_t0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
cell_t0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
auto
*
batch_gate
=
ctx
.
Output
<
LoDTensor
>
(
"BatchGate"
);
batch_gate
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// the result after x*Wx (size: sum_words*4D) or batched_x (size:
// sum_words*x)
auto
*
xx
=
ctx
.
Output
<
LoDTensor
>
(
"XX"
);
auto
*
batched_gate
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedGate"
);
auto
*
hidden_out
=
ctx
.
Output
<
LoDTensor
>
(
"Hidden"
);
hidden_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
cell_out
=
ctx
.
Output
<
LoDTensor
>
(
"Cell"
);
bool
is_reverse
=
ctx
.
Attr
<
bool
>
(
"is_reverse"
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
batched_gate_data
=
batched_gate
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
hidden_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
cell_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bool
is_reverse
=
ctx
.
Attr
<
bool
>
(
"is_reverse"
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
wx_data
=
wx
->
data
<
T
>
();
auto
x_dims
=
x
->
dims
();
auto
wx_dims
=
wx
->
dims
();
math
::
LoDTensor2BatchFunctor
<
DeviceContext
,
T
>
to_batch
;
auto
&
device_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
to_batch
(
device_ctx
,
*
input
,
batch_gate
,
true
,
is_reverse
);
auto
in_dims
=
input
->
dims
();
int
frame_size
=
static_cast
<
int
>
(
in_dims
[
1
]
/
4
);
framework
::
DDim
dims
({
in_dims
[
0
],
frame_size
});
if
(
bias
)
{
Tensor
b
=
*
bias
;
b
.
Resize
({
bias
->
numel
(),
1
});
Tensor
gate_bias
=
b
.
Slice
(
0
,
4
*
frame_size
);
math
::
RowwiseAdd
<
DeviceContext
,
T
>
add_bias
;
add_bias
(
device_ctx
,
*
batch_gate
,
gate_bias
,
batch_gate
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
dev_ctx
);
// TODO(TJ): op test these two cases
if
(
x_dims
[
1
]
>
wx_dims
[
1
])
{
SimpleFC
<
DeviceContext
,
T
>
(
blas
,
x_dims
[
0
],
wx_dims
[
1
],
x_dims
[
1
],
x_data
,
wx_data
,
xx_data
,
bias
->
data
<
T
>
());
to_batch
(
dev_ctx
,
*
xx
,
batched_gate
,
true
,
is_reverse
);
}
else
{
to_batch
(
dev_ctx
,
*
x
,
xx
,
true
,
is_reverse
);
SimpleFC
<
DeviceContext
,
T
>
(
blas
,
x_dims
[
0
],
wx_dims
[
1
],
x_dims
[
1
],
xx_data
,
wx_data
,
batched_gate_data
,
bias
->
data
<
T
>
());
}
int
frame_size
=
static_cast
<
int
>
(
wx_dims
[
1
]
/
4
);
framework
::
DDim
out_dims
({
x_dims
[
0
],
frame_size
});
math
::
LstmMetaValue
<
T
>
lstm_value
;
if
(
bias
&&
ctx
.
Attr
<
bool
>
(
"use_peepholes"
))
{
T
*
bias_data
=
const_cast
<
T
*>
(
bias
->
data
<
T
>
());
// the code style in LstmMetaValue will be updated later.
// no peephole
lstm_value
.
check_ig
=
nullptr
;
lstm_value
.
check_fg
=
nullptr
;
lstm_value
.
check_og
=
nullptr
;
lstm_value
.
check_ig
=
bias_data
+
4
*
frame_size
;
lstm_value
.
check_fg
=
lstm_value
.
check_ig
+
frame_size
;
lstm_value
.
check_og
=
lstm_value
.
check_fg
+
frame_size
;
}
else
{
lstm_value
.
check_ig
=
nullptr
;
lstm_value
.
check_fg
=
nullptr
;
lstm_value
.
check_og
=
nullptr
;
}
lstm_value
.
prev_state_value
=
nullptr
;
Tensor
ordered_c0
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
framework
::
Vector
<
size_t
>
order
(
batch
ed
_gate
->
lod
()[
2
]);
if
(
cell_t0
)
{
// Since the batch computing for LSTM reorders the input sequence
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState
<
DeviceContext
,
T
>
(
dev
ice_ctx
,
*
cell_t0
,
order
,
&
ordered_c0
,
true
);
ReorderInitState
<
DeviceContext
,
T
>
(
dev
_ctx
,
*
cell_t0
,
order
,
&
ordered_c0
,
true
);
lstm_value
.
prev_state_value
=
ordered_c0
.
data
<
T
>
();
}
// Use the local variable as here.
LoDTensor
batch_hidden
,
batch_cell
;
auto
*
batch_cell_pre_act
=
ctx
.
Output
<
LoDTensor
>
(
"BatchCellPreAct"
);
batch_hidden
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
batch_cell
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
batch_cell_pre_act
->
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
batch_hidden
.
mutable_data
<
T
>
(
out_
dims
,
ctx
.
GetPlace
());
batch_cell
.
mutable_data
<
T
>
(
out_
dims
,
ctx
.
GetPlace
());
batch_cell_pre_act
->
mutable_data
<
T
>
(
out_
dims
,
ctx
.
GetPlace
());
auto
batch_starts
=
batch_gate
->
lod
()[
0
];
size_t
num_batch
=
batch_starts
.
size
()
-
1
;
auto
batch_starts
=
batch
ed
_gate
->
lod
()[
0
];
size_t
max_seq_len
=
batch_starts
.
size
()
-
1
;
auto
gate_act
=
math
::
detail
::
GetActivationType
(
ctx
.
Attr
<
std
::
string
>
(
"gate_activation"
));
auto
cell_act
=
math
::
detail
::
GetActivationType
(
...
...
@@ -300,12 +343,11 @@ class LSTMKernel : public framework::OpKernel<T> {
auto
cand_act
=
math
::
detail
::
GetActivationType
(
ctx
.
Attr
<
std
::
string
>
(
"candidate_activation"
));
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
device_ctx
);
for
(
size_t
n
=
0
;
n
<
num_batch
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
max_seq_len
;
n
++
)
{
int
bstart
=
static_cast
<
int
>
(
batch_starts
[
n
]);
int
bend
=
static_cast
<
int
>
(
batch_starts
[
n
+
1
]);
Tensor
gate_t
=
batch_gate
->
Slice
(
bstart
,
bend
);
Tensor
gate_t
=
batch
ed
_gate
->
Slice
(
bstart
,
bend
);
Tensor
out_t
=
batch_hidden
.
Slice
(
bstart
,
bend
);
Tensor
cell_t
=
batch_cell
.
Slice
(
bstart
,
bend
);
Tensor
cell_pre_act_t
=
batch_cell_pre_act
->
Slice
(
bstart
,
bend
);
...
...
@@ -316,9 +358,11 @@ class LSTMKernel : public framework::OpKernel<T> {
int
pre_h_start
=
static_cast
<
int
>
(
batch_starts
[
n
-
1
]);
int
pre_h_end
=
pre_h_start
+
cur_batch_size
;
auto
pre_hidden_t
=
batch_hidden
.
Slice
(
pre_h_start
,
pre_h_end
);
blas
.
MatMul
(
pre_hidden_t
,
false
,
*
weight
,
false
,
static_cast
<
T
>
(
1.0
),
// TODO(TJ): use gemm directly
blas
.
MatMul
(
pre_hidden_t
,
false
,
*
wh
,
false
,
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
}
else
if
(
hidden_t0
)
{
// TODO(TJ): move h0 outside for
// If n == 0 and there is no initialized hidden state, that is to say
// the H0 is zeros, the calculation W_h * H0 will be skiped.
// If n == 0 and there is initialized hidden state, calculate W_h * H0.
...
...
@@ -327,10 +371,11 @@ class LSTMKernel : public framework::OpKernel<T> {
// according to their length. The initialized hidden state also needs
// to reorder.
Tensor
ordered_h0
;
ReorderInitState
<
DeviceContext
,
T
>
(
dev
ice
_ctx
,
*
hidden_t0
,
order
,
ReorderInitState
<
DeviceContext
,
T
>
(
dev_ctx
,
*
hidden_t0
,
order
,
&
ordered_h0
,
true
);
blas
.
MatMul
(
ordered_h0
,
false
,
*
weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
// TODO(TJ): use gemm directly
blas
.
MatMul
(
ordered_h0
,
false
,
*
wh
,
false
,
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
}
lstm_value
.
gate_value
=
gate_t
.
data
<
T
>
();
...
...
@@ -338,19 +383,19 @@ class LSTMKernel : public framework::OpKernel<T> {
lstm_value
.
state_value
=
cell_t
.
data
<
T
>
();
lstm_value
.
state_active_value
=
cell_pre_act_t
.
data
<
T
>
();
math
::
LstmUnitFunctor
<
DeviceContext
,
T
>::
compute
(
dev
ice_ctx
,
lstm_value
,
frame_size
,
cur_batch_size
,
gate
_act
,
c
ell_act
,
c
and_act
);
dev
_ctx
,
lstm_value
,
frame_size
,
cur_batch_size
,
gate_act
,
cell
_act
,
cand_act
);
lstm_value
.
prev_state_value
=
lstm_value
.
state_value
;
}
math
::
Batch2LoDTensorFunctor
<
DeviceContext
,
T
>
to_seq
;
batch_hidden
.
set_lod
(
batch_gate
->
lod
());
batch_hidden
.
set_lod
(
batch
ed
_gate
->
lod
());
// restore the output hidden in LoDTensor from the batch hidden
to_seq
(
dev
ice
_ctx
,
batch_hidden
,
hidden_out
);
to_seq
(
dev_ctx
,
batch_hidden
,
hidden_out
);
batch_cell
.
set_lod
(
batch_gate
->
lod
());
batch_cell
.
set_lod
(
batch
ed
_gate
->
lod
());
// restore the output cell state in LoDTensor from the batch cell
to_seq
(
dev
ice
_ctx
,
batch_cell
,
cell_out
);
to_seq
(
dev_ctx
,
batch_cell
,
cell_out
);
}
};
...
...
@@ -358,9 +403,10 @@ class LSTMKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
lstm
,
ops
::
LSTMOp
,
ops
::
LSTMOpMaker
,
REGISTER_OPERATOR
(
fusion_lstm
,
ops
::
FusionLSTMOp
,
ops
::
Fusion
LSTMOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OP_CPU_KERNEL
(
fusion_lstm
,
ops
::
LSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
fusion_lstm
,
ops
::
FuisonLSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
FuisonLSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/fusion_lstm_op.h
浏览文件 @
8f913295
...
...
@@ -15,10 +15,6 @@ limitations under the License. */
#pragma once
// #include <string>
#include "paddle/fluid/framework/op_registry.h"
// #include "paddle/fluid/operators/math/blas.h"
// #include "paddle/fluid/operators/math/detail/activation_functions.h"
// #include "paddle/fluid/operators/math/lstm_compute.h"
// #include "paddle/fluid/operators/math/sequence2batch.h"
namespace
paddle
{
namespace
operators
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录