Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
852bc6f4
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
852bc6f4
编写于
8月 15, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine fusion lstm op doc
上级
8f913295
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
36 addition
and
79 deletion
+36
-79
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+36
-79
未找到文件。
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
852bc6f4
...
@@ -112,26 +112,22 @@ framework::OpKernelType FusionLSTMOp::GetExpectedKernelType(
...
@@ -112,26 +112,22 @@ framework::OpKernelType FusionLSTMOp::GetExpectedKernelType(
void
FusionLSTMOpMaker
::
Make
()
{
void
FusionLSTMOpMaker
::
Make
()
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(LoDTensor) the
first
input is a LodTensor, which support "
"(LoDTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X 4D), where T is the "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, D is the hidden size."
);
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"H0"
,
AddInput
(
"WeightX"
,
"(Tensor, optional) the initial hidden state is an optional "
"(Tensor) the learnable weights of X."
"input. This is a tensor with shape (N x D), where N is the "
" - The shape is (M x 4D), where M is the dim size of x, D is the "
"batch size and D is the hidden size."
)
"hidden size. "
.
AsDispensable
();
" - Weight = {W_cx, W_ix, W_fx, W_ox}"
);
AddInput
(
"C0"
,
AddInput
(
"WeightH"
,
"(Tensor, optional) the initial cell state is an optional "
"(Tensor) same as LSTMOp, the learnable hidden-hidden weights."
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
.
AsDispensable
();
AddInput
(
"Weight"
,
"(Tensor) the learnable hidden-hidden weights."
" - The shape is (D x 4D), where D is the hidden size. "
" - The shape is (D x 4D), where D is the hidden size. "
" - Weight = {W_ch, W_ih, W_fh, W_oh}"
);
" - Weight = {W_ch, W_ih, W_fh, W_oh}"
);
AddInput
(
"Bias"
,
AddInput
(
"Bias"
,
"(Tensor) the learnable weights, which contains two parts: "
"(Tensor) the learnable weights. Almost same as LSTMOp"
"Note: we should add the fc bias into this (1x4D) in bias."
"input-hidden bias weight and peephole connections weight if "
"input-hidden bias weight and peephole connections weight if "
"setting `use_peepholes` True. "
"setting `use_peepholes` True. "
"1. `use_peepholes = False` "
"1. `use_peepholes = False` "
...
@@ -140,29 +136,31 @@ void FusionLSTMOpMaker::Make() {
...
@@ -140,29 +136,31 @@ void FusionLSTMOpMaker::Make() {
"2. `use_peepholes = True` "
"2. `use_peepholes = True` "
" - The shape is (1 x 7D). "
" - The shape is (1 x 7D). "
" - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."
);
" - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."
);
AddInput
(
"H0"
,
"(Tensor, optional) (same as LSTMOp) the initial hidden state is an "
"optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size and D is the hidden size."
)
.
AsDispensable
();
AddInput
(
"C0"
,
"(Tensor, optional) (same as LSTMOp) (the initial cell state is an "
"optional "
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
.
AsDispensable
();
AddOutput
(
"Hidden"
,
AddOutput
(
"Hidden"
,
"(LoDTensor) the hidden state of LSTM operator. "
"(LoDTensor)
(same as LSTMOp)
the hidden state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Cell"
,
AddOutput
(
"Cell"
,
"(LoDTensor) the cell state of LSTM operator. "
"(LoDTensor)
(same as LSTMOp)
the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"XX"
,
AddOutput
(
"XX"
,
"(LoDTensor) the first input is a LodTensor, which support "
"(LoDTensor) the result after X * WeightX (size is T x 4D)"
"variable-time length input sequence. The underlying tensor in "
" or batched_X (size is T x M), this will be automatically chosen,"
"this LoDTensor is a matrix with shape (T X 4D), where T is the "
" where T is the total time steps in this mini-batch,"
"total time steps in this mini-batch, D is the hidden size."
);
" D is the hidden size, M is the dim size of x input."
);
AddOutput
(
"BatchedGate"
,
AddOutput
(
"BatchedGate"
,
"(LoDTensor) (same as LSTMOp)."
).
AsIntermediate
();
"(LoDTensor) This LoDTensor contains input gate, forget gate "
AddOutput
(
"BatchCellPreAct"
,
"(LoDTensor) (same as LSTMOp)."
)
"and output gate after the nonlinear computation. This "
"LoDTensor has the same shape as the reorganized input, which "
"is also be called batch input. The LoD size is 2. The first "
"LoD is the batch offsets and the second LoD contains the "
"indexes, which denote the position of reorganized sequence "
"in the raw input."
)
.
AsIntermediate
();
AddOutput
(
"BatchCellPreAct"
,
"(LoDTensor) This LoDTensor is obtained in the forward and used "
"in the backward."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
AddAttr
<
bool
>
(
"use_peepholes"
,
"(bool, defalut: True) "
"(bool, defalut: True) "
...
@@ -190,46 +188,8 @@ void FusionLSTMOpMaker::Make() {
...
@@ -190,46 +188,8 @@ void FusionLSTMOpMaker::Make() {
.
SetDefault
(
"tanh"
)
.
SetDefault
(
"tanh"
)
.
InEnum
({
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
});
.
InEnum
({
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Long-Short Term Memory (LSTM) Operator.
Fusion Long-Short Term Memory (LSTM) Operator.
This operator fuse the X into LSTM, more details can refer to LSTM op.
The defalut implementation is diagonal/peephole connection
(https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
$$ i_t = \\sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) $$
$$ f_t = \\sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) $$
$$ \\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) $$
$$ o_t = \\sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) $$
$$ c_t = f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} $$
$$ h_t = o_t \\odot act_h(c_t) $$
- W terms denote weight matrices (e.g. $W_{xi}$ is the matrix
of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$
are diagonal weight matrices for peephole connections. In our implementation,
we use vectors to reprenset these diagonal weight matrices.
- The b terms denote bias vectors ($b_i$ is the input gate bias vector).
- $\sigma$ is the non-line activations, such as logistic sigmoid function.
- $i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$.
- The $\odot$ is the element-wise product of the vectors.
- $act_g$ and $act_h$ are the cell input and cell output activation functions
and `tanh` is usually used for them.
- $\tilde{c_t}$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
Set `use_peepholes` False to disable peephole connection. The formula
is omitted here, please refer to the paper
http://www.bioinf.jku.at/publications/older/2604.pdf for details.
Note that these $W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}$
operations on the input $x_{t}$ are NOT included in this operator.
Users can choose to use fully-connect operator before LSTM operator.
)DOC"
);
)DOC"
);
}
}
...
@@ -266,14 +226,12 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -266,14 +226,12 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
wx
=
ctx
.
Input
<
Tensor
>
(
"WeightX"
);
// x*4D
auto
*
wx
=
ctx
.
Input
<
Tensor
>
(
"WeightX"
);
auto
*
wh
=
ctx
.
Input
<
Tensor
>
(
"WeightH"
);
// D*4D
auto
*
wh
=
ctx
.
Input
<
Tensor
>
(
"WeightH"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
hidden_t0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
hidden_t0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
cell_t0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
auto
*
cell_t0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
// the result after x*Wx (size: sum_words*4D) or batched_x (size:
// sum_words*x)
auto
*
xx
=
ctx
.
Output
<
LoDTensor
>
(
"XX"
);
auto
*
xx
=
ctx
.
Output
<
LoDTensor
>
(
"XX"
);
auto
*
batched_gate
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedGate"
);
auto
*
batched_gate
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedGate"
);
auto
*
hidden_out
=
ctx
.
Output
<
LoDTensor
>
(
"Hidden"
);
auto
*
hidden_out
=
ctx
.
Output
<
LoDTensor
>
(
"Hidden"
);
...
@@ -312,7 +270,6 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -312,7 +270,6 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
lstm_value
.
check_ig
=
nullptr
;
lstm_value
.
check_ig
=
nullptr
;
lstm_value
.
check_fg
=
nullptr
;
lstm_value
.
check_fg
=
nullptr
;
lstm_value
.
check_og
=
nullptr
;
lstm_value
.
check_og
=
nullptr
;
lstm_value
.
prev_state_value
=
nullptr
;
lstm_value
.
prev_state_value
=
nullptr
;
Tensor
ordered_c0
;
Tensor
ordered_c0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录