Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c0d2ca54
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c0d2ca54
编写于
11月 03, 2017
作者:
K
kexinzhao
提交者:
Yi Wang
11月 03, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
polish_g_to_l (#5367)
上级
af760eac
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
187 addition
and
135 deletion
+187
-135
paddle/operators/gather_op.cc
paddle/operators/gather_op.cc
+20
-3
paddle/operators/gaussian_random_op.cc
paddle/operators/gaussian_random_op.cc
+24
-10
paddle/operators/gru_unit_op.cc
paddle/operators/gru_unit_op.cc
+22
-17
paddle/operators/huber_loss_op.cc
paddle/operators/huber_loss_op.cc
+4
-2
paddle/operators/increment_op.cc
paddle/operators/increment_op.cc
+8
-4
paddle/operators/l1_norm_op.cc
paddle/operators/l1_norm_op.cc
+1
-1
paddle/operators/load_op.cc
paddle/operators/load_op.cc
+8
-4
paddle/operators/lookup_table_op.cc
paddle/operators/lookup_table_op.cc
+16
-10
paddle/operators/lrn_op.cc
paddle/operators/lrn_op.cc
+41
-43
paddle/operators/lstm_op.cc
paddle/operators/lstm_op.cc
+31
-34
paddle/operators/lstm_unit_op.cc
paddle/operators/lstm_unit_op.cc
+12
-7
未找到文件。
paddle/operators/gather_op.cc
浏览文件 @
c0d2ca54
...
@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"The source input of gather op"
);
AddInput
(
"X"
,
"The source input of gather op"
);
AddInput
(
"Index"
,
"The index input of gather op"
);
AddInput
(
"Index"
,
"The index input of gather op"
);
AddOutput
(
"Out"
,
"The output of
add
op"
);
AddOutput
(
"Out"
,
"The output of
gather
op"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Gather Operator by selecting from the first axis,
Gather Operator.
$Out = X[Index]$
Out is obtained by gathering entries of the outer-most dimension
of X indexed by Index and concatenate them together.
Example:
X = [[1, 2],
[3, 4],
[5, 6]]
Index = [[1, 2]]
Then:
Out = [[3, 4],
[5, 6]]
Out = X[Index]
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/operators/gaussian_random_op.cc
浏览文件 @
c0d2ca54
...
@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
GaussianRandomOpMaker
(
framework
::
OpProto
*
proto
,
GaussianRandomOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
framework
::
OpAttrChecker
*
op_checker
)
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
framework
::
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddOutput
(
"Out"
,
"output matrix of random op"
);
AddOutput
(
"Out"
,
"Output matrix of gaussian random op"
);
AddComment
(
R"DOC(
GaussianRandom operator.
Use to initialize tensor with gaussian random generator.
)DOC"
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
"The dimension of random tensor."
);
AddAttr
<
std
::
vector
<
int
>>
(
"shape"
,
AddAttr
<
float
>
(
"mean"
,
"mean of random tensor."
).
SetDefault
(
.0
f
);
"(vector<int>) "
AddAttr
<
float
>
(
"std"
,
"std of random tensor."
).
SetDefault
(
1.0
f
);
"The dimension of random tensor."
);
AddAttr
<
float
>
(
"mean"
,
"(float, default 0.0) "
"mean of random tensor."
)
.
SetDefault
(
.0
f
);
AddAttr
<
float
>
(
"std"
,
"(float, default 1.0) "
"std of random tensor."
)
.
SetDefault
(
1.0
f
);
AddAttr
<
int
>
(
"seed"
,
AddAttr
<
int
>
(
"seed"
,
"(int, default 0) "
"Random seed of generator."
"Random seed of generator."
"0 means use system wide seed"
)
"0 means use system wide seed
.
"
)
.
SetDefault
(
0
);
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"data_type"
,
"output data type"
)
AddAttr
<
int
>
(
"data_type"
,
"(int, default 5(FP32)) "
"Output data type."
)
.
SetDefault
(
framework
::
DataType
::
FP32
);
.
SetDefault
(
framework
::
DataType
::
FP32
);
AddComment
(
R"DOC(
GaussianRandom Operator.
Used to initialize tensors with gaussian random generator.
)DOC"
);
}
}
};
};
...
...
paddle/operators/gru_unit_op.cc
浏览文件 @
c0d2ca54
...
@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"HiddenPrev"
,
AddInput
(
"HiddenPrev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step."
);
"states of previous time step."
);
AddInput
(
"Weight"
,
AddInput
(
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"Weight"
,
"The elements continuous in memory can be divided into two parts. "
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The first part are weights of the update gate and reset gate "
"The elements continuous in memory can be divided into two parts. "
"with shape [frame_size, frame_size * 2], and the second part are "
"The first part are weights of the update gate and reset gate "
"weights of output candidate with shape [frame_size, frame_size]"
);
"with shape [frame_size, frame_size * 2], and the second part are "
AddInput
(
"Bias"
,
"weights of output candidate with shape [frame_size, frame_size]."
);
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
AddInput
(
"bias of the update gate, reset gate and output candidate."
)
"Bias"
,
"(Tensor) Bias vector with shape [1, frame_size * 3] concatenating "
"bias of the update gate, reset gate and output candidate."
)
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"Gate"
,
AddOutput
(
"Gate"
,
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate"
)
"output of update gate, reset gate and output candidate
.
"
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"ResetHiddenPrev"
,
AddOutput
(
"ResetHiddenPrev"
,
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
...
@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
sigmoid
)
.
SetDefault
(
sigmoid
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
GRUUnit
Op implements part calculations of the GRU unit as following:
GRUUnit
Operator.
\f[
This operator implements partial calculations of the GRU unit as follows:
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\
$$
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev)
reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\
\f]
output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev})
$$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/operators/huber_loss_op.cc
浏览文件 @
c0d2ca54
...
@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
"The shape is same as Input(X) and will be reused in backward."
)
"The shape is same as Input(X) and will be reused in backward."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"The output tensor with shape [batch_size, 1]
which represents
"
"The output tensor with shape [batch_size, 1] "
"the huber loss."
);
"
which represents
the huber loss."
);
AddAttr
<
AttrType
>
(
"delta"
,
"Hyper parameter in huber loss."
);
AddAttr
<
AttrType
>
(
"delta"
,
"Hyper parameter in huber loss."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
HuberLoss Operator.
Huber loss is a loss function used in robust regression. We define X as the
Huber loss is a loss function used in robust regression. We define X as the
input value and Y as the target value. Huber loss can evaluate the fitness of
input value and Y as the target value. Huber loss can evaluate the fitness of
X to Y. Different from MSE loss, Huber loss is more robust for outliers. The
X to Y. Different from MSE loss, Huber loss is more robust for outliers. The
...
...
paddle/operators/increment_op.cc
浏览文件 @
c0d2ca54
...
@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor) The input tensor of increment operator"
);
AddInput
(
"X"
,
"(Tensor) The input tensor of increment operator"
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of increment operator."
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of increment operator."
);
AddComment
(
R"DOC(Increment operator
The equation is: Out = X + step
)DOC"
);
AddAttr
<
AttrType
>
(
"step"
,
AddAttr
<
AttrType
>
(
"step"
,
"(float, default 1.0) "
"The step size by which the "
"The step size by which the "
"input tensor will be incremented."
)
"input tensor will be incremented."
)
.
SetDefault
(
1.0
);
.
SetDefault
(
1.0
);
AddComment
(
R"DOC(
Increment Operator.
The equation is:
$$Out = X + step$$
)DOC"
);
}
}
};
};
...
...
paddle/operators/l1_norm_op.cc
浏览文件 @
c0d2ca54
...
@@ -57,7 +57,7 @@ L1 Norm Operator.
...
@@ -57,7 +57,7 @@ L1 Norm Operator.
Computes the L1 norm of a tensor.
Computes the L1 norm of a tensor.
Out = sum (abs(X))
$$Out = \sum{|X|}$$
)DOC"
);
)DOC"
);
}
}
...
...
paddle/operators/load_op.cc
浏览文件 @
c0d2ca54
...
@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
LoadOpProtoMaker
(
framework
::
OpProto
*
proto
,
LoadOpProtoMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddOutput
(
"Out"
,
"The tensor need to be loaded"
);
AddOutput
(
"Out"
,
"(Tensor) The tensor need to be loaded"
);
AddComment
(
R"DOC(Load Operator
Load operator will load a tensor variable from disk file.
)DOC"
);
AddAttr
<
std
::
string
>
(
"file_path"
,
AddAttr
<
std
::
string
>
(
"file_path"
,
"(string) "
"Variable will be loaded from
\"
file_path
\"
."
)
"Variable will be loaded from
\"
file_path
\"
."
)
.
AddCustomChecker
(
.
AddCustomChecker
(
[](
const
std
::
string
&
path
)
{
return
!
path
.
empty
();
});
[](
const
std
::
string
&
path
)
{
return
!
path
.
empty
();
});
AddComment
(
R"DOC(
Load Operator.
Load operator will load a tensor variable from disk file.
)DOC"
);
}
}
};
};
}
// namespace operators
}
// namespace operators
...
...
paddle/operators/lookup_table_op.cc
浏览文件 @
c0d2ca54
...
@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
framework
::
OpAttrChecker
*
op_checker
)
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"W"
,
AddInput
(
"W"
,
"An input represents embedding tensors,"
"An input represents embedding tensors,
"
"
which is a learnable parameter."
);
"which is a learnable parameter."
);
AddInput
(
"Ids"
,
AddInput
(
"Ids"
,
"An input with type int32 or int64"
"An input with type int32 or int64 "
"contains the ids to be looked up in W."
"contains the ids to be looked up in W. "
"Ids must be a column vector with rank = 2."
"Ids must be a column vector with rank = 2. "
"The 2nd dimension size must be 1"
);
"The 2nd dimension size must be 1."
);
AddOutput
(
"Out"
,
"The lookup results, which have the same type with W."
);
AddOutput
(
"Out"
,
"The lookup results, which have the same type as W."
);
AddAttr
<
bool
>
(
"is_sparse"
,
"Sparse update"
).
SetDefault
(
false
);
AddAttr
<
bool
>
(
"is_sparse"
,
"(boolean, default false) "
"Sparse update"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Lookup Table Operator.
This operator is used to perform lookups on the parameter W,
This operator is used to perform lookups on the parameter W,
then concatenated into a dense tensor.
then concatenated into a dense tensor.
The input `Ids` can carry the LoD (Level of Details) information,
The input Ids can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD with input `Ids`.
or not. And the output only shares the LoD information with input Ids.
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/operators/lrn_op.cc
浏览文件 @
c0d2ca54
...
@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
LRNOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
LRNOpMaker
(
framework
::
OpProto
*
proto
,
framework
::
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
R"DOC(
AddInput
(
"X"
,
(Tensor) The input of LRN operator. It must be a 4D tenor with NCHW format.
"(Tensor) The input of LRN operator. "
)DOC"
);
"It must be a 4D tenor with NCHW format."
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(Tensor) The output of LRN operator, which is also the 4D "
"(Tensor) The output of LRN operator, which is also the 4D "
"tensor with NCHW format."
);
"tensor with NCHW format."
);
AddOutput
(
"MidOut"
,
R"Doc(
AddOutput
(
"MidOut"
,
(Tensor)Middle result of lrn op.It's computed in forward process
"(Tensor) Middle result of LRN operator. It's computed in "
and also used in backward process.
"forward process and also used in backward process."
);
)Doc"
);
AddAttr
<
int
>
(
"n"
,
AddAttr
<
int
>
(
"n"
,
R"DOC(
"(int default 5) "
(int, default 5)n is “adjacent” kernel maps at the same spatial position.
"n is the
\"
adjacent
\"
kernel that maps "
)DOC
"
)
"at the same spatial position.
"
)
.
SetDefault
(
5
)
.
SetDefault
(
5
)
.
GreaterThan
(
0
);
.
GreaterThan
(
0
);
AddAttr
<
T
>
(
"k"
,
R"DOC(
AddAttr
<
T
>
(
"k"
,
(float, default 2.0)k is the bias.
"(float, default 2.0) "
)DOC
"
)
"k is the bias.
"
)
.
SetDefault
(
2.0
)
.
SetDefault
(
2.0
)
.
GreaterThan
(
0.0
);
.
GreaterThan
(
0.0
);
AddAttr
<
T
>
(
"alpha"
,
R"DOC(
AddAttr
<
T
>
(
"alpha"
,
(float, default 0.0001)alpha is the scale number.
"(float, default 0.0001) "
)DOC
"
)
"alpha is the scale number.
"
)
.
SetDefault
(
0.0001
)
.
SetDefault
(
0.0001
)
.
GreaterThan
(
0.0
);
.
GreaterThan
(
0.0
);
AddAttr
<
T
>
(
"beta"
,
R"DOC(
AddAttr
<
T
>
(
"beta"
,
(float, default 0.75)beta is the power number.
"(float, default 0.75) "
)DOC
"
)
"beta is the power number.
"
)
.
SetDefault
(
0.75
)
.
SetDefault
(
0.75
)
.
GreaterThan
(
0.0
);
.
GreaterThan
(
0.0
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Local Response Normalization.
Local Response Normalization Operator.
This Function comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
The original formula is:
This operator comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
Input(i, x, y)
The original formula is:
Output(i, x, y) = ----------------------------------------------
-- upper
(k + alpha * > (Input(j, x, y))^2) ^ (beta)
-- j = lower
upper is `min(C, c + n/2)`
$$
lower if `max(0, c - n/2)`
Output(i, x, y) = Input(i, x, y) / \left(
k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
(Input(j, x, y))^2
\right)^{\beta}
$$
Function implementation:
Function implementation:
inputs and outpus is NCHW format, while input.shape.ndims() is equal
4.
Inputs and outpus are in NCHW format, while input.shape.ndims() equals
4.
And the meaning of each dimension(0-3) is respectively batch size
,
And dimensions 0 ~ 3 represent batch size, feature maps, rows
,
feature maps, rows and columns
.
and columns, respectively
.
Input and Output in the above formula
is for each map(i) of one image, and
Input and Output in the formula above
is for each map(i) of one image, and
Input(i, x, y), Output(i, x, y) represents an element in an image.
Input(i, x, y), Output(i, x, y) represents an element in an image.
C is the number of feature maps of one image, and n is a hyper-parameters
C is the number of feature maps of one image. n is a hyper-parameter
is configured when Function is initialized. The sum in the denominator
configured when operator is initialized. The sum in the denominator
is the sum of the same position in the neighboring maps.
is the sum of the same positions in the neighboring maps.
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/operators/lstm_op.cc
浏览文件 @
c0d2ca54
...
@@ -103,7 +103,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -103,7 +103,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"H0"
,
AddInput
(
"H0"
,
"(Tensor, optional) the initial hidden state is an optional "
"(Tensor, optional) the initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
"input. This is a tensor with shape (N x D), where N is the "
"batch size
,
D is the hidden size."
)
"batch size
and
D is the hidden size."
)
.
AsDispensable
();
.
AsDispensable
();
AddInput
(
"C0"
,
AddInput
(
"C0"
,
"(Tensor, optional) the initial cell state is an optional "
"(Tensor, optional) the initial cell state is an optional "
...
@@ -134,85 +134,82 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -134,85 +134,82 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"BatchGate"
,
AddOutput
(
"BatchGate"
,
"(LoDTensor) This LoDTensor contains input gate, forget gate "
"(LoDTensor) This LoDTensor contains input gate, forget gate "
"and output gate after the nonlinear computation. This "
"and output gate after the nonlinear computation. This "
"LoDTensor has the same shape
with
the reorganized input, which "
"LoDTensor has the same shape
as
the reorganized input, which "
"is also be called batch input. The LoD size is 2. The first "
"is also be called batch input. The LoD size is 2. The first "
"LoD is the batch offsets and the second LoD contains the "
"LoD is the batch offsets and the second LoD contains the "
"indexes, which denote the position of reorganized sequence "
"indexes, which denote the position of reorganized sequence "
"in the raw input."
)
"in the raw input."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"BatchCellPreAct"
,
AddOutput
(
"BatchCellPreAct"
,
"(LoDTensor) This LoDTensor is
got
in the forward and used "
"(LoDTensor) This LoDTensor is
obtained
in the forward and used "
"in the backward."
)
"in the backward."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddAttr
<
bool
>
(
"usePeepholes"
,
AddAttr
<
bool
>
(
"usePeepholes"
,
"(bool, defa
lut:
True) "
"(bool, defa
ult
True) "
"whether to enable diagonal/peephole connections."
)
"whether to enable diagonal/peephole connections."
)
.
SetDefault
(
true
);
.
SetDefault
(
true
);
AddAttr
<
bool
>
(
"isReverse"
,
AddAttr
<
bool
>
(
"isReverse"
,
"(bool, defa
lut:
False) "
"(bool, defa
ult
False) "
"whether to compute reversed LSTM."
)
"whether to compute reversed LSTM."
)
.
SetDefault
(
false
);
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
AddAttr
<
std
::
string
>
(
"gateActivation"
,
"gateActivation"
,
"(string, default
:
sigmoid)"
"(string, default sigmoid)"
"The activation for input gate, forget gate and output "
"The activation for input gate, forget gate and output "
"gate, `sigmoid` by default."
)
"gate, `sigmoid` by default."
)
.
SetDefault
(
"sigmoid"
);
.
SetDefault
(
"sigmoid"
);
AddAttr
<
std
::
string
>
(
"cellActivation"
,
AddAttr
<
std
::
string
>
(
"cellActivation"
,
"(string, default
:
tanh)"
"(string, default tanh)"
"The activation for cell output, `tanh` by defalut."
)
"The activation for cell output, `tanh` by defalut."
)
.
SetDefault
(
"tanh"
);
.
SetDefault
(
"tanh"
);
AddAttr
<
std
::
string
>
(
"candidateActivation"
,
AddAttr
<
std
::
string
>
(
"candidateActivation"
,
"(string, default
:
tanh)"
"(string, default tanh)"
"The activation for candidate hidden state, "
"The activation for candidate hidden state, "
"`tanh` by default."
)
"`tanh` by default."
)
.
SetDefault
(
"tanh"
);
.
SetDefault
(
"tanh"
);
AddComment
(
R"DOC(Long-Short Term Memory (LSTM) Operator
AddComment
(
R"DOC(
Long-Short Term Memory (LSTM) Operator.
The defalut implementation is diagonal/peephole connection
[1], the formula is
The defalut implementation is diagonal/peephole connection
as follows
(https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
$$
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) \\
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) \\
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \\
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\
c_t = f_t ⊙ c_{t-1} + i_t ⊙ \tilde{c_t}
c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\
h_t = o_t ⊙ act_h(c_t)
h_t = o_t \odot act_h(c_t)
$$
where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix
where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix
of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$
of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$
are diagonal weight matrices for peephole connections. In our imple
nmen
tion,
are diagonal weight matrices for peephole connections. In our imple
menta
tion,
W
e use vectors to reprenset these diagonal weight matrices. The b terms
w
e use vectors to reprenset these diagonal weight matrices. The b terms
denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$
denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$
is the non-line acti
c
ations, such as logistic sigmoid function, and
is the non-line acti
v
ations, such as logistic sigmoid function, and
\f$i, f, o\f$ and \f$c\f$ are
respectively the input gate, forge
t gate,
\f$i, f, o\f$ and \f$c\f$ are
the input gate, forget gate, outpu
t gate,
output gate and cell activation vectors, all of which ar
e the same size as
and cell activation vectors, respectively, all of which hav
e the same size as
the cell output activation vector \f$h\f$.
the cell output activation vector \f$h\f$.
The
⊙ is the element-wise product of the vectors,
\f$act_g\f$ and \f$act_h\f$
The
\f$\odot\f$ is the element-wise product of the vectors.
\f$act_g\f$ and \f$act_h\f$
are the cell input and cell output activation functions
,
`tanh` is usually
are the cell input and cell output activation functions
and
`tanh` is usually
used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state,
used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
which is computed based on the current input and the previous hidden state.
Set `usePeepholes` False to disable peephole connection [2]. The formula
Set usePeepholes False to disable peephole connection
(http://www.bioinf.jku.at/publications/older/2604.pdf). The formula
is omitted here.
is omitted here.
@note T
hese \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$
Note that t
hese \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$
operations on the input
x_{t} we
re NOT included in this operator.
operations on the input
\f$x_{t}\f$ a
re NOT included in this operator.
Users can choose to use fully-connect operator before LSTM operator.
Users can choose to use fully-connect operator before LSTM operator.
[1] Hasim Sak, Andrew Senior, and Francoise Beaufays. Long short-term memory
recurrent neural network architectures for large scale acoustic modeling.
INTERSPEECH, 2014.
[2] S. Hochreiter and J. Schmidhuber. Long Short-Term Memory.
Neural Computation, 9(8):1735-1780, 1997.
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/operators/lstm_unit_op.cc
浏览文件 @
c0d2ca54
...
@@ -57,17 +57,22 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -57,17 +57,22 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
"The cell state tensor of last time-step in the Lstm Unit operator."
);
"The cell state tensor of last time-step in the Lstm Unit operator."
);
AddOutput
(
"C"
,
"The cell tensor of Lstm Unit operator."
);
AddOutput
(
"C"
,
"The cell tensor of Lstm Unit operator."
);
AddOutput
(
"H"
,
"The hidden state tensor of Lstm Unit operator."
);
AddOutput
(
"H"
,
"The hidden state tensor of Lstm Unit operator."
);
AddAttr
<
float
>
(
"forget_bias"
,
AddComment
(
R"DOC(Lstm-Unit Operator
"(float, default 0.0) "
"The forget bias of Lstm Unit."
)
.
SetDefault
(
0.0
);
AddComment
(
R"DOC(
Lstm Unit Operator
Equation:
Equation:
i, f, o, j = split(X)
C = C_prev * sigm(f + forget_bias) + sigm(i) * tanh(j)
$$
H = C * sigm(o)
i, f, o, j = split(X) \\
C = C_{prev} * sigm(f + forget\_bias) + sigm(i) * tanh(j) \\
H = C * sigm(o)
$$
)DOC"
);
)DOC"
);
AddAttr
<
float
>
(
"forget_bias"
,
"The forget bias of Lstm Unit."
)
.
SetDefault
(
0.0
);
}
}
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录