提交 c0d2ca54 编写于 作者: K kexinzhao 提交者: Yi Wang

polish_g_to_l (#5367)

上级 af760eac
...@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -67,11 +67,28 @@ class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The source input of gather op"); AddInput("X", "The source input of gather op");
AddInput("Index", "The index input of gather op"); AddInput("Index", "The index input of gather op");
AddOutput("Out", "The output of add op"); AddOutput("Out", "The output of gather op");
AddComment(R"DOC( AddComment(R"DOC(
Gather Operator by selecting from the first axis, Gather Operator.
$Out = X[Index]$
Out is obtained by gathering entries of the outer-most dimension
of X indexed by Index and concatenate them together.
Example:
X = [[1, 2],
[3, 4],
[5, 6]]
Index = [[1, 2]]
Then:
Out = [[3, 4],
[5, 6]]
Out = X[Index]
)DOC"); )DOC");
} }
}; };
......
...@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -68,21 +68,35 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
GaussianRandomOpMaker(framework::OpProto* proto, GaussianRandomOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "output matrix of random op"); AddOutput("Out", "Output matrix of gaussian random op");
AddComment(R"DOC(
GaussianRandom operator.
Use to initialize tensor with gaussian random generator.
)DOC");
AddAttr<std::vector<int>>("shape", "The dimension of random tensor."); AddAttr<std::vector<int>>("shape",
AddAttr<float>("mean", "mean of random tensor.").SetDefault(.0f); "(vector<int>) "
AddAttr<float>("std", "std of random tensor.").SetDefault(1.0f); "The dimension of random tensor.");
AddAttr<float>("mean",
"(float, default 0.0) "
"mean of random tensor.")
.SetDefault(.0f);
AddAttr<float>("std",
"(float, default 1.0) "
"std of random tensor.")
.SetDefault(1.0f);
AddAttr<int>("seed", AddAttr<int>("seed",
"(int, default 0) "
"Random seed of generator." "Random seed of generator."
"0 means use system wide seed") "0 means use system wide seed.")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("data_type", "output data type") AddAttr<int>("data_type",
"(int, default 5(FP32)) "
"Output data type.")
.SetDefault(framework::DataType::FP32); .SetDefault(framework::DataType::FP32);
AddComment(R"DOC(
GaussianRandom Operator.
Used to initialize tensors with gaussian random generator.
)DOC");
} }
}; };
......
...@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -80,19 +80,21 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("HiddenPrev", AddInput("HiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the " "(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step."); "states of previous time step.");
AddInput("Weight", AddInput(
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. " "Weight",
"The elements continuous in memory can be divided into two parts. " "(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The first part are weights of the update gate and reset gate " "The elements continuous in memory can be divided into two parts. "
"with shape [frame_size, frame_size * 2], and the second part are " "The first part are weights of the update gate and reset gate "
"weights of output candidate with shape [frame_size, frame_size]"); "with shape [frame_size, frame_size * 2], and the second part are "
AddInput("Bias", "weights of output candidate with shape [frame_size, frame_size].");
"(Tensor) Bias vector with shape [1, frame_size * 3] concating " AddInput(
"bias of the update gate, reset gate and output candidate.") "Bias",
"(Tensor) Bias vector with shape [1, frame_size * 3] concatenating "
"bias of the update gate, reset gate and output candidate.")
.AsDispensable(); .AsDispensable();
AddOutput("Gate", AddOutput("Gate",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the " "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate") "output of update gate, reset gate and output candidate.")
.AsIntermediate(); .AsIntermediate();
AddOutput("ResetHiddenPrev", AddOutput("ResetHiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the " "(Tensor) Matrix with shape [batch_size, frame_size] for the "
...@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -112,16 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(sigmoid) .SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu}); .InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC( AddComment(R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following: GRUUnit Operator.
\f[ This operator implements partial calculations of the GRU unit as follows:
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\ $$
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\ update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev) reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\
\f] output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev})
$$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC"); )DOC");
} }
}; };
......
...@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -59,10 +59,12 @@ class HuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
"The shape is same as Input(X) and will be reused in backward.") "The shape is same as Input(X) and will be reused in backward.")
.AsIntermediate(); .AsIntermediate();
AddOutput("Out", AddOutput("Out",
"The output tensor with shape [batch_size, 1] which represents " "The output tensor with shape [batch_size, 1] "
"the huber loss."); "which represents the huber loss.");
AddAttr<AttrType>("delta", "Hyper parameter in huber loss."); AddAttr<AttrType>("delta", "Hyper parameter in huber loss.");
AddComment(R"DOC( AddComment(R"DOC(
HuberLoss Operator.
Huber loss is a loss function used in robust regression. We define X as the Huber loss is a loss function used in robust regression. We define X as the
input value and Y as the target value. Huber loss can evaluate the fitness of input value and Y as the target value. Huber loss can evaluate the fitness of
X to Y. Different from MSE loss, Huber loss is more robust for outliers. The X to Y. Different from MSE loss, Huber loss is more robust for outliers. The
......
...@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -39,14 +39,18 @@ class IncrementOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) The input tensor of increment operator"); AddInput("X", "(Tensor) The input tensor of increment operator");
AddOutput("Out", "(Tensor) The output tensor of increment operator."); AddOutput("Out", "(Tensor) The output tensor of increment operator.");
AddComment(R"DOC(Increment operator
The equation is: Out = X + step
)DOC");
AddAttr<AttrType>("step", AddAttr<AttrType>("step",
"(float, default 1.0) "
"The step size by which the " "The step size by which the "
"input tensor will be incremented.") "input tensor will be incremented.")
.SetDefault(1.0); .SetDefault(1.0);
AddComment(R"DOC(
Increment Operator.
The equation is:
$$Out = X + step$$
)DOC");
} }
}; };
......
...@@ -57,7 +57,7 @@ L1 Norm Operator. ...@@ -57,7 +57,7 @@ L1 Norm Operator.
Computes the L1 norm of a tensor. Computes the L1 norm of a tensor.
Out = sum (abs(X)) $$Out = \sum{|X|}$$
)DOC"); )DOC");
} }
......
...@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -115,14 +115,18 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
LoadOpProtoMaker(framework::OpProto *proto, LoadOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "The tensor need to be loaded"); AddOutput("Out", "(Tensor) The tensor need to be loaded");
AddComment(R"DOC(Load Operator
Load operator will load a tensor variable from disk file.
)DOC");
AddAttr<std::string>("file_path", AddAttr<std::string>("file_path",
"(string) "
"Variable will be loaded from \"file_path\".") "Variable will be loaded from \"file_path\".")
.AddCustomChecker( .AddCustomChecker(
[](const std::string &path) { return !path.empty(); }); [](const std::string &path) { return !path.empty(); });
AddComment(R"DOC(
Load Operator.
Load operator will load a tensor variable from disk file.
)DOC");
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -53,21 +53,27 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("W", AddInput("W",
"An input represents embedding tensors," "An input represents embedding tensors, "
" which is a learnable parameter."); "which is a learnable parameter.");
AddInput("Ids", AddInput("Ids",
"An input with type int32 or int64" "An input with type int32 or int64 "
"contains the ids to be looked up in W." "contains the ids to be looked up in W. "
"Ids must be a column vector with rank = 2." "Ids must be a column vector with rank = 2. "
"The 2nd dimension size must be 1"); "The 2nd dimension size must be 1.");
AddOutput("Out", "The lookup results, which have the same type with W."); AddOutput("Out", "The lookup results, which have the same type as W.");
AddAttr<bool>("is_sparse", "Sparse update").SetDefault(false); AddAttr<bool>("is_sparse",
"(boolean, default false) "
"Sparse update")
.SetDefault(false);
AddComment(R"DOC( AddComment(R"DOC(
Lookup Table Operator.
This operator is used to perform lookups on the parameter W, This operator is used to perform lookups on the parameter W,
then concatenated into a dense tensor. then concatenated into a dense tensor.
The input `Ids` can carry the LoD (Level of Details) information, The input Ids can carry the LoD (Level of Details) information,
or not. And the output only shares the LoD with input `Ids`. or not. And the output only shares the LoD information with input Ids.
)DOC"); )DOC");
} }
}; };
......
...@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -45,72 +45,70 @@ class LRNOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
LRNOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) LRNOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", R"DOC( AddInput("X",
(Tensor) The input of LRN operator. It must be a 4D tenor with NCHW format. "(Tensor) The input of LRN operator. "
)DOC"); "It must be a 4D tenor with NCHW format.");
AddOutput("Out", AddOutput("Out",
"(Tensor) The output of LRN operator, which is also the 4D " "(Tensor) The output of LRN operator, which is also the 4D "
"tensor with NCHW format."); "tensor with NCHW format.");
AddOutput("MidOut", R"Doc( AddOutput("MidOut",
(Tensor)Middle result of lrn op.It's computed in forward process "(Tensor) Middle result of LRN operator. It's computed in "
and also used in backward process. "forward process and also used in backward process.");
)Doc");
AddAttr<int>("n",
AddAttr<int>("n", R"DOC( "(int default 5) "
(int, default 5)n is “adjacent” kernel maps at the same spatial position. "n is the \"adjacent\" kernel that maps "
)DOC") "at the same spatial position.")
.SetDefault(5) .SetDefault(5)
.GreaterThan(0); .GreaterThan(0);
AddAttr<T>("k", R"DOC( AddAttr<T>("k",
(float, default 2.0)k is the bias. "(float, default 2.0) "
)DOC") "k is the bias.")
.SetDefault(2.0) .SetDefault(2.0)
.GreaterThan(0.0); .GreaterThan(0.0);
AddAttr<T>("alpha", R"DOC( AddAttr<T>("alpha",
(float, default 0.0001)alpha is the scale number. "(float, default 0.0001) "
)DOC") "alpha is the scale number.")
.SetDefault(0.0001) .SetDefault(0.0001)
.GreaterThan(0.0); .GreaterThan(0.0);
AddAttr<T>("beta", R"DOC( AddAttr<T>("beta",
(float, default 0.75)beta is the power number. "(float, default 0.75) "
)DOC") "beta is the power number.")
.SetDefault(0.75) .SetDefault(0.75)
.GreaterThan(0.0); .GreaterThan(0.0);
AddComment(R"DOC( AddComment(R"DOC(
Local Response Normalization. Local Response Normalization Operator.
This Function comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
The original formula is: This operator comes from the paper
"ImageNet Classification with Deep Convolutional Neural Networks".
Input(i, x, y) The original formula is:
Output(i, x, y) = ----------------------------------------------
-- upper
(k + alpha * > (Input(j, x, y))^2) ^ (beta)
-- j = lower
upper is `min(C, c + n/2)` $$
lower if `max(0, c - n/2)` Output(i, x, y) = Input(i, x, y) / \left(
k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)}
(Input(j, x, y))^2
\right)^{\beta}
$$
Function implementation: Function implementation:
inputs and outpus is NCHW format, while input.shape.ndims() is equal 4. Inputs and outpus are in NCHW format, while input.shape.ndims() equals 4.
And the meaning of each dimension(0-3) is respectively batch size, And dimensions 0 ~ 3 represent batch size, feature maps, rows,
feature maps, rows and columns. and columns, respectively.
Input and Output in the above formula is for each map(i) of one image, and Input and Output in the formula above is for each map(i) of one image, and
Input(i, x, y), Output(i, x, y) represents an element in an image. Input(i, x, y), Output(i, x, y) represents an element in an image.
C is the number of feature maps of one image, and n is a hyper-parameters C is the number of feature maps of one image. n is a hyper-parameter
is configured when Function is initialized. The sum in the denominator configured when operator is initialized. The sum in the denominator
is the sum of the same position in the neighboring maps. is the sum of the same positions in the neighboring maps.
)DOC");
)DOC");
} }
}; };
......
...@@ -103,7 +103,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -103,7 +103,7 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("H0", AddInput("H0",
"(Tensor, optional) the initial hidden state is an optional " "(Tensor, optional) the initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the " "input. This is a tensor with shape (N x D), where N is the "
"batch size, D is the hidden size.") "batch size and D is the hidden size.")
.AsDispensable(); .AsDispensable();
AddInput("C0", AddInput("C0",
"(Tensor, optional) the initial cell state is an optional " "(Tensor, optional) the initial cell state is an optional "
...@@ -134,85 +134,82 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -134,85 +134,82 @@ class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("BatchGate", AddOutput("BatchGate",
"(LoDTensor) This LoDTensor contains input gate, forget gate " "(LoDTensor) This LoDTensor contains input gate, forget gate "
"and output gate after the nonlinear computation. This " "and output gate after the nonlinear computation. This "
"LoDTensor has the same shape with the reorganized input, which " "LoDTensor has the same shape as the reorganized input, which "
"is also be called batch input. The LoD size is 2. The first " "is also be called batch input. The LoD size is 2. The first "
"LoD is the batch offsets and the second LoD contains the " "LoD is the batch offsets and the second LoD contains the "
"indexes, which denote the position of reorganized sequence " "indexes, which denote the position of reorganized sequence "
"in the raw input.") "in the raw input.")
.AsIntermediate(); .AsIntermediate();
AddOutput("BatchCellPreAct", AddOutput("BatchCellPreAct",
"(LoDTensor) This LoDTensor is got in the forward and used " "(LoDTensor) This LoDTensor is obtained in the forward and used "
"in the backward.") "in the backward.")
.AsIntermediate(); .AsIntermediate();
AddAttr<bool>("usePeepholes", AddAttr<bool>("usePeepholes",
"(bool, defalut: True) " "(bool, default True) "
"whether to enable diagonal/peephole connections.") "whether to enable diagonal/peephole connections.")
.SetDefault(true); .SetDefault(true);
AddAttr<bool>("isReverse", AddAttr<bool>("isReverse",
"(bool, defalut: False) " "(bool, default False) "
"whether to compute reversed LSTM.") "whether to compute reversed LSTM.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>( AddAttr<std::string>(
"gateActivation", "gateActivation",
"(string, default: sigmoid)" "(string, default sigmoid)"
"The activation for input gate, forget gate and output " "The activation for input gate, forget gate and output "
"gate, `sigmoid` by default.") "gate, `sigmoid` by default.")
.SetDefault("sigmoid"); .SetDefault("sigmoid");
AddAttr<std::string>("cellActivation", AddAttr<std::string>("cellActivation",
"(string, default: tanh)" "(string, default tanh)"
"The activation for cell output, `tanh` by defalut.") "The activation for cell output, `tanh` by defalut.")
.SetDefault("tanh"); .SetDefault("tanh");
AddAttr<std::string>("candidateActivation", AddAttr<std::string>("candidateActivation",
"(string, default: tanh)" "(string, default tanh)"
"The activation for candidate hidden state, " "The activation for candidate hidden state, "
"`tanh` by default.") "`tanh` by default.")
.SetDefault("tanh"); .SetDefault("tanh");
AddComment(R"DOC(Long-Short Term Memory (LSTM) Operator AddComment(R"DOC(
Long-Short Term Memory (LSTM) Operator.
The defalut implementation is diagonal/peephole connection [1], the formula is The defalut implementation is diagonal/peephole connection
as follows (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) $$
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) \\
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) \\
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \\
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\
c_t = f_t ⊙ c_{t-1} + i_t ⊙ \tilde{c_t} c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\
h_t = o_t ⊙ act_h(c_t) h_t = o_t \odot act_h(c_t)
$$
where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix
of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$ of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$
are diagonal weight matrices for peephole connections. In our implenmention, are diagonal weight matrices for peephole connections. In our implementation,
We use vectors to reprenset these diagonal weight matrices. The b terms we use vectors to reprenset these diagonal weight matrices. The b terms
denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$ denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$
is the non-line actications, such as logistic sigmoid function, and is the non-line activations, such as logistic sigmoid function, and
\f$i, f, o\f$ and \f$c\f$ are respectively the input gate, forget gate, \f$i, f, o\f$ and \f$c\f$ are the input gate, forget gate, output gate,
output gate and cell activation vectors, all of which are the same size as and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector \f$h\f$. the cell output activation vector \f$h\f$.
The ⊙ is the element-wise product of the vectors, \f$act_g\f$ and \f$act_h\f$ The \f$\odot\f$ is the element-wise product of the vectors. \f$act_g\f$ and \f$act_h\f$
are the cell input and cell output activation functions, `tanh` is usually are the cell input and cell output activation functions and `tanh` is usually
used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state, used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state,
which is computed based on the current input and the previous hidden state. which is computed based on the current input and the previous hidden state.
Set `usePeepholes` False to disable peephole connection [2]. The formula Set usePeepholes False to disable peephole connection
(http://www.bioinf.jku.at/publications/older/2604.pdf). The formula
is omitted here. is omitted here.
@note These \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$ Note that these \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$
operations on the input x_{t} were NOT included in this operator. operations on the input \f$x_{t}\f$ are NOT included in this operator.
Users can choose to use fully-connect operator before LSTM operator. Users can choose to use fully-connect operator before LSTM operator.
[1] Hasim Sak, Andrew Senior, and Francoise Beaufays. Long short-term memory
recurrent neural network architectures for large scale acoustic modeling.
INTERSPEECH, 2014.
[2] S. Hochreiter and J. Schmidhuber. Long Short-Term Memory.
Neural Computation, 9(8):1735-1780, 1997.
)DOC"); )DOC");
} }
}; };
......
...@@ -57,17 +57,22 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -57,17 +57,22 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
"The cell state tensor of last time-step in the Lstm Unit operator."); "The cell state tensor of last time-step in the Lstm Unit operator.");
AddOutput("C", "The cell tensor of Lstm Unit operator."); AddOutput("C", "The cell tensor of Lstm Unit operator.");
AddOutput("H", "The hidden state tensor of Lstm Unit operator."); AddOutput("H", "The hidden state tensor of Lstm Unit operator.");
AddAttr<float>("forget_bias",
AddComment(R"DOC(Lstm-Unit Operator "(float, default 0.0) "
"The forget bias of Lstm Unit.")
.SetDefault(0.0);
AddComment(R"DOC(
Lstm Unit Operator
Equation: Equation:
i, f, o, j = split(X)
C = C_prev * sigm(f + forget_bias) + sigm(i) * tanh(j) $$
H = C * sigm(o) i, f, o, j = split(X) \\
C = C_{prev} * sigm(f + forget\_bias) + sigm(i) * tanh(j) \\
H = C * sigm(o)
$$
)DOC"); )DOC");
AddAttr<float>("forget_bias", "The forget bias of Lstm Unit.")
.SetDefault(0.0);
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册