From 2ac9a3d8dcc64ed06c09c42bf55e5be15b7ca329 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Tue, 31 Oct 2017 18:38:23 +0800 Subject: [PATCH] follow comments. --- paddle/framework/tensor_impl.h | 2 +- paddle/operators/linear_chain_crf_op.cc | 25 ++++++++++--------- paddle/operators/linear_chain_crf_op.h | 14 +++++++---- .../tests/test_linear_chain_crf_op.py | 3 +++ 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 46dc6fbdff..bcccdd5881 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -235,7 +235,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound."); PADDLE_ENFORCE_LT( begin_idx, end_idx, - "The start row index must be smaller than the end row index."); + "The start row index must be lesser than the end row index."); if (dims_[0] == 1) { return *this; diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index 06d71d26be..605dbba5af 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -26,9 +26,8 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { "Emission", "(LoDTensor, default: LoDTensor). " "The unscaled emission weight matrix for the linear chain CRF. " - "This input is a LoDTensor with shape [N x D] where N is the total " - "element number of all input squences in a mini-batch, " - "and D is the total tag number."); + "This input is a LoDTensor with shape [N x D] where N is the size of " + "the mini-batch and D is the total tag number."); AddInput( "Transition", "(Tensor, default: Tensor). A Tensor with shape [(D + 2) x D]. " @@ -36,7 +35,7 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { "See more details in the operator's comments."); AddInput( "Label", - "(LoDTensor, default: LoDTensor). The groundtruth which is a 2-D " + "(LoDTensor, default: LoDTensor). The ground truth which is a 2-D " "LoDTensor with shape [N x 1], where N is the total element number in " "a mini-batch."); AddOutput( @@ -77,12 +76,13 @@ variables. CRF learns the conditional probability \f$P(Y|X)\f$, where Linear chain CRF is a special case of CRF that is useful for sequence labeling task. Sequence labeling tasks do not assume a lot of conditional -independences among inputs. They only concern about the input and the output -being linear sequences. Thus, the graph model of such a CRF is a simple chain -or a line, which results in the linear chain CRF. +independences among inputs. The only constraint they impose is that the input +and output must be linear sequences. Thus, the graph of such a CRF is a simple +chain or a line, which results in the linear chain CRF. This operator implements the Forward-Backward algorithm for the linear chain -CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf for reference. +CRF. Please see http://www.cs.columbia.edu/~mcollins/fb.pdf and +http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for reference. Equation: @@ -111,7 +111,7 @@ NOTE: transition features. The emission feature weights are NOT computed in this operator. They MUST be computed first before this operator is called. -2. Because this operator performs globally normaliztion over all possible +2. Because this operator performs global normalization over all possible sequences internally, it expects UNSCALED emission feature weights. Please do not call this op with the emission feature being output of any nonlinear activation. @@ -171,9 +171,10 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Alpha", emission_dims); ctx->SetOutputDim("EmissionExps", emission_dims); ctx->SetOutputDim("TransitionExps", transition_dims); - // (TODO caoying) This is tricky. The 1st dimension of Output(LogLikelihood) + // TODO(caoying) This is tricky. The 1st dimension of Output(LogLikelihood) // is the sequence number in a mini-batch. The dimension set here should be - // resized to its correct size in the function Compute. + // resized to its correct size in the function Compute. Fix this once we can + // get LoD information in the InferShape interface. ctx->SetOutputDim("LogLikelihood", {emission_dims[0], 1}); } @@ -236,7 +237,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of output of the linear_chain_crf_grad - // operator is determined by its input: graidents of LogLikelihood. + // operator is determined by its input: gradients of LogLikelihood. framework::DataType IndicateDataType( const framework::ExecutionContext& ctx) const override { return framework::ToDataType( diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h index e14672c78a..24c8b4052d 100644 --- a/paddle/operators/linear_chain_crf_op.h +++ b/paddle/operators/linear_chain_crf_op.h @@ -188,7 +188,6 @@ class LinearChainCRFOpKernel : public framework::OpKernel { const LoDTensor& src, LoDTensor* dst) { dst->mutable_data(src.dims(), platform::CPUPlace()); dst->CopyFrom(src, platform::CPUPlace(), ctx); - }; copyLoDTensor(ctx, emission_weights_src, emission_weights_dst); @@ -248,7 +247,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { for (size_t i = 0; i < tag_num; ++i) { T sum = 0.; for (size_t j = 0; j < tag_num; ++j) { - sum += alpha_value[(k - 1) * tag_num + j] * + sum += alpha_value[(k - 1) * tag_num + j] * // (*) w_exps[(j + state_trans_base_idx) * tag_num + i]; } alpha_value[k * tag_num + i] = x_exps[k * tag_num + i] * sum; @@ -291,7 +290,8 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // These local variables hold the inputs and outputs, garanteeing them on // CPU memory, to provide a consistent reference. // TODO(caoying) Fix this by moving all these local variables into the - // class's data members once we can profile the training process. + // class's data members once we can profile the training process, or + // implementing a real GPU kernel for CRF. Tensor* label = nullptr; Tensor label_tensor; Tensor* emission_exps = nullptr; @@ -344,6 +344,9 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { transition_grad = ctx.Output(framework::GradVarName("Transition")); } + + // TODO(caoying) Fix this constraint. When the Input(Emission) is from the + // data reader operator, it can have no gradients. PADDLE_ENFORCE(emission_grad, "Output(Emission@Grad) should not be null."); emission_grad->mutable_data(platform::CPUPlace()); math::SetConstant()(ctx.device_context(), @@ -458,7 +461,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { for (size_t i = 0; i < tag_num; ++i) { T sum = 0.; for (size_t j = 0; j < tag_num; ++j) { - sum += w_exps[(i + state_trans_base_idx) * tag_num + j] * + sum += w_exps[(i + state_trans_base_idx) * tag_num + j] * // (**) x_exps[(k + 1) * tag_num + j] * beta_value[(k + 1) * tag_num + j]; } @@ -493,7 +496,8 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { auto x_exps_mat = EigenMatrix::From(emission_exps); - // TODO(caoying): Fix this to avoid using this local variable. + // TODO(caoying): Fix this to avoid using this local variable if when can + // profiling the training process. Tensor tmp; tmp.mutable_data(beta->dims(), platform::CPUPlace()); auto tmp_mat = EigenMatrix::From(tmp); diff --git a/python/paddle/v2/framework/tests/test_linear_chain_crf_op.py b/python/paddle/v2/framework/tests/test_linear_chain_crf_op.py index 1cc6dc1aaa..6f06a66c82 100644 --- a/python/paddle/v2/framework/tests/test_linear_chain_crf_op.py +++ b/python/paddle/v2/framework/tests/test_linear_chain_crf_op.py @@ -83,6 +83,9 @@ class LinearChainCrfForward(object): class TestLinearChainCrfOp(OpTest): def set_test_data(self): + # TODO(caoying) Fix the unittest by: add the boundary cases when + # sequence lengths are 1, 2, and 3. + SEQ_NUM = 3 TAG_NUM = 17 MAX_SEQ_LEN = 5 -- GitLab