From 55ba7f610b4d1767c057d92a8c7ef31c5d3b9c84 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Thu, 14 Mar 2019 13:51:01 +0000 Subject: [PATCH] fix numeric error test=develop --- paddle/fluid/operators/cross_entropy_op.cc | 14 +++- paddle/fluid/operators/cross_entropy_op.h | 73 ++++++++++++++----- python/paddle/fluid/layers/nn.py | 2 + .../tests/unittests/test_cross_entropy2_op.py | 11 ++- 4 files changed, 78 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 7e744e68e9..be65cd535c 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -248,10 +248,15 @@ class CrossEntropyOp2 : public CrossEntropyOpBase { PADDLE_ENFORCE(ctx->HasOutput("XShape"), "Output(XShape) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("MatchX"), + "Output(MatchX) should be not null."); + auto x_dims = ctx->GetInputDim("X"); auto x_dims_vec = framework::vectorize(x_dims); x_dims_vec.push_back(0); ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec)); + x_dims[x_dims.size() - 1] = 1; + ctx->SetOutputDim("MatchX", x_dims); ctx->ShareLoD("X", /*->*/ "XShape"); } @@ -265,6 +270,11 @@ class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase { public: using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("MatchX"), "Input(MatchX) must exist"); + CrossEntropyGradientOpBase::InferShape(ctx); + } + protected: virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { auto x_shape = ctx->GetInputDim("XShape"); @@ -295,6 +305,8 @@ class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker { "with 'X' except that the last dimension size is 1. It " "represents the cross entropy loss."); AddOutput("XShape", "Temporaily variable to save shape and LoD of X."); + AddOutput("MatchX", + "X value that matches label, used for gradient computation."); AddAttr("ignore_index", "(int, default -100), Specifies a target value that is" "ignored and does not contribute to the input gradient." @@ -327,7 +339,7 @@ class CrossEntropyGradOpDescMaker2 : public framework::SingleGradOpDescMaker { std::unique_ptr op(new framework::OpDesc()); op->SetType("cross_entropy_grad2"); op->SetInput("Label", Input("Label")); - op->SetInput("Y", Output("Y")); + op->SetInput("MatchX", Output("MatchX")); op->SetInput("XShape", Output("XShape")); op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); op->SetOutput(framework::GradVarName("X"), InputGrad("X")); diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 05609e4bc2..7eb663773e 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -138,15 +138,48 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { } }; +template +struct HardLabelCrossEntropyForwardFunctor { + HardLabelCrossEntropyForwardFunctor(const T* x, T* y, T* match_x, + const int64_t* label, + int64_t ignore_index, + int64_t feature_size) + : x_(x), + y_(y), + match_x_(match_x), + label_(label), + ignore_index_(ignore_index), + feature_size_(feature_size) {} + + HOSTDEVICE void operator()(int64_t idx) const { + auto label = label_[idx]; + if (label != ignore_index_) { + auto match_x = x_[idx * feature_size_ + label]; + y_[idx] = -math::TolerableValue()(real_log(match_x)); + match_x_[idx] = match_x; + } else { + y_[idx] = 0; + match_x_[idx] = 0; // any value is ok + } + } + + const T* x_; + T* y_; + T* match_x_; + const int64_t* label_; + int64_t ignore_index_; + int64_t feature_size_; +}; + template struct HardLabelCrossEntropyBackwardFunctor { - HardLabelCrossEntropyBackwardFunctor(T* dx, const T* y, const T* dy, + HardLabelCrossEntropyBackwardFunctor(T* dx, const T* dy, const T* match_x, const int64_t* label, int64_t ignore_index, int64_t feature_size) : dx_(dx), - y_(y), dy_(dy), + match_x_(match_x), label_(label), ignore_index_(ignore_index), feature_size_(feature_size) {} @@ -156,15 +189,15 @@ struct HardLabelCrossEntropyBackwardFunctor { auto col_idx = idx % feature_size_; auto label = label_[row_idx]; if (label == col_idx && label != ignore_index_) { - dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]); + dx_[idx] = -dy_[row_idx] / match_x_[row_idx]; } else { dx_[idx] = 0; } } T* dx_; - const T* y_; const T* dy_; + const T* match_x_; const int64_t* label_; int64_t ignore_index_; int64_t feature_size_; @@ -174,20 +207,26 @@ template class CrossEntropyOpKernel2 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x_original = ctx.Input("X"); - int rank = x_original->dims().size(); - - auto x = framework::ReshapeToMatrix(*x_original, rank - 1); - auto label = - framework::ReshapeToMatrix(*ctx.Input("Label"), rank - 1); + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); auto* y = ctx.Output("Y"); - y->mutable_data(ctx.GetPlace()); + auto* match_x = ctx.Output("MatchX"); + + auto& x_dims = x->dims(); + auto feature_size = x_dims[x_dims.size() - 1]; + auto batch_size = framework::product(x->dims()) / feature_size; + + auto* p_x = x->data(); + auto* p_label = label->data(); + auto* p_y = y->mutable_data(ctx.GetPlace()); + auto* p_match_x = match_x->mutable_data(ctx.GetPlace()); auto ignore_index = ctx.Attr("ignore_index"); - math::CrossEntropyFunctor()( - ctx.template device_context(), y, &x, &label, false, - ignore_index); + platform::ForRange for_range( + ctx.template device_context(), batch_size); + for_range(HardLabelCrossEntropyForwardFunctor( + p_x, p_y, p_match_x, p_label, ignore_index, feature_size)); } }; @@ -196,13 +235,13 @@ class CrossEntropyGradientOpKernel2 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* dx = ctx.Output(framework::GradVarName("X")); - auto* y = ctx.Input("Y"); auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* match_x = ctx.Input("MatchX"); auto* label = ctx.Input("Label"); auto* p_dx = dx->mutable_data(ctx.GetPlace()); - auto* p_y = y->data(); auto* p_dy = dy->data(); + auto* p_match_x = match_x->data(); auto* p_label = label->data(); int64_t ignore_index = ctx.Attr("ignore_index"); @@ -214,7 +253,7 @@ class CrossEntropyGradientOpKernel2 : public framework::OpKernel { ctx.template device_context(), batch_size * feature_size); for_range(HardLabelCrossEntropyBackwardFunctor( - p_dx, p_y, p_dy, p_label, ignore_index, feature_size)); + p_dx, p_dy, p_match_x, p_label, ignore_index, feature_size)); } }; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 4f384ce37d..2b9b880f77 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1450,11 +1450,13 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex): helper = LayerHelper('cross_entropy2', **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) xshape = helper.create_variable_for_type_inference(dtype=input.dtype) + match_x = helper.create_variable_for_type_inference(dtype=input.dtype) helper.append_op( type='cross_entropy2', inputs={'X': [input], 'Label': [label]}, outputs={'Y': [out], + 'MatchX': [match_x], 'XShape': [xshape]}, attrs={'ignore_index': ignore_index}) return out diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py index c29d422361..55029c18d6 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py @@ -24,11 +24,13 @@ class CrossEntropy2OpTestBase(OpTest): def calc_output(self, logits, label, ignore_index): ret = np.zeros(shape=label.shape, dtype=logits.dtype) + match_x = np.zeros(shape=label.shape, dtype=logits.dtype) for idx in six.moves.range(label.shape[0]): if label[idx] == ignore_index: continue - ret[idx] = -np.log(logits[idx][label[idx]]) - return ret + match_x[idx] = logits[idx][label[idx]] + ret[idx] = -np.log(match_x[idx]) + return ret, match_x def setUp(self): self.shape, self.dtype, self.ignore_index = self.initParameters() @@ -39,12 +41,13 @@ class CrossEntropy2OpTestBase(OpTest): label = np.random.random_integers( low=0, high=feature_size - 1, size=self.shape[0:-1] + [1]).astype('int64') - outputs = self.calc_output( + outputs, match_x = self.calc_output( np.reshape(logits, [batch_size, feature_size]), np.reshape(label, [batch_size, 1]), self.ignore_index) self.inputs = {'X': logits, 'Label': label} self.outputs = { 'Y': np.reshape(outputs, label.shape), + 'MatchX': np.reshape(match_x, label.shape), 'XShape': np.zeros( shape=logits.shape, dtype=logits.dtype) } @@ -57,7 +60,7 @@ class CrossEntropy2OpTestBase(OpTest): self.check_grad( inputs_to_check=['X'], output_names=['Y'], - no_grad_set=['XShape', 'Label']) + no_grad_set=['XShape', 'MatchX', 'Label']) class CrossEntropy2OpTest2(CrossEntropy2OpTestBase): -- GitLab