From 9b3e79ac098712eac9d196f39b6c17a1abdf8026 Mon Sep 17 00:00:00 2001 From: sneaxiy Date: Fri, 22 Mar 2019 03:28:59 +0000 Subject: [PATCH] cherry-pick mem op to release/1.3 test=release/1.3 --- paddle/fluid/framework/executor.cc | 40 +-- paddle/fluid/framework/executor.h | 17 +- paddle/fluid/operators/cross_entropy_op.cc | 244 +++++++++++++++--- paddle/fluid/operators/cross_entropy_op.cu | 10 + paddle/fluid/operators/cross_entropy_op.h | 120 +++++++++ paddle/fluid/operators/expand_op.cc | 27 +- paddle/fluid/operators/expand_op.cu | 8 +- paddle/fluid/operators/math.h | 56 ++++ paddle/fluid/operators/recurrent_op.cc | 8 +- python/paddle/fluid/executor.py | 2 +- python/paddle/fluid/layers/nn.py | 18 ++ .../tests/unittests/test_cross_entropy2_op.py | 82 ++++++ .../tests/unittests/test_dist_transpiler.py | 20 +- 13 files changed, 578 insertions(+), 74 deletions(-) create mode 100644 paddle/fluid/operators/math.h create mode 100644 python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 4323883fa5..c8ce86fcb9 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -14,6 +14,10 @@ limitations under the License. */ #include "paddle/fluid/framework/executor.h" #include +#include +#include +#include +#include #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/lod_rank_table.h" @@ -74,11 +78,11 @@ static std::unordered_map GetNonPersistableReferenceCounts( ExecutorPrepareContext::ExecutorPrepareContext( const framework::ProgramDesc& prog, size_t block_id, - const std::vector& skip_ref_cnt_vars) - : prog_(prog), block_id_(block_id) { - if (GetEagerDeletionThreshold() >= 0) { - global_ref_cnts_ = GetNonPersistableReferenceCounts(prog.Block(block_id), - skip_ref_cnt_vars); + const std::vector& keep_vars, bool force_disable_gc) + : prog_(prog), block_id_(block_id), force_disable_gc_(force_disable_gc) { + if (GetEagerDeletionThreshold() >= 0 && !force_disable_gc_) { + global_ref_cnts_ = + GetNonPersistableReferenceCounts(prog.Block(block_id), keep_vars); } } @@ -183,13 +187,15 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, } void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, - bool create_local_scope, bool create_vars) { + bool create_local_scope, bool create_vars, + const std::vector& skip_ref_cnt_vars, + bool force_disable_gc) { platform::RecordBlock b(block_id); if (FLAGS_use_mkldnn) EnableMKLDNN(pdesc); #ifdef PADDLE_WITH_NGRAPH if (FLAGS_use_ngraph) operators::NgraphEngine::EnableNgraph(pdesc); #endif - auto ctx = Prepare(pdesc, block_id); + auto ctx = Prepare(pdesc, block_id, skip_ref_cnt_vars, force_disable_gc); RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars); } @@ -356,9 +362,9 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, std::unique_ptr Executor::Prepare( const ProgramDesc& program, int block_id, - const std::vector& skip_ref_cnt_vars) { - std::unique_ptr ctx( - new ExecutorPrepareContext(program, block_id, skip_ref_cnt_vars)); + const std::vector& skip_ref_cnt_vars, bool force_disable_gc) { + std::unique_ptr ctx(new ExecutorPrepareContext( + program, block_id, skip_ref_cnt_vars, force_disable_gc)); PADDLE_ENFORCE_LT(static_cast(block_id), program.Size()); auto& block = program.Block(block_id); for (auto& op_desc : block.AllOps()) { @@ -369,7 +375,8 @@ std::unique_ptr Executor::Prepare( std::vector> Executor::Prepare( const ProgramDesc& program, const std::vector& block_ids, - const std::vector>& skip_ref_cnt_vars) { + const std::vector>& skip_ref_cnt_vars, + bool force_disable_gc) { PADDLE_ENFORCE( skip_ref_cnt_vars.empty() || skip_ref_cnt_vars.size() == block_ids.size(), "skip_ref_cnt_vars should be either empty or equals to block number %d", @@ -379,9 +386,11 @@ std::vector> Executor::Prepare( for (auto& bid : block_ids) { ExecutorPrepareContext* ctx; if (skip_ref_cnt_vars.empty()) { - ctx = new ExecutorPrepareContext(program, bid); + ctx = new ExecutorPrepareContext(program, bid, std::vector(), + force_disable_gc); } else { - ctx = new ExecutorPrepareContext(program, bid, skip_ref_cnt_vars[idx]); + ctx = new ExecutorPrepareContext(program, bid, skip_ref_cnt_vars[idx], + force_disable_gc); } PADDLE_ENFORCE_LT(static_cast(bid), program.Size()); auto& block = program.Block(bid); @@ -408,8 +417,9 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, int64_t max_memory_size = GetEagerDeletionThreshold(); std::unique_ptr gc; - // skip while_op and while_grad_op temporarily - if (max_memory_size >= 0 && !keep_kids) { + // FIXME(zjl): recurrent_op is rather complex, we would + // disable gc forcely in recurrent_op + if (!ctx->force_disable_gc_ && max_memory_size >= 0 && !keep_kids) { ctx->ResetReferenceCount(); #ifdef PADDLE_WITH_CUDA if (platform::is_gpu_place(place_)) { diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 5a040ac641..65cb9e51ab 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include +#include #include +#include #include #include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/op_info.h" @@ -30,7 +32,8 @@ namespace framework { struct ExecutorPrepareContext { ExecutorPrepareContext(const framework::ProgramDesc& prog, size_t block_id, const std::vector& skip_ref_cnt_vars = - std::vector()); + std::vector(), + bool force_disable_gc = false); ~ExecutorPrepareContext(); @@ -38,6 +41,7 @@ struct ExecutorPrepareContext { const framework::ProgramDesc& prog_; size_t block_id_; + bool force_disable_gc_; std::vector> ops_; std::unordered_map global_ref_cnts_; @@ -66,7 +70,10 @@ class Executor { * Scope */ void Run(const ProgramDesc& prog, Scope* scope, int block_id, - bool create_local_scope = true, bool create_vars = true); + bool create_local_scope = true, bool create_vars = true, + const std::vector& skip_ref_cnt_vars = + std::vector(), + bool force_disable_gc = false); // This API is very slow. void Run(const ProgramDesc& program, Scope* scope, @@ -79,12 +86,14 @@ class Executor { static std::unique_ptr Prepare( const ProgramDesc& program, int block_id, const std::vector& skip_ref_cnt_vars = - std::vector()); + std::vector(), + bool force_disable_gc = false); static std::vector> Prepare( const ProgramDesc& program, const std::vector& block_ids, const std::vector>& skip_ref_cnt_vars = - std::vector>()); + std::vector>(), + bool force_disable_gc = false); void CreateVariables(const ProgramDesc& pdesc, Scope* scope, int block_id); diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 1968e54b00..a617b9fb1d 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -13,18 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" +#include #include +#include namespace paddle { namespace operators { -class CrossEntropyOp : public framework::OperatorWithKernel { +class CrossEntropyOpBase : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); + PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); auto x_dims = ctx->GetInputDim("X"); @@ -32,14 +35,24 @@ class CrossEntropyOp : public framework::OperatorWithKernel { int rank = x_dims.size(); PADDLE_ENFORCE_EQ(rank, label_dims.size(), "Input(X) and Input(Label) shall have the same rank."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "Input(X) and Input(Label) shall have the same shape " - "except the last dimension."); - if (ctx->Attrs().Get("soft_label")) { - PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], - "If Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "Input(X) and Input(Label) shall have the same shape " + "except the last dimension."); + } + + if (IsSoftLabel(ctx)) { + if (check) { + PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], + "If Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } } else { PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL, "If Attr(softLabel) == false, the last dimension of " @@ -60,21 +73,24 @@ class CrossEntropyOp : public framework::OperatorWithKernel { return framework::OpKernelType(ctx.Input("X")->type(), ctx.device_context()); } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return ctx->Attrs().Get("soft_label"); + } }; -class CrossEntropyGradientOp : public framework::OperatorWithKernel { +class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + void InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), "Input(Y@GRAD) shoudl be not null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Output(X@GRAD) should be not null."); - auto x_dims = ctx->GetInputDim("X"); + auto x_dims = GetXDim(ctx); auto label_dims = ctx->GetInputDim("Label"); auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); int rank = x_dims.size(); @@ -82,27 +98,40 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { "Input(Y@Grad) and Input(X) should have the same rank."); PADDLE_ENFORCE_EQ(label_dims.size(), rank, "Input(Label) and Input(X) should have the same rank."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(label_dims, 0, rank - 1), - "The Input(X) and Input(Label) should have the same " - "shape except the last dimension."); - PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), - framework::slice_ddim(dy_dims, 0, rank - 1), - "The Input(X) and Input(Y@Grad) should have the same " - "shape except the last dimension."); - PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, - "The last dimension of Input(Y@Grad) should be 1."); - if (ctx->Attrs().Get("soft_label")) { - PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], - "When Attr(soft_label) == true, the last dimension of " - "Input(X) and Input(Label) should be equal."); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(x_dims) <= 0 || + framework::product(label_dims) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(label_dims, 0, rank - 1), + "The Input(X) and Input(Label) should have the same " + "shape except the last dimension."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), + framework::slice_ddim(dy_dims, 0, rank - 1), + "The Input(X) and Input(Y@Grad) should have the same " + "shape except the last dimension."); + } + if (IsSoftLabel(ctx)) { + if (check) { + PADDLE_ENFORCE_EQ( + x_dims[rank - 1], label_dims[rank - 1], + "When Attr(soft_label) == true, the last dimension of " + "Input(X) and Input(Label) should be equal."); + } } else { PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1, "When Attr(soft_label) == false, the last dimension of " "Input(Label) should be 1."); } ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); + PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, + "The last dimension of Input(Y@Grad) should be 1."); + ctx->SetOutputDim(framework::GradVarName("X"), x_dims); + ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X")); } protected: @@ -110,8 +139,28 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { // is determined by its input "X". framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(ctx.Input("X")->type(), - ctx.device_context()); + return framework::OpKernelType( + ctx.Input(framework::GradVarName("Y"))->type(), + ctx.device_context()); + } + + virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { + return ctx->GetInputDim("X"); + } + + virtual const char* VarNameWithXLoD() const { return "X"; } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return ctx->Attrs().Get("soft_label"); + } +}; + +class CrossEntropyOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; } }; @@ -179,22 +228,132 @@ or not. But the output only shares the LoD information with input X. } }; -class CrossEntropyOpInferVarType - : public framework::PassInDtypeAndVarTypeToOutput { +class CrossEntropyGradientOp : public CrossEntropyGradientOpBase { + public: + using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); + CrossEntropyGradientOpBase::InferShape(ctx); + } +}; + +class CrossEntropyOp2 : public CrossEntropyOpBase { + public: + using CrossEntropyOpBase::CrossEntropyOpBase; + + void InferShape(framework::InferShapeContext* ctx) const override { + CrossEntropyOpBase::InferShape(ctx); + + PADDLE_ENFORCE(ctx->HasOutput("XShape"), + "Output(XShape) should be not null."); + + PADDLE_ENFORCE(ctx->HasOutput("MatchX"), + "Output(MatchX) should be not null."); + auto x_dims = ctx->GetInputDim("X"); + auto x_dims_vec = framework::vectorize(x_dims); + x_dims_vec.push_back(0); + ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec)); + x_dims[x_dims.size() - 1] = 1; + ctx->SetOutputDim("MatchX", x_dims); + ctx->ShareLoD("X", /*->*/ "XShape"); + } + protected: - std::unordered_map GetInputOutputWithSameType() - const override { - return std::unordered_map{{"X", /*->*/ "Y"}}; + bool IsSoftLabel(framework::InferShapeContext* ctx) const override { + return false; } }; + +class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase { + public: + using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("MatchX"), "Input(MatchX) must exist"); + CrossEntropyGradientOpBase::InferShape(ctx); + } + + protected: + virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { + auto x_shape = ctx->GetInputDim("XShape"); + return framework::DDim(x_shape.Get(), x_shape.size() - 1); + } + + virtual const char* VarNameWithXLoD() const { return "XShape"; } + + virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { + return false; + } +}; + +class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", + "(Tensor, default Tensor), a tensor whose last dimension " + "size is equal to the number of classes. This input is a " + "probability computed by the previous operator, which is almost " + "always the result of a softmax operator."); + AddInput( + "Label", + "(Tensor), the tensor which represents the ground truth. It has the " + "same shape with 'X' except the last dimension. One hot Tensor."); + AddOutput("Y", + "(Tensor, default Tensor), a tensor whose shape is same " + "with 'X' except that the last dimension size is 1. It " + "represents the cross entropy loss."); + AddOutput("XShape", "Temporaily variable to save shape and LoD of X."); + AddOutput("MatchX", + "X value that matches label, used for gradient computation."); + AddAttr("ignore_index", + "(int, default -100), Specifies a target value that is" + "ignored and does not contribute to the input gradient." + "Only valid if soft_label is set to False") + .SetDefault(-100); + AddComment(R"DOC( +Hard-label CrossEntropy Operator. + +The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. +The matrix's second dimension(row length) is as same as the original last +dimension, and the first dimension(column length) is the product of all other +original dimensions. Then the softmax computation will take palce on each raw +of flattened matrixs. + +Only support hard label. + +Both the input X and Label can carry the LoD (Level of Details) information, +or not. But the output only shares the LoD information with input X. + +)DOC"); + } +}; + +class CrossEntropyGradOpDescMaker2 : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("cross_entropy_grad2"); + op->SetInput("Label", Input("Label")); + op->SetInput("MatchX", Output("MatchX")); + op->SetInput("XShape", Output("XShape")); + op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; using CPUCtx = paddle::platform::CPUDeviceContext; -REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, - ops::CrossEntropyOpInferVarType, +REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOpBase, + ops::CrossEntropyOpMaker, ops::CrossEntropyOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, @@ -202,3 +361,14 @@ REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, REGISTER_OP_CPU_KERNEL(cross_entropy_grad, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel); + +REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2, + ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType, + ops::CrossEntropyGradOpDescMaker2); +REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2); +REGISTER_OP_CPU_KERNEL(cross_entropy2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2); +REGISTER_OP_CPU_KERNEL(cross_entropy_grad2, + ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy_op.cu b/paddle/fluid/operators/cross_entropy_op.cu index fcd34383a8..243e7f52c1 100644 --- a/paddle/fluid/operators/cross_entropy_op.cu +++ b/paddle/fluid/operators/cross_entropy_op.cu @@ -27,3 +27,13 @@ REGISTER_OP_CUDA_KERNEL( cross_entropy_grad, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel); + +REGISTER_OP_CUDA_KERNEL(cross_entropy2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2, + ops::CrossEntropyOpKernel2); + +REGISTER_OP_CUDA_KERNEL( + cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2, + ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index f123e11542..7eb663773e 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" @@ -137,5 +138,124 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { } }; +template +struct HardLabelCrossEntropyForwardFunctor { + HardLabelCrossEntropyForwardFunctor(const T* x, T* y, T* match_x, + const int64_t* label, + int64_t ignore_index, + int64_t feature_size) + : x_(x), + y_(y), + match_x_(match_x), + label_(label), + ignore_index_(ignore_index), + feature_size_(feature_size) {} + + HOSTDEVICE void operator()(int64_t idx) const { + auto label = label_[idx]; + if (label != ignore_index_) { + auto match_x = x_[idx * feature_size_ + label]; + y_[idx] = -math::TolerableValue()(real_log(match_x)); + match_x_[idx] = match_x; + } else { + y_[idx] = 0; + match_x_[idx] = 0; // any value is ok + } + } + + const T* x_; + T* y_; + T* match_x_; + const int64_t* label_; + int64_t ignore_index_; + int64_t feature_size_; +}; + +template +struct HardLabelCrossEntropyBackwardFunctor { + HardLabelCrossEntropyBackwardFunctor(T* dx, const T* dy, const T* match_x, + const int64_t* label, + int64_t ignore_index, + int64_t feature_size) + : dx_(dx), + dy_(dy), + match_x_(match_x), + label_(label), + ignore_index_(ignore_index), + feature_size_(feature_size) {} + + HOSTDEVICE void operator()(int64_t idx) const { + auto row_idx = idx / feature_size_; + auto col_idx = idx % feature_size_; + auto label = label_[row_idx]; + if (label == col_idx && label != ignore_index_) { + dx_[idx] = -dy_[row_idx] / match_x_[row_idx]; + } else { + dx_[idx] = 0; + } + } + + T* dx_; + const T* dy_; + const T* match_x_; + const int64_t* label_; + int64_t ignore_index_; + int64_t feature_size_; +}; + +template +class CrossEntropyOpKernel2 : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); + auto* y = ctx.Output("Y"); + auto* match_x = ctx.Output("MatchX"); + + auto& x_dims = x->dims(); + auto feature_size = x_dims[x_dims.size() - 1]; + auto batch_size = framework::product(x->dims()) / feature_size; + + auto* p_x = x->data(); + auto* p_label = label->data(); + auto* p_y = y->mutable_data(ctx.GetPlace()); + auto* p_match_x = match_x->mutable_data(ctx.GetPlace()); + + auto ignore_index = ctx.Attr("ignore_index"); + + platform::ForRange for_range( + ctx.template device_context(), batch_size); + for_range(HardLabelCrossEntropyForwardFunctor( + p_x, p_y, p_match_x, p_label, ignore_index, feature_size)); + } +}; + +template +class CrossEntropyGradientOpKernel2 : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* match_x = ctx.Input("MatchX"); + auto* label = ctx.Input("Label"); + + auto* p_dx = dx->mutable_data(ctx.GetPlace()); + auto* p_dy = dy->data(); + auto* p_match_x = match_x->data(); + auto* p_label = label->data(); + + int64_t ignore_index = ctx.Attr("ignore_index"); + int rank = dx->dims().size(); + int64_t feature_size = dx->dims()[rank - 1]; + int64_t batch_size = framework::product(dx->dims()) / feature_size; + + platform::ForRange for_range( + ctx.template device_context(), + batch_size * feature_size); + for_range(HardLabelCrossEntropyBackwardFunctor( + p_dx, p_dy, p_match_x, p_label, ignore_index, feature_size)); + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 6aa4c76b9c..fcb2be9363 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/expand_op.h" +#include #include namespace paddle { @@ -138,15 +139,35 @@ class ExpandGradOp : public framework::OperatorWithKernel { } }; +class ExpandGradOpDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("expand_grad"); + op->SetInput("X", Input("X")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetAttrMap(Attrs()); + return op; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(expand, ops::ExpandOp, ops::ExpandOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::ExpandGradOpDescMaker); REGISTER_OPERATOR(expand_grad, ops::ExpandGradOp); REGISTER_OP_CPU_KERNEL( - expand, ops::ExpandKernel); + expand, ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel); REGISTER_OP_CPU_KERNEL( expand_grad, - ops::ExpandGradKernel); + ops::ExpandGradKernel, + ops::ExpandGradKernel); diff --git a/paddle/fluid/operators/expand_op.cu b/paddle/fluid/operators/expand_op.cu index d95c9b6180..50a506b294 100644 --- a/paddle/fluid/operators/expand_op.cu +++ b/paddle/fluid/operators/expand_op.cu @@ -15,7 +15,11 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - expand, ops::ExpandKernel); + expand, ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel, + ops::ExpandKernel); REGISTER_OP_CUDA_KERNEL( expand_grad, - ops::ExpandGradKernel); + ops::ExpandGradKernel, + ops::ExpandGradKernel); diff --git a/paddle/fluid/operators/math.h b/paddle/fluid/operators/math.h new file mode 100644 index 0000000000..f18611588f --- /dev/null +++ b/paddle/fluid/operators/math.h @@ -0,0 +1,56 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/platform/float16.h" +#include "paddle/fluid/platform/hostdevice.h" + +#include "math.h" // NOLINT + +namespace paddle { +namespace operators { + +inline HOSTDEVICE platform::float16 real_exp(platform::float16 x) { + return static_cast(::expf(static_cast(x))); +} + +inline HOSTDEVICE float real_exp(float x) { return ::expf(x); } + +inline HOSTDEVICE double real_exp(double x) { return ::exp(x); } + +inline HOSTDEVICE platform::float16 real_log(platform::float16 x) { + return static_cast(::logf(static_cast(x))); +} + +inline HOSTDEVICE float real_log(float x) { return ::logf(x); } + +inline HOSTDEVICE double real_log(double x) { return ::log(x); } + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index a1e02a3fd0..eb39b3119c 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -270,7 +270,9 @@ class RecurrentOp : public RecurrentBase { // Every inputs are linked now, execute! executor.Run(*program, &cur_scope, block->ID(), - false /*create_local_scope*/); + false /*create_local_scope*/, true /*create_vars*/, + std::vector() /*skip_ref_cnt_vars*/, + true /*force_disable_gc*/); // get device context from pool platform::DeviceContextPool &pool = @@ -385,7 +387,9 @@ class RecurrentGradOp : public RecurrentBase { VLOG(5) << "Recurrent memory linking finished "; // Run step block with cur_scope executor.Run(*program, &cur_scope, block->ID(), - false /*create_local_scope*/); + false /*create_local_scope*/, true /*create_vars*/, + std::vector() /*skip_ref_cnt_vars*/, + true /*force_disable_gc*/); VLOG(5) << "executor.Run finished "; diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index fc3b24fa81..42a12fbfb1 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -588,7 +588,7 @@ class Executor(object): fetch_var_name=fetch_var_name) self._feed_data(program, feed, feed_var_name, scope) - exe.run(program.desc, scope, 0, True, True) + exe.run(program.desc, scope, 0, True, True, fetch_var_name) outs = self._fetch_data(fetch_list, fetch_var_name, scope) if return_numpy: outs = as_numpy(outs) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index cdd64e89b4..0d5d7d247e 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1398,6 +1398,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) """ + if not soft_label: + return cross_entropy2(input, label, ignore_index) helper = LayerHelper('cross_entropy', **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) helper.append_op( @@ -1410,6 +1412,22 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): return out +def cross_entropy2(input, label, ignore_index=kIgnoreIndex): + helper = LayerHelper('cross_entropy2', **locals()) + out = helper.create_variable_for_type_inference(dtype=input.dtype) + xshape = helper.create_variable_for_type_inference(dtype=input.dtype) + match_x = helper.create_variable_for_type_inference(dtype=input.dtype) + helper.append_op( + type='cross_entropy2', + inputs={'X': [input], + 'Label': [label]}, + outputs={'Y': [out], + 'MatchX': [match_x], + 'XShape': [xshape]}, + attrs={'ignore_index': ignore_index}) + return out + + def bpr_loss(input, label, name=None): """ Bayesian Personalized Ranking Loss Operator. diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py new file mode 100644 index 0000000000..55029c18d6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py @@ -0,0 +1,82 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from op_test import OpTest +import unittest +import numpy as np +import six + + +class CrossEntropy2OpTestBase(OpTest): + def initParameters(self): + return [32, 64], 'float32', -100 + + def calc_output(self, logits, label, ignore_index): + ret = np.zeros(shape=label.shape, dtype=logits.dtype) + match_x = np.zeros(shape=label.shape, dtype=logits.dtype) + for idx in six.moves.range(label.shape[0]): + if label[idx] == ignore_index: + continue + match_x[idx] = logits[idx][label[idx]] + ret[idx] = -np.log(match_x[idx]) + return ret, match_x + + def setUp(self): + self.shape, self.dtype, self.ignore_index = self.initParameters() + self.op_type = 'cross_entropy2' + feature_size = int(self.shape[-1]) + batch_size = int(np.prod(self.shape) / feature_size) + logits = (np.random.random(size=self.shape) + 1).astype(self.dtype) + label = np.random.random_integers( + low=0, high=feature_size - 1, + size=self.shape[0:-1] + [1]).astype('int64') + outputs, match_x = self.calc_output( + np.reshape(logits, [batch_size, feature_size]), + np.reshape(label, [batch_size, 1]), self.ignore_index) + self.inputs = {'X': logits, 'Label': label} + self.outputs = { + 'Y': np.reshape(outputs, label.shape), + 'MatchX': np.reshape(match_x, label.shape), + 'XShape': np.zeros( + shape=logits.shape, dtype=logits.dtype) + } + self.attrs = {'ignore_index': self.ignore_index} + + def test_check_output(self): + self.check_output(no_check_set=['XShape']) + + def test_check_grad(self): + self.check_grad( + inputs_to_check=['X'], + output_names=['Y'], + no_grad_set=['XShape', 'MatchX', 'Label']) + + +class CrossEntropy2OpTest2(CrossEntropy2OpTestBase): + def initParameters(self): + return [32, 64], 'float64', 3 + + +class CrossEntropy2OpTest3(CrossEntropy2OpTestBase): + def initParameters(self): + return [4, 8, 16, 32], 'float32', -100 + + +class CrossEntropy2OpTest4(CrossEntropy2OpTestBase): + def initParameters(self): + return [4, 8, 16, 32], 'float32', 3 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 3566fed215..f89c9c2abe 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -515,8 +515,8 @@ class TestLocalLookupTable(TestDistLookupTableBase): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', @@ -555,8 +555,8 @@ class TestDistLookupTable(TestDistLookupTableBase): ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', - 'elementwise_add', 'cross_entropy', 'mean', 'fill_constant', - 'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send', + 'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant', + 'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', @@ -603,8 +603,8 @@ class TestAsyncLocalLookupTable(TestDistLookupTableBase): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', @@ -643,8 +643,8 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase): ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', - 'elementwise_add', 'cross_entropy', 'mean', 'fill_constant', - 'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send', + 'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant', + 'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', @@ -831,8 +831,8 @@ class TestRemoteLookupTable(TestDistLookupTableBase): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', -- GitLab