From 513bc99702e8d8fd36c34de3aa813c0229442d6b Mon Sep 17 00:00:00 2001 From: caoying03 Date: Fri, 8 Sep 2017 18:49:40 +0800 Subject: [PATCH] softmax with cross entropy as a cost operator. --- paddle/operators/softmax_with_cost_op.cc | 82 -------------- .../softmax_with_cross_entropy_op.cc | 102 ++++++++++++++++++ .../softmax_with_cross_entropy_op.cu | 25 +++++ ...t_op.h => softmax_with_cross_entropy_op.h} | 22 ++-- paddle/pybind/pybind.cc | 2 +- 5 files changed, 139 insertions(+), 94 deletions(-) delete mode 100644 paddle/operators/softmax_with_cost_op.cc create mode 100644 paddle/operators/softmax_with_cross_entropy_op.cc create mode 100644 paddle/operators/softmax_with_cross_entropy_op.cu rename paddle/operators/{softmax_with_cost_op.h => softmax_with_cross_entropy_op.h} (51%) diff --git a/paddle/operators/softmax_with_cost_op.cc b/paddle/operators/softmax_with_cost_op.cc deleted file mode 100644 index a4537691a03..00000000000 --- a/paddle/operators/softmax_with_cost_op.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -:A -limitations under the License. */ - -#include "paddle/operators/softmax_op.h" - -namespace paddle { -namespace operators { - -class SoftmaxWithLossOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(const framework::InferShapeContext &ctx) const override { - auto logits = ctx.Input("logits"); - PADDLE_ENFORCE(logits->dims().size() == 2UL, - "The input of softmax_with_loss_op should be a 2-d tensor."); - PADDLE_ENFORCE(ctx.Input("lables")->dims().size() == 1UL, - "The label should be a 1-d tensor."); - ctx.Output("loss")->Resize({logits->dims()[0]}); - } -}; - -class SoftmaxWithLossOpMaker : public framework::OpProtoAndCheckerMaker { - public: - SoftmaxWithLossOpMaker(framework::OpProto *proto, - framework::OpAttrChecker *op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("logits", - "The unscaled log probabilities which is a 2-D tensor with" - "shape [N x K]. N is the batch_size, and K is the class number."); - AddInput("label", "The ground truth. A 1-D tensor with shape N."); - AddOutput("loss", "A 1-D tensor with shape N."); - AddComment(R"DOC( -Cross entropy loss with softmax are used as the output layer extensively. This -operator computes the softmax normalized values for each row of the input -tensor, after which cross-entropy loss is then computed. This provides a more -numerically stable gradient. - -Because this operators performs a softmax on logits internally, it expects -unscaled logits. Please do not call this op with the output of softmax operator, -which will produce incorrect results. - -This operators expects mutually exclusive hard labels, each sample in a batch -is in exactly one class with probabilities 1. Each sample in the batch with one -and only one label. -)DOC"); - } -}; - -class SoftmaxWithLossOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(const framework::InferShapeContext &ctx) const override {} -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP(softmax, ops::SoftmaxWithLossOp, ops::SoftmaxWithLossOpMaker, - softmax_grad, ops::SoftmaxWithLossOpGrad); -REGISTER_OP_CPU_KERNEL( - softmax, ops::SoftmaxWithLossKernel); -REGISTER_OP_CPU_KERNEL( - softmax_grad, - ops::SoftmaxWithLossGradKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc new file mode 100644 index 00000000000..2edf00766ed --- /dev/null +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/softmax_with_cross_entropy_op.h" + +namespace paddle { +namespace operators { + +class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto logits = ctx.Input("logits"); + PADDLE_ENFORCE( + logits->dims().size() == 2UL, + "The input of softmax_with_cross_entropy should be a 2-d tensor."); + PADDLE_ENFORCE(ctx.Input("lables")->dims().size() == 1UL, + "The label should be a 1-d tensor."); + ctx.Output("Y")->Resize({logits->dims()[0]}); + } +}; + +class SoftmaxWithCrossEntropyOpMaker + : public framework::OpProtoAndCheckerMaker { + public: + SoftmaxWithCrossEntropyOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("logits", + "The unscaled log probabilities which is a 2-D tensor with" + "shape [N x K]. N is the batch_size, and K is the class number."); + AddInput("label", "The ground truth. A 1-D tensor with shape N."); + AddOutput("Y", "A 1-D tensor with shape N."); + AddComment(R"DOC( +Cross entropy loss with softmax are used as the output layer extensively. This +operator computes the softmax normalized values for each row of the input +tensor, after which cross-entropy loss is then computed. This provides a more +numerically stable gradient. + +Because this operators performs a softmax on logits internally, it expects +unscaled logits. Please do not call this op with the output of softmax operator, +which will produce incorrect results. + +This operators expects mutually exclusive hard labels, each sample in a batch +is in exactly one class with probabilities 1. Each sample in the batch with one +and only one label. +)DOC"); + } +}; + +class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), + "Input(Y@GRAD) should be not null."); + PADDLE_ENFORCE_EQ(ctx.Input("Y")->dims(), + ctx.Input(framework::GradVarName("Y"))->dims(), + "Input(Y) and its gradients should have a same shape."); + + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("labels"), + "Input(lables) should be not null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("logits")), + "Input(logits@GRAD) should be not null."); + PADDLE_ENFORCE_EQ( + ctx.Input("logits")->dims(), + ctx.Input(framework::GradVarName("logits"))->dims(), + "Input(logits) and its gradients should have a same shape."); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP(softmax_with_cross_entropy, ops::SoftmaxWithCrossEntropyOp, + ops::SoftmaxWithCrossEntropyOpMaker, + softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyOpGrad); +REGISTER_OP_CPU_KERNEL( + softmax_with_cross_entropy, + ops::SoftmaxWithCrossEntropyKernel); +REGISTER_OP_CPU_KERNEL( + softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyGradKernel); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cu b/paddle/operators/softmax_with_cross_entropy_op.cu new file mode 100644 index 00000000000..c9d47cc4aae --- /dev/null +++ b/paddle/operators/softmax_with_cross_entropy_op.cu @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "softmax_with_cross_entropy_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_GPU_KERNEL( + softmax_with_cross_entropy, + ops::SoftmaxWithCrossEntropyKernel); +REGISTER_OP_GPU_KERNEL( + softmax_with_cross_entropy_grad, + ops::SoftmaxWithCrossEntropyGradKernel); diff --git a/paddle/operators/softmax_with_cost_op.h b/paddle/operators/softmax_with_cross_entropy_op.h similarity index 51% rename from paddle/operators/softmax_with_cost_op.h rename to paddle/operators/softmax_with_cross_entropy_op.h index fb544842b70..418fb540b8a 100644 --- a/paddle/operators/softmax_with_cost_op.h +++ b/paddle/operators/softmax_with_cross_entropy_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" @@ -25,13 +25,13 @@ template ; template -class SoftmaxWithLossKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override {} }; template -class SoftmaxWithLossGradKernel : public framework::OpKernel { +class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override {} }; diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 7d363b3108c..a059cd0b818 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -37,7 +37,7 @@ USE_OP(mul); USE_OP(mean); USE_OP(sigmoid); USE_OP(softmax); -USE_OP(softmax_with_loss); +USE_OP(softmax_with_cross_entropy); USE_OP(rowwise_add); USE_OP(fill_zeros_like); USE_NO_KERNEL_OP(recurrent); -- GitLab