diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc deleted file mode 100644 index e745dfa96b6c98d9546d37becac13c8352a38bfe..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc +++ /dev/null @@ -1,336 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/kernels/funcs/axis_utils.h" - -namespace paddle { -namespace operators { - -class SoftmaxWithCrossEntropyOpMaker - : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Logits", - "(Tensor, default: Tensor), The input tensor of unscaled " - "log probabilities, whose dimension :attr:`axis` should be scaled " - "by softmax."); - AddInput( - "Label", - "(Tensor) The input tensor of groud truth label. If :attr:`soft_label` " - "is set to false, Label is a Tensor in same shape with " - "Input(Logits) except the shape in dimension :attr:`axis` as 1. If " - "soft_label is set to true, Label is a Tensor in same " - "shape with Input(Logits)."); - AddOutput( - "Softmax", - "(Tensor, default: Tensor), A tensor in same shape with " - "Input(Logits). " - "The outputs value of softmax activation by given the input batch, " - "which will be used in backward calculation.") - .AsIntermediate(); - AddOutput("Loss", - "(Tensor, default: Tensor), A tensor in same shape with " - "Input(Logits) " - "except the shape in dimension :attr:`axis` as 1. The cross " - "entropy loss."); - AddAttr( - "soft_label", - "(bool, default: false), A flag to indicate whether to interpretant " - "the given labels as soft labels.") - .SetDefault(false); - AddAttr( - "use_softmax", - "(bool, default: true), A flag to indicate whether to do softmax ") - .SetDefault(true); - AddAttr( - "numeric_stable_mode", - "(bool, default: true), A flag to indicate whether to use more " - "numerically stable algorithm. This flag is only valid when " - "soft_label is false and GPU is used.") - .SetDefault(true); - AddAttr( - "ignore_index", - "(int, default -100), Specifies a target value that is ignored and" - "does not contribute to the input gradient. Only valid if soft_label" - "is set to False") - .SetDefault(-100); - AddAttr("axis", - "The dimension index of Input(Logits) to perform softmax," - "default -1 for last dimension") - .SetDefault(-1); - AddComment(R"DOC( -Softmax With Cross Entropy Operator. - -Cross entropy loss with softmax is used as the output layer extensively. This -operator computes the softmax normalized values for each row of the input -tensor, after which cross-entropy loss is computed. This provides a more -numerically stable gradient. - -Because this operator performs a softmax on logits internally, it expects -unscaled logits. This operator should not be used with the output of -softmax operator since that would produce incorrect results. - -When the attribute soft_label is set false, this operators expects mutually -exclusive hard labels, each sample in a batch is in exactly one class with a -probability of 1.0. Each sample in the batch will have a single label. - -The equation is as follows: - -1) Hard label (one-hot label, so every sample has exactly one class) - -$$Loss_j = -\text{Logit}_{Label_j} + -\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), -j = 1,..., K$$ - -2) Soft label (each sample can have a distribution over all classes) - -$$Loss_j = -\sum_{i=0}^{K}\text{Label}_i \left(\text{Logit}_i - -\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), -j = 1,...,K$$ - -)DOC"); - } -}; - -class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ( - ctx->HasInput("Logits"), - true, - platform::errors::InvalidArgument("Input(Logits) should be not null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Label"), - true, - platform::errors::InvalidArgument("Input(Label) should be not null.")); - - PADDLE_ENFORCE_EQ(ctx->HasOutput("Softmax"), - true, - platform::errors::InvalidArgument( - "Output(Softmax) should be not null.")); - PADDLE_ENFORCE_EQ( - ctx->HasOutput("Loss"), - true, - platform::errors::InvalidArgument("Output(Loss) should be not null.")); - - auto axis = ctx->Attrs().Get("axis"); - auto logits_dims = ctx->GetInputDim("Logits"); - auto labels_dims = ctx->GetInputDim("Label"); - auto logits_rank = logits_dims.size(); - PADDLE_ENFORCE_GE(axis, - -logits_rank, - platform::errors::InvalidArgument( - "Attr(axis) value should be in range [-R, R-1], " - "R is the rank of Input(Logits).")); - PADDLE_ENFORCE_LT(axis, - logits_rank, - platform::errors::InvalidArgument( - "Attr(axis) value should be in range [-R, R-1], " - "R is the rank of Input(Logits).")); - - axis = phi::funcs::CanonicalAxis(axis, logits_rank); - for (int i = 0; i < logits_rank; i++) { - if (i != axis) { - if (ctx->IsRuntime() || (logits_dims[i] > 0 && labels_dims[i] > 0)) { - PADDLE_ENFORCE_EQ(logits_dims[i], - labels_dims[i], - platform::errors::InvalidArgument( - "Input(Logits) and Input(Label) should in " - "same shape in dimensions except axis.")); - } - } - } - - auto numeric_stable_mode = ctx->Attrs().Get("numeric_stable_mode"); - if (axis != logits_rank - 1) { - PADDLE_ENFORCE_EQ(numeric_stable_mode, - true, - platform::errors::InvalidArgument( - "Attr(axis) can only be -1 " - "when not in numeric_stable_mode.")); - } - - bool soft_label = ctx->Attrs().Get("soft_label"); - if (soft_label) { - if (ctx->IsRuntime() || - (logits_dims[axis] > 0 && labels_dims[axis] > 0)) { - PADDLE_ENFORCE_EQ(logits_dims[axis], - labels_dims[axis], - platform::errors::InvalidArgument( - "If Attr(soft_label) == true, " - "the axis dimension of " - "Input(X) and Input(Label) should be equal.")); - } - } else { - if (ctx->IsRuntime() || labels_dims[axis] > 0) { - PADDLE_ENFORCE_EQ( - labels_dims[axis], - 1UL, - platform::errors::InvalidArgument("If Attr(soft_label) == false, " - "the axis dimension of " - "Input(Label) should be 1.")); - } - } - - ctx->SetOutputDim("Softmax", logits_dims); - - logits_dims[axis] = 1; - ctx->SetOutputDim("Loss", logits_dims); - - ctx->ShareLoD("Logits", /*->*/ "Softmax"); - ctx->ShareLoD("Logits", /*->*/ "Loss"); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey( - OperatorWithKernel::IndicateVarDataType(ctx, "Logits"), ctx.GetPlace()); - } -}; - -class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Loss")), - true, - platform::errors::InvalidArgument( - "Input(Loss@Grad) should not be null.")); - PADDLE_ENFORCE_EQ(ctx->HasInput("Softmax"), - true, - platform::errors::InvalidArgument( - "Input(Softmax) should be not null.")); - PADDLE_ENFORCE_EQ( - ctx->HasInput("Label"), - true, - platform::errors::InvalidArgument("Input(Label) should be not null.")); - PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Logits")), - true, - platform::errors::InvalidArgument( - "Output(Logits@Grad) should be not null.")); - - auto axis = ctx->Attrs().Get("axis"); - auto softmax_dims = ctx->GetInputDim("Softmax"); - auto labels_dims = ctx->GetInputDim("Label"); - auto softmax_rank = softmax_dims.size(); - PADDLE_ENFORCE_GE(axis, - -softmax_rank, - platform::errors::InvalidArgument( - "Attr(axis) value should be in range [-R, R-1], " - "R is the rank of Input(Logits).")); - PADDLE_ENFORCE_LT(axis, - softmax_rank, - platform::errors::InvalidArgument( - "Attr(axis) value should be in range [-R, R-1], " - "R is the rank of Input(Logits).")); - - axis = phi::funcs::CanonicalAxis(axis, softmax_rank); - for (int i = 0; i < softmax_rank; i++) { - if (i != axis) { - if (ctx->IsRuntime() || (softmax_dims[i] > 0 && labels_dims[i] > 0)) { - PADDLE_ENFORCE_EQ( - softmax_dims[i], - labels_dims[i], - platform::errors::InvalidArgument( - "Input(Logits) and Input(Label) should in same shape in " - "dimensions except axis.")); - } - } - } - - bool soft_label = ctx->Attrs().Get("soft_label"); - if (soft_label) { - if (ctx->IsRuntime() || - (softmax_dims[axis] > 0 && labels_dims[axis] > 0)) { - PADDLE_ENFORCE_EQ(softmax_dims[axis], - labels_dims[axis], - platform::errors::InvalidArgument( - "If Attr(soft_label) == true, " - "the axis dimension of " - "Input(X) and Input(Label) should be equal.")); - } - } else { - if (ctx->IsRuntime() || labels_dims[axis] > 0) { - PADDLE_ENFORCE_EQ( - labels_dims[axis], - 1UL, - platform::errors::InvalidArgument("If Attr(soft_label) == false, " - "the axis dimension of " - "Input(Label) should be 1.")); - } - } - - ctx->SetOutputDim(framework::GradVarName("Logits"), - ctx->GetInputDim("Softmax")); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Loss")), - ctx.GetPlace()); - } -}; - -template -class SoftmaxGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr grad_op) const override { - grad_op->SetType("softmax_with_cross_entropy_grad"); - grad_op->SetInput("Label", this->Input("Label")); - grad_op->SetInput("Softmax", this->Output("Softmax")); - grad_op->SetInput(framework::GradVarName("Loss"), this->OutputGrad("Loss")); - grad_op->SetOutput(framework::GradVarName("Logits"), - this->InputGrad("Logits")); - grad_op->SetAttrMap(this->Attrs()); - } -}; - -DECLARE_INPLACE_OP_INFERER(SoftmaxWithCrossEntropyInplaceInferer, - {"Logits", "Softmax"}); - -DECLARE_INPLACE_OP_INFERER(SoftmaxWithCrossEntropyGradInplaceInferer, - {"Softmax", framework::GradVarName("Logits")}); - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OPERATOR(softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyOp, - ops::SoftmaxWithCrossEntropyOpMaker, - ops::SoftmaxGradMaker, - ops::SoftmaxGradMaker, - ops::SoftmaxWithCrossEntropyInplaceInferer); -REGISTER_OPERATOR(softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyOpGrad, - ops::SoftmaxWithCrossEntropyGradInplaceInferer); - -REGISTER_OP_VERSION(softmax_with_cross_entropy) - .AddCheckpoint( - R"ROC( - Add a new attribute [use_softmax] )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "use_softmax", "A flag to indicate whether to do softmax", true)); diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 0a6062dd8294c52febb3c82a15861feaaf155136..2b49b6950f68aca5431776deba5ed8538cea3f71 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -348,6 +348,17 @@ func : crop_grad data_type : x +- backward_op : cross_entropy_with_softmax_grad + forward : cross_entropy_with_softmax (Tensor input, Tensor label, bool soft_label=false, bool use_softmax=true, bool numeric_stable_mode=true, int ignore_index=-100, int axis=-1) -> Tensor(softmax), Tensor(loss) + args : (Tensor label, Tensor softmax, Tensor loss_grad, bool soft_label, bool use_softmax, bool numeric_stable_mode, int ignore_index, int axis) + output : Tensor(input_grad) + infer_meta : + func : CrossEntropyWithSoftmaxGradInferMeta + kernel : + func : cross_entropy_with_softmax_grad + data_type : loss_grad + inplace : (softmax -> input_grad) + - backward_op : cross_grad forward : cross (Tensor x, Tensor y, int axis = 9) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad, int axis) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index b655f379cf71ebe31ecc2345e725086e5a008640..25582edca0c937a03b70071bbc4750f7063f5c3a 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -239,17 +239,6 @@ kernel : func : conv3d_transpose_grad -- backward_op : cross_entropy_with_softmax_grad - forward : cross_entropy_with_softmax (Tensor input, Tensor label, bool soft_label, bool use_softmax, bool numeric_stable_mode, int ignore_index, int axis) -> Tensor(softmax), Tensor(loss) - args : (Tensor label, Tensor softmax, Tensor loss_grad, bool soft_label, bool use_softmax, bool numeric_stable_mode, int ignore_index, int axis) - output : Tensor(input_grad) - infer_meta : - func : CrossEntropyWithSoftmaxGradInferMeta - kernel : - func : cross_entropy_with_softmax_grad - data_type : softmax - inplace : (softmax -> input_grad) - - backward_op : cumsum_grad forward : cumsum(Tensor x, Scalar axis, bool flatten, bool exclusive, bool reverse) -> Tensor(out) args : (Tensor x, Tensor out_grad, Scalar axis, bool flatten, bool exclusive, bool reverse) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index d827e7eabbfa7b0ab38073bcead528c249826fb2..972f85070ba8f8a04ea304ebf01f21c9f1cfb353 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -322,17 +322,6 @@ output : Tensor(out) invoke : copy_to_impl(x, place, blocking) -# Part of python API paddle.nn.functional.cross_entropy -- op : cross_entropy_with_softmax - args : (Tensor input, Tensor label, bool soft_label, bool use_softmax, bool numeric_stable_mode, int ignore_index, int axis) - output : Tensor(softmax), Tensor(loss) - infer_meta : - func : CrossEntropyWithSoftmaxInferMeta - kernel : - func : cross_entropy_with_softmax - data_type : input - backward : cross_entropy_with_softmax_grad - - op : cumsum args : (Tensor x, Scalar axis, bool flatten, bool exclusive, bool reverse) output : Tensor(out) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 552895cf25fde4d86fe97801f3c99606a4aeb9e1..e9a790e912871d5461f92aa471e91b5898ecca7a 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -456,6 +456,13 @@ outputs : out : Out +- op : cross_entropy_with_softmax (softmax_with_cross_entropy) + backward : cross_entropy_with_softmax_grad (softmax_with_cross_entropy_grad) + inputs : + {input : Logits, label : Label} + outputs : + {softmax : Softmax, loss : Loss} + - op : cumprod backward : cumprod_grad inputs : diff --git a/paddle/phi/api/yaml/op_version.yaml b/paddle/phi/api/yaml/op_version.yaml index 8014103fad92af56b8a51c706232cf76f0064cc5..b36cd86f78ae8a983794b79e6f05acaf89d449ae 100644 --- a/paddle/phi/api/yaml/op_version.yaml +++ b/paddle/phi/api/yaml/op_version.yaml @@ -191,6 +191,14 @@ - add_input : ShiftsTensor comment : The number of places by which the elements of the tensor are shifted. +- op : softmax_with_cross_entropy + version : + - checkpoint : Add a new attribute [use_softmax] + action : + - add_attr : use_softmax + comment : A flag to indicate whether to do softmax + default : "true" + - op : trace version : - checkpoint : Upgrade trace add a new attribute [axis2] diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 40e47845fe90022246d2253d7c0802a5ee29383e..5d5eb8d9fc3fe3a726e9f84760cd5afc26ae943b 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -380,6 +380,18 @@ data_type : x backward : cross_grad +# Part of python API paddle.nn.functional.cross_entropy +- op : cross_entropy_with_softmax + args : (Tensor input, Tensor label, bool soft_label=false, bool use_softmax=true, bool numeric_stable_mode=true, int ignore_index=-100, int axis=-1) + output : Tensor(softmax), Tensor(loss) + inplace : (input -> softmax) + infer_meta : + func : CrossEntropyWithSoftmaxInferMeta + kernel : + func : cross_entropy_with_softmax + data_type : input + backward : cross_entropy_with_softmax_grad + - op : cumprod args : (Tensor x, int dim) output : Tensor(out) diff --git a/paddle/phi/ops/compat/softmax_with_cross_entropy_sig.cc b/paddle/phi/ops/compat/softmax_with_cross_entropy_sig.cc deleted file mode 100644 index c75d4f711dc0fd3456e5b67c5bfa4b59cee77ce3..0000000000000000000000000000000000000000 --- a/paddle/phi/ops/compat/softmax_with_cross_entropy_sig.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature SoftmaxWithCrossEntropyOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("cross_entropy_with_softmax", - {"Logits", "Label"}, - {"soft_label", - "use_softmax", - "numeric_stable_mode", - "ignore_index", - "axis"}, - {"Softmax", "Loss"}); -} - -KernelSignature SoftmaxWithCrossEntropyGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("cross_entropy_with_softmax_grad", - {"Label", "Softmax", "Loss@GRAD"}, - {"soft_label", - "use_softmax", - "numeric_stable_mode", - "ignore_index", - "axis"}, - {"Logits@GRAD"}); -} - -} // namespace phi - -PD_REGISTER_BASE_KERNEL_NAME(softmax_with_cross_entropy, - cross_entropy_with_softmax); -PD_REGISTER_BASE_KERNEL_NAME(softmax_with_cross_entropy_grad, - cross_entropy_with_softmax_grad); - -PD_REGISTER_ARG_MAPPING_FN(softmax_with_cross_entropy, - phi::SoftmaxWithCrossEntropyOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(softmax_with_cross_entropy_grad, - phi::SoftmaxWithCrossEntropyGradOpArgumentMapping); diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 874faafc92338c417e6162c9e9ddfe546dde5673..85c96de4bfbc79c94657111b576d787f93e7f80f 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -313,13 +313,6 @@ def fluid_softmax_with_cross_entropy( loss = helper.create_variable_for_type_inference(dtype=logits.dtype) outputs = {'Softmax': softmax, 'Loss': loss} - if core.is_compiled_with_custom_device( - "npu" - ) or core.is_compiled_with_custom_device("mlu"): - backprop = helper.create_variable_for_type_inference( - dtype=logits.dtype - ) - outputs['Backprop'] = backprop helper.append_op( type='softmax_with_cross_entropy', inputs={'Logits': logits, 'Label': label}, @@ -2768,13 +2761,6 @@ def cross_entropy( out = helper.create_variable_for_type_inference(dtype=input.dtype) outputs = {'Softmax': softmax, 'Loss': out} - if core.is_compiled_with_custom_device( - "npu" - ) or core.is_compiled_with_custom_device("mlu"): - backprop = helper.create_variable_for_type_inference( - dtype=input.dtype - ) - outputs['Backprop'] = backprop helper.append_op( type='softmax_with_cross_entropy', inputs={'Logits': input, 'Label': label},