From a91964c8fed2d1438b1bf9aa70e503121e4499dd Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Thu, 14 Mar 2019 19:55:23 +0800 Subject: [PATCH] Revert "PaddingRNN model memory optimize" test=develop --- paddle/fluid/operators/cross_entropy_op.cc | 177 ++---------------- paddle/fluid/operators/cross_entropy_op.cu | 10 - paddle/fluid/operators/cross_entropy_op.h | 81 -------- paddle/fluid/operators/expand_op.cc | 19 +- paddle/fluid/operators/math.h | 42 ----- paddle/fluid/operators/math/cross_entropy.cu | 13 +- paddle/fluid/operators/selu_op.h | 5 +- .../sequence_ops/sequence_softmax_op.cu | 4 +- .../sigmoid_cross_entropy_with_logits_op.cu | 6 +- python/paddle/fluid/layers/nn.py | 16 -- .../tests/unittests/test_cross_entropy2_op.py | 79 -------- .../tests/unittests/test_dist_transpiler.py | 20 +- 12 files changed, 53 insertions(+), 419 deletions(-) delete mode 100644 paddle/fluid/operators/math.h delete mode 100644 python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 7e744e68e97..3adc7baebdd 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -13,21 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" -#include #include -#include namespace paddle { namespace operators { -class CrossEntropyOpBase : public framework::OperatorWithKernel { +class CrossEntropyOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); - PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); auto x_dims = ctx->GetInputDim("X"); @@ -46,8 +43,7 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { "Input(X) and Input(Label) shall have the same shape " "except the last dimension."); } - - if (IsSoftLabel(ctx)) { + if (ctx->Attrs().Get("soft_label")) { if (check) { PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1], "If Attr(soft_label) == true, the last dimension of " @@ -73,24 +69,21 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { return framework::OpKernelType(ctx.Input("X")->type(), ctx.device_context()); } - - virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { - return ctx->Attrs().Get("soft_label"); - } }; -class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { +class CrossEntropyGradientOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const { + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) should be not null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), "Input(Y@GRAD) shoudl be not null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Output(X@GRAD) should be not null."); - auto x_dims = GetXDim(ctx); + auto x_dims = ctx->GetInputDim("X"); auto label_dims = ctx->GetInputDim("Label"); auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); int rank = x_dims.size(); @@ -115,7 +108,9 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { "The Input(X) and Input(Y@Grad) should have the same " "shape except the last dimension."); } - if (IsSoftLabel(ctx)) { + PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, + "The last dimension of Input(Y@Grad) should be 1."); + if (ctx->Attrs().Get("soft_label")) { if (check) { PADDLE_ENFORCE_EQ( x_dims[rank - 1], label_dims[rank - 1], @@ -128,10 +123,7 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { "Input(Label) should be 1."); } ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, - "The last dimension of Input(Y@Grad) should be 1."); - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD(VarNameWithXLoD(), framework::GradVarName("X")); + ctx->ShareLoD("X", framework::GradVarName("X")); } protected: @@ -139,28 +131,8 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { // is determined by its input "X". framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - ctx.Input(framework::GradVarName("Y"))->type(), - ctx.device_context()); - } - - virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { - return ctx->GetInputDim("X"); - } - - virtual const char* VarNameWithXLoD() const { return "X"; } - - virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { - return ctx->Attrs().Get("soft_label"); - } -}; - -class CrossEntropyOpInferVarType - : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map GetInputOutputWithSameType() - const override { - return std::unordered_map{{"X", /*->*/ "Y"}}; + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context()); } }; @@ -228,122 +200,22 @@ or not. But the output only shares the LoD information with input X. } }; -class CrossEntropyGradientOp : public CrossEntropyGradientOpBase { - public: - using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; - - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); - CrossEntropyGradientOpBase::InferShape(ctx); - } -}; - -class CrossEntropyOp2 : public CrossEntropyOpBase { - public: - using CrossEntropyOpBase::CrossEntropyOpBase; - - void InferShape(framework::InferShapeContext* ctx) const override { - CrossEntropyOpBase::InferShape(ctx); - - PADDLE_ENFORCE(ctx->HasOutput("XShape"), - "Output(XShape) should be not null."); - - auto x_dims = ctx->GetInputDim("X"); - auto x_dims_vec = framework::vectorize(x_dims); - x_dims_vec.push_back(0); - ctx->SetOutputDim("XShape", framework::make_ddim(x_dims_vec)); - ctx->ShareLoD("X", /*->*/ "XShape"); - } - - protected: - bool IsSoftLabel(framework::InferShapeContext* ctx) const override { - return false; - } -}; - -class CrossEntropyGradientOp2 : public CrossEntropyGradientOpBase { - public: - using CrossEntropyGradientOpBase::CrossEntropyGradientOpBase; - - protected: - virtual framework::DDim GetXDim(framework::InferShapeContext* ctx) const { - auto x_shape = ctx->GetInputDim("XShape"); - return framework::DDim(x_shape.Get(), x_shape.size() - 1); - } - - virtual const char* VarNameWithXLoD() const { return "XShape"; } - - virtual bool IsSoftLabel(framework::InferShapeContext* ctx) const { - return false; - } -}; - -class CrossEntropyOpMaker2 : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "(Tensor, default Tensor), a tensor whose last dimension " - "size is equal to the number of classes. This input is a " - "probability computed by the previous operator, which is almost " - "always the result of a softmax operator."); - AddInput( - "Label", - "(Tensor), the tensor which represents the ground truth. It has the " - "same shape with 'X' except the last dimension. One hot Tensor."); - AddOutput("Y", - "(Tensor, default Tensor), a tensor whose shape is same " - "with 'X' except that the last dimension size is 1. It " - "represents the cross entropy loss."); - AddOutput("XShape", "Temporaily variable to save shape and LoD of X."); - AddAttr("ignore_index", - "(int, default -100), Specifies a target value that is" - "ignored and does not contribute to the input gradient." - "Only valid if soft_label is set to False") - .SetDefault(-100); - AddComment(R"DOC( -Hard-label CrossEntropy Operator. - -The input 'X' and 'Label' will first be logically flattened to 2-D matrixs. -The matrix's second dimension(row length) is as same as the original last -dimension, and the first dimension(column length) is the product of all other -original dimensions. Then the softmax computation will take palce on each raw -of flattened matrixs. - -Only support hard label. - -Both the input X and Label can carry the LoD (Level of Details) information, -or not. But the output only shares the LoD information with input X. - -)DOC"); - } -}; - -class CrossEntropyGradOpDescMaker2 : public framework::SingleGradOpDescMaker { - public: - using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - +class CrossEntropyOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { protected: - std::unique_ptr Apply() const override { - std::unique_ptr op(new framework::OpDesc()); - op->SetType("cross_entropy_grad2"); - op->SetInput("Label", Input("Label")); - op->SetInput("Y", Output("Y")); - op->SetInput("XShape", Output("XShape")); - op->SetInput(framework::GradVarName("Y"), OutputGrad("Y")); - op->SetOutput(framework::GradVarName("X"), InputGrad("X")); - op->SetAttrMap(Attrs()); - return op; + std::unordered_map GetInputOutputWithSameType() + const override { + return std::unordered_map{{"X", /*->*/ "Y"}}; } }; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; using CPUCtx = paddle::platform::CPUDeviceContext; -REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOpBase, - ops::CrossEntropyOpMaker, ops::CrossEntropyOpInferVarType, +REGISTER_OPERATOR(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, + ops::CrossEntropyOpInferVarType, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(cross_entropy_grad, ops::CrossEntropyGradientOp); REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, @@ -351,14 +223,3 @@ REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, REGISTER_OP_CPU_KERNEL(cross_entropy_grad, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel); - -REGISTER_OPERATOR(cross_entropy2, ops::CrossEntropyOp2, - ops::CrossEntropyOpMaker2, ops::CrossEntropyOpInferVarType, - ops::CrossEntropyGradOpDescMaker2); -REGISTER_OPERATOR(cross_entropy_grad2, ops::CrossEntropyGradientOp2); -REGISTER_OP_CPU_KERNEL(cross_entropy2, - ops::CrossEntropyOpKernel2, - ops::CrossEntropyOpKernel2); -REGISTER_OP_CPU_KERNEL(cross_entropy_grad2, - ops::CrossEntropyGradientOpKernel2, - ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy_op.cu b/paddle/fluid/operators/cross_entropy_op.cu index 243e7f52c1e..fcd34383a85 100644 --- a/paddle/fluid/operators/cross_entropy_op.cu +++ b/paddle/fluid/operators/cross_entropy_op.cu @@ -27,13 +27,3 @@ REGISTER_OP_CUDA_KERNEL( cross_entropy_grad, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel, ops::CrossEntropyGradientOpKernel); - -REGISTER_OP_CUDA_KERNEL(cross_entropy2, - ops::CrossEntropyOpKernel2, - ops::CrossEntropyOpKernel2, - ops::CrossEntropyOpKernel2); - -REGISTER_OP_CUDA_KERNEL( - cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2, - ops::CrossEntropyGradientOpKernel2, - ops::CrossEntropyGradientOpKernel2); diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 05609e4bc20..f123e11542d 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/for_range.h" @@ -138,85 +137,5 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel { } }; -template -struct HardLabelCrossEntropyBackwardFunctor { - HardLabelCrossEntropyBackwardFunctor(T* dx, const T* y, const T* dy, - const int64_t* label, - int64_t ignore_index, - int64_t feature_size) - : dx_(dx), - y_(y), - dy_(dy), - label_(label), - ignore_index_(ignore_index), - feature_size_(feature_size) {} - - HOSTDEVICE void operator()(int64_t idx) const { - auto row_idx = idx / feature_size_; - auto col_idx = idx % feature_size_; - auto label = label_[row_idx]; - if (label == col_idx && label != ignore_index_) { - dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]); - } else { - dx_[idx] = 0; - } - } - - T* dx_; - const T* y_; - const T* dy_; - const int64_t* label_; - int64_t ignore_index_; - int64_t feature_size_; -}; - -template -class CrossEntropyOpKernel2 : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x_original = ctx.Input("X"); - int rank = x_original->dims().size(); - - auto x = framework::ReshapeToMatrix(*x_original, rank - 1); - auto label = - framework::ReshapeToMatrix(*ctx.Input("Label"), rank - 1); - auto* y = ctx.Output("Y"); - y->mutable_data(ctx.GetPlace()); - - auto ignore_index = ctx.Attr("ignore_index"); - - math::CrossEntropyFunctor()( - ctx.template device_context(), y, &x, &label, false, - ignore_index); - } -}; - -template -class CrossEntropyGradientOpKernel2 : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* y = ctx.Input("Y"); - auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* label = ctx.Input("Label"); - - auto* p_dx = dx->mutable_data(ctx.GetPlace()); - auto* p_y = y->data(); - auto* p_dy = dy->data(); - auto* p_label = label->data(); - - int64_t ignore_index = ctx.Attr("ignore_index"); - int rank = dx->dims().size(); - int64_t feature_size = dx->dims()[rank - 1]; - int64_t batch_size = framework::product(dx->dims()) / feature_size; - - platform::ForRange for_range( - ctx.template device_context(), - batch_size * feature_size); - for_range(HardLabelCrossEntropyBackwardFunctor( - p_dx, p_y, p_dy, p_label, ignore_index, feature_size)); - } -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index fcb2be93635..44a2f37b667 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/expand_op.h" -#include #include namespace paddle { @@ -139,28 +138,12 @@ class ExpandGradOp : public framework::OperatorWithKernel { } }; -class ExpandGradOpDescMaker : public framework::SingleGradOpDescMaker { - public: - using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; - - protected: - std::unique_ptr Apply() const override { - std::unique_ptr op(new framework::OpDesc()); - op->SetType("expand_grad"); - op->SetInput("X", Input("X")); - op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); - op->SetOutput(framework::GradVarName("X"), InputGrad("X")); - op->SetAttrMap(Attrs()); - return op; - } -}; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(expand, ops::ExpandOp, ops::ExpandOpMaker, - ops::ExpandGradOpDescMaker); + paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(expand_grad, ops::ExpandGradOp); REGISTER_OP_CPU_KERNEL( expand, ops::ExpandKernel, diff --git a/paddle/fluid/operators/math.h b/paddle/fluid/operators/math.h deleted file mode 100644 index 8cc24200d37..00000000000 --- a/paddle/fluid/operators/math.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/hostdevice.h" - -#include "math.h" // NOLINT - -namespace paddle { -namespace operators { - -inline HOSTDEVICE platform::float16 real_exp(platform::float16 x) { - return static_cast(::expf(static_cast(x))); -} - -inline HOSTDEVICE float real_exp(float x) { return ::expf(x); } - -inline HOSTDEVICE double real_exp(double x) { return ::exp(x); } - -inline HOSTDEVICE platform::float16 real_log(platform::float16 x) { - return static_cast(::logf(static_cast(x))); -} - -inline HOSTDEVICE float real_log(float x) { return ::logf(x); } - -inline HOSTDEVICE double real_log(double x) { return ::log(x); } - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index 44cbdf2e988..cb200ec8d6e 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/platform/cuda_device_function.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -21,6 +20,17 @@ namespace paddle { namespace operators { namespace math { +namespace { + +__device__ __forceinline__ float real_log(float x) { return logf(x); } + +__device__ __forceinline__ double real_log(double x) { return log(x); } + +__device__ __forceinline__ platform::float16 real_log( + const platform::float16& val) { + return static_cast(logf(static_cast(val))); +} + template __global__ void CrossEntropyKernel(T* Y, const T* X, const int64_t* label, const int N, const int D, @@ -51,6 +61,7 @@ __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label, Y[blockIdx.x] = -val; } } +} // namespace template class CrossEntropyFunctor { diff --git a/paddle/fluid/operators/selu_op.h b/paddle/fluid/operators/selu_op.h index b2fc834c42f..bdb506885c9 100644 --- a/paddle/fluid/operators/selu_op.h +++ b/paddle/fluid/operators/selu_op.h @@ -15,12 +15,13 @@ limitations under the License. */ #pragma once #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/platform/for_range.h" - namespace paddle { namespace operators { +static HOSTDEVICE float real_exp(float x) { return expf(x); } +static HOSTDEVICE float real_exp(double x) { return exp(x); } + template struct SeluFunctor { SeluFunctor(const T* x_data_ptr, float alpha, float scale, T* y_data_ptr) diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu index a9dc0a4fda2..cc5e9821903 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu @@ -14,7 +14,6 @@ limitations under the License. */ #include #include // NOLINT -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h" namespace paddle { @@ -22,6 +21,9 @@ namespace operators { using LoDTensor = framework::LoDTensor; +__device__ __forceinline__ float real_exp(float x) { return expf(x); } +__device__ __forceinline__ double real_exp(double x) { return exp(x); } + template using BlockReduce = cub::BlockReduce; diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu index aea69de6434..2a4570ef5ce 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cu @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "cub/cub.cuh" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.h" #include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/hostdevice.h" @@ -22,6 +21,11 @@ namespace operators { using Tensor = framework::Tensor; +static HOSTDEVICE float real_exp(float x) { return expf(x); } +static HOSTDEVICE float real_exp(double x) { return exp(x); } +static HOSTDEVICE float real_log(float x) { return logf(x); } +static HOSTDEVICE float real_log(double x) { return log(x); } + static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaxinumNumBlocks = 4096; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 67ff1425ac8..d0bff52e434 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1432,8 +1432,6 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) """ - if not soft_label: - return cross_entropy2(input, label, ignore_index) helper = LayerHelper('cross_entropy', **locals()) out = helper.create_variable_for_type_inference(dtype=input.dtype) helper.append_op( @@ -1446,20 +1444,6 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): return out -def cross_entropy2(input, label, ignore_index=kIgnoreIndex): - helper = LayerHelper('cross_entropy2', **locals()) - out = helper.create_variable_for_type_inference(dtype=input.dtype) - xshape = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='cross_entropy2', - inputs={'X': [input], - 'Label': [label]}, - outputs={'Y': [out], - 'XShape': [xshape]}, - attrs={'ignore_index': ignore_index}) - return out - - def bpr_loss(input, label, name=None): """ Bayesian Personalized Ranking Loss Operator. diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py deleted file mode 100644 index c29d422361b..00000000000 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy2_op.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from op_test import OpTest -import unittest -import numpy as np -import six - - -class CrossEntropy2OpTestBase(OpTest): - def initParameters(self): - return [32, 64], 'float32', -100 - - def calc_output(self, logits, label, ignore_index): - ret = np.zeros(shape=label.shape, dtype=logits.dtype) - for idx in six.moves.range(label.shape[0]): - if label[idx] == ignore_index: - continue - ret[idx] = -np.log(logits[idx][label[idx]]) - return ret - - def setUp(self): - self.shape, self.dtype, self.ignore_index = self.initParameters() - self.op_type = 'cross_entropy2' - feature_size = int(self.shape[-1]) - batch_size = int(np.prod(self.shape) / feature_size) - logits = (np.random.random(size=self.shape) + 1).astype(self.dtype) - label = np.random.random_integers( - low=0, high=feature_size - 1, - size=self.shape[0:-1] + [1]).astype('int64') - outputs = self.calc_output( - np.reshape(logits, [batch_size, feature_size]), - np.reshape(label, [batch_size, 1]), self.ignore_index) - self.inputs = {'X': logits, 'Label': label} - self.outputs = { - 'Y': np.reshape(outputs, label.shape), - 'XShape': np.zeros( - shape=logits.shape, dtype=logits.dtype) - } - self.attrs = {'ignore_index': self.ignore_index} - - def test_check_output(self): - self.check_output(no_check_set=['XShape']) - - def test_check_grad(self): - self.check_grad( - inputs_to_check=['X'], - output_names=['Y'], - no_grad_set=['XShape', 'Label']) - - -class CrossEntropy2OpTest2(CrossEntropy2OpTestBase): - def initParameters(self): - return [32, 64], 'float64', 3 - - -class CrossEntropy2OpTest3(CrossEntropy2OpTestBase): - def initParameters(self): - return [4, 8, 16, 32], 'float32', -100 - - -class CrossEntropy2OpTest4(CrossEntropy2OpTestBase): - def initParameters(self): - return [4, 8, 16, 32], 'float32', 3 - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index f81d4fda50b..12132477d28 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -524,8 +524,8 @@ class TestLocalLookupTable(TestDistLookupTableBase): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', @@ -564,8 +564,8 @@ class TestDistLookupTable(TestDistLookupTableBase): ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', - 'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant', - 'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send', + 'elementwise_add', 'cross_entropy', 'mean', 'fill_constant', + 'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', @@ -612,8 +612,8 @@ class TestAsyncLocalLookupTable(TestDistLookupTableBase): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', @@ -652,8 +652,8 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase): ops = [ 'split_ids', 'prefetch', 'merge_ids', 'sequence_pool', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', - 'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant', - 'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send', + 'elementwise_add', 'cross_entropy', 'mean', 'fill_constant', + 'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', @@ -841,8 +841,8 @@ class TestRemoteLookupTable(TestDistLookupTableBase): ops = [ 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add', - 'cross_entropy2', 'mean', 'fill_constant', 'mean_grad', - 'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad', + 'cross_entropy', 'mean', 'fill_constant', 'mean_grad', + 'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad', 'split_selected_rows', 'send', 'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad', -- GitLab