From cfdde0ec13838e7720d114c8c173d8c05fd99537 Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Thu, 15 Apr 2021 11:50:32 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Deepmd=20Support=E3=80=91add=20IsIniti?= =?UTF-8?q?alized=20and=20tanh=20double=20grad=20(#32188)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add IsInitialized * rm additional log and add tanh double grad * rename is_initialized --- paddle/fluid/extension/include/ext_tensor.h | 3 + paddle/fluid/extension/src/ext_tensor.cc | 18 ++-- paddle/fluid/framework/custom_tensor_test.cc | 17 ++++ paddle/fluid/operators/activation_op.cc | 48 +++++++++++ paddle/fluid/operators/activation_op.cu | 13 +++ paddle/fluid/operators/activation_op.h | 83 ++++++++++++++++++- .../unittests/test_activation_nn_grad.py | 23 +++++ 7 files changed, 195 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/extension/include/ext_tensor.h b/paddle/fluid/extension/include/ext_tensor.h index 52606b2a7f5..fa91490e6cd 100644 --- a/paddle/fluid/extension/include/ext_tensor.h +++ b/paddle/fluid/extension/include/ext_tensor.h @@ -113,6 +113,9 @@ class PD_DLL_DECL Tensor { /// \brief Cast datatype from one to another Tensor cast(const DataType& target_type) const; + /// \brief Check Tensor is initialized + bool is_initialized() const; + #ifdef PADDLE_WITH_CUDA /// \bref Get current stream of Tensor cudaStream_t stream() const; diff --git a/paddle/fluid/extension/src/ext_tensor.cc b/paddle/fluid/extension/src/ext_tensor.cc index e9705e2101c..8b2f7cc5bf1 100644 --- a/paddle/fluid/extension/src/ext_tensor.cc +++ b/paddle/fluid/extension/src/ext_tensor.cc @@ -103,15 +103,6 @@ void GpuCopy(T *src, T *dst, PlaceType src_plc, PlaceType dst_plc, void Tensor::reshape(const std::vector &shape) { GET_CASTED_TENSOR auto new_dim = framework::make_ddim(shape); - if (tensor->numel() != framework::product(new_dim)) { - LOG(WARNING) << "Custom Op: Calling reshape to a new shape which is bigger " - "or smaller" - << "than original shape will not change your tensor's memory " - "Please call" - << "paddle::Tensor::mutable_data() after to reallocate " - "your tensor's size." - << std::endl; - } tensor->Resize(new_dim); } @@ -393,6 +384,15 @@ int64_t Tensor::size() const { return tensor->numel(); } +bool Tensor::is_initialized() const { + GET_CASTED_TENSOR; + if (tensor->IsInitialized()) { + return true; + } else { + return false; + } +} + #ifdef PADDLE_WITH_CUDA cudaStream_t Tensor::stream() const { if (!stream_.IsStreamSet()) { diff --git a/paddle/fluid/framework/custom_tensor_test.cc b/paddle/fluid/framework/custom_tensor_test.cc index 8d6fd4efd5a..a65dcbd55f9 100644 --- a/paddle/fluid/framework/custom_tensor_test.cc +++ b/paddle/fluid/framework/custom_tensor_test.cc @@ -220,6 +220,21 @@ void GroupTestDtypeConvert() { paddle::DataType::FLOAT16); } +void TestInitilized() { + paddle::Tensor test_tensor(paddle::PlaceType::kCPU); + CHECK(test_tensor.is_initialized() == false); + test_tensor.reshape({1, 1}); + test_tensor.mutable_data(); + CHECK(test_tensor.is_initialized() == true); + float* tensor_data = test_tensor.data(); + for (int i = 0; i < test_tensor.size(); i++) { + tensor_data[i] = 0.5; + } + for (int i = 0; i < test_tensor.size(); i++) { + CHECK(tensor_data[i] == 0.5); + } +} + TEST(CustomTensor, copyTest) { VLOG(2) << "TestCopy"; GroupTestCopy(); @@ -233,4 +248,6 @@ TEST(CustomTensor, copyTest) { GroupTestCast(); VLOG(2) << "TestDtypeConvert"; GroupTestDtypeConvert(); + VLOG(2) << "TestInitilized"; + TestInitilized(); } diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 94f2eb3672b..1cac9ed9f1d 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -782,6 +782,26 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel { } }; +template +class TanhDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { + public: + using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("tanh_grad_grad"); + // input1: Out + op->SetInput("Out", this->Input("Out")); + // input2: ddx + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetInput("DOut", this->Input(framework::GradVarName("Out"))); + op->SetAttrMap(this->Attrs()); + // output: ddy + op->SetOutput("DOutNew", this->InputGrad("Out")); + op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out"))); + } +}; + // ReluGrad: dx = dy if y >= 0 else 0 // ReluGradGrad: ddy = ddx if y >= 0 else 0 template @@ -1041,6 +1061,34 @@ namespace plat = paddle::platform; FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL); +/* ========================== tanh register ============================= */ +REGISTER_OPERATOR( + tanh, ops::ActivationOp, ops::TanhOpMaker, ops::ActivationOpInferVarType, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::framework::OpDesc>, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::imperative::OpBase>, + std::conditional>(), + ops::ActFwdInplaceInferer, void>::type); +REGISTER_OPERATOR(tanh_grad, ops::ActivationOpGrad, + ops::ActivationGradOpInplaceInferer, + ops::TanhDoubleGradMaker, + ops::TanhDoubleGradMaker) +REGISTER_OPERATOR( + tanh_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>, + ops::ActivationDoubleGradOpInplaceInferer); + +REGISTER_ACTIVATION_CPU_KERNEL(tanh, Tanh, TanhFunctor, TanhGradFunctor); +REGISTER_OP_CPU_KERNEL( + tanh_grad_grad, ops::TanhDoubleGradKernel>, + ops::TanhDoubleGradKernel>, + ops::TanhDoubleGradKernel>); +/* ========================================================================== */ + /* ========================== relu register ============================= */ REGISTER_OPERATOR( relu, ops::ActivationOp, ops::ReluOpMaker, ops::ActivationOpInferVarType, diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 04f329088fa..781a97c1ffc 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -468,6 +468,19 @@ REGISTER_OP_CUDA_KERNEL( ops::ReluGradGradFunctor>); /* ========================================================================== */ +/* =========================== tanh register ============================ */ +REGISTER_ACTIVATION_CUDA_KERNEL(tanh, Tanh, TanhFunctor, TanhGradFunctor); + +REGISTER_OP_CUDA_KERNEL( + tanh_grad_grad, + ops::TanhDoubleGradKernel>, + ops::TanhDoubleGradKernel>, + ops::TanhDoubleGradKernel>); +/* ========================================================================== */ + /* =========================== sqrt register ============================= */ REGISTER_ACTIVATION_CUDA_KERNEL(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index fb5c4db91ec..fb9f956f17c 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -366,6 +366,36 @@ struct TanhGradFunctor : public BaseActivationFunctor { static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; +template +struct TanhGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* Out, + const framework::Tensor* ddX, const framework::Tensor* dOut, + framework::Tensor* dOutNew, framework::Tensor* ddOut) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "TanhGradGrad")); + auto out = framework::EigenVector::Flatten( + GET_DATA_SAFELY(Out, "Input", "Out", "TanhGradGrad")); + // tanh grad grad : ddout = (1 - out^2) * ddx, dout = - (dout_old * 2 * out + // * ddx) + if (dOutNew) { + auto dout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dOut, "Input", "DOut", "TanhGradGrad")); + auto dout_new = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dOutNew, "Output", "DOutNew", "SquareGradGrad")); + dout_new.device(*d) = + static_cast(-1) * dout * static_cast(2) * out * ddx; + } + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SquareGradGrad")); + ddout.device(*d) = (static_cast(1) - out * out) * ddx; + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } +}; + // tanhshrink(x) = x - tanh(x) // where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x)) template @@ -1734,6 +1764,58 @@ inline void ExtractDoubleGradTensorWithInputDOut( } } +template +class TanhDoubleGradKernel + : public framework::OpKernel { + public: + using T = typename Functor::ELEMENT_TYPE; + void Compute(const framework::ExecutionContext& ctx) const override { + const framework::Tensor *Out, *ddX, *dOut; + framework::Tensor *dOutNew, *ddOut; + Out = ddX = dOut = nullptr; + dOutNew = ddOut = nullptr; + + // extract ddx(input) and out(input) + auto ddx_var = ctx.InputVar("DDX"); + auto out_var = ctx.InputVar("Out"); + PADDLE_ENFORCE_NOT_NULL( + ddx_var, platform::errors::NotFound( + "Cannot get input Variable ddx, variable name = %s", + ctx.InputName("DDX"))); + PADDLE_ENFORCE_NOT_NULL( + out_var, platform::errors::NotFound( + "Cannot get input Variable out, variable name = %s", + ctx.InputName("Out"))); + ddX = ctx.Input("DDX"); + Out = ctx.Input("Out"); + + // set output ddout + auto ddout_var = ctx.OutputVar("DDOut"); + if (ddout_var) { + ddOut = ctx.Output("DDOut"); + } + + // extract dOut(intput) + auto dout_var = ctx.InputVar("DOut"); + PADDLE_ENFORCE_NOT_NULL( + dout_var, platform::errors::NotFound( + "Cannot get input Variable dout_var, variable name = %s", + ctx.InputName("DOut"))); + dOut = ctx.Input("DOut"); + + // set output dout_new + auto dout_new_var = ctx.OutputVar("DOutNew"); + if (dout_new_var) { + dOutNew = ctx.Output("DOutNew"); + } + + if (dOutNew) dOutNew->mutable_data(Out->dims(), ctx.GetPlace()); + if (ddOut) ddOut->mutable_data(Out->dims(), ctx.GetPlace()); + auto& place = ctx.template device_context(); + Functor functor; + functor(place, Out, ddX, dOut, dOutNew, ddOut); + } +}; template class SquareDoubleGradKernel : public framework::OpKernel { @@ -2048,7 +2130,6 @@ struct LogGradGradFunctor : public BaseActivationFunctor { #define FOR_EACH_ACTIVATION_OP(__macro) \ __macro(sigmoid, Sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ __macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ - __macro(tanh, Tanh, TanhFunctor, TanhGradFunctor); \ __macro(atan, Atan, AtanFunctor, AtanGradFunctor); \ __macro(softshrink, SoftShrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \ diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index cfa487a8354..6c35d445b43 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -18,6 +18,7 @@ import unittest import numpy as np import paddle.fluid as fluid +import paddle import paddle.fluid.layers as layers import paddle.fluid.core as core import gradient_checker @@ -25,6 +26,28 @@ import gradient_checker from decorator_helper import prog_scope +class TestTanhDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + shape = [2, 3, 7, 9] + eps = 0.0005 + dtype = np.float64 + x = layers.data('x', shape, False, dtype=dtype) + x.persistable = True + y = paddle.tanh(x) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + x_arr[np.abs(x_arr) < 0.005] = 0.002 + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + class TestReluDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): -- GitLab