diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 95214484dca99e718f8ec62225ce40ce3ffd7323..a640a6c745ccb7e7cda0e47b17104ec990fce0dd 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { } }; +// log Grad: dx = dout / x +// log Grad Grad: ddout = ddx / x; dx = -(dout / x) * (ddx / x) +template +class LogDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { + public: + using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("log_grad_grad"); + op->SetInput("X", this->Input("X")); + // X@GRAD@GRAD: ddx + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetInput("DOut", this->Input(framework::GradVarName("Out"))); + op->SetAttrMap(this->Attrs()); + // X@GRAD: dx + op->SetOutput("DX", this->InputGrad("X")); + // Out@GRAD@GRAD: ddy + op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out"))); + } +}; + DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer, {framework::GradVarName("Out"), framework::GradVarName("X")}); @@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL( ops::AbsGradGradFunctor>); /* ========================================================================== */ +/* ========================== Log register ==================================*/ +REGISTER_OPERATOR( + log, ops::ActivationOp, ops::LogOpMaker, ops::ActivationOpInferVarType, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::framework::OpDesc>, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::imperative::OpBase>, + ops::ActFwdInplaceInferer); +REGISTER_OPERATOR(log_grad, ops::ActivationOpGrad, + ops::ActivationGradOpInplaceInferer, + ops::LogDoubleGradMaker, + ops::LogDoubleGradMaker); + +REGISTER_OPERATOR( + log_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>, + ops::ActivationDoubleGradOpInplaceInferer); + +REGISTER_ACTIVATION_CPU_KERNEL(log, Log, LogFunctor, LogGradFunctor); + +REGISTER_OP_CPU_KERNEL( + log_grad_grad, ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>); +/* ========================================================================== */ + /* ========================== register checkpoint ===========================*/ REGISTER_OP_VERSION(leaky_relu) .AddCheckpoint( diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 072d952d2618d2c9dbbe27641dcd951a634753a0..839776ad58d0352cd9bdd59530951e4eea1120b3 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL( ops::ActivationDoubleGradKernel>); /* ========================================================================== */ + +/* ========================== Log register ==================================*/ +REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, LogFunctor, LogGradFunctor); + +REGISTER_OP_CUDA_KERNEL( + log_grad_grad, ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>); +/* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 646f546bffb2ced3830c119b5f24f6d3fcad0e78..a5c613297a473c326a2d239ad57ac0cca5a165f3 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel } }; +template +class LogDoubleGradKernel + : public SquareDoubleGradKernel {}; + template class ELUDoubleGradKernel : public framework::OpKernel { @@ -1852,6 +1856,37 @@ class PowGradKernel functor(*place, x, out, dout, dx); } }; + +template +struct LogGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* X, + const framework::Tensor* ddX, framework::Tensor* ddOut, + const framework::Tensor* dOut, framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "LogGradGrad")); + auto x = framework::EigenVector::Flatten( + GET_DATA_SAFELY(X, "Input", "X", "LogGradGrad")); + // ddout = ddx / x; dx = -(dout / x) * (ddx / x) + // calculate dx first, so ddout can inplace ddx + if (dX) { + auto dout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dOut, "Output", "DOut", "LogGradGrad")); + auto dx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dX, "Output", "DX", "LogGradGrad")); + dx.device(*d) = dout * static_cast(-1) * ddx / (x * x); + } + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "LogGradGrad")); + ddout.device(*d) = ddx * static_cast(1) / x; + } + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + } // namespace operators } // namespace paddle @@ -1872,7 +1907,6 @@ class PowGradKernel __macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \ __macro(round, Round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ - __macro(log, Log, LogFunctor, LogGradFunctor); \ __macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \ __macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index c97cca654a7c47c1581c94a242eac9554bc87887..6c4834b84f91f68f51b65bfc831775966732b36c 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase): self.func(p) +class TestLogDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + shape = [2, 3, 7, 9] + eps = 1e-6 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.log(x) + + x_arr = np.random.uniform(0.1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": unittest.main()