diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc index 04c87c1b2ac398f8f75265c80bef5326aea15dce..b1ec10ea86c836acce6cf7d83bfc53866de06afd 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc @@ -13,10 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_sub_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseSubDoubleGradDescMaker + : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_sub_grad_grad"); + op->SetInput("Y", Input("Y")); + op->SetInput("DOut", Input(framework::GradVarName("Out"))); + op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); + op->SetInput("DDY", OutputGrad(framework::GradVarName("Y"))); + + op->SetAttrMap(Attrs()); + + op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out"))); + return op; + } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; REGISTER_ELEMWISE_GRAD_MAKER(elementwise_sub, Sub); -REGISTER_ELEMWISE_EXPLICIT_OP(elementwise_sub, "Sub", "Out = X - Y"); +REGISTER_ELEMWISE_EXPLICIT_OP_WITHOUT_GRAD(elementwise_sub, "Sub", + "Out = X - Y"); + +REGISTER_OPERATOR(elementwise_sub_grad, ops::ElementwiseOpExplicitGrad, + ops::ElementwiseGradOpInplace, + ops::ElementwiseGradNoBufVarsInference, + ops::ElementwiseSubDoubleGradDescMaker); +REGISTER_OPERATOR(elementwise_sub_grad_grad, + ops::ElementwiseOpDoubleGradWithoutDXDY); REGISTER_OP_CPU_KERNEL( elementwise_sub, @@ -30,3 +68,13 @@ REGISTER_OP_CPU_KERNEL( ops::ElementwiseSubGradKernel, ops::ElementwiseSubGradKernel, ops::ElementwiseSubGradKernel); +REGISTER_OP_CPU_KERNEL( + elementwise_sub_grad_grad, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cu b/paddle/fluid/operators/elementwise/elementwise_sub_op.cu index f2adf1c83730c317cd4f4d2a4039c0f94da9df7b..52fad7fd04b0083c81089899d4dab80853441ca7 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cu @@ -33,3 +33,13 @@ REGISTER_OP_CUDA_KERNEL( ops::ElementwiseSubGradKernel, ops::ElementwiseSubGradKernel); +REGISTER_OP_CUDA_KERNEL( + elementwise_sub_grad_grad, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel, + ops::ElementwiseSubDoubleGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.h b/paddle/fluid/operators/elementwise/elementwise_sub_op.h index 770323fe5a8fe7c1051b418b2541ab4c669635b4..5049d587b582a71981f45a72dc5bfc133dadb52d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.h @@ -68,5 +68,33 @@ class ElementwiseSubGradKernel : public ElemwiseGradKernel { ctx, *x, *y, *out, *dout, axis, dx, dy, SubGradDX(), SubGradDY()); } }; + +template +class ElementwiseSubDoubleGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + using Tensor = framework::Tensor; + + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input("DOut"); + auto* ddx = ctx.Input("DDX"); + auto* ddy = ctx.Input("DDY"); + + auto* ddout = ctx.Output("DDOut"); + + // DDOut = ddx - ddy + if (ddout) { + Tensor ddx_safe, ddy_safe; + GetDoubleGradSafeTensor(ctx, dout, ddx, &ddx_safe); + GetDoubleGradSafeTensor(ctx, y, ddy, &ddy_safe); + + ddout->mutable_data(ctx.GetPlace()); + int axis = ctx.Attr("axis"); + ElementwiseComputeEx, DeviceContext, T>( + ctx, &ddx_safe, &ddy_safe, axis, SubFunctor(), ddout); + } + } +}; + } // namespace operators } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index 5710c93b7900115003158780f2ddf59abbc831f5..b8c9d73e4f3c35796a983460b4d26f95a4a5d6ef 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -169,7 +169,7 @@ class TestElementwiseMulDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -219,7 +219,7 @@ class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -246,7 +246,7 @@ class TestElementwiseAddDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -273,7 +273,7 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): # the shape of input variable shoule be clearly specified, not inlcude -1. - shape = [7, 9] + shape = [2, 3, 5, 7] eps = 0.005 dtype = np.float64 @@ -296,6 +296,60 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): self.func(p) +class TestElementwiseSubDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [2, 3, 5, 7] + eps = 0.005 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + y = layers.data('y', shape, False, dtype) + x.persistable = True + y.persistable = True + out = layers.elementwise_sub(x, y) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + y_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [2, 3, 5, 7] + eps = 0.005 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + y = layers.data('y', shape[:-1], False, dtype) + x.persistable = True + y.persistable = True + out = layers.elementwise_sub(x, y, axis=0) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) + + gradient_checker.double_grad_check( + [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + class TestMulDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place):