diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index 2b3fc06dcb79b8c6b46de7abf51bdb2c47acca1c..bf12d8a1a6de1374e8b99691f8f5713617321111 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -13,10 +13,48 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_add_op.h" +#include +#include #include "paddle/fluid/operators/elementwise/elementwise_op.h" -namespace ops = paddle::operators; + +namespace paddle { +namespace operators { + +class ElementwiseAddDoubleGradDescMaker + : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_add_grad_grad"); + op->SetInput("Y", Input("Y")); + op->SetInput("DOut", Input(framework::GradVarName("Out"))); + op->SetInput("DDX", OutputGrad(framework::GradVarName("X"))); + op->SetInput("DDY", OutputGrad(framework::GradVarName("Y"))); + + op->SetAttrMap(Attrs()); + + op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out"))); + return op; + } +}; + +} // namespace operators +} // namespace paddle + REGISTER_ELEMWISE_GRAD_MAKER(elementwise_add, Add); -REGISTER_ELEMWISE_EXPLICIT_OP(elementwise_add, "Add", "Out = X + Y"); +REGISTER_ELEMWISE_EXPLICIT_OP_WITHOUT_GRAD(elementwise_add, "Add", + "Out = X + Y"); + +namespace ops = paddle::operators; +REGISTER_OPERATOR(elementwise_add_grad, ops::ElementwiseOpExplicitGrad, + ops::ElementwiseGradOpInplace, + ops::ElementwiseGradNoBufVarsInference, + ops::ElementwiseAddDoubleGradDescMaker); +REGISTER_OPERATOR(elementwise_add_grad_grad, + ops::ElementwiseOpDoubleGradWithoutDXDY); REGISTER_OP_CPU_KERNEL( elementwise_add, @@ -30,3 +68,13 @@ REGISTER_OP_CPU_KERNEL( ops::ElementwiseAddGradKernel, ops::ElementwiseAddGradKernel, ops::ElementwiseAddGradKernel); +REGISTER_OP_CPU_KERNEL( + elementwise_add_grad_grad, + ops::ElementwiseAddDoubleGradKernel, + ops::ElementwiseAddDoubleGradKernel, + ops::ElementwiseAddDoubleGradKernel, + ops::ElementwiseAddDoubleGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cu b/paddle/fluid/operators/elementwise/elementwise_add_op.cu index fc38653ce1132ec9e05074fb739335970f6b9256..8320272b4b69ad24fbd1e94eb25fda53abe8c3fc 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cu @@ -31,3 +31,9 @@ REGISTER_OP_CUDA_KERNEL( ops::ElementwiseAddGradKernel, ops::ElementwiseAddGradKernel, ops::ElementwiseAddGradKernel); +REGISTER_OP_CUDA_KERNEL( + elementwise_add_grad_grad, + ops::ElementwiseAddDoubleGradKernel, + ops::ElementwiseAddDoubleGradKernel, + ops::ElementwiseAddDoubleGradKernel, + ops::ElementwiseAddDoubleGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index ba8ca1ad4f71732921e9ef3fe0d0dce69e27f733..7f8b0ffe92fd40d7944f05282c4edc8271547e00 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -161,5 +161,31 @@ class ElementwiseAddGradKernel : public ElemwiseGradKernel { } }; +template +class ElementwiseAddDoubleGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + using Tensor = framework::Tensor; + + auto *y = ctx.Input("Y"); + auto *dout = ctx.Input("DOut"); + auto *ddx = ctx.Input("DDX"); + auto *ddy = ctx.Input("DDY"); + + auto *ddout = ctx.Output("DDOut"); + + // ddOut = ddx + ddy + if (ddout) { + Tensor ddx_safe, ddy_safe; + GetDoubleGradSafeTensor(ctx, dout, ddx, &ddx_safe); + GetDoubleGradSafeTensor(ctx, y, ddy, &ddy_safe); + + ddout->mutable_data(ctx.GetPlace()); + default_elementwise_add(ctx, &ddx_safe, &ddy_safe, + ddout); + } + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index c6615b635e0640f5cefa9521018fd91ac1519041..c251cc722703cbd6388e911c6899415e4240cfda 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -236,7 +236,35 @@ class ElementwiseOpDoubleGrad : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto input_data_type = ctx.Input("DDX")->type(); + auto input_data_type = ctx.Input("DOut")->type(); + +#ifdef PADDLE_WITH_MKLDNN + if (platform::CanMKLDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } +}; + +class ElementwiseOpDoubleGradWithoutDXDY + : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + using Tensor = framework::Tensor; + + void InferShape(framework::InferShapeContext *ctx) const override { + if (ctx->HasOutput("DDOut")) { + ctx->ShareDim("DOut", "DDOut"); + ctx->ShareLoD("DOut", "DDOut"); + } + } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + auto input_data_type = ctx.Input("DOut")->type(); #ifdef PADDLE_WITH_MKLDNN if (platform::CanMKLDNNBeUsed(ctx)) { @@ -359,3 +387,16 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ElementwiseGradNoBufVarsInference, "Y"); ::paddle::operators::ElementwiseOpExplicitGrad, \ ::paddle::operators::ElementwiseGradOpInplace, \ ::paddle::operators::ElementwiseGradNoBufVarsInference) + +#define REGISTER_ELEMWISE_EXPLICIT_OP_WITHOUT_GRAD(op_type, op_name, equation) \ + class __ElemwiseOp##op_type##Maker__ \ + : public ::paddle::operators::ElementwiseOpMaker { \ + protected: \ + virtual std::string GetName() const { return op_name; } \ + virtual std::string GetEquation() const { return equation; } \ + }; \ + REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \ + __ElemwiseOp##op_type##Maker__, \ + ::paddle::operators::ElementwiseOpInferVarType, \ + op_type##GradMaker, \ + ::paddle::operators::ElementwiseOpInplace); diff --git a/python/paddle/fluid/tests/unittests/gradient_checker.py b/python/paddle/fluid/tests/unittests/gradient_checker.py index 87c917873cd97f7512621d45f64b2ae9e76bd33b..98ca93caeb6e75db5d5f54bbc4cfa4eac4635b17 100644 --- a/python/paddle/fluid/tests/unittests/gradient_checker.py +++ b/python/paddle/fluid/tests/unittests/gradient_checker.py @@ -196,17 +196,23 @@ def _compute_analytical_jacobian(program, x, y, place, scope): x = _as_list(x) jacobian = make_jacobian(x, y_size, np_type) + # filter None in dx for DX/DY may be None in kernel + # only fetch not None dx in exe.run + filted = [(i, dxi) for i, dxi in enumerate(dx) if dxi is not None] + filted_idx, filted_dx = zip(*filted) + for i in six.moves.xrange(y_size): _set_item(dy_t, i, 1, np_type) - dx_res = exe.run(program, scope=scope, fetch_list=dx) + dx_res = exe.run(program, scope=scope, fetch_list=filted_dx) - for j in six.moves.xrange(len(x)): + for j in six.moves.xrange(len(filted_dx)): + dx_idx = filted_idx[j] if dx_res[j] is not None: - jacobian[j][:, i] = dx_res[j].flatten() + jacobian[dx_idx][:, i] = dx_res[j].flatten() else: - jacobian[j][:, i] = np.zeros( - dx[j].shape, dtype=np_type).flatten() + jacobian[dx_idx][:, i] = np.zeros( + dx[dx_idx].shape, dtype=np_type).flatten() _set_item(dy_t, i, 0, np_type) diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index 2ef722c9133e1c8306074705384488a1c1f28a10..083cfdd21fa46374fbc2c5a0eced13913f900f46 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -193,6 +193,60 @@ class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): self.func(p) +class TestElementwiseAddDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [7, 9] + eps = 0.005 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + y = layers.data('y', shape, False, dtype) + x.persistable = True + y.persistable = True + out = layers.elementwise_add(x, y) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + y_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable shoule be clearly specified, not inlcude -1. + shape = [7, 9] + eps = 0.005 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + y = layers.data('y', shape[:-1], False, dtype) + x.persistable = True + y.persistable = True + out = layers.elementwise_add(x, y, axis=0) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + y_arr = np.random.uniform(-1, 1, shape[:-1]).astype(dtype) + + gradient_checker.double_grad_check( + [x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + class TestMulDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place):