diff --git a/paddle/fluid/op_use_default_grad_op_maker.spec b/paddle/fluid/op_use_default_grad_op_maker.spec index 403be1fc2c97a189a541c0c887eaadfe4266a124..a2355d2deee5784f85a65ba32bf1440a55fb6bed 100644 --- a/paddle/fluid/op_use_default_grad_op_maker.spec +++ b/paddle/fluid/op_use_default_grad_op_maker.spec @@ -29,7 +29,6 @@ prelu quantize rank_loss reduce_max -reduce_mean reduce_min reduce_prod reduce_sum diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc index 072bc34d3e23a48c8d856a51b0d5a6facc7ececf..14593ea54ff24eca19d67939493987d8deabb261 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc @@ -13,8 +13,77 @@ // limitations under the License. #include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" +#include +#include +#include -REGISTER_REDUCE_OP(reduce_mean); +namespace paddle { +namespace operators { + +// NOTE(dengkaipeng): Input(Out) is unnecessary in reduce_mean_grad +// calcualtion, but will incur a reduce_mean_grad op after +// reduce_mean_grad_grad, delete Input(Out) here. +// This change has no effect on reduce_mean_grad calculations. +class ReduceMeanOpGradDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("reduce_mean_grad"); + op->SetInput("X", Input("X")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetAttrMap(Attrs()); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + return op; + } +}; + +class ReduceMeanDoubleGradMaker : public framework::GradOpDescMakerBase { + public: + using framework::GradOpDescMakerBase::GradOpDescMakerBase; + + std::vector> operator()() const override { + std::vector> ops; + auto x_grads = InputGrad("X"); + auto x_gg = OutputGrad(framework::GradVarName("X")); // input ddx + if (!x_grads.empty()) { + auto* x_grad_op = new framework::OpDesc(); + x_grad_op->SetType("scale"); + x_grad_op->SetInput("X", x_gg); + x_grad_op->SetOutput("Out", x_grads); + x_grad_op->SetAttr("scale", 0.0f); + ops.emplace_back(x_grad_op); + } + + auto out_grads = InputGrad(framework::GradVarName("Out")); + if (!out_grads.empty()) { + auto* out_grad_op = new framework::OpDesc(); + out_grad_op->SetType("reduce_mean"); + out_grad_op->SetInput("X", x_gg); + out_grad_op->SetAttrMap(Attrs()); + out_grad_op->SetOutput("Out", out_grads); + ops.emplace_back(out_grad_op); + } + + return ops; + } +}; + +} // namespace operators +} // namespace paddle + +class __reduce_meanMaker__ : public ops::ReduceOpMaker { + protected: + virtual std::string GetName() const { return "reduce_mean"; } + virtual std::string GetOpType() const { return "Reduce reduce_mean"; } +}; + +REGISTER_OPERATOR(reduce_mean, ops::ReduceOp, __reduce_meanMaker__, + ops::ReduceMeanOpGradDescMaker); +REGISTER_OPERATOR(reduce_mean_grad, ops::ReduceGradOp, + ops::ReduceMeanDoubleGradMaker); REGISTER_OP_CPU_KERNEL(reduce_mean, ops::ReduceKernel, diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index c86591fdafa3d33bb3c7d75bf9f4f3b041a7a9cb..67fd3e1dad4b9c6036ac2c8f7f0fe5ec951c8e98 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -88,6 +88,10 @@ class ReduceGradKernel : public framework::OpKernel { auto* output = context.Output(framework::GradVarName("X")); output->mutable_data(context.GetPlace()); + // NOTE(dengkaipeng): Out is unnecessary in some reduce kernel and + // not be set as Input in grad Maker, use Out_grad to replace here + if (!input1) input1 = input2; + if (reduce_all) { auto x = EigenVector::Flatten(*input0); auto x_reduce = EigenVector::From(*input1); diff --git a/python/paddle/fluid/tests/unittests/test_nn_grad.py b/python/paddle/fluid/tests/unittests/test_nn_grad.py index 083cfdd21fa46374fbc2c5a0eced13913f900f46..be0f4b239b6855e0f95f7c399abcfbc8ab64962a 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py @@ -166,6 +166,29 @@ class TestElementwiseMulDoubleGradCheck(unittest.TestCase): self.func(p) +class TestReduceMeanWithDimDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + shape = [7, 11] + eps = 0.05 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.reduce_mean(x, dim=0) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + class TestElementwiseMulBroadcastDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place):