diff --git a/paddle/operators/reduce_op.cc b/paddle/operators/reduce_op.cc index 89f54fe74b61e1a1c12dd4498a5aca2ccff1fa0e..61b33d4bbd59e259fcb89f0c22bdc992eea15947 100644 --- a/paddle/operators/reduce_op.cc +++ b/paddle/operators/reduce_op.cc @@ -18,7 +18,6 @@ namespace paddle { namespace operators { using framework::Tensor; -using framework::LoDTensor; class ReduceOp : public framework::OperatorWithKernel { public: @@ -46,7 +45,11 @@ class ReduceOp : public framework::OperatorWithKernel { dims_vector.erase(dims_vector.begin() + dim); } auto out_dims = framework::make_ddim(dims_vector); - ctx.Output("Out")->Resize(out_dims); + ctx.Output("Out")->Resize(out_dims); + if (dim != 0) { + // Only pass LoD when not reducing on the first dim + ctx.ShareLoD("X", /*->*/ "Out"); + } } }; @@ -81,9 +84,12 @@ class ReduceOpMaker : public framework::OpProtoAndCheckerMaker { "X", "(Tensor) The input tensor. Tensors with rank at most 6 are supported"); AddOutput("Out", "(Tensor) The result tensor."); - AddAttr("dim", - "(int, default 0) The dimension to reduce. " - "Must be in the range [-rank(input), rank(input))") + AddAttr( + "dim", + "(int, default 1) The dimension to reduce. " + "Must be in the range [-rank(input), rank(input)). " + "If `dim < 0`, the dim to reduce is `rank + dim`. " + "Noting that reducing on the first dim will make the LoD info lost.") .SetDefault(0); AddAttr("keep_dim", "(bool, default false) " diff --git a/paddle/operators/reduce_op.h b/paddle/operators/reduce_op.h index 972bd7bd4645d991ec466726d927e92216171186..2fbf94e34f3961a9b3140fb682a7c479f3b71f4d 100644 --- a/paddle/operators/reduce_op.h +++ b/paddle/operators/reduce_op.h @@ -80,6 +80,8 @@ struct MaxOrMinGradFunctor { auto equals = x == y.broadcast(dim); auto ones = dx.constant(1); auto zeros = dx.constant(0); + // If there are multiple minimum or maximum elements, the subgradient of + // each is the set [0, 1], and we pass gradient to all of them here. dx.device(place) = dy.broadcast(dim) * equals.select(ones, zeros); } }; @@ -145,102 +147,52 @@ class ReduceGradKernel : public framework::OpKernel { int rank = context.Input("X")->dims().size(); switch (rank) { case 1: - ReduceCompute<1>(context); + ReduceGradCompute<1>(context); break; case 2: - ReduceCompute<2>(context); + ReduceGradCompute<2>(context); break; case 3: - ReduceCompute<3>(context); + ReduceGradCompute<3>(context); break; case 4: - ReduceCompute<4>(context); + ReduceGradCompute<4>(context); break; case 5: - ReduceCompute<5>(context); + ReduceGradCompute<5>(context); break; case 6: - ReduceCompute<6>(context); + ReduceGradCompute<6>(context); break; } } private: template - void ReduceCompute(const framework::ExecutionContext& context) const { + void ReduceGradCompute(const framework::ExecutionContext& context) const { auto* input0 = context.Input("X"); auto* input1 = context.Input("Out"); auto* input2 = context.Input(framework::GradVarName("Out")); auto* output = context.Output(framework::GradVarName("X")); - if (output != nullptr) { - output->mutable_data(context.GetPlace()); - auto x = EigenTensor::From(*input0); - auto x_grad = EigenTensor::From(*output); - auto x_rank = static_cast(x.dimensions().size()); - int dim = static_cast(context.Attr("dim")); - if (dim < 0) dim = x_rank + dim; - DDim dims = input0->dims(); - dims[dim] = 1; - auto x_reduce = EigenTensor::From(*input1, dims); - auto x_reduce_grad = EigenTensor::From(*input2, dims); - - Eigen::array braodcast_dim; - for (size_t i = 0; i < D; ++i) braodcast_dim[i] = 1; - braodcast_dim[dim] = input0->dims()[dim]; - auto& place = context.GetEigenDevice(); - Functor functor; - functor(place, x, x_reduce, x_grad, x_reduce_grad, braodcast_dim, - braodcast_dim[dim]); - } - } -}; - -// For EigenTensor unsupported reduce -template -class ReduceGradEigenFreeKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Input("Out"); - auto* x_grad = context.Output(framework::GradVarName("X")); - auto* out_grad = context.Input(framework::GradVarName("Out")); - if (x_grad != nullptr) { - DDim dims = x->dims(); - int rank = dims.size(); - int dim = static_cast(context.Attr("dim")); - if (dim < 0) dim = rank + dim; - - auto* x_data = x->data(); - auto* x_grad_data = x_grad->mutable_data(context.GetPlace()); - auto* out_data = out->data(); - auto* out_grad_data = out_grad->data(); - - int outer_count = 1; - int inner_count = 1; - int mid_count = dims[dim]; - for (int i = 0; i < dim; ++i) { - outer_count *= dims[i]; - } - for (int i = dim + 1; i < rank; ++i) { - inner_count *= dims[i]; - } - - int x_offset = 0; // offset on raw data - int out_offset = 0; // offset on reduced data - Functor functor; - for (int i = 0; i < outer_count; ++i) { - for (int j = 0; j < inner_count; ++j) { - out_offset = inner_count * i + j; - for (int k = 0; k < mid_count; ++k) { - x_offset = (inner_count * mid_count) * i + inner_count * k + j; - functor(x_data + x_offset, out_data + out_offset, - x_grad_data + x_offset, out_grad_data + out_offset, - mid_count); - } - } - } - } + output->mutable_data(context.GetPlace()); + auto x = EigenTensor::From(*input0); + auto x_grad = EigenTensor::From(*output); + auto x_rank = static_cast(x.dimensions().size()); + int dim = static_cast(context.Attr("dim")); + if (dim < 0) dim = x_rank + dim; + DDim dims = input0->dims(); + dims[dim] = 1; + auto x_reduce = EigenTensor::From(*input1, dims); + auto x_reduce_grad = EigenTensor::From(*input2, dims); + + Eigen::array braodcast_dim; + for (size_t i = 0; i < D; ++i) braodcast_dim[i] = 1; + braodcast_dim[dim] = input0->dims()[dim]; + auto& place = context.GetEigenDevice(); + Functor functor; + functor(place, x, x_reduce, x_grad, x_reduce_grad, braodcast_dim, + braodcast_dim[dim]); } };