Add reduce_op

3994e91a · guosheng · a2393fc1 · 3994e91a · 3994e91a · 3994e91a
4 changed file
--- a/paddle/operators/reduce_op.cc
+++ b/paddle/operators/reduce_op.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include "paddle/operators/reduce_op.h"
+namespace paddle {
+namespace operators {
+using framework::Tensor;
+using framework::DDim;
+class ReduceOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
+    auto x_rank = x_dims.size();
+    PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported");
+    int dim = static_cast<int>(ctx.Attr<int>("dim"));
+    if (dim < 0) dim = x_rank + dim;
+    PADDLE_ENFORCE_LT(
+        dim, x_rank,
+        "The dim should be in the range [-rank(input), rank(input)]");
+    bool keep_dim = true;  // TODO;
+    auto dims_vector = vectorize(x_dims);
+    if (keep_dim || x_rank == 1) {
+      dims_vector[dim] = 1;
+    } else {
+      dims_vector.erase(dims_vector.begin() + dim);
+    }
+    auto out_dims = framework::make_ddim(dims_vector);
+    ctx.Output<Tensor>("Out")->Resize(out_dims);
+  }
+};
+class ReduceGradOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
+                            "Input(Out@GRAD) should not be null");
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
+    auto x_rank = x_dims.size();
+    PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported");
+    int dim = static_cast<int>(ctx.Attr<int>("dim"));
+    if (dim < 0) dim = x_rank + dim;
+    PADDLE_ENFORCE_LT(
+        dim, x_rank,
+        "The dim should be in the range [-rank(input), rank(input)]");
+    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    if (x_grad) x_grad->Resize(x_dims);
+  }
+};
+class ReduceSumOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ReduceSumOpMaker(framework::OpProto *proto,
+                   framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "X",
+        "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
+    AddOutput("Out", "(Tensor) The result tensor.");
+    AddComment(R"DOC(
+ReduceMean operator computes the sum of input tensor along the given dimension. 
+The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
+)DOC");
+    AddAttr<int>("dim",
+                 "(int, default 0) The dimension to reduce. "
+                 "Must be in the range [-rank(input), rank(input)]")
+        .SetDefault(0);
+    AddAttr<bool>("keep_dim",
+                  "(bool, default fasle) "
+                  "If true, retain the reduced dimension with length 1.")
+        .SetDefault(false);
+  }
+};
+class ReduceMeanOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ReduceMeanOpMaker(framework::OpProto *proto,
+                    framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "X",
+        "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
+    AddOutput("Out", "(Tensor) The result tensor.");
+    AddComment(R"DOC(
+ReduceMean operator computes the mean of input tensor along the given dimension. 
+The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
+)DOC");
+    AddAttr<int>("dim",
+                 "(int, default 0) The dimension to reduce. "
+                 "Must be in the range [-rank(input), rank(input)]")
+        .SetDefault(0);
+    AddAttr<bool>("keep_dim",
+                  "(bool, default fasle) "
+                  "If true, retain the reduced dimension with length 1.")
+        .SetDefault(false);
+  }
+};
+class ReduceMaxOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ReduceMaxOpMaker(framework::OpProto *proto,
+                   framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "X",
+        "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
+    AddOutput("Out", "(Tensor) The result tensor.");
+    AddComment(R"DOC(
+ReduceMax operator computes the maximum of input tensor along the given dimension. 
+The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
+)DOC");
+    AddAttr<int>("dim",
+                 "(int, default 0) The dimension to reduce. "
+                 "Must be in the range [-rank(input), rank(input)]")
+        .SetDefault(0);
+    AddAttr<bool>("keep_dim",
+                  "(bool, default fasle) "
+                  "If true, retain the reduced dimension with length 1.")
+        .SetDefault(false);
+  }
+};
+class ReduceMinOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ReduceMinOpMaker(framework::OpProto *proto,
+                   framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "X",
+        "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
+    AddOutput("Out", "(Tensor) The result tensor.");
+    AddComment(R"DOC(
+ReduceMin operator computes the minimum of input tensor along the given dimension. 
+The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
+)DOC");
+    AddAttr<int>("dim",
+                 "(int, default 0) The dimension to reduce. "
+                 "Must be in the range [-rank(input), rank(input)]")
+        .SetDefault(0);
+    AddAttr<bool>("keep_dim",
+                  "(bool, default fasle) "
+                  "If true, retain the reduced dimension with length 1.")
+        .SetDefault(false);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP(reduce_sum, ops::ReduceOp, ops::ReduceSumOpMaker, reduce_sum_grad,
+            ops::ReduceGradOp);
+REGISTER_OP_CPU_KERNEL(
+    reduce_sum,
+    ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::SumFunctor>);
+REGISTER_OP_CPU_KERNEL(reduce_sum_grad,
+                       ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
+                                             ops::SumGradFunctor>);
+REGISTER_OP(reduce_mean, ops::ReduceOp, ops::ReduceMeanOpMaker,
+            reduce_mean_grad, ops::ReduceGradOp);
+REGISTER_OP_CPU_KERNEL(
+    reduce_mean,
+    ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MeanFunctor>);
+REGISTER_OP_CPU_KERNEL(reduce_mean_grad,
+                       ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
+                                             ops::MeanGradFunctor>);
+REGISTER_OP(reduce_max, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_max_grad,
+            ops::ReduceGradOp);
+REGISTER_OP_CPU_KERNEL(
+    reduce_max,
+    ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MaxFunctor>);
+REGISTER_OP_CPU_KERNEL(reduce_max_grad,
+                       ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
+                                             ops::MaxOrMinGradFunctor>);
+REGISTER_OP(reduce_min, ops::ReduceOp, ops::ReduceMaxOpMaker, reduce_min_grad,
+            ops::ReduceGradOp);
+REGISTER_OP_CPU_KERNEL(
+    reduce_min,
+    ops::ReduceKernel<paddle::platform::CPUPlace, float, ops::MinFunctor>);
+REGISTER_OP_CPU_KERNEL(reduce_min_grad,
+                       ops::ReduceGradKernel<paddle::platform::CPUPlace, float,
+                                             ops::MaxOrMinGradFunctor>);
--- a/paddle/operators/reduce_op.cu
+++ b/paddle/operators/reduce_op.cu
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#define EIGEN_USE_GPU
+#include "paddle/operators/reduce_op.h"
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(
+    reduce_sum,
+    ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::SumFunctor>);
+REGISTER_OP_GPU_KERNEL(reduce_sum_grad,
+                       ops::ReduceGradEigenKernel<paddle::platform::GPUPlace,
+                                                  float, ops::SumGradFunctor>);
+REGISTER_OP_GPU_KERNEL(
+    reduce_mean,
+    ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MeanFunctor>);
+REGISTER_OP_GPU_KERNEL(reduce_mean_grad,
+                       ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
+                                             ops::MeanGradFunctor>);
+REGISTER_OP_GPU_KERNEL(
+    reduce_max,
+    ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MaxFunctor>);
+REGISTER_OP_GPU_KERNEL(reduce_max_grad,
+                       ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
+                                             ops::MaxOrMinGradFunctor>);
+REGISTER_OP_GPU_KERNEL(
+    reduce_min,
+    ops::ReduceKernel<paddle::platform::GPUPlace, float, ops::MinFunctor>);
+REGISTER_OP_GPU_KERNEL(reduce_min_grad,
+                       ops::ReduceGradKernel<paddle::platform::GPUPlace, float,
+                                             ops::MaxOrMinGradFunctor>);
\ No newline at end of file
--- a/paddle/operators/reduce_op.h
+++ b/paddle/operators/reduce_op.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#pragma once
+#include "paddle/operators/math/math_function.h"
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+using DDim = framework::DDim;
+template <typename T, size_t D, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
+struct SumFunctor {
+  template <typename Place, typename In, typename Out, typename Dim>
+  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
+    out.device(place) = in.sum(dim);
+  }
+};
+struct SumGradFunctor {
+  template <typename Place, typename In, typename In_Const, typename Out,
+            typename Dim>
+  void operator()(const Place& place, In_Const& in, In& in_grad, Out& out,
+                  Out& out_grad, const Dim& dim, int size) {
+    in_grad.device(place) = out_grad.broadcast(dim);
+  }
+};
+struct MeanFunctor {
+  template <typename Place, typename In, typename Out, typename Dim>
+  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
+    out.device(place) = in.mean(dim);
+  }
+};
+struct MeanGradFunctor {
+  template <typename Place, typename In, typename In_Const, typename Out,
+            typename Dim>
+  void operator()(const Place& place, In_Const& in, In& in_grad, Out& out,
+                  Out& out_grad, const Dim& dim, int size) {
+    in_grad.device(place) = out_grad.broadcast(dim) / in_grad.constant(size);
+  }
+};
+struct MaxFunctor {
+  template <typename Place, typename In, typename Out, typename Dim>
+  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
+    out.device(place) = in.maximum(dim);
+  }
+};
+struct MinFunctor {
+  template <typename Place, typename In, typename Out, typename Dim>
+  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
+    out.device(place) = in.minimum(dim);
+  }
+};
+struct MaxOrMinGradFunctor {
+  template <typename Place, typename In, typename In_Const, typename Out,
+            typename Dim>
+  void operator()(const Place& place, In_Const& in, In& in_grad, Out& out,
+                  Out& out_grad, const Dim& dim, int size) {
+    auto equals = in == out.broadcast(dim);
+    auto ones = in_grad.constant(1);
+    auto zeros = in_grad.constant(0);
+    in_grad.device(place) =
+        out_grad.broadcast(dim) * equals.select(ones, zeros);
+  }
+};
+template <typename Place, typename T, typename Functor>
+class ReduceKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    int rank = context.Input<Tensor>("X")->dims().size();
+    switch (rank) {
+      case 1:
+        ReduceCompute<1>(context);
+        break;
+      case 2:
+        ReduceCompute<2>(context);
+        break;
+      case 3:
+        ReduceCompute<3>(context);
+        break;
+      case 4:
+        ReduceCompute<4>(context);
+        break;
+      case 5:
+        ReduceCompute<5>(context);
+        break;
+      case 6:
+        ReduceCompute<6>(context);
+        break;
+    }
+  }
+ private:
+  template <size_t D>
+  void ReduceCompute(const framework::ExecutionContext& context) const {
+    auto* input = context.Input<Tensor>("X");
+    auto* output = context.Output<Tensor>("Out");
+    output->mutable_data<T>(context.GetPlace());
+    auto x = EigenTensor<T, D>::From(*input);
+    auto x_rank = static_cast<int>(x.dimensions().size());
+    int dim = static_cast<int>(context.Attr<int>("dim"));
+    if (dim < 0) dim = x_rank + dim;
+    auto reduce_dim = Eigen::array<int, 1>({{dim}});
+    // construct the squeezed output tensor
+    bool keep_dim = true;  // static_cast<bool>(context.Attr<bool>("keep_dim"));
+    DDim dims = output->dims();
+    auto dims_vector = vectorize(dims);
+    if (keep_dim && x_rank > 1) {
+      dims_vector.erase(dims_vector.begin() + dim);
+      dims = framework::make_ddim(dims_vector);
+    }
+    auto out = EigenTensor < T, D == 1 ? 1 : (D - 1) > ::From(*output, dims);
+    auto& place = context.GetEigenDevice<Place>();
+    Functor functor;
+    functor(place, x, out, reduce_dim);
+  }
+};
+template <typename Place, typename T, typename Functor>
+class ReduceGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    int rank = context.Input<Tensor>("X")->dims().size();
+    switch (rank) {
+      case 1:
+        ReduceCompute<1>(context);
+        break;
+      case 2:
+        ReduceCompute<2>(context);
+        break;
+      case 3:
+        ReduceCompute<3>(context);
+        break;
+      case 4:
+        ReduceCompute<4>(context);
+        break;
+      case 5:
+        ReduceCompute<5>(context);
+        break;
+      case 6:
+        ReduceCompute<6>(context);
+        break;
+    }
+  }
+ private:
+  template <size_t D>
+  void ReduceCompute(const framework::ExecutionContext& context) const {
+    auto* input0 = context.Input<Tensor>("X");
+    auto* input1 = context.Input<Tensor>("Out");
+    auto* input2 = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* output = context.Output<Tensor>(framework::GradVarName("X"));
+    if (output != nullptr) {
+      output->mutable_data<T>(context.GetPlace());
+      auto x = EigenTensor<T, D>::From(*input0);
+      auto x_grad = EigenTensor<T, D>::From(*output);
+      auto x_rank = static_cast<int>(x.dimensions().size());
+      int dim = static_cast<int>(context.Attr<int>("dim"));
+      if (dim < 0) dim = x_rank + dim;
+      DDim dims = input0->dims();
+      dims[dim] = 1;
+      auto x_reduce = EigenTensor<T, D>::From(*input1, dims);
+      auto x_reduce_grad = EigenTensor<T, D>::From(*input2, dims);
+      Eigen::array<int, D> braodcast_dim;
+      for (size_t i = 0; i < D; ++i) braodcast_dim[i] = 1;
+      braodcast_dim[dim] = input0->dims()[dim];
+      auto& place = context.GetEigenDevice<Place>();
+      Functor functor;
+      functor(place, x, x_grad, x_reduce, x_reduce_grad, braodcast_dim,
+              braodcast_dim[dim]);
+    }
+  }
+};
+// For EigenTensor unsupported reduce
+template <typename T, typename Functor>
+class ReduceGradEigenFreeKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* x = context.Input<Tensor>("X");
+    auto* out = context.Input<Tensor>("Out");
+    auto* x_grad = context.Output<Tensor>(framework::GradVarName("X"));
+    auto* out_grad = context.Input<Tensor>(framework::GradVarName("Out"));
+    if (x_grad != nullptr) {
+      DDim dims = x->dims();
+      int rank = dims.size();
+      int dim = static_cast<int>(context.Attr<int>("dim"));
+      if (dim < 0) dim = rank + dim;
+      auto* x_data = x->data<T>();
+      auto* x_grad_data = x_grad->mutable_data<T>(context.GetPlace());
+      auto* out_data = out->data<T>();
+      auto* out_grad_data = out_grad->data<T>();
+      int outer_count = 1;
+      int inner_count = 1;
+      int mid_count = dims[dim];
+      for (int i = 0; i < dim; ++i) {
+        outer_count *= dims[i];
+      }
+      for (int i = dim + 1; i < rank; ++i) {
+        inner_count *= dims[i];
+      }
+      int x_offset = 0;    // offset on raw data
+      int out_offset = 0;  // offset on reduced data
+      Functor functor;
+      for (int i = 0; i < outer_count; ++i) {
+        for (int j = 0; j < inner_count; ++j) {
+          out_offset = inner_count * i + j;
+          for (int k = 0; k < mid_count; ++k) {
+            x_offset = (inner_count * mid_count) * i + inner_count * k + j;
+            functor(x_data + x_offset, x_grad_data + x_offset,
+                    out_data + out_offset, out_grad_data + out_offset,
+                    mid_count);
+          }
+        }
+      }
+    }
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/python/paddle/v2/framework/tests/test_reduce_op.py
+++ b/python/paddle/v2/framework/tests/test_reduce_op.py
+import unittest
+import numpy as np
+from gradient_checker import GradientChecker, create_op
+from op_test_util import OpTestMeta
+from paddle.v2.framework.op import Operator
+class TestSumOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+    def setUp(self):
+        self.type = "reduce_sum"
+        self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
+        self.attrs = {'dim': -2}
+        out = self.inputs['X'].sum(axis=self.attrs['dim'])
+        self.outputs = {'Out': out}
+class TestSumGradOp(GradientChecker):
+    def test_normal(self):
+        op = Operator("reduce_sum", X="X", Out="Out", dim=-2)
+        # use small size to decrease the error of numerical calculation
+        inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
+        self.check_grad(op, inputs, set(["X"]), "Out")
+    def test_1d_tensor(self):
+        op = Operator("reduce_sum", X="X", Out="Out", dim=0)
+        # use small size to decrease the error of numerical calculation
+        inputs = {'X': np.random.random(10).astype("float32")}
+        self.check_grad(op, inputs, set(["X"]), "Out")
+class TestKeepdimSumOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+    def setUp(self):
+        self.type = "reduce_sum"
+        self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
+        self.attrs = {'dim': -2}
+        out = self.inputs['X'].sum(axis=self.attrs['dim'], keepdims=True)
+        self.outputs = {'Out': out}
+class TestMeanOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+    def setUp(self):
+        self.type = "reduce_mean"
+        self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
+        self.attrs = {'dim': -1}
+        out = self.inputs['X'].mean(axis=self.attrs['dim'])
+        self.outputs = {'Out': out}
+class TestMeanGradOp(GradientChecker):
+    def test_normal(self):
+        op = Operator("reduce_mean", X="X", Out="Out", dim=-2)
+        # use small size to decrease the error of numerical calculation
+        inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
+        self.check_grad(op, inputs, set(["X"]), "Out")
+    def test_1d_tensor(self):
+        op = Operator("reduce_mean", X="X", Out="Out", dim=0)
+        # use small size to decrease the error of numerical calculation
+        inputs = {'X': np.random.random(10).astype("float32")}
+        self.check_grad(op, inputs, set(["X"]), "Out")
+class TestMaxOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+    def setUp(self):
+        self.type = "reduce_max"
+        self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
+        self.attrs = {'dim': -1}
+        out = self.inputs['X'].max(axis=self.attrs['dim'])
+        self.outputs = {'Out': out}
+class TestMinOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+    def setUp(self):
+        self.type = "reduce_max"
+        self.inputs = {'X': np.random.random((5, 6, 10)).astype("float32")}
+        self.attrs = {'dim': -2}
+        out = self.inputs['X'].min(axis=self.attrs['dim'])
+        self.outputs = {'Out': out}
+if __name__ == '__main__':
+    unittest.main()