From 8b3bf28c6b5da73d919b0414361473bee638f414 Mon Sep 17 00:00:00 2001
From: guosheng <guosheng@baidu.com>
Date: Thu, 21 Sep 2017 11:12:32 +0800
Subject: [PATCH] Refine reduce_op and follow comments

---
 paddle/operators/CMakeLists.txt |   7 ++
 paddle/operators/reduce_op.cc   | 147 ++++++++++++++------------------
 paddle/operators/reduce_op.h    |  63 +++++++-------
 3 files changed, 103 insertions(+), 114 deletions(-)
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index f8b0bce6815..eec0d0b5958 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -61,6 +61,13 @@ function(op_library TARGET)
         # It's enough to just adding one operator to pybind
         file(APPEND ${pybind_file} "USE_OP(sigmoid);\n")
     endif()
+    
+    # reduce_op contains several operators
+    if ("${TARGET}" STREQUAL "reduce_op")
+        set(pybind_flag 1)
+        # It's enough to just adding one operator to pybind
+        file(APPEND ${pybind_file} "USE_OP(reduce_sum);\n")
+    endif()
 
     # pybind USE_NO_KERNEL_OP
     file(READ ${TARGET}.cc TARGET_CONTENT)
diff --git a/paddle/operators/reduce_op.cc b/paddle/operators/reduce_op.cc
index 20e6319730c..89f54fe74b6 100644
--- a/paddle/operators/reduce_op.cc
+++ b/paddle/operators/reduce_op.cc
@@ -18,7 +18,7 @@ namespace paddle {
 namespace operators {
 
 using framework::Tensor;
-using framework::DDim;
+using framework::LoDTensor;
 
 class ReduceOp : public framework::OperatorWithKernel {
  public:
@@ -26,18 +26,19 @@ class ReduceOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
+                            "Input(X) of ReduceOp should not be null.");
+    PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"),
+                            "Output(Out) of ReduceOp should not be null.");
     auto x_dims = ctx.Input<Tensor>("X")->dims();
     auto x_rank = x_dims.size();
-    PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported");
+    PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported.");
     int dim = ctx.Attr<int>("dim");
     if (dim < 0) dim = x_rank + dim;
     PADDLE_ENFORCE_LT(
         dim, x_rank,
-        "The dim should be in the range [-rank(input), rank(input))");
-    PADDLE_ENFORCE_GE(ctx.Attr<int>("keep_dim"), 0, "keep_dim must be 0 or 1");
-    PADDLE_ENFORCE_LE(ctx.Attr<int>("keep_dim"), 1, "keep_dim must be 0 or 1");
-    bool keep_dim = ctx.Attr<int>("keep_dim") == 1;
+        "The dim should be in the range [-rank(input), rank(input)).");
+    bool keep_dim = ctx.Attr<bool>("keep_dim");
     auto dims_vector = vectorize(x_dims);
     if (keep_dim || x_rank == 1) {
       dims_vector[dim] = 1;
@@ -45,7 +46,7 @@ class ReduceOp : public framework::OperatorWithKernel {
       dims_vector.erase(dims_vector.begin() + dim);
     }
     auto out_dims = framework::make_ddim(dims_vector);
-    ctx.Output<Tensor>("Out")->Resize(out_dims);
+    ctx.Output<framework::LoDTensor>("Out")->Resize(out_dims);
   }
 };
 
@@ -55,119 +56,101 @@ class ReduceGradOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null.");
     PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
-                            "Input(Out@GRAD) should not be null");
+                            "Input(Out@GRAD) should not be null.");
     auto x_dims = ctx.Input<Tensor>("X")->dims();
     auto x_rank = x_dims.size();
-    PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported");
+    PADDLE_ENFORCE_LE(x_rank, 6, "Tensors with rank at most 6 are supported.");
     int dim = ctx.Attr<int>("dim");
     if (dim < 0) dim = x_rank + dim;
     PADDLE_ENFORCE_LT(
         dim, x_rank,
-        "The dim should be in the range [-rank(input), rank(input))");
-    auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+        "The dim should be in the range [-rank(input), rank(input)).");
+    auto *x_grad =
+        ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
     if (x_grad) x_grad->Resize(x_dims);
   }
 };
 
-class ReduceSumOpMaker : public framework::OpProtoAndCheckerMaker {
+class ReduceOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
-  ReduceSumOpMaker(framework::OpProto *proto,
-                   framework::OpAttrChecker *op_checker)
+  ReduceOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput(
         "X",
         "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
     AddOutput("Out", "(Tensor) The result tensor.");
-    AddComment(R"DOC(
-ReduceMean operator computes the sum of input tensor along the given dimension. 
-The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
-)DOC");
     AddAttr<int>("dim",
                  "(int, default 0) The dimension to reduce. "
                  "Must be in the range [-rank(input), rank(input))")
         .SetDefault(0);
-    AddAttr<int>(
-        "keep_dim",
-        "(int, default 0) "
-        "Must be 0 or 1. If 1, retain the reduced dimension with length 1.")
-        .SetDefault(0);
+    AddAttr<bool>("keep_dim",
+                  "(bool, default false) "
+                  "If true, retain the reduced dimension with length 1.")
+        .SetDefault(false);
+    comment_ = R"DOC(
+{ReduceOP} operator computes the {reduce} of input tensor along the given dimension. 
+The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
+)DOC";
+    AddComment(comment_);
+  }
+
+ protected:
+  std::string comment_;
+
+  void Replace(std::string &src, std::string from, std::string to) {
+    std::size_t len_from = std::strlen(from.c_str());
+    std::size_t len_to = std::strlen(to.c_str());
+    for (std::size_t pos = src.find(from); pos != std::string::npos;
+         pos = src.find(from, pos + len_to)) {
+      src.replace(pos, len_from, to);
+    }
+  }
+
+  void SetComment(std::string name, std::string op) {
+    Replace(comment_, "{ReduceOP}", name);
+    Replace(comment_, "{reduce}", op);
   }
 };
 
-class ReduceMeanOpMaker : public framework::OpProtoAndCheckerMaker {
+class ReduceSumOpMaker : public ReduceOpMaker {
+ public:
+  ReduceSumOpMaker(framework::OpProto *proto,
+                   framework::OpAttrChecker *op_checker)
+      : ReduceOpMaker(proto, op_checker) {
+    SetComment("ReduceSum", "sum");
+    AddComment(comment_);
+  }
+};
+
+class ReduceMeanOpMaker : public ReduceOpMaker {
  public:
   ReduceMeanOpMaker(framework::OpProto *proto,
                     framework::OpAttrChecker *op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput(
-        "X",
-        "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
-    AddOutput("Out", "(Tensor) The result tensor.");
-    AddComment(R"DOC(
-ReduceMean operator computes the mean of input tensor along the given dimension. 
-The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
-)DOC");
-    AddAttr<int>("dim",
-                 "(int, default 0) The dimension to reduce. "
-                 "Must be in the range [-rank(input), rank(input))")
-        .SetDefault(0);
-    AddAttr<int>(
-        "keep_dim",
-        "(int, default 0) "
-        "Must be 0 or 1. If 1, retain the reduced dimension with length 1.")
-        .SetDefault(0);
+      : ReduceOpMaker(proto, op_checker) {
+    SetComment("ReduceMean", "mean");
+    AddComment(comment_);
   }
 };
 
-class ReduceMaxOpMaker : public framework::OpProtoAndCheckerMaker {
+class ReduceMaxOpMaker : public ReduceOpMaker {
  public:
   ReduceMaxOpMaker(framework::OpProto *proto,
                    framework::OpAttrChecker *op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput(
-        "X",
-        "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
-    AddOutput("Out", "(Tensor) The result tensor.");
-    AddComment(R"DOC(
-ReduceMax operator computes the maximum of input tensor along the given dimension. 
-The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
-)DOC");
-    AddAttr<int>("dim",
-                 "(int, default 0) The dimension to reduce. "
-                 "Must be in the range [-rank(input), rank(input))")
-        .SetDefault(0);
-    AddAttr<int>(
-        "keep_dim",
-        "(int, default 0) "
-        "Must be 0 or 1. If 1, retain the reduced dimension with length 1.")
-        .SetDefault(0);
+      : ReduceOpMaker(proto, op_checker) {
+    SetComment("ReduceMax", "max");
+    AddComment(comment_);
   }
 };
 
-class ReduceMinOpMaker : public framework::OpProtoAndCheckerMaker {
+class ReduceMinOpMaker : public ReduceOpMaker {
  public:
   ReduceMinOpMaker(framework::OpProto *proto,
                    framework::OpAttrChecker *op_checker)
-      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput(
-        "X",
-        "(Tensor) The input tensor. Tensors with rank at most 6 are supported");
-    AddOutput("Out", "(Tensor) The result tensor.");
-    AddComment(R"DOC(
-ReduceMin operator computes the minimum of input tensor along the given dimension. 
-The result tensor has 1 fewer dimension than the input unless `keep_dim` is true.
-)DOC");
-    AddAttr<int>("dim",
-                 "(int, default 0) The dimension to reduce. "
-                 "Must be in the range [-rank(input), rank(input))")
-        .SetDefault(0);
-    AddAttr<int>(
-        "keep_dim",
-        "(int, default 0) "
-        "Must be 0 or 1. If 1, retain the reduced dimension with length 1.")
-        .SetDefault(0);
+      : ReduceOpMaker(proto, op_checker) {
+    SetComment("ReduceMin", "min");
+    AddComment(comment_);
   }
 };
 
diff --git a/paddle/operators/reduce_op.h b/paddle/operators/reduce_op.h
index f0d4e1f95c2..972bd7bd464 100644
--- a/paddle/operators/reduce_op.h
+++ b/paddle/operators/reduce_op.h
@@ -27,61 +27,60 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor,
 using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
 
 struct SumFunctor {
-  template <typename Place, typename In, typename Out, typename Dim>
-  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
-    out.device(place) = in.sum(dim);
+  template <typename Place, typename X, typename Y, typename Dim>
+  void operator()(const Place& place, X& x, Y& y, const Dim& dim) {
+    y.device(place) = x.sum(dim);
   }
 };
 
 struct SumGradFunctor {
-  template <typename Place, typename In, typename In_Const, typename Out,
+  template <typename Place, typename X, typename Y, typename DX, typename DY,
             typename Dim>
-  void operator()(const Place& place, In_Const& in, In& in_grad, Out& out,
-                  Out& out_grad, const Dim& dim, int size) {
-    in_grad.device(place) = out_grad.broadcast(dim);
+  void operator()(const Place& place, X& x, Y& y, DX& dx, DY& dy,
+                  const Dim& dim, int size) {
+    dx.device(place) = dy.broadcast(dim);
   }
 };
 
 struct MeanFunctor {
-  template <typename Place, typename In, typename Out, typename Dim>
-  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
-    out.device(place) = in.mean(dim);
+  template <typename Place, typename X, typename Y, typename Dim>
+  void operator()(const Place& place, X& x, Y& y, const Dim& dim) {
+    y.device(place) = x.mean(dim);
   }
 };
 
 struct MeanGradFunctor {
-  template <typename Place, typename In, typename In_Const, typename Out,
+  template <typename Place, typename X, typename Y, typename DX, typename DY,
             typename Dim>
-  void operator()(const Place& place, In_Const& in, In& in_grad, Out& out,
-                  Out& out_grad, const Dim& dim, int size) {
-    in_grad.device(place) = out_grad.broadcast(dim) / in_grad.constant(size);
+  void operator()(const Place& place, X& x, Y& y, DX& dx, DY& dy,
+                  const Dim& dim, int size) {
+    dx.device(place) = dy.broadcast(dim) / dx.constant(size);
   }
 };
 
 struct MaxFunctor {
-  template <typename Place, typename In, typename Out, typename Dim>
-  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
-    out.device(place) = in.maximum(dim);
+  template <typename Place, typename X, typename Y, typename Dim>
+  void operator()(const Place& place, X& x, Y& y, const Dim& dim) {
+    y.device(place) = x.maximum(dim);
   }
 };
 
 struct MinFunctor {
-  template <typename Place, typename In, typename Out, typename Dim>
-  void operator()(const Place& place, In& in, Out& out, const Dim& dim) {
-    out.device(place) = in.minimum(dim);
+  template <typename Place, typename X, typename Y, typename Dim>
+  void operator()(const Place& place, X& x, Y& y, const Dim& dim) {
+    y.device(place) = x.minimum(dim);
   }
 };
 
 struct MaxOrMinGradFunctor {
-  template <typename Place, typename In, typename In_Const, typename Out,
+  template <typename Place, typename X, typename Y, typename DX, typename DY,
             typename Dim>
-  void operator()(const Place& place, In_Const& in, In& in_grad, Out& out,
-                  Out& out_grad, const Dim& dim, int size) {
-    auto equals = in == out.broadcast(dim);
-    auto ones = in_grad.constant(1);
-    auto zeros = in_grad.constant(0);
-    in_grad.device(place) =
-        out_grad.broadcast(dim) * equals.select(ones, zeros);
+  void operator()(const Place& place, X& x, Y& y, DX& dx, DY& dy,
+                  const Dim& dim, int size) {
+    auto equals = x == y.broadcast(dim);
+    auto ones = dx.constant(1);
+    auto zeros = dx.constant(0);
+    dx.device(place) = dy.broadcast(dim) * equals.select(ones, zeros);
   }
 };
 
@@ -125,7 +124,7 @@ class ReduceKernel : public framework::OpKernel {
     if (dim < 0) dim = x_rank + dim;
     auto reduce_dim = Eigen::array<int, 1>({{dim}});
     // construct the squeezed output tensor
-    bool keep_dim = context.Attr<int>("keep_dim") == 1;
+    bool keep_dim = context.Attr<bool>("keep_dim");
     DDim dims = output->dims();
     auto dims_vector = vectorize(dims);
     if (keep_dim && x_rank > 1) {
@@ -191,7 +190,7 @@ class ReduceGradKernel : public framework::OpKernel {
       braodcast_dim[dim] = input0->dims()[dim];
       auto& place = context.GetEigenDevice<Place>();
       Functor functor;
-      functor(place, x, x_grad, x_reduce, x_reduce_grad, braodcast_dim,
+      functor(place, x, x_reduce, x_grad, x_reduce_grad, braodcast_dim,
               braodcast_dim[dim]);
     }
   }
@@ -235,8 +234,8 @@ class ReduceGradEigenFreeKernel : public framework::OpKernel {
           out_offset = inner_count * i + j;
           for (int k = 0; k < mid_count; ++k) {
             x_offset = (inner_count * mid_count) * i + inner_count * k + j;
-            functor(x_data + x_offset, x_grad_data + x_offset,
-                    out_data + out_offset, out_grad_data + out_offset,
+            functor(x_data + x_offset, out_data + out_offset,
+                    x_grad_data + x_offset, out_grad_data + out_offset,
                     mid_count);
           }
         }
-- 
GitLab