for code review 4

76fc1a82 · wanghaox · 52f2366d · 76fc1a82 · 76fc1a82 · 76fc1a82
7 changed file
--- a/paddle/operators/math/maxouting.cc
+++ b/paddle/operators/math/maxouting.cc
@@ -18,10 +18,7 @@ namespace paddle {
 namespace operators {
 namespace math {
-/*
+// All tensors are in NCHW format, and the groups must be greater than 1
- * All tensors are in NCHW format.
- * groups mustbe > 1
- */
 template <typename T>
 class MaxOutFunctor<platform::CPUPlace, T> {
 public:
@@ -44,7 +41,6 @@ class MaxOutFunctor<platform::CPUPlace, T> {
      for (int c = 0; c < output_channels; ++c) {
        int new_cindex = fea_size * c;
        for (int f = 0; f < fea_size; ++f) {
-          // T ele = maxout_process.initial();
          T ele = static_cast<T>(-FLT_MAX);
          for (int ph = 0; ph < groups; ++ph) {
            T x = input_data[(new_bindex + new_cindex) * groups
@@ -65,7 +61,7 @@ class MaxOutGradFunctor<platform::CPUPlace, T> {
 public:
  void operator()(const platform::DeviceContext& context,
                  const framework::Tensor& input,
-                  framework::Tensor& input_grad,
+                  framework::Tensor * input_grad,
                  const framework::Tensor& output,
                  const framework::Tensor& output_grad,
                  int groups) {
@@ -77,7 +73,7 @@ public:
    const T* input_data = input.data<T>();
    const T* output_data = output.data<T>();
    const T* output_grad_data = output_grad.data<T>();
-    T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
+    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
    for (int i = 0; i < batch_size; ++i) {
      int blen = fea_size * output_channels * i;

--- a/paddle/operators/math/maxouting.cu
+++ b/paddle/operators/math/maxouting.cu
@@ -112,7 +112,8 @@ template <typename T>
 class MaxOutGradFunctor<platform::GPUPlace, T> {
 public:
  void operator()(const platform::DeviceContext& context,
-                  const framework::Tensor& input, framework::Tensor& input_grad,
+                  const framework::Tensor& input,
+                  framework::Tensor * input_grad,
                  const framework::Tensor& output,
                  const framework::Tensor& output_grad,
                  int groups) {
@@ -127,7 +128,7 @@ class MaxOutGradFunctor<platform::GPUPlace, T> {
    const T* input_data = input.data<T>();
    const T* output_data = output.data<T>();
    const T* output_grad_data = output_grad.data<T>();
-    T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
+    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
    int nthreads =  output.numel();
    int blocks = (nthreads + 1024 - 1) / 1024;
    dim3 threads(1024, 1);

--- a/paddle/operators/math/maxouting.h
+++ b/paddle/operators/math/maxouting.h
@@ -38,7 +38,7 @@ class MaxOutGradFunctor {
 public:
  void operator()(const platform::DeviceContext& context,
                  const framework::Tensor& input,
-                  framework::Tensor& input_grad,
+                  framework::Tensor * input_grad,
                  const framework::Tensor& output,
                  const framework::Tensor& output_grad, int groups);
 };

--- a/paddle/operators/maxout_op.cc
+++ b/paddle/operators/maxout_op.cc
@@ -34,14 +34,13 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
        "width of feature.");
    AddAttr<int>(
        "groups",
-        R"DOC(The group number of input layer.
+        R"DOC("Specifies how many groups the input tensor will be split"
+        "in the channel dimension. And the number of output channel is "
+        "the number of channels divided by groups.."
        )DOC");
    AddComment(R"DOC(
-        - Input: NCHW.
+        Assumed the input shape is (N, Ci, H, W).
-        - Output: The feature map size of output is the same as the input.
+        The output shape is (N, Co, H, W). Then `Co = Ci / groups`.
-        The output_channel is (input channel) / groups
-        So groups should be larger than 1, and the num of channels should be able
-        to be devided by groups.
       math:
       y_{si+j} = \max_k x_{gsi + sk + j}
@@ -65,10 +64,10 @@ class MaxOutOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of maxoutOp"
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MaxoutOp"
                   "should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
-                   "Output(Out) of maxoutOp should not be null.");
+                   "Output(Out) of MaxoutOp should not be null.");
    auto in_x_dims = ctx->GetInputDim("X");
    int groups = ctx->Attrs().Get<int>("groups");
    // check groups > 1

--- a/paddle/operators/maxout_op.cu
+++ b/paddle/operators/maxout_op.cu
@@ -12,7 +12,6 @@
   See the License for the specific language governing permissions and
   limitations under the License. */
-#define EIGEN_USE_GPU
 #include "paddle/operators/maxout_op.h"
 namespace ops = paddle::operators;

--- a/paddle/operators/maxout_op.h
+++ b/paddle/operators/maxout_op.h
@@ -31,9 +31,7 @@ class MaxOutKernel : public framework::OpKernel<T> {
    Tensor* out = context.Output<Tensor>("Out");
    int groups = context.template Attr<int>("groups");
-    paddle::operators::math::MaxOutFunctor<
+    math::MaxOutFunctor<Place, T> maxout_forward;
-    Place, T>
-    maxout_forward;
    maxout_forward(context.device_context(), *in_x, out, groups);
  }
 };
@@ -53,10 +51,9 @@ class MaxOutGradKernel : public framework::OpKernel<T> {
    if (in_x_grad) {
      in_x_grad->mutable_data<T>(context.GetPlace());
      zero(device_ctx, in_x_grad, static_cast<T>(0.0));
-      paddle::operators::math::MaxOutGradFunctor<Place, T>
+      math::MaxOutGradFunctor<Place, T> maxout_backward;
-      maxout_backward;
+      maxout_backward(context.device_context(), *in_x, in_x_grad, *out,
-      maxout_backward(context.device_context(), *in_x, *in_x_grad, *out,
+        *out_grad, groups);
-      *out_grad, groups);
    }
  }
 };

--- a/python/paddle/v2/fluid/tests/test_maxout_op.py
+++ b/python/paddle/v2/fluid/tests/test_maxout_op.py
@@ -3,7 +3,7 @@ import numpy as np
 from op_test import OpTest
-def maxout_forward_naive(input, groups,num_channels):
+def maxout_forward_naive(input, groups):
    s0, s1, s2, s3 = input.shape
    return np.ndarray([s0, s1 / groups, groups, s2, s3], \
        buffer = input, dtype=input.dtype).max(axis=(2))
@@ -18,7 +18,7 @@ class TestMaxOutOp(OpTest):
                self.num_channels).astype("float32")
        self.inputs = {'X': input}
-        self.attrs = {'groups': self.groups, 'num_channels': self.num_channels}
+        self.attrs = {'groups': self.groups}
        self.outputs = {'Out': output.astype('float32')}
@@ -32,7 +32,6 @@ class TestMaxOutOp(OpTest):
        self.MaxOut_forward_naive = maxout_forward_naive
        self.shape = [100, 6, 2, 2]
        self.groups=2
-        self.num_channels=6