diff --git a/paddle/operators/math/maxouting.cc b/paddle/operators/math/maxouting.cc
index a4d46ccc98779986210942dfbfc0a43e123ef1bd..c8c1974f79a94563e930caebe63790bb1d5adc17 100644
--- a/paddle/operators/math/maxouting.cc
+++ b/paddle/operators/math/maxouting.cc
@@ -18,10 +18,7 @@ namespace paddle {
 namespace operators {
 namespace math {
 
-/*
- * All tensors are in NCHW format.
- * groups mustbe > 1
- */
+// All tensors are in NCHW format, and the groups must be greater than 1
 template <typename T>
 class MaxOutFunctor<platform::CPUPlace, T> {
  public:
@@ -44,7 +41,6 @@ class MaxOutFunctor<platform::CPUPlace, T> {
       for (int c = 0; c < output_channels; ++c) {
         int new_cindex = fea_size * c;
         for (int f = 0; f < fea_size; ++f) {
-          // T ele = maxout_process.initial();
           T ele = static_cast<T>(-FLT_MAX);
           for (int ph = 0; ph < groups; ++ph) {
             T x = input_data[(new_bindex + new_cindex) * groups
@@ -65,7 +61,7 @@ class MaxOutGradFunctor<platform::CPUPlace, T> {
 public:
   void operator()(const platform::DeviceContext& context,
                   const framework::Tensor& input,
-                  framework::Tensor& input_grad,
+                  framework::Tensor * input_grad,
                   const framework::Tensor& output,
                   const framework::Tensor& output_grad,
                   int groups) {
@@ -77,7 +73,7 @@ public:
     const T* input_data = input.data<T>();
     const T* output_data = output.data<T>();
     const T* output_grad_data = output_grad.data<T>();
-    T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
+    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
 
     for (int i = 0; i < batch_size; ++i) {
       int blen = fea_size * output_channels * i;
diff --git a/paddle/operators/math/maxouting.cu b/paddle/operators/math/maxouting.cu.cc
similarity index 97%
rename from paddle/operators/math/maxouting.cu
rename to paddle/operators/math/maxouting.cu.cc
index 336a1bd8b5b0c93bab2c7cbe0e50ac0aa6005b23..3a0600fd8465e5183ded3775afb459f27e3da0bf 100644
--- a/paddle/operators/math/maxouting.cu
+++ b/paddle/operators/math/maxouting.cu.cc
@@ -112,7 +112,8 @@ template <typename T>
 class MaxOutGradFunctor<platform::GPUPlace, T> {
  public:
   void operator()(const platform::DeviceContext& context,
-                  const framework::Tensor& input, framework::Tensor& input_grad,
+                  const framework::Tensor& input,
+                  framework::Tensor * input_grad,
                   const framework::Tensor& output,
                   const framework::Tensor& output_grad,
                   int groups) {
@@ -127,7 +128,7 @@ class MaxOutGradFunctor<platform::GPUPlace, T> {
     const T* input_data = input.data<T>();
     const T* output_data = output.data<T>();
     const T* output_grad_data = output_grad.data<T>();
-    T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
+    T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
     int nthreads =  output.numel();
     int blocks = (nthreads + 1024 - 1) / 1024;
     dim3 threads(1024, 1);
diff --git a/paddle/operators/math/maxouting.h b/paddle/operators/math/maxouting.h
index 76a256add9ad89744c8d5c3cf8e6f264c5bb87d5..d4c9da38ab8f8d88ed461d805ae64a015db968c4 100644
--- a/paddle/operators/math/maxouting.h
+++ b/paddle/operators/math/maxouting.h
@@ -38,7 +38,7 @@ class MaxOutGradFunctor {
  public:
   void operator()(const platform::DeviceContext& context,
                   const framework::Tensor& input,
-                  framework::Tensor& input_grad,
+                  framework::Tensor * input_grad,
                   const framework::Tensor& output,
                   const framework::Tensor& output_grad, int groups);
 };
diff --git a/paddle/operators/maxout_op.cc b/paddle/operators/maxout_op.cc
index f9277518cc49c31861232b1eea80ae9b2fc80be3..95467f2e69093906980d075b6a41c5d2934dd5a2 100644
--- a/paddle/operators/maxout_op.cc
+++ b/paddle/operators/maxout_op.cc
@@ -34,14 +34,13 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
         "width of feature.");
     AddAttr<int>(
         "groups",
-        R"DOC(The group number of input layer.
+        R"DOC("Specifies how many groups the input tensor will be split"
+        "in the channel dimension. And the number of output channel is "
+        "the number of channels divided by groups.."
         )DOC");
     AddComment(R"DOC(
-        - Input: NCHW.
-        - Output: The feature map size of output is the same as the input.
-        The output_channel is (input channel) / groups
-        So groups should be larger than 1, and the num of channels should be able
-        to be devided by groups.
+        Assumed the input shape is (N, Ci, H, W).
+        The output shape is (N, Co, H, W). Then `Co = Ci / groups`.
 
        math:
        y_{si+j} = \max_k x_{gsi + sk + j}
@@ -65,10 +64,10 @@ class MaxOutOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of maxoutOp"
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MaxoutOp"
                    "should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
-                   "Output(Out) of maxoutOp should not be null.");
+                   "Output(Out) of MaxoutOp should not be null.");
     auto in_x_dims = ctx->GetInputDim("X");
     int groups = ctx->Attrs().Get<int>("groups");
     // check groups > 1
diff --git a/paddle/operators/maxout_op.cu b/paddle/operators/maxout_op.cu.cc
similarity index 97%
rename from paddle/operators/maxout_op.cu
rename to paddle/operators/maxout_op.cu.cc
index 44a149b065d890af75997138fe602b1496b6527a..3e6debf69960be1a7ebd2c00505b31ec7484c40a 100644
--- a/paddle/operators/maxout_op.cu
+++ b/paddle/operators/maxout_op.cu.cc
@@ -12,7 +12,6 @@
    See the License for the specific language governing permissions and
    limitations under the License. */
 
-#define EIGEN_USE_GPU
 #include "paddle/operators/maxout_op.h"
 
 namespace ops = paddle::operators;
diff --git a/paddle/operators/maxout_op.h b/paddle/operators/maxout_op.h
index 6c769838c35024c6d0a13c722c226dece5150897..c404cd16a9b2372ea4c6a17eb5ac82cf8f3bf27c 100644
--- a/paddle/operators/maxout_op.h
+++ b/paddle/operators/maxout_op.h
@@ -31,9 +31,7 @@ class MaxOutKernel : public framework::OpKernel<T> {
     Tensor* out = context.Output<Tensor>("Out");
     int groups = context.template Attr<int>("groups");
 
-    paddle::operators::math::MaxOutFunctor<
-    Place, T>
-    maxout_forward;
+    math::MaxOutFunctor<Place, T> maxout_forward;
     maxout_forward(context.device_context(), *in_x, out, groups);
   }
 };
@@ -53,10 +51,9 @@ class MaxOutGradKernel : public framework::OpKernel<T> {
     if (in_x_grad) {
       in_x_grad->mutable_data<T>(context.GetPlace());
       zero(device_ctx, in_x_grad, static_cast<T>(0.0));
-      paddle::operators::math::MaxOutGradFunctor<Place, T>
-      maxout_backward;
-      maxout_backward(context.device_context(), *in_x, *in_x_grad, *out,
-      *out_grad, groups);
+      math::MaxOutGradFunctor<Place, T> maxout_backward;
+      maxout_backward(context.device_context(), *in_x, in_x_grad, *out,
+        *out_grad, groups);
     }
   }
 };
diff --git a/python/paddle/v2/fluid/tests/test_maxout_op.py b/python/paddle/v2/fluid/tests/test_maxout_op.py
index a7c47108f114fd9dc45eeae64a0dd17d11ce2c0d..1416e13feb9371ca7ebe28af5c33a99d493858b9 100644
--- a/python/paddle/v2/fluid/tests/test_maxout_op.py
+++ b/python/paddle/v2/fluid/tests/test_maxout_op.py
@@ -3,7 +3,7 @@ import numpy as np
 from op_test import OpTest
 
 
-def maxout_forward_naive(input, groups,num_channels):
+def maxout_forward_naive(input, groups):
     s0, s1, s2, s3 = input.shape
     return np.ndarray([s0, s1 / groups, groups, s2, s3], \
         buffer = input, dtype=input.dtype).max(axis=(2))
@@ -18,7 +18,7 @@ class TestMaxOutOp(OpTest):
                 self.num_channels).astype("float32")
 
         self.inputs = {'X': input}
-        self.attrs = {'groups': self.groups, 'num_channels': self.num_channels}
+        self.attrs = {'groups': self.groups}
 
         self.outputs = {'Out': output.astype('float32')}
 
@@ -32,7 +32,6 @@ class TestMaxOutOp(OpTest):
         self.MaxOut_forward_naive = maxout_forward_naive
         self.shape = [100, 6, 2, 2]
         self.groups=2
-        self.num_channels=6