diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 0a5dd49bb48c25f268aa273314f92c092305664a..b0524a507bacec6768424045e58bf91305de2d08 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1253,9 +1253,9 @@ def pooling_layer(input,
 
     If stride > 0, this layer slides a window whose size is determined by stride,
     and return the pooling value of the window as the output. Thus, a long sequence
-    will be shorten. 
-    
-    The parameter stride specifies the intervals at which to apply the pooling 
+    will be shorten.
+
+    The parameter stride specifies the intervals at which to apply the pooling
     operation. Note that for sequence with sub-sequence, the default value
     of stride is -1.
 
@@ -4805,6 +4805,14 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
     So groups should be larger than 1, and the num of channels should be able
     to devided by groups.
 
+    .. math::
+       y_{si+j} = \max_k x_{gsi + sk + j}
+       g = groups
+       s = input.size / num_channels
+       0 \le i < num_channels / groups
+       0 \le j < s
+       0 \le k < groups
+
     Please refer to Paper:
       - Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
       - Multi-digit Number Recognition from Street View \