diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 0a5dd49bb48c25f268aa273314f92c092305664a..b0524a507bacec6768424045e58bf91305de2d08 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1253,9 +1253,9 @@ def pooling_layer(input, If stride > 0, this layer slides a window whose size is determined by stride, and return the pooling value of the window as the output. Thus, a long sequence - will be shorten. - - The parameter stride specifies the intervals at which to apply the pooling + will be shorten. + + The parameter stride specifies the intervals at which to apply the pooling operation. Note that for sequence with sub-sequence, the default value of stride is -1. @@ -4805,6 +4805,14 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): So groups should be larger than 1, and the num of channels should be able to devided by groups. + .. math:: + y_{si+j} = \max_k x_{gsi + sk + j} + g = groups + s = input.size / num_channels + 0 \le i < num_channels / groups + 0 \le j < s + 0 \le k < groups + Please refer to Paper: - Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf - Multi-digit Number Recognition from Street View \