pooling.py 48.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ...fluid.dygraph import layers
from ...fluid.layer_helper import LayerHelper
from .. import functional as F

__all__ = [
C
cnn 已提交
20 21 22 23 24 25 26 27 28 29 30 31
    'AvgPool1D',
    'AvgPool2D',
    'AvgPool3D',
    'MaxPool1D',
    'MaxPool2D',
    'MaxPool3D',
    'AdaptiveAvgPool1D',
    'AdaptiveAvgPool2D',
    'AdaptiveAvgPool3D',
    'AdaptiveMaxPool1D',
    'AdaptiveMaxPool2D',
    'AdaptiveMaxPool3D',
32 33 34
]


C
cnn 已提交
35
class AvgPool1D(layers.Layer):
W
Wei Shengyu 已提交
36
    r"""
37
    This operation applies a 1D average pooling over an input signal composed
38
    of several input planes, based on the input, output_size, return_mask parameters.
39 40 41 42 43
    Input(X) and output(Out) are in NCL format, where N is batch
    size, C is the number of channels, L is the length of the feature.
    The output tensor shape will be [N, C, output_size].

    The output value of the layer with input size (N, C, L),
W
Wei Shengyu 已提交
44
    output (N, C, :math:`L_{out}`) and kernel_size ksize can be precisely described as
45 46 47 48
    For average pool1d:

    ..  math::

W
Wei Shengyu 已提交
49
        Output(N_i, C_i, l) = \frac{Input[N_i, C_i, stride \times l:stride \times l+k]}{ksize}
50

W
Wei Shengyu 已提交
51 52
    Parameters:
        kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
53
            it must contain an integer.
W
Wei Shengyu 已提交
54 55 56
        stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
            it must contain an integer. Default None, then stride will be equal to the kernel_size.
        padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
57 58 59 60 61 62
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
            4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
W
Wei Shengyu 已提交
63 64 65 66 67
        exclusive(bool, optional): Whether to exclude padding points in average pooling mode, default is `True`.
        ceil_mode(bool, optional): ${ceil_mode_comment}Whether to use the ceil function to calculate output height
            and width. If it is set to False, the floor function will be used. The default value is False.
        name(str, optional): For eed to detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no nset and None by default.
68 69

    Returns:
W
Wei Shengyu 已提交
70
        A callable object of AvgPool1D.
71 72 73 74 75

    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ValueError: If `padding` is a list or tuple but its length greater than 1.
76
        ShapeError: If the input is not a 3-D tensor.
77 78
        ShapeError: If the output's shape calculated is not greater than 0.

79
    Shape:
W
Wei Shengyu 已提交
80 81 82 83
        - x(Tensor): The input tensor of avg pool1d operator, which is a 3-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of avg pool1d  operator, which is a 3-D tensor.
          The data type is same as input x.
84

85 86 87
    Examples:

        .. code-block:: python
88

W
Wei Shengyu 已提交
89 90 91
            import paddle
            import paddle.nn as nn
            import numpy as np
92

W
Wei Shengyu 已提交
93 94 95 96
            data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
            AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
            pool_out = AvgPool1D(data)
            # pool_out shape: [1, 3, 16]
97 98 99 100 101 102 103

    """

    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
104
                 exclusive=True,
105 106
                 ceil_mode=False,
                 name=None):
C
cnn 已提交
107
        super(AvgPool1D, self).__init__()
108 109 110 111
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
112
        self.exclusive = exclusive
113 114 115 116
        self.name = name

    def forward(self, x):
        out = F.avg_pool1d(x, self.kernel_size, self.stride, self.padding,
117
                           self.exclusive, self.ceil_mode, self.name)
118 119
        return out

120 121 122 123
    def extra_repr(self):
        return 'kernel_size={kernel_size}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

124

C
cnn 已提交
125
class AvgPool2D(layers.Layer):
126
    r"""
127 128 129 130
    This operation applies 2D average pooling over input features based on the input,
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
    in NCHW format, where N is batch size, C is the number of channels,
    H is the height of the feature, and W is the width of the feature.
131

132
    Example:
W
Wei Shengyu 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
        Input:
            X shape: :math:`(N, C, :math:`H_{in}`, :math:`W_{in}`)`
        Attr:
            kernel_size: ksize

        Output:
            Out shape: :math:`(N, C, :math:`H_{out}`, :math:`W_{out}`)`

        ..  math::

            Output(N_i, C_j, h, w)  = \frac{\sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
                Input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)}{ksize[0] * ksize[1]}

    Parameters:
        kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
148 149
            it must contain two integers, (pool_size_Height, pool_size_Width).
            Otherwise, the pool kernel size will be a square of an int.
W
Wei Shengyu 已提交
150
        stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
151 152
            it must contain two integers, (pool_stride_Height, pool_stride_Width).
            Otherwise, the pool stride size will be a square of an int.
W
Wei Shengyu 已提交
153 154
            Default None, then stride will be equal to the kernel_size.
        padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
155 156 157 158 159 160
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
            4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
W
Wei Shengyu 已提交
161 162 163 164 165 166 167 168 169 170
        ceil_mode(bool, optional): When True, will use `ceil` instead of `floor` to compute the output shape.
        exclusive(bool, optional): Whether to exclude padding points in average pooling
            mode, default is `true`.
        divisor_override(float, optional): If specified, it will be used as divisor, otherwise kernel_size will be
            used. Default None.
        data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`,
            `"NDHW"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
            `[batch_size, input_channels, input_height, input_width]`.
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
171

172
    Shape:
W
Wei Shengyu 已提交
173 174 175 176
        - x(Tensor): The input tensor of avg pool2d operator, which is a 4-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of avg pool2d  operator, which is a 4-D tensor.
          The data type is same as input x.
177

W
Wei Shengyu 已提交
178 179
    Returns:
        A callable object of AvgPool2D.
180 181 182 183 184 185
    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.
    Examples:
        .. code-block:: python
186

W
Wei Shengyu 已提交
187 188 189
            import paddle
            import paddle.nn as nn
            import numpy as np
190

W
Wei Shengyu 已提交
191 192 193
            # max pool2d
            input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
            AvgPool2D = nn.AvgPool2D(kernel_size=2,
194
                                stride=2, padding=0)
W
Wei Shengyu 已提交
195 196
            output = AvgPool2D(input)
            # output.shape [1, 3, 16, 16]
197 198 199 200 201 202 203 204

    """

    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
                 ceil_mode=False,
205
                 exclusive=True,
206 207
                 divisor_override=None,
                 data_format="NCHW",
208
                 name=None):
C
cnn 已提交
209
        super(AvgPool2D, self).__init__()
210
        self.ksize = kernel_size
211 212 213
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
214
        self.exclusive = exclusive
215 216
        self.divisor = divisor_override
        self.data_format = data_format
217 218
        self.name = name

219 220 221 222 223 224 225
    def forward(self, x):
        return F.avg_pool2d(
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
            ceil_mode=self.ceil_mode,
226
            exclusive=self.exclusive,
227 228 229
            divisor_override=self.divisor,
            data_format=self.data_format,
            name=self.name)
230

231 232 233 234
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

235

C
cnn 已提交
236
class AvgPool3D(layers.Layer):
237
    """
238 239 240 241
    This operation applies 3D max pooling over input features based on the input,
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
    in NCDHW format, where N is batch size, C is the number of channels,
    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
242

W
Wei Shengyu 已提交
243 244
    Parameters:
        kernel_size(int|list|tuple): The pool kernel size. If pool kernel size
245 246 247
            is a tuple or list, it must contain three integers,
            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
            Otherwise, the pool kernel size will be the cube of an int.
W
Wei Shengyu 已提交
248
        stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
249 250
            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
            Otherwise, the pool stride size will be a cube of an int.
W
Wei Shengyu 已提交
251 252
            Default None, then stride will be equal to the kernel_size.
        padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
253 254 255 256 257 258
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
W
Wei Shengyu 已提交
259 260 261 262 263 264 265
        ceil_mode(bool, optional): ${ceil_mode_comment}
        exclusive(bool, optional): Whether to exclude padding points in average pooling mode, default is True.
        divisor_override(int|float, optional): if specified, it will be used as divisor, otherwise kernel_size will
            be used. Default None.
        data_format(str, optional): The data format of the input and output data. An optional string from: `"NCDHW"`,
             `"NDHWC"`. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
             `[batch_size, input_channels, input_depth, input_height, input_width]`.
266
        name(str, optional): For detailed information, please refer
W
Wei Shengyu 已提交
267 268
             to :ref:`api_guide_Name`. Usually name is no need to set and
             None by default.
269

W
Wei Shengyu 已提交
270 271
    Returns:
        A callable object of AvgPool3D.
272
    Raises:
273 274 275 276 277
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.

    Shape:
W
Wei Shengyu 已提交
278 279 280 281
        - x(Tensor): The input tensor of avg pool3d operator, which is a 5-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of avg pool3d  operator, which is a 5-D tensor.
          The data type is same as input x.
282 283
    Examples:
        .. code-block:: python
284

W
Wei Shengyu 已提交
285 286 287
            import paddle
            import paddle.nn as nn
            import numpy as np
288

W
Wei Shengyu 已提交
289 290 291
            # avg pool3d
            input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
            AvgPool3D = nn.AvgPool3D(kernel_size=2,
292
                                   stride=2, padding=0)
W
Wei Shengyu 已提交
293 294
            output = AvgPool3D(input)
            # output.shape [1, 2, 3, 16, 16]
295

296 297
    """

298 299
    def __init__(self,
                 kernel_size,
W
Wei Shengyu 已提交
300
                 stride=None,
301 302
                 padding=0,
                 ceil_mode=False,
303
                 exclusive=True,
304 305 306
                 divisor_override=None,
                 data_format="NCDHW",
                 name=None):
C
cnn 已提交
307
        super(AvgPool3D, self).__init__()
308 309 310 311
        self.ksize = kernel_size
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
312
        self.exclusive = exclusive
313 314
        self.divisor = divisor_override
        self.data_format = data_format
315 316
        self.name = name

317 318 319 320 321 322 323
    def forward(self, x):
        return F.avg_pool3d(
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
            ceil_mode=self.ceil_mode,
324
            exclusive=self.exclusive,
325 326 327
            divisor_override=self.divisor,
            data_format=self.data_format,
            name=self.name)
328

329 330 331 332
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

333

C
cnn 已提交
334
class MaxPool1D(layers.Layer):
335
    """
W
Wei Shengyu 已提交
336 337 338 339 340
    This operation applies 1D max pooling over input signal
    composed of several input planes based on the input,
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
    in NCL format, where N is batch size, C is the number of channels,
    L is the length of the feature.
341

342 343 344
    The output value of the layer with input size (N, C, L),
    output (N, C, L_{out}) and kernel_size k can be precisely described as
    For average pool1d:
345 346 347

    ..  math::

W
Wei Shengyu 已提交
348
        Output(N_i, C_i, l) =  max(Input[N_i, C_i, stride \times l:stride \times l+k])
349

W
Wei Shengyu 已提交
350 351
    Parameters:
        kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
352
            it must contain an integer.
W
Wei Shengyu 已提交
353 354 355
        stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
            it must contain an integer. Default None, then stride will be equal to the kernel_size.
        padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
356 357 358
            1. A string in ['valid', 'same'].
            2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
W
Wei Shengyu 已提交
359 360
            4. A list[int] or tuple(int) whose length is 2, It has the form [pad_before, pad_after].
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or(0,0).
361
            The default value is 0.
W
Wei Shengyu 已提交
362 363 364 365 366
        return_mask(bool, optional): Whether return the max indices along with the outputs. default is `False`.
        ceil_mode(bool, optional): Whether to use the ceil function to calculate output height and width.
            False is the default. If it is set to False, the floor function will be used. Default False.
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
367
    Returns:
W
Wei Shengyu 已提交
368
        A callable object of MaxPool1D.
369 370

    Raises:
371 372 373 374 375 376 377 378
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ValueError: If `padding` is a list or tuple but its length greater than 1.
        ShapeError: If the input is not a 3-D.
        ShapeError: If the output's shape calculated is not greater than 0.


    Shape:
W
Wei Shengyu 已提交
379 380 381 382
        - x(Tensor): The input tensor of max pool1d operator, which is a 3-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of max pool1d  operator, which is a 3-D tensor.
          The data type is same as input x.
383 384

    Examples:
385

386 387
        .. code-block:: python

W
Wei Shengyu 已提交
388 389 390
            import paddle
            import paddle.nn as nn
            import numpy as np
391

W
Wei Shengyu 已提交
392 393 394 395
            data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
            MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
            pool_out = MaxPool1D(data)
            # pool_out shape: [1, 3, 16]
396

W
Wei Shengyu 已提交
397 398 399
            MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True)
            pool_out, indices = MaxPool1D(data)
            # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
400 401 402

    """

403 404 405 406
    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
407
                 return_mask=False,
408 409
                 ceil_mode=False,
                 name=None):
C
cnn 已提交
410
        super(MaxPool1D, self).__init__()
411 412 413 414
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
415
        self.return_mask = return_mask
416 417 418
        self.name = name

    def forward(self, input):
419
        out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
420
                           self.return_mask, self.ceil_mode, self.name)
421
        return out
422

423 424 425 426
    def extra_repr(self):
        return 'kernel_size={kernel_size}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

427

C
cnn 已提交
428
class MaxPool2D(layers.Layer):
429
    r"""
430
    This operation applies 2D max pooling over input feature based on the input,
431 432 433 434 435
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
    in NCHW format, where N is batch size, C is the number of channels,
    H is the height of the feature, and W is the width of the feature.

    Example:
W
Wei Shengyu 已提交
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
        - Input:
            X shape: :math:`(N, C, H_{in}, W_{in})`
        - Attr:
            kernel_size: ksize

        - Output:
            Out shape: :math:`(N, C, H_{out}, W_{out})`

        ..  math::

            Output(N_i, C_j, h, w) = \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1}
                Input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)

    Parameters:
        kernel_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
451 452
            it must contain two integers, (pool_size_Height, pool_size_Width).
            Otherwise, the pool kernel size will be a square of an int.
W
Wei Shengyu 已提交
453
        stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
454
            it must contain two integers, (pool_stride_Height, pool_stride_Width).
455
            Otherwise, the pool stride size will be a square of an int.
W
Wei Shengyu 已提交
456 457
            Default None, then stride will be equal to the kernel_size.
        padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
458 459 460
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
W
Wei Shengyu 已提交
461
            4. A list[int] or tuple(int) whose length is \4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
462 463
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
W
Wei Shengyu 已提交
464 465 466 467 468 469 470
        ceil_mode(bool, optional): when True, will use `ceil` instead of `floor` to compute the output shape
        return_mask(bool, optional): Whether to return the max indices along with the outputs.
        data_format(str, optional): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
            `[batch_size, input_channels, input_height, input_width]`.
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
471

W
Wei Shengyu 已提交
472 473
    Returns:
        A callable object of MaxPool2D.
474 475 476 477
    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.
478 479

    Shape:
W
Wei Shengyu 已提交
480 481 482 483
        - x(Tensor): The input tensor of max pool2d operator, which is a 4-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of max pool2d  operator, which is a 4-D tensor.
          The data type is same as input x.
484

485 486
    Examples:
        .. code-block:: python
487

W
Wei Shengyu 已提交
488 489 490
            import paddle
            import paddle.nn as nn
            import numpy as np
491

W
Wei Shengyu 已提交
492 493 494
            # max pool2d
            input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
            MaxPool2D = nn.MaxPool2D(kernel_size=2,
495
                                   stride=2, padding=0)
W
Wei Shengyu 已提交
496 497
            output = MaxPool2D(input)
            # output.shape [1, 3, 16, 16]
498

W
Wei Shengyu 已提交
499 500 501 502
            # for return_mask=True
            MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True)
            output, max_indices = MaxPool2D(input)
            # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
503 504 505 506 507 508
    """

    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
509
                 return_mask=False,
510 511 512
                 ceil_mode=False,
                 data_format="NCHW",
                 name=None):
C
cnn 已提交
513
        super(MaxPool2D, self).__init__()
514 515 516
        self.ksize = kernel_size
        self.stride = stride
        self.padding = padding
517
        self.return_mask = return_mask
518 519 520 521 522
        self.ceil_mode = ceil_mode
        self.data_format = data_format
        self.name = name

    def forward(self, x):
523
        return F.max_pool2d(
524 525 526 527
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
528
            return_mask=self.return_mask,
D
Double_V 已提交
529
            ceil_mode=self.ceil_mode,
530 531 532
            data_format=self.data_format,
            name=self.name)

533 534 535 536
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

537

C
cnn 已提交
538
class MaxPool3D(layers.Layer):
539
    """
540
    This operation applies 3D max pooling over input features based on the input,
541
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
542 543
    in NCDHW format, where N is batch size, C is the number of channels,
    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
544

W
Wei Shengyu 已提交
545 546
    Parameters:
        kernel_size(int|list|tuple): The pool kernel size. If the kernel size
547
            is a tuple or list, it must contain three integers,
548
            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
549
            Otherwise, the pool kernel size will be the cube of an int.
W
Wei Shengyu 已提交
550
        stride(int|list|tuple, optional): The pool stride size. If pool stride size is a tuple or list,
551 552
            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
            Otherwise, the pool stride size will be a cube of an int.
W
Wei Shengyu 已提交
553 554
            Default None, then stride will be equal to the kernel_size.
        padding(str|int|list|tuple, optional): The padding size. Padding could be in one of the following forms.
555 556 557
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
W
Wei Shengyu 已提交
558
            4. A list[int] or tuple(int) whose length is \6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
559 560
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
W
Wei Shengyu 已提交
561 562 563 564 565 566 567
        ceil_mode(bool, optional): ${ceil_mode_comment}
        return_mask(bool, optional): Whether to return the max indices along with the outputs.
        data_format(str, optional): The data format of the input and output data. An optional string from: `"NCDHW"`,
            `"NDHWC"`. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
            `[batch_size, input_channels, input_depth, input_height, input_width]`.
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
568 569


W
Wei Shengyu 已提交
570 571
    Returns:
        A callable object of MaxPool3D.
572 573 574 575
    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.
576 577

    Shape:
W
Wei Shengyu 已提交
578 579 580 581
        - x(Tensor): The input tensor of max pool3d operator, which is a 5-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of max pool3d  operator, which is a 5-D tensor.
          The data type is same as input x.
582

583 584
    Examples:
        .. code-block:: python
585

W
Wei Shengyu 已提交
586 587 588
            import paddle
            import paddle.nn as nn
            import numpy as np
589

W
Wei Shengyu 已提交
590 591 592
            # max pool3d
            input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
            MaxPool3D = nn.MaxPool3D(kernel_size=2,
593
                                   stride=2, padding=0)
W
Wei Shengyu 已提交
594 595
            output = MaxPool3D(input)
            # output.shape [1, 2, 3, 16, 16]
596

W
Wei Shengyu 已提交
597 598 599 600
            # for return_mask=True
            MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True)
            output, max_indices = MaxPool3D(input)
            # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
601 602 603 604
    """

    def __init__(self,
                 kernel_size,
P
parap1uie-s 已提交
605 606
                 stride=None,
                 padding=0,
607
                 return_mask=False,
608 609 610
                 ceil_mode=False,
                 data_format="NCDHW",
                 name=None):
C
cnn 已提交
611
        super(MaxPool3D, self).__init__()
612 613 614
        self.ksize = kernel_size
        self.stride = stride
        self.padding = padding
615
        self.return_mask = return_mask
616 617 618 619 620 621 622 623 624 625
        self.ceil_mode = ceil_mode
        self.data_format = data_format
        self.name = name

    def forward(self, x):
        return F.max_pool3d(
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
626
            return_mask=self.return_mask,
D
Double_V 已提交
627
            ceil_mode=self.ceil_mode,
628 629 630
            data_format=self.data_format,
            name=self.name)

631 632 633 634
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

635

C
cnn 已提交
636
class AdaptiveAvgPool1D(layers.Layer):
637
    r"""
638 639

    This operation applies a 1D adaptive average pooling over an input signal composed
640
    of several input planes, based on the input, output_size, return_mask parameters.
641 642 643 644 645 646 647 648
    Input(X) and output(Out) are in NCL format, where N is batch
    size, C is the number of channels, L is the length of the feature.
    The output tensor shape will be [N, C, output_size].

    For average adaptive pool1d:

    ..  math::

W
Wei Shengyu 已提交
649
        lstart &= floor(i * L_{in} / L_{out})
650

W
Wei Shengyu 已提交
651
        lend &= ceil((i + 1) * L_{in} / L_{out})
652

W
Wei Shengyu 已提交
653
        Output(i) &= \frac{ \sum Input[lstart:lend]}{lend - lstart}
654

W
Wei Shengyu 已提交
655 656 657 658
    Parameters:
        output_size(int): The target output size. It must be an integer.
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
659

660
    Returns:
W
Wei Shengyu 已提交
661
        A callable object of AdaptiveAvgPool1D.
662

663
    Raises:
664
        ValueError: 'output_size' should be an integer.
665 666

    Shape:
W
Wei Shengyu 已提交
667 668 669 670
        - x(Tensor): 3-D tensor. The input tensor of adaptive avg pool1d operator, which is a 3-D tensor.
          The data type can be float32, float64.
        - output(Tensor): 3-D tensor. The output tensor of adaptive avg pool1d operator, which is a 3-D tensor.
          The data type is same as input x.
671

672 673
    Examples:
        .. code-block:: python
674

W
Wei Shengyu 已提交
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
            # average adaptive pool1d
            # suppose input data in shape of [N, C, L], `output_size` is m or [m],
            # output shape is [N, C, m], adaptive pool divide L dimension
            # of input data into m grids averagely and performs poolings in each
            # grid to get output.
            # adaptive max pool performs calculations as follow:
            #
            #     for i in range(m):
            #         lstart = floor(i * L / m)
            #         lend = ceil((i + 1) * L / m)
            #         output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart)
            #
            import paddle
            import paddle.nn as nn
            import numpy as np

            data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
            AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16)
            pool_out = AdaptiveAvgPool1D(data)
            # pool_out shape: [1, 3, 16]
695 696
    """

697
    def __init__(self, output_size, name=None):
C
cnn 已提交
698
        super(AdaptiveAvgPool1D, self).__init__()
699
        self.output_size = output_size
700 701
        self.name = name

702 703 704
    def forward(self, input):
        return F.adaptive_avg_pool1d(input, self.output_size, self.name)

705 706 707
    def extra_repr(self):
        return 'output_size={}'.format(self.output_size)

708

C
cnn 已提交
709
class AdaptiveAvgPool2D(layers.Layer):
710
    r"""
711 712 713 714 715 716 717 718

    This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
    of the output tensor are determined by the parameter output_size.

    For avg adaptive pool2d:

    ..  math::

W
Wei Shengyu 已提交
719
        hstart &= floor(i * H_{in} / H_{out})
720

W
Wei Shengyu 已提交
721
        hend &= ceil((i + 1) * H_{in} / H_{out})
722

W
Wei Shengyu 已提交
723
        wstart &= floor(j * W_{in} / W_{out})
724

W
Wei Shengyu 已提交
725
        wend &= ceil((j + 1) * W_{in} / W_{out})
726

W
Wei Shengyu 已提交
727
        Output(i ,j) &= \frac{\sum Input[hstart:hend, wstart:wend]}{(hend - hstart) * (wend - wstart)}
728 729 730


    Parameters:
W
Wei Shengyu 已提交
731
        output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
732 733
            it must contain two element, (H, W). H and W can be either a int, or None which means
            the size will be the same as that of the input.
W
Wei Shengyu 已提交
734
        data_format(str, optional): The data format of the input and output data. An optional string
735 736
            from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
            the order of: [batch_size, input_channels, input_height, input_width].
W
Wei Shengyu 已提交
737 738
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
739 740

    Shape:
W
Wei Shengyu 已提交
741 742 743 744
        - x(Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor.
          The data type is same as input x.
745 746

    Returns:
C
cnn 已提交
747
        A callable object of AdaptiveAvgPool2D.
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768

    Examples:
        .. code-block:: python

            # adaptive avg pool2d
            # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
            # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
            # of input data into m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive avg pool performs calculations as follow:
            #
            #     for i in range(m):
            #         for j in range(n):
            #             hstart = floor(i * H / m)
            #             hend = ceil((i + 1) * H / m)
            #             wstart = floor(i * W / n)
            #             wend = ceil((i + 1) * W / n)
            #             output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
            #
            import paddle
            import numpy as np
769

770 771 772
            input_data = np.random.rand(2, 3, 32, 32)
            x = paddle.to_tensor(input_data)
            # x.shape is [2, 3, 32, 32]
C
cnn 已提交
773
            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3)
774 775 776 777 778
            pool_out = adaptive_avg_pool(x = x)
            # pool_out.shape is [2, 3, 3, 3]
    """

    def __init__(self, output_size, data_format="NCHW", name=None):
C
cnn 已提交
779
        super(AdaptiveAvgPool2D, self).__init__()
780 781 782 783 784 785 786 787 788 789 790
        self._output_size = output_size
        self._data_format = data_format
        self._name = name

    def forward(self, x):
        return F.adaptive_avg_pool2d(
            x,
            output_size=self._output_size,
            data_format=self._data_format,
            name=self._name)

791 792 793
    def extra_repr(self):
        return 'output_size={}'.format(self._output_size)

794

C
cnn 已提交
795
class AdaptiveAvgPool3D(layers.Layer):
796
    r"""
797 798 799 800 801 802 803 804

    This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
    of the output tensor are determined by the parameter output_size.

    For avg adaptive pool3d:

    ..  math::

W
Wei Shengyu 已提交
805
        dstart &= floor(i * D_{in} / D_{out})
806

W
Wei Shengyu 已提交
807
        dend &= ceil((i + 1) * D_{in} / D_{out})
808

W
Wei Shengyu 已提交
809
        hstart &= floor(j * H_{in} / H_{out})
810

W
Wei Shengyu 已提交
811
        hend &= ceil((j + 1) * H_{in} / H_{out})
812

W
Wei Shengyu 已提交
813
        wstart &= floor(k * W_{in} / W_{out})
814

W
Wei Shengyu 已提交
815
        wend &= ceil((k + 1) * W_{in} / W_{out})
816

W
Wei Shengyu 已提交
817 818
        Output(i ,j, k) &= \frac{\sum Input[dstart:dend, hstart:hend, wstart:wend]}
            {(dend - dstart) * (hend - hstart) * (wend - wstart)}
819 820 821


    Parameters:
W
Wei Shengyu 已提交
822
        output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
823 824
            it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
            the size will be the same as that of the input.
W
Wei Shengyu 已提交
825
        data_format(str, optional): The data format of the input and output data. An optional string
826 827
            from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
            the order of: [batch_size, input_channels, input_depth, input_height, input_width].
W
Wei Shengyu 已提交
828 829
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
830
    Shape:
W
Wei Shengyu 已提交
831 832 833 834
        - x(Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor.
          The data type can be float32, float64\.
        - output(Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor.
          The data type is same as input x.
835 836

    Returns:
C
cnn 已提交
837
        A callable object of AdaptiveAvgPool3D.
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861

    Examples:
        .. code-block:: python

            # adaptive avg pool3d
            # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
            # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
            # of input data into l * m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive avg pool performs calculations as follow:
            #
            #     for i in range(l):
            #         for j in range(m):
            #             for k in range(n):
            #                 dstart = floor(i * D / l)
            #                 dend = ceil((i + 1) * D / l)
            #                 hstart = floor(j * H / m)
            #                 hend = ceil((j + 1) * H / m)
            #                 wstart = floor(k * W / n)
            #                 wend = ceil((k + 1) * W / n)
            #                 output[:, :, i, j, k] =
            #                     avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
            import paddle
            import numpy as np
862

863 864 865
            input_data = np.random.rand(2, 3, 8, 32, 32)
            x = paddle.to_tensor(input_data)
            # x.shape is [2, 3, 8, 32, 32]
C
cnn 已提交
866
            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3)
867 868 869 870 871
            pool_out = adaptive_avg_pool(x = x)
            # pool_out = [2, 3, 3, 3, 3]
    """

    def __init__(self, output_size, data_format="NCDHW", name=None):
C
cnn 已提交
872
        super(AdaptiveAvgPool3D, self).__init__()
873 874 875 876 877 878 879 880 881 882 883
        self._output_size = output_size
        self._data_format = data_format
        self._name = name

    def forward(self, x):
        return F.adaptive_avg_pool3d(
            x,
            output_size=self._output_size,
            data_format=self._data_format,
            name=self._name)

884 885 886
    def extra_repr(self):
        return 'output_size={}'.format(self._output_size)

887

C
cnn 已提交
888
class AdaptiveMaxPool1D(layers.Layer):
889 890 891
    """

    This operation applies a 1D adaptive max pooling over an input signal composed
892
    of several input planes, based on the input, output_size, return_mask parameters.
893 894 895 896 897 898 899 900
    Input(X) and output(Out) are in NCL format, where N is batch
    size, C is the number of channels, L is the length of the feature.
    The output tensor shape will be [N, C, output_size].

    For max adaptive pool1d:

    ..  math::

W
Wei Shengyu 已提交
901
        lstart &= floor(i * L_{in} / L_{out})
902

W
Wei Shengyu 已提交
903
        lend &= ceil((i + 1) * L_{in} / L_{out})
904

W
Wei Shengyu 已提交
905
        Output(i) &= max(Input[lstart:lend])
906

W
Wei Shengyu 已提交
907 908 909 910
    Parameters:
        output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
            it must contain one int.
        return_mask(bool, optional): If true, the index of max pooling point will be returned along
911
            with outputs. It cannot be set in average pooling type. Default False.
W
Wei Shengyu 已提交
912 913
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
914
    Returns:
W
Wei Shengyu 已提交
915
        A callable object of AdaptiveMaxPool1D.
916 917 918 919 920

    Raises:
        ValueError: 'pool_size' should be a integer or list or tuple with length as 1.

    Shape:
W
Wei Shengyu 已提交
921 922 923 924
        - x(Tensor): The input tensor of adaptive max pool1d operator, which is a 3-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of adaptive max pool1d operator, which is a 3-D tensor.
          The data type is same as input x.
925 926 927 928

    Examples:
        .. code-block:: python

W
Wei Shengyu 已提交
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953
            # max adaptive pool1d
            # suppose input data in shape of [N, C, L], `output_size` is m or [m],
            # output shape is [N, C, m], adaptive pool divide L dimension
            # of input data into m grids averagely and performs poolings in each
            # grid to get output.
            # adaptive max pool performs calculations as follow:
            #
            #     for i in range(m):
            #         lstart = floor(i * L / m)
            #         lend = ceil((i + 1) * L / m)
            #         output[:, :, i] = max(input[:, :, lstart: lend])
            #
            import paddle
            import paddle.nn as nn
            import numpy as np

            data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
            AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16)
            pool_out = AdaptiveMaxPool1D(data)
            # pool_out shape: [1, 3, 16]

            # for return_mask = true
            AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
            pool_out, indices = AdaptiveMaxPool1D(data)
            # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
954 955 956

    """

957
    def __init__(self, output_size, return_mask=False, name=None):
C
cnn 已提交
958
        super(AdaptiveMaxPool1D, self).__init__()
959
        self.output_size = output_size
960
        self.return_mask = return_mask
961 962 963
        self.name = name

    def forward(self, input):
964 965
        return F.adaptive_max_pool1d(input, self.output_size, self.return_mask,
                                     self.name)
966

967 968 969 970
    def extra_repr(self):
        return 'output_size={}, return_mask={}'.format(self.output_size,
                                                       self.return_mask)

971

C
cnn 已提交
972
class AdaptiveMaxPool2D(layers.Layer):
973 974
    """
    This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
W
Wei Shengyu 已提交
975 976
    of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and
    pooling is adaptive one focus on the output size.
977

978
    For adaptive max pool2d:
979

980
    ..  math::
981

W
Wei Shengyu 已提交
982
        hstart &= floor(i * H_{in} / H_{out})
983

W
Wei Shengyu 已提交
984
        hend &= ceil((i + 1) * H_{in} / H_{out})
985

W
Wei Shengyu 已提交
986
        wstart &= floor(j * W_{in} / W_{out})
987

W
Wei Shengyu 已提交
988
        wend &= ceil((j + 1) * W_{in} / W_{out})
989

W
Wei Shengyu 已提交
990
        Output(i ,j) &= max(Input[hstart:hend, wstart:wend])
991

992
    Parameters:
W
Wei Shengyu 已提交
993 994 995 996 997 998 999
        output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain
            two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of
            the input.
        return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs.
            It cannot be set in average pooling type. Default False.
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
1000
    Shape:
W
Wei Shengyu 已提交
1001 1002 1003 1004
        - x(Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor.
          The data type is same as input x.
D
Double_V 已提交
1005

1006
    Returns:
C
cnn 已提交
1007
        A callable object of AdaptiveMaxPool2D.
1008 1009
    Examples:
        .. code-block:: python
1010

1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
            # adaptive max pool2d
            # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
            # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
            # of input data into m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive max pool performs calculations as follow:
            #
            #     for i in range(m):
            #         for j in range(n):
            #             hstart = floor(i * H / m)
            #             hend = ceil((i + 1) * H / m)
            #             wstart = floor(i * W / n)
            #             wend = ceil((i + 1) * W / n)
            #             output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
            #
            import paddle
            import numpy as np
1028

1029 1030
            input_data = np.random.rand(2, 3, 32, 32)
            x = paddle.to_tensor(input_data)
1031
            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
1032 1033 1034
            pool_out, indices = adaptive_max_pool(x = x)
    """

1035
    def __init__(self, output_size, return_mask=False, name=None):
C
cnn 已提交
1036
        super(AdaptiveMaxPool2D, self).__init__()
1037
        self._output_size = output_size
1038
        self._return_mask = return_mask
1039 1040 1041 1042 1043 1044
        self._name = name

    def forward(self, x):
        return F.adaptive_max_pool2d(
            x,
            output_size=self._output_size,
1045
            return_mask=self._return_mask,
1046 1047
            name=self._name)

1048 1049 1050 1051
    def extra_repr(self):
        return 'output_size={}, return_mask={}'.format(self._output_size,
                                                       self._return_mask)

1052

C
cnn 已提交
1053
class AdaptiveMaxPool3D(layers.Layer):
1054
    """
W
Wei Shengyu 已提交
1055 1056 1057
    This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are
    determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus
    on the output size.
1058

1059
    For adaptive max pool3d:
1060

1061
    ..  math::
1062

W
Wei Shengyu 已提交
1063
        dstart &= floor(i * D_{in} / D_{out})
1064

W
Wei Shengyu 已提交
1065
        dend &= ceil((i + 1) * D_{in} / D_{out})
1066

W
Wei Shengyu 已提交
1067
        hstart &= floor(j * H_{in} / H_{out})
1068

W
Wei Shengyu 已提交
1069
        hend &= ceil((j + 1) * H_{in} / H_{out})
1070

W
Wei Shengyu 已提交
1071
        wstart &= floor(k * W_{in} / W_{out})
1072

W
Wei Shengyu 已提交
1073
        wend &= ceil((k + 1) * W_{in} / W_{out})
1074

W
Wei Shengyu 已提交
1075
        Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend])
1076

1077
    Parameters:
W
Wei Shengyu 已提交
1078 1079 1080 1081 1082 1083 1084
        output_size(int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain
            three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as
            that of the input.
        return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs.
            Default False.
        name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`.
            Usually name is no need to set and None by default.
1085
    Shape:
W
Wei Shengyu 已提交
1086 1087 1088 1089 1090
        - x(Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor.
          The data type can be float32, float64.
        - output(Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor.
          The data type is same as input x.

1091
    Returns:
C
cnn 已提交
1092
        A callable object of AdaptiveMaxPool3D.
1093 1094
    Examples:
        .. code-block:: python
1095

1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
            # adaptive max pool3d
            # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
            # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
            # of input data into l * m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive max pool performs calculations as follow:
            #
            #     for i in range(l):
            #         for j in range(m):
            #             for k in range(n):
            #                 dstart = floor(i * D / l)
            #                 dend = ceil((i + 1) * D / l)
            #                 hstart = floor(j * H / m)
            #                 hend = ceil((j + 1) * H / m)
            #                 wstart = floor(k * W / n)
            #                 wend = ceil((k + 1) * W / n)
            #                 output[:, :, i, j, k] =
            #                     max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
            import paddle
            import numpy as np
1116

1117 1118
            input_data = np.random.rand(2, 3, 8, 32, 32)
            x = paddle.to_tensor(input_data)
C
cnn 已提交
1119
            pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
1120 1121
            out = pool(x)
            # out shape: [2, 3, 4, 4, 4]
1122
            pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
1123
            out, indices = pool(x)
1124
            # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
D
Double_V 已提交
1125

1126 1127
    """

1128
    def __init__(self, output_size, return_mask=False, name=None):
C
cnn 已提交
1129
        super(AdaptiveMaxPool3D, self).__init__()
1130
        self._output_size = output_size
1131
        self._return_mask = return_mask
1132 1133 1134 1135 1136 1137
        self._name = name

    def forward(self, x):
        return F.adaptive_max_pool3d(
            x,
            output_size=self._output_size,
1138
            return_mask=self._return_mask,
1139
            name=self._name)
1140 1141 1142 1143

    def extra_repr(self):
        return 'output_size={}, return_mask={}'.format(self._output_size,
                                                       self._return_mask)