pooling.py 46.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ...fluid.dygraph import layers
from ...fluid.layer_helper import LayerHelper
from .. import functional as F

__all__ = [
C
cnn 已提交
20 21 22 23 24 25 26 27 28 29 30 31
    'AvgPool1D',
    'AvgPool2D',
    'AvgPool3D',
    'MaxPool1D',
    'MaxPool2D',
    'MaxPool3D',
    'AdaptiveAvgPool1D',
    'AdaptiveAvgPool2D',
    'AdaptiveAvgPool3D',
    'AdaptiveMaxPool1D',
    'AdaptiveMaxPool2D',
    'AdaptiveMaxPool3D',
32 33 34
]


C
cnn 已提交
35
class AvgPool1D(layers.Layer):
36 37
    """
    This operation applies a 1D average pooling over an input signal composed
38
    of several input planes, based on the input, output_size, return_mask parameters.
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
    Input(X) and output(Out) are in NCL format, where N is batch
    size, C is the number of channels, L is the length of the feature.
    The output tensor shape will be [N, C, output_size].

    The output value of the layer with input size (N, C, L),
    output (N, C, L_{out}) and kernel_size k can be precisely described as
    For average pool1d:

    ..  math::

       Output(N_i, C_i, l) &= mean(Input[N_i, C_i, stride \times l:stride \times l+k])


    Args:
        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
54
            it must contain an integer.
55
        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
56 57 58 59 60 61 62 63
            it must contain an integer.
        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
            4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
64
        exclusive (bool): Whether to exclude padding points in average pooling
65
                          mode, default is `True`.
66
        ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
67
            If it is set to False, the floor function will be used. The default value is False.
68 69 70 71 72 73 74 75 76 77 78
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.

    Returns:
        None.

    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ValueError: If `padding` is a list or tuple but its length greater than 1.
79
        ShapeError: If the input is not a 3-D tensor.
80 81 82
        ShapeError: If the output's shape calculated is not greater than 0.


83 84 85 86
    Shape:
        - inpuut: 3-D tensor.
        - output: 3-D tensor

87 88 89
    Examples:

        .. code-block:: python
90

91 92
          import paddle
          import paddle.nn as nn
C
Chen Long 已提交
93
          import numpy as np
94 95

          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
C
cnn 已提交
96 97
          AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
          pool_out = AvgPool1D(data)
98 99 100 101 102 103 104 105
          # pool_out shape: [1, 3, 16]

    """

    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
106
                 exclusive=True,
107 108
                 ceil_mode=False,
                 name=None):
C
cnn 已提交
109
        super(AvgPool1D, self).__init__()
110 111 112 113
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
114
        self.exclusive = exclusive
115 116 117 118
        self.name = name

    def forward(self, x):
        out = F.avg_pool1d(x, self.kernel_size, self.stride, self.padding,
119
                           self.exclusive, self.ceil_mode, self.name)
120 121
        return out

122 123 124 125
    def extra_repr(self):
        return 'kernel_size={kernel_size}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

126

C
cnn 已提交
127
class AvgPool2D(layers.Layer):
128
    r"""
129 130 131 132
    This operation applies 2D average pooling over input features based on the input,
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
    in NCHW format, where N is batch size, C is the number of channels,
    H is the height of the feature, and W is the width of the feature.
133

134 135 136 137 138
    Example:
      Input:
           X shape: $(N, C, H_{in}, W_{in})$
      Attr:
           kernel_size: ksize
139

140 141 142 143 144 145
      Output:
           Out shape: $(N, C, H_{out}, W_{out})$
           $$
           out(N_i, C_j, h, w)  = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
                               input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
           $$
146 147

    Args:
148 149 150
       kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
            it must contain two integers, (pool_size_Height, pool_size_Width).
            Otherwise, the pool kernel size will be a square of an int.
151
        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
152 153 154 155 156 157 158 159 160 161 162
            it must contain two integers, (pool_stride_Height, pool_stride_Width).
            Otherwise, the pool stride size will be a square of an int.

        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
            4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
        ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
163
        exclusive (bool): Whether to exclude padding points in average pooling
164 165 166 167 168
                          mode, default is `true`.
        divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
                        `[batch_size, input_channels, input_height, input_width]`.
169 170 171 172
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.

173 174 175
    Shape:
        - x: 4-D tensor.
        - out: 2-D tensor
176

177
    Returns: None.
178 179 180 181 182 183
    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.
    Examples:
        .. code-block:: python
184

185 186
          import paddle
          import paddle.nn as nn
187
          import numpy as np
188

189 190
          # max pool2d
          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
C
cnn 已提交
191
          AvgPool2D = nn.AvgPool2D(kernel_size=2,
192
                                stride=2, padding=0)
C
Chen Long 已提交
193
          output = AvgPool2D(input)
194
          # output.shape [1, 3, 16, 16]
195 196 197 198 199 200 201 202

    """

    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
                 ceil_mode=False,
203
                 exclusive=True,
204 205
                 divisor_override=None,
                 data_format="NCHW",
206
                 name=None):
C
cnn 已提交
207
        super(AvgPool2D, self).__init__()
208
        self.ksize = kernel_size
209 210 211
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
212
        self.exclusive = exclusive
213 214
        self.divisor = divisor_override
        self.data_format = data_format
215 216
        self.name = name

217 218 219 220 221 222 223
    def forward(self, x):
        return F.avg_pool2d(
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
            ceil_mode=self.ceil_mode,
224
            exclusive=self.exclusive,
225 226 227
            divisor_override=self.divisor,
            data_format=self.data_format,
            name=self.name)
228

229 230 231 232
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

233

C
cnn 已提交
234
class AvgPool3D(layers.Layer):
235
    """
236 237 238 239
    This operation applies 3D max pooling over input features based on the input,
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
    in NCDHW format, where N is batch size, C is the number of channels,
    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
240 241

    Args:
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
            is a tuple or list, it must contain three integers,
            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
            Otherwise, the pool kernel size will be the cube of an int.
        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
            Otherwise, the pool stride size will be a cube of an int.
        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
        ceil_mode (bool): ${ceil_mode_comment}
257
        exclusive (bool): Whether to exclude padding points in average pooling
258 259 260 261 262
                          mode, default is True.
        divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
                        The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
                        `[batch_size, input_channels, input_depth, input_height, input_width]`.
263 264 265 266
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.

267
    Returns: None.
268
    Raises:
269 270 271 272 273 274 275
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.

    Shape:
        - x: 5-D tensor.
        - out: 5-D tensor.
276 277 278

    Examples:
        .. code-block:: python
279

280 281
          import paddle
          import paddle.nn as nn
282
          import numpy as np
283

284 285
          # avg pool3d
          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
C
cnn 已提交
286
          AvgPool3D = nn.AvgPool3D(kernel_size=2,
287
                                   stride=2, padding=0)
C
cnn 已提交
288
          output = AvgPool3D(input)
289 290
          # output.shape [1, 2, 3, 16, 16]

291 292
    """

293 294 295 296 297
    def __init__(self,
                 kernel_size,
                 stride,
                 padding=0,
                 ceil_mode=False,
298
                 exclusive=True,
299 300 301
                 divisor_override=None,
                 data_format="NCDHW",
                 name=None):
C
cnn 已提交
302
        super(AvgPool3D, self).__init__()
303 304 305 306
        self.ksize = kernel_size
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
307
        self.exclusive = exclusive
308 309
        self.divisor = divisor_override
        self.data_format = data_format
310 311
        self.name = name

312 313 314 315 316 317 318
    def forward(self, x):
        return F.avg_pool3d(
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
            ceil_mode=self.ceil_mode,
319
            exclusive=self.exclusive,
320 321 322
            divisor_override=self.divisor,
            data_format=self.data_format,
            name=self.name)
323

324 325 326 327
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

328

C
cnn 已提交
329
class MaxPool1D(layers.Layer):
330
    """
331
    Applies a 1D max pooling over an input signal composed of several input planes based
332
    on the input, output_size, return_mask parameters.
333 334 335
    Input(X) and output(Out) are in NCL format, where N is batch
    size, C is the number of channels, L is the length of the feature.

336 337 338
    The output value of the layer with input size (N, C, L),
    output (N, C, L_{out}) and kernel_size k can be precisely described as
    For average pool1d:
339 340 341

    ..  math::

342
       Output(N_i, C_i, l) &=  max(Input[N_i, C_i, stride \times l:stride \times l+k])}
343 344

    Args:
345 346 347 348 349 350 351 352 353 354 355
       kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
            it must contain an integer.
        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
            it must contain an integer.
        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
            1. A string in ['valid', 'same'].
            2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
            4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
356
        return_mask (bool): Whether return the max indices along with the outputs. default is `False`.
357 358
        ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
            If it is set to False, the floor function will be used. Default False.
359 360 361 362 363 364 365
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
    Returns:
        None.

    Raises:
366 367 368 369 370 371 372 373 374 375
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ValueError: If `padding` is a list or tuple but its length greater than 1.
        ShapeError: If the input is not a 3-D.
        ShapeError: If the output's shape calculated is not greater than 0.


    Shape:
        - x: 3-D tensor.
        - out: 3-D tensor.
376 377

    Examples:
378

379 380
        .. code-block:: python

381
          import paddle
382
          import paddle.nn as nn
C
Chen Long 已提交
383
          import numpy as np
384 385

          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
C
cnn 已提交
386 387
          MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
          pool_out = MaxPool1D(data)
388 389
          # pool_out shape: [1, 3, 16]

390
          MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True)
C
cnn 已提交
391
          pool_out, indices = MaxPool1D(data)
392 393 394 395
          # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]

    """

396 397 398 399
    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
400
                 return_mask=False,
401 402
                 ceil_mode=False,
                 name=None):
C
cnn 已提交
403
        super(MaxPool1D, self).__init__()
404 405 406 407
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.ceil_mode = ceil_mode
408
        self.return_mask = return_mask
409 410 411
        self.name = name

    def forward(self, input):
412
        out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
413
                           self.return_mask, self.ceil_mode, self.name)
414
        return out
415

416 417 418 419
    def extra_repr(self):
        return 'kernel_size={kernel_size}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

420

C
cnn 已提交
421
class MaxPool2D(layers.Layer):
422
    r"""
423
    This operation applies 2D max pooling over input feature based on the input,
424 425 426 427 428 429 430 431 432 433 434 435 436
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
    in NCHW format, where N is batch size, C is the number of channels,
    H is the height of the feature, and W is the width of the feature.

    Example:
      Input:
           X shape: $(N, C, H_{in}, W_{in})$
      Attr:
           kernel_size: ksize

      Output:
           Out shape: $(N, C, H_{out}, W_{out})$
           $$
437 438 439
           out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
                                    & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
                                                   \text{stride[1]} \times w + n)
440 441 442 443 444 445 446 447
           $$

    Args:
        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
            it must contain two integers, (pool_size_Height, pool_size_Width).
            Otherwise, the pool kernel size will be a square of an int.
        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
            it must contain two integers, (pool_stride_Height, pool_stride_Width).
448 449 450 451 452 453 454 455
            Otherwise, the pool stride size will be a square of an int.
        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
            4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
456
        ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
457
        return_mask (bool): Whether to return the max indices along with the outputs.
458 459 460
        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
                        `[batch_size, input_channels, input_height, input_width]`.
461 462 463
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
464

465
    Returns: None
466 467 468 469
    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.
470 471 472 473 474

    Shape:
        - x: 4-D tensor.
        - out: 4-D tensor.

475 476
    Examples:
        .. code-block:: python
477

478 479 480 481 482 483
          import paddle
          import paddle.nn as nn
          import numpy as np

          # max pool2d
          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
C
cnn 已提交
484
          MaxPool2D = nn.MaxPool2D(kernel_size=2,
485
                                   stride=2, padding=0)
C
cnn 已提交
486
          output = MaxPool2D(input)
487 488
          # output.shape [1, 3, 16, 16]

489 490
          # for return_mask=True
          MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True)
C
cnn 已提交
491
          output, max_indices = MaxPool2D(input)
492
          # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
493 494 495 496 497 498
    """

    def __init__(self,
                 kernel_size,
                 stride=None,
                 padding=0,
499
                 return_mask=False,
500 501 502
                 ceil_mode=False,
                 data_format="NCHW",
                 name=None):
C
cnn 已提交
503
        super(MaxPool2D, self).__init__()
504 505 506
        self.ksize = kernel_size
        self.stride = stride
        self.padding = padding
507
        self.return_mask = return_mask
508 509 510 511 512
        self.ceil_mode = ceil_mode
        self.data_format = data_format
        self.name = name

    def forward(self, x):
513
        return F.max_pool2d(
514 515 516 517
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
518
            return_mask=self.return_mask,
D
Double_V 已提交
519
            ceil_mode=self.ceil_mode,
520 521 522
            data_format=self.data_format,
            name=self.name)

523 524 525 526
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

527

C
cnn 已提交
528
class MaxPool3D(layers.Layer):
529
    """
530
    This operation applies 3D max pooling over input features based on the input,
531
    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
532 533
    in NCDHW format, where N is batch size, C is the number of channels,
    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
534 535

    Args:
536
        kernel_size (int|list|tuple): The pool kernel size. If the kernel size
537
            is a tuple or list, it must contain three integers,
538
            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
539
            Otherwise, the pool kernel size will be the cube of an int.
540 541 542 543 544 545 546 547 548 549 550
        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
            Otherwise, the pool stride size will be a cube of an int.
        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
        ceil_mode (bool): ${ceil_mode_comment}
551
        return_mask (bool): Whether to return the max indices along with the outputs.
552 553 554
        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
                        The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
                        `[batch_size, input_channels, input_depth, input_height, input_width]`.
555 556 557 558 559 560 561 562 563 564
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.


    Returns:None.
    Raises:
        ValueError: If `padding` is a string, but not "SAME" or "VALID".
        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
        ShapeError: If the output's shape calculated is not greater than 0.
565 566 567 568 569

    Shape:
        - x: 5-D tensor.
        - out: 5-D tensor.

570 571
    Examples:
        .. code-block:: python
572

573 574 575 576 577 578
          import paddle
          import paddle.nn as nn
          import numpy as np

          # max pool3d
          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
C
cnn 已提交
579
          MaxPool3D = nn.MaxPool3D(kernel_size=2,
580
                                   stride=2, padding=0)
C
cnn 已提交
581
          output = MaxPool3D(input)
582 583
          # output.shape [1, 2, 3, 16, 16]

584 585
          # for return_mask=True
          MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True)
C
cnn 已提交
586
          output, max_indices = MaxPool3D(input)
587 588 589 590 591
          # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
    """

    def __init__(self,
                 kernel_size,
P
parap1uie-s 已提交
592 593
                 stride=None,
                 padding=0,
594
                 return_mask=False,
595 596 597
                 ceil_mode=False,
                 data_format="NCDHW",
                 name=None):
C
cnn 已提交
598
        super(MaxPool3D, self).__init__()
599 600 601
        self.ksize = kernel_size
        self.stride = stride
        self.padding = padding
602
        self.return_mask = return_mask
603 604 605 606 607 608 609 610 611 612
        self.ceil_mode = ceil_mode
        self.data_format = data_format
        self.name = name

    def forward(self, x):
        return F.max_pool3d(
            x,
            kernel_size=self.ksize,
            stride=self.stride,
            padding=self.padding,
613
            return_mask=self.return_mask,
D
Double_V 已提交
614
            ceil_mode=self.ceil_mode,
615 616 617
            data_format=self.data_format,
            name=self.name)

618 619 620 621
    def extra_repr(self):
        return 'kernel_size={ksize}, stride={stride}, padding={padding}'.format(
            **self.__dict__)

622

C
cnn 已提交
623
class AdaptiveAvgPool1D(layers.Layer):
624
    r"""
625 626

    This operation applies a 1D adaptive average pooling over an input signal composed
627
    of several input planes, based on the input, output_size, return_mask parameters.
628 629 630 631 632 633 634 635 636 637 638 639 640
    Input(X) and output(Out) are in NCL format, where N is batch
    size, C is the number of channels, L is the length of the feature.
    The output tensor shape will be [N, C, output_size].

    For average adaptive pool1d:

    ..  math::

       lstart &= floor(i * L_{in} / L_{out})

       lend &= ceil((i + 1) * L_{in} / L_{out})

       Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
641 642

    Args:
643
        output_size (int): The target output size. It must be an integer.
644 645 646 647
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.

648 649 650
    Returns:
        None.

651
    Raises:
652
        ValueError: 'output_size' should be an integer.
653 654 655 656 657

    Shape:
        - x: 3-D tensor.
        - out: 3-D tensor.

658 659
    Examples:
        .. code-block:: python
660 661 662 663 664 665 666 667 668 669 670 671 672

          # average adaptive pool1d
          # suppose input data in shape of [N, C, L], `output_size` is m or [m],
          # output shape is [N, C, m], adaptive pool divide L dimension
          # of input data into m grids averagely and performs poolings in each
          # grid to get output.
          # adaptive max pool performs calculations as follow:
          #
          #     for i in range(m):
          #         lstart = floor(i * L / m)
          #         lend = ceil((i + 1) * L / m)
          #         output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
          #
673 674
          import paddle
          import paddle.nn as nn
C
Chen Long 已提交
675
          import numpy as np
676

677
          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
C
cnn 已提交
678 679
          AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16)
          pool_out = AdaptiveAvgPool1D(data)
680
          # pool_out shape: [1, 3, 16]
681 682
    """

683
    def __init__(self, output_size, name=None):
C
cnn 已提交
684
        super(AdaptiveAvgPool1D, self).__init__()
685
        self.output_size = output_size
686 687
        self.name = name

688 689 690
    def forward(self, input):
        return F.adaptive_avg_pool1d(input, self.output_size, self.name)

691 692 693
    def extra_repr(self):
        return 'output_size={}'.format(self.output_size)

694

C
cnn 已提交
695
class AdaptiveAvgPool2D(layers.Layer):
696
    r"""
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731

    This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
    of the output tensor are determined by the parameter output_size.

    For avg adaptive pool2d:

    ..  math::

       hstart &= floor(i * H_{in} / H_{out})

       hend &= ceil((i + 1) * H_{in} / H_{out})

       wstart &= floor(j * W_{in} / W_{out})

       wend &= ceil((j + 1) * W_{in} / W_{out})

       Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}


    Parameters:
        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
            it must contain two element, (H, W). H and W can be either a int, or None which means
            the size will be the same as that of the input.
        data_format (str): The data format of the input and output data. An optional string
            from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
            the order of: [batch_size, input_channels, input_height, input_width].
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.

    Shape:
        x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
        output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.

    Returns:
C
cnn 已提交
732
        A callable object of AdaptiveAvgPool2D.
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753

    Examples:
        .. code-block:: python

            # adaptive avg pool2d
            # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
            # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
            # of input data into m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive avg pool performs calculations as follow:
            #
            #     for i in range(m):
            #         for j in range(n):
            #             hstart = floor(i * H / m)
            #             hend = ceil((i + 1) * H / m)
            #             wstart = floor(i * W / n)
            #             wend = ceil((i + 1) * W / n)
            #             output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
            #
            import paddle
            import numpy as np
754

755 756 757
            input_data = np.random.rand(2, 3, 32, 32)
            x = paddle.to_tensor(input_data)
            # x.shape is [2, 3, 32, 32]
C
cnn 已提交
758
            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3)
759 760 761 762 763
            pool_out = adaptive_avg_pool(x = x)
            # pool_out.shape is [2, 3, 3, 3]
    """

    def __init__(self, output_size, data_format="NCHW", name=None):
C
cnn 已提交
764
        super(AdaptiveAvgPool2D, self).__init__()
765 766 767 768 769 770 771 772 773 774 775
        self._output_size = output_size
        self._data_format = data_format
        self._name = name

    def forward(self, x):
        return F.adaptive_avg_pool2d(
            x,
            output_size=self._output_size,
            data_format=self._data_format,
            name=self._name)

776 777 778
    def extra_repr(self):
        return 'output_size={}'.format(self._output_size)

779

C
cnn 已提交
780
class AdaptiveAvgPool3D(layers.Layer):
781
    r"""
782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819

    This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
    of the output tensor are determined by the parameter output_size.

    For avg adaptive pool3d:

    ..  math::

      dstart &= floor(i * D_{in} / D_{out})

      dend &= ceil((i + 1) * D_{in} / D_{out})

      hstart &= floor(j * H_{in} / H_{out})

      hend &= ceil((j + 1) * H_{in} / H_{out})

      wstart &= floor(k * W_{in} / W_{out})

      wend &= ceil((k + 1) * W_{in} / W_{out})

      Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}


    Parameters:
        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
            it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
            the size will be the same as that of the input.
        data_format (str): The data format of the input and output data. An optional string
            from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
            the order of: [batch_size, input_channels, input_depth, input_height, input_width].
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
    Shape:
        x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
        output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.

    Returns:
C
cnn 已提交
820
        A callable object of AdaptiveAvgPool3D.
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844

    Examples:
        .. code-block:: python

            # adaptive avg pool3d
            # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
            # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
            # of input data into l * m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive avg pool performs calculations as follow:
            #
            #     for i in range(l):
            #         for j in range(m):
            #             for k in range(n):
            #                 dstart = floor(i * D / l)
            #                 dend = ceil((i + 1) * D / l)
            #                 hstart = floor(j * H / m)
            #                 hend = ceil((j + 1) * H / m)
            #                 wstart = floor(k * W / n)
            #                 wend = ceil((k + 1) * W / n)
            #                 output[:, :, i, j, k] =
            #                     avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
            import paddle
            import numpy as np
845

846 847 848
            input_data = np.random.rand(2, 3, 8, 32, 32)
            x = paddle.to_tensor(input_data)
            # x.shape is [2, 3, 8, 32, 32]
C
cnn 已提交
849
            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3)
850 851 852 853 854
            pool_out = adaptive_avg_pool(x = x)
            # pool_out = [2, 3, 3, 3, 3]
    """

    def __init__(self, output_size, data_format="NCDHW", name=None):
C
cnn 已提交
855
        super(AdaptiveAvgPool3D, self).__init__()
856 857 858 859 860 861 862 863 864 865 866
        self._output_size = output_size
        self._data_format = data_format
        self._name = name

    def forward(self, x):
        return F.adaptive_avg_pool3d(
            x,
            output_size=self._output_size,
            data_format=self._data_format,
            name=self._name)

867 868 869
    def extra_repr(self):
        return 'output_size={}'.format(self._output_size)

870

C
cnn 已提交
871
class AdaptiveMaxPool1D(layers.Layer):
872 873 874
    """

    This operation applies a 1D adaptive max pooling over an input signal composed
875
    of several input planes, based on the input, output_size, return_mask parameters.
876 877 878 879 880 881 882 883 884 885 886 887
    Input(X) and output(Out) are in NCL format, where N is batch
    size, C is the number of channels, L is the length of the feature.
    The output tensor shape will be [N, C, output_size].

    For max adaptive pool1d:

    ..  math::

       lstart &= floor(i * L_{in} / L_{out})

       lend &= ceil((i + 1) * L_{in} / L_{out})

D
Double_V 已提交
888
       Output(i) &= max(Input[lstart:lend])
889 890 891 892

    Args:
        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
             it must contain one int.
893
        return_mask (bool): If true, the index of max pooling point will be returned along
894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
            with outputs. It cannot be set in average pooling type. Default False.
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
    Returns:
        None.

    Raises:
        ValueError: 'pool_size' should be a integer or list or tuple with length as 1.

    Shape:
        x (Tensor): The input tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type can be float32, float64.
        output (Tensor): The output tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type is same as input x.

    Examples:
        .. code-block:: python

          # max adaptive pool1d
          # suppose input data in shape of [N, C, L], `output_size` is m or [m],
          # output shape is [N, C, m], adaptive pool divide L dimension
          # of input data into m grids averagely and performs poolings in each
          # grid to get output.
          # adaptive max pool performs calculations as follow:
          #
          #     for i in range(m):
          #         lstart = floor(i * L / m)
          #         lend = ceil((i + 1) * L / m)
          #         output[:, :, i] = max(input[:, :, lstart: lend])
          #
C
Chen Long 已提交
923
          import paddle
924
          import paddle.nn as nn
C
Chen Long 已提交
925
          import numpy as np
926 927

          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
C
cnn 已提交
928 929
          AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16)
          pool_out = AdaptiveMaxPool1D(data)
930 931
          # pool_out shape: [1, 3, 16]

932 933
          # for return_mask = true
          AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
C
cnn 已提交
934
          pool_out, indices = AdaptiveMaxPool1D(data)
935 936 937 938
          # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]

    """

939
    def __init__(self, output_size, return_mask=False, name=None):
C
cnn 已提交
940
        super(AdaptiveMaxPool1D, self).__init__()
941
        self.output_size = output_size
942
        self.return_mask = return_mask
943 944 945
        self.name = name

    def forward(self, input):
946 947
        return F.adaptive_max_pool1d(input, self.output_size, self.return_mask,
                                     self.name)
948

949 950 951 952
    def extra_repr(self):
        return 'output_size={}, return_mask={}'.format(self.output_size,
                                                       self.return_mask)

953

C
cnn 已提交
954
class AdaptiveMaxPool2D(layers.Layer):
955 956 957
    """
    This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
    of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
958

959
    For adaptive max pool2d:
960

961
    ..  math::
962

963
       hstart &= floor(i * H_{in} / H_{out})
964

965
       hend &= ceil((i + 1) * H_{in} / H_{out})
966

967
       wstart &= floor(j * W_{in} / W_{out})
968

969
       wend &= ceil((j + 1) * W_{in} / W_{out})
970

971
       Output(i ,j) &= max(Input[hstart:hend, wstart:wend])
972

973 974
    Parameters:
        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
975
        return_mask (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False.
976 977 978 979 980 981
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
    Shape:
        x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
        output (Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type is same as input x.
D
Double_V 已提交
982

983
    Returns:
C
cnn 已提交
984
        A callable object of AdaptiveMaxPool2D.
985 986
    Examples:
        .. code-block:: python
987

988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
            # adaptive max pool2d
            # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
            # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
            # of input data into m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive max pool performs calculations as follow:
            #
            #     for i in range(m):
            #         for j in range(n):
            #             hstart = floor(i * H / m)
            #             hend = ceil((i + 1) * H / m)
            #             wstart = floor(i * W / n)
            #             wend = ceil((i + 1) * W / n)
            #             output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
            #
            import paddle
            import numpy as np
1005

1006 1007
            input_data = np.random.rand(2, 3, 32, 32)
            x = paddle.to_tensor(input_data)
1008
            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
1009 1010 1011
            pool_out, indices = adaptive_max_pool(x = x)
    """

1012
    def __init__(self, output_size, return_mask=False, name=None):
C
cnn 已提交
1013
        super(AdaptiveMaxPool2D, self).__init__()
1014
        self._output_size = output_size
1015
        self._return_mask = return_mask
1016 1017 1018 1019 1020 1021
        self._name = name

    def forward(self, x):
        return F.adaptive_max_pool2d(
            x,
            output_size=self._output_size,
1022
            return_mask=self._return_mask,
1023 1024
            name=self._name)

1025 1026 1027 1028
    def extra_repr(self):
        return 'output_size={}, return_mask={}'.format(self._output_size,
                                                       self._return_mask)

1029

C
cnn 已提交
1030
class AdaptiveMaxPool3D(layers.Layer):
1031
    """
1032
    This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions
1033
    of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
1034

1035
    For adaptive max pool3d:
1036

1037
    ..  math::
1038

1039
      dstart &= floor(i * D_{in} / D_{out})
1040

1041
      dend &= ceil((i + 1) * D_{in} / D_{out})
1042

1043
      hstart &= floor(j * H_{in} / H_{out})
1044

1045
      hend &= ceil((j + 1) * H_{in} / H_{out})
1046

1047
      wstart &= floor(k * W_{in} / W_{out})
1048

1049
      wend &= ceil((k + 1) * W_{in} / W_{out})
1050

1051
      Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend])
1052

1053
    Parameters:
1054
        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input.
1055
        return_mask (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
1056 1057 1058 1059 1060 1061 1062
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
    Shape:
        x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
        output (Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type is same as input x.
    Returns:
C
cnn 已提交
1063
        A callable object of AdaptiveMaxPool3D.
1064 1065
    Examples:
        .. code-block:: python
1066

1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
            # adaptive max pool3d
            # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
            # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
            # of input data into l * m * n grids averagely and performs poolings in each
            # grid to get output.
            # adaptive max pool performs calculations as follow:
            #
            #     for i in range(l):
            #         for j in range(m):
            #             for k in range(n):
            #                 dstart = floor(i * D / l)
            #                 dend = ceil((i + 1) * D / l)
            #                 hstart = floor(j * H / m)
            #                 hend = ceil((j + 1) * H / m)
            #                 wstart = floor(k * W / n)
            #                 wend = ceil((k + 1) * W / n)
            #                 output[:, :, i, j, k] =
            #                     max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
            import paddle
            import numpy as np
1087

1088 1089
            input_data = np.random.rand(2, 3, 8, 32, 32)
            x = paddle.to_tensor(input_data)
C
cnn 已提交
1090
            pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
1091 1092
            out = pool(x)
            # out shape: [2, 3, 4, 4, 4]
1093
            pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
1094
            out, indices = pool(x)
1095
            # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
D
Double_V 已提交
1096

1097 1098
    """

1099
    def __init__(self, output_size, return_mask=False, name=None):
C
cnn 已提交
1100
        super(AdaptiveMaxPool3D, self).__init__()
1101
        self._output_size = output_size
1102
        self._return_mask = return_mask
1103 1104 1105 1106 1107 1108
        self._name = name

    def forward(self, x):
        return F.adaptive_max_pool3d(
            x,
            output_size=self._output_size,
1109
            return_mask=self._return_mask,
1110
            name=self._name)
1111 1112 1113 1114

    def extra_repr(self):
        return 'output_size={}, return_mask={}'.format(self._output_size,
                                                       self._return_mask)