From db68e085899665cadcb6231ec1b55b048d00cb0a Mon Sep 17 00:00:00 2001 From: ruri Date: Sat, 29 Aug 2020 16:46:18 +0800 Subject: [PATCH] [API2.0]Unify pooling function and add adaptive max pooling function (#26483) --- python/paddle/fluid/layers/nn.py | 4 + .../unittests/test_adaptive_avg_pool1d.py | 119 ++ .../unittests/test_adaptive_max_pool1d.py | 110 ++ .../unittests/test_adaptive_max_pool2d.py | 274 +++ .../unittests/test_adaptive_max_pool3d.py | 293 ++++ .../fluid/tests/unittests/test_pool1d_api.py | 64 - python/paddle/nn/__init__.py | 12 + python/paddle/nn/functional/__init__.py | 20 +- python/paddle/nn/functional/conv.py | 24 +- python/paddle/nn/functional/pooling.py | 1533 ++++++++--------- python/paddle/nn/layer/__init__.py | 14 +- python/paddle/nn/layer/pooling.py | 1246 ++++++++------ 12 files changed, 2273 insertions(+), 1440 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py create mode 100644 python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py create mode 100644 python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py create mode 100755 python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 592b6a00951..39c4df00657 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1858,6 +1858,7 @@ def conv3d(input, return helper.append_activation(pre_act) +@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool2d") @templatedoc() def pool2d(input, pool_size=-1, @@ -2075,6 +2076,7 @@ def pool2d(input, return pool_out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool3d") @templatedoc() def pool3d(input, pool_size=-1, @@ -2303,6 +2305,7 @@ def pool3d(input, return pool_out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool2d") @templatedoc(op_type="pool2d") def adaptive_pool2d(input, pool_size, @@ -2450,6 +2453,7 @@ def adaptive_pool2d(input, return (pool_out, mask) if require_index else pool_out +@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool3d") @templatedoc(op_type="pool3d") def adaptive_pool3d(input, pool_size, diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py new file mode 100644 index 00000000000..5a135cea529 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +import paddle +import paddle.nn.functional as F +import paddle.fluid as fluid + + +def adaptive_start_index(index, input_size, output_size): + return int(np.floor(index * input_size / output_size)) + + +def adaptive_end_index(index, input_size, output_size): + return int(np.ceil((index + 1) * input_size / output_size)) + + +def avg_pool1D_forward_naive(x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + exclusive=False, + adaptive=False, + data_type=np.float64): + N, C, L = x.shape + if global_pool == 1: + ksize = [L] + if adaptive: + L_out = ksize[0] + else: + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 + ) // strides[0] + 1 if ceil_mode else ( + L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 + + out = np.zeros((N, C, L_out)) + for i in range(L_out): + if adaptive: + r_start = adaptive_start_index(i, L, ksize[0]) + r_end = adaptive_end_index(i, L, ksize[0]) + else: + r_start = np.max((i * strides[0] - paddings[0], 0)) + r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L)) + x_masked = x[:, :, r_start:r_end] + + field_size = (r_end - r_start) \ + if (exclusive or adaptive) else (ksize[0]) + if data_type == np.int8 or data_type == np.uint8: + out[:, :, i] = (np.rint( + np.sum(x_masked, axis=(2, 3)) / field_size)).astype(data_type) + else: + out[:, :, i] = (np.sum(x_masked, axis=(2)) / + field_size).astype(data_type) + return out + + +class TestPool1d_API(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_adaptive_avg_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.adaptive_avg_pool1d(input, output_size=16) + result_np = avg_pool1D_forward_naive( + input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d( + output_size=16) + result = ada_max_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_adaptive_avg_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") + result = F.adaptive_avg_pool1d(input, output_size=16) + + input_np = np.random.random([2, 3, 32]).astype("float32") + result_np = avg_pool1D_forward_naive( + input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def test_adaptive_avg_pool1d(self): + for place in self.places: + self.check_adaptive_avg_dygraph_results(place) + self.check_adaptive_avg_static_results(place) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py new file mode 100644 index 00000000000..875fdf9e9c3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py @@ -0,0 +1,110 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import unittest +from op_test import OpTest +import paddle.fluid.core as core +from paddle.fluid import compiler, Program, program_guard +import paddle +import paddle.nn.functional as F +import paddle.fluid as fluid + + +def adaptive_start_index(index, input_size, output_size): + return int(np.floor(index * input_size / output_size)) + + +def adaptive_end_index(index, input_size, output_size): + return int(np.ceil((index + 1) * input_size / output_size)) + + +def max_pool1D_forward_naive(x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + exclusive=False, + adaptive=False, + data_type=np.float64): + N, C, L = x.shape + if global_pool == 1: + ksize = [L] + if adaptive: + L_out = ksize[0] + else: + L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1 + ) // strides[0] + 1 if ceil_mode else ( + L - ksize[0] + 2 * paddings[0]) // strides[0] + 1 + + out = np.zeros((N, C, L_out)) + for i in range(L_out): + if adaptive: + r_start = adaptive_start_index(i, L, ksize[0]) + r_end = adaptive_end_index(i, L, ksize[0]) + else: + r_start = np.max((i * strides[0] - paddings[0], 0)) + r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L)) + x_masked = x[:, :, r_start:r_end] + + out[:, :, i] = np.max(x_masked, axis=(2)) + return out + + +class TestPool1d_API(unittest.TestCase): + def setUp(self): + np.random.seed(123) + self.places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + self.places.append(fluid.CUDAPlace(0)) + + def check_adaptive_max_dygraph_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.adaptive_max_pool1d(input, output_size=16) + + result_np = max_pool1D_forward_naive( + input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d( + output_size=16) + result = ada_max_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_adaptive_max_static_results(self, place): + with fluid.program_guard(fluid.Program(), fluid.Program()): + input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") + result = F.adaptive_max_pool1d(input, output_size=16) + + input_np = np.random.random([2, 3, 32]).astype("float32") + result_np = max_pool1D_forward_naive( + input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) + + exe = fluid.Executor(place) + fetches = exe.run(fluid.default_main_program(), + feed={"input": input_np}, + fetch_list=[result]) + self.assertTrue(np.allclose(fetches[0], result_np)) + + def test_adaptive_max_pool1d(self): + for place in self.places: + self.check_adaptive_max_dygraph_results(place) + self.check_adaptive_max_static_results(place) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py new file mode 100644 index 00000000000..d78788eb1e7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py @@ -0,0 +1,274 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from __future__ import division + +import unittest +import numpy as np + +import paddle.fluid.core as core +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +def adaptive_start_index(index, input_size, output_size): + return int(np.floor(index * input_size / output_size)) + + +def adaptive_end_index(index, input_size, output_size): + return int(np.ceil((index + 1) * input_size / output_size)) + + +def adaptive_pool2d_forward(x, output_size, data_format='NCHW', + pool_type="max"): + + N = x.shape[0] + C, H, W = [x.shape[1], x.shape[2], x.shape[3]] if data_format == 'NCHW' \ + else [x.shape[3], x.shape[1], x.shape[2]] + + if (isinstance(output_size, int) or output_size == None): + H_out = output_size + W_out = output_size + output_size = [H_out, W_out] + else: + H_out, W_out = output_size + + if output_size[0] == None: + output_size[0] = H + H_out = H + if output_size[1] == None: + output_size[1] = W + W_out = W + + out = np.zeros((N, C, H_out, W_out)) if data_format=='NCHW' \ + else np.zeros((N, H_out, W_out, C)) + + for i in range(H_out): + in_h_start = adaptive_start_index(i, H, output_size[0]) + in_h_end = adaptive_end_index(i, H, output_size[0]) + + for j in range(W_out): + in_w_start = adaptive_start_index(j, W, output_size[1]) + in_w_end = adaptive_end_index(j, W, output_size[1]) + + if data_format == 'NCHW': + x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end] + if pool_type == 'avg': + field_size = ( + (in_h_end - in_h_start) * (in_w_end - in_w_start)) + out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size + elif pool_type == 'max': + out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) + elif data_format == 'NHWC': + x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] + if pool_type == 'avg': + field_size = ( + (in_h_end - in_h_start) * (in_w_end - in_w_start)) + out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size + elif pool_type == 'max': + out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) + return out + + +class TestAdaptiveMaxPool2dAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[3, 3], pool_type="max") + + self.res_2_np = adaptive_pool2d_forward( + x=self.x_np, output_size=5, pool_type="max") + + self.res_3_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[2, 5], pool_type="max") + """ + self.res_4_np = adaptive_pool2d_forward( + x=self.x_np, + output_size=[3, 3], + pool_type="max", + data_format="NHWC") + """ + self.res_5_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[None, 3], pool_type="max") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32") + + out_1 = paddle.nn.functional.adaptive_max_pool2d( + x=x, output_size=[3, 3]) + + out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_max_pool2d( + x=x, output_size=[2, 5]) + + #out_4 = paddle.nn.functional.adaptive_max_pool2d( + # x=x, output_size=[3, 3], data_format="NHWC") + + out_5 = paddle.nn.functional.adaptive_max_pool2d( + x=x, output_size=[None, 3]) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + #assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + out_1 = paddle.nn.functional.adaptive_max_pool2d( + x=x, return_indices=False, output_size=[3, 3]) + + out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_max_pool2d( + x=x, output_size=[2, 5]) + + #out_4 = paddle.nn.functional.adaptive_max_pool2d( + # x=x, output_size=[3, 3], data_format="NHWC") + + out_5 = paddle.nn.functional.adaptive_max_pool2d( + x=x, output_size=[None, 3]) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + #assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[3, 3], pool_type="max") + + self.res_2_np = adaptive_pool2d_forward( + x=self.x_np, output_size=5, pool_type="max") + + self.res_3_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[2, 5], pool_type="max") + + #self.res_4_np = adaptive_pool2d_forward( + # x=self.x_np, + # output_size=[3, 3], + # pool_type="max", + # data_format="NHWC") + + self.res_5_np = adaptive_pool2d_forward( + x=self.x_np, output_size=[None, 3], pool_type="max") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32") + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3]) + out_1 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5) + out_2 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5]) + out_3 = adaptive_max_pool(x=x) + + # adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d( + # output_size=[3, 3], data_format="NHWC") + # out_4 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d( + output_size=[None, 3]) + out_5 = adaptive_max_pool(x=x) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + #assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3]) + out_1 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5) + out_2 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5]) + out_3 = adaptive_max_pool(x=x) + + #adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d( + # output_size=[3, 3], data_format="NHWC") + #out_4 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d( + output_size=[None, 3]) + out_5 = adaptive_max_pool(x=x) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + #assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py new file mode 100755 index 00000000000..a7de0a5c6a7 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py @@ -0,0 +1,293 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +from __future__ import division + +import unittest +import numpy as np + +import paddle.fluid.core as core +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +def adaptive_start_index(index, input_size, output_size): + return int(np.floor(index * input_size / output_size)) + + +def adaptive_end_index(index, input_size, output_size): + return int(np.ceil((index + 1) * input_size / output_size)) + + +def adaptive_pool3d_forward(x, + output_size, + adaptive=True, + data_format='NCDHW', + pool_type='max'): + + N = x.shape[0] + C, D, H, W = [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] \ + if data_format == 'NCDHW' else [x.shape[4], x.shape[1], x.shape[2],x.shape[3]] + + if (isinstance(output_size, int) or output_size == None): + H_out = output_size + W_out = output_size + D_out = output_size + output_size = [D_out, H_out, W_out] + else: + D_out, H_out, W_out = output_size + + if output_size[0] == None: + output_size[0] = D + D_out = D + if output_size[1] == None: + output_size[1] = H + H_out = H + if output_size[2] == None: + output_size[2] = W + W_out = W + + out = np.zeros((N, C, D_out, H_out, W_out)) if data_format=='NCDHW' \ + else np.zeros((N, D_out, H_out, W_out, C)) + for k in range(D_out): + d_start = adaptive_start_index(k, D, output_size[0]) + d_end = adaptive_end_index(k, D, output_size[0]) + + for i in range(H_out): + h_start = adaptive_start_index(i, H, output_size[1]) + h_end = adaptive_end_index(i, H, output_size[1]) + + for j in range(W_out): + w_start = adaptive_start_index(j, W, output_size[2]) + w_end = adaptive_end_index(j, W, output_size[2]) + + if data_format == 'NCDHW': + x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start: + w_end] + if pool_type == 'avg': + field_size = (d_end - d_start) * (h_end - h_start) * ( + w_end - w_start) + out[:, :, k, i, j] = np.sum(x_masked, + axis=(2, 3, 4)) / field_size + elif pool_type == 'max': + out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) + + elif data_format == 'NDHWC': + x_masked = x[:, d_start:d_end, h_start:h_end, w_start: + w_end, :] + if pool_type == 'avg': + field_size = (d_end - d_start) * (h_end - h_start) * ( + w_end - w_start) + out[:, k, i, j, :] = np.sum(x_masked, + axis=(1, 2, 3)) / field_size + elif pool_type == 'max': + out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3)) + return out + + +class TestAdaptiveMaxPool3dAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[3, 3, 3], pool_type="max") + + self.res_2_np = adaptive_pool3d_forward( + x=self.x_np, output_size=5, pool_type="max") + + self.res_3_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[2, 3, 5], pool_type="max") + + self.res_4_np = adaptive_pool3d_forward( + x=self.x_np, + output_size=[3, 3, 3], + pool_type="max", + data_format="NDHWC") + + self.res_5_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[None, 3, None], pool_type="max") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + + out_1 = paddle.nn.functional.adaptive_max_pool3d( + x=x, output_size=[3, 3, 3]) + + out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_max_pool3d( + x=x, output_size=[2, 3, 5]) + + #out_4 = paddle.nn.functional.adaptive_max_pool3d( + # x=x, output_size=[3, 3, 3], data_format="NDHWC") + + out_5 = paddle.nn.functional.adaptive_max_pool3d( + x=x, output_size=[None, 3, None]) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + #assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + out_1 = paddle.nn.functional.adaptive_max_pool3d( + x=x, output_size=[3, 3, 3]) + + out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5) + + out_3 = paddle.nn.functional.adaptive_max_pool3d( + x=x, output_size=[2, 3, 5]) + + #out_4 = paddle.nn.functional.adaptive_max_pool3d( + # x=x, output_size=[3, 3, 3], data_format="NDHWC") + + out_5 = paddle.nn.functional.adaptive_max_pool3d( + x=x, output_size=[None, 3, None]) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + #assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") + self.res_1_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[3, 3, 3], pool_type="max") + + self.res_2_np = adaptive_pool3d_forward( + x=self.x_np, output_size=5, pool_type="max") + + self.res_3_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[2, 3, 5], pool_type="max") + + # self.res_4_np = adaptive_pool3d_forward( + # x=self.x_np, + # output_size=[3, 3, 3], + # pool_type="max", + # data_format="NDHWC") + + self.res_5_np = adaptive_pool3d_forward( + x=self.x_np, output_size=[None, 3, None], pool_type="max") + + def test_static_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32") + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + output_size=[3, 3, 3]) + out_1 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5) + out_2 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + output_size=[2, 3, 5]) + out_3 = adaptive_max_pool(x=x) + + # adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + # output_size=[3, 3, 3], data_format="NDHWC") + # out_4 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + output_size=[None, 3, None]) + out_5 = adaptive_max_pool(x=x) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + fluid.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5]) + + assert np.allclose(res_1, self.res_1_np) + + assert np.allclose(res_2, self.res_2_np) + + assert np.allclose(res_3, self.res_3_np) + + # assert np.allclose(res_4, self.res_4_np) + + assert np.allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_variable(self.x_np) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + output_size=[3, 3, 3]) + out_1 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5) + out_2 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + output_size=[2, 3, 5]) + out_3 = adaptive_max_pool(x=x) + + # adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + # output_size=[3, 3, 3], data_format="NDHWC") + # out_4 = adaptive_max_pool(x=x) + + adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( + output_size=[None, 3, None]) + out_5 = adaptive_max_pool(x=x) + + assert np.allclose(out_1.numpy(), self.res_1_np) + + assert np.allclose(out_2.numpy(), self.res_2_np) + + assert np.allclose(out_3.numpy(), self.res_3_np) + + # assert np.allclose(out_4.numpy(), self.res_4_np) + + assert np.allclose(out_5.numpy(), self.res_5_np) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py index b1a25ad3529..1c05b96f1fc 100644 --- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py @@ -174,66 +174,6 @@ class TestPool1d_API(unittest.TestCase): result = max_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) - def check_adaptive_max_dygraph_results(self, place): - with fluid.dygraph.guard(place): - input_np = np.random.random([2, 3, 32]).astype("float32") - input = fluid.dygraph.to_variable(input_np) - result = F.adaptive_max_pool1d(input, output_size=16) - - result_np = max_pool1D_forward_naive( - input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) - self.assertTrue(np.allclose(result.numpy(), result_np)) - - ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d( - output_size=16) - result = ada_max_pool1d_dg(input) - self.assertTrue(np.allclose(result.numpy(), result_np)) - - def check_adaptive_avg_dygraph_results(self, place): - with fluid.dygraph.guard(place): - input_np = np.random.random([2, 3, 32]).astype("float32") - input = fluid.dygraph.to_variable(input_np) - result = F.adaptive_avg_pool1d(input, output_size=16) - result_np = avg_pool1D_forward_naive( - input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True) - - self.assertTrue(np.allclose(result.numpy(), result_np)) - - ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d( - output_size=16) - result = ada_max_pool1d_dg(input) - self.assertTrue(np.allclose(result.numpy(), result_np)) - - def check_adaptive_max_static_results(self, place): - with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") - result = F.adaptive_max_pool1d(input, output_size=16) - - input_np = np.random.random([2, 3, 32]).astype("float32") - result_np = max_pool1D_forward_naive( - input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) - - exe = fluid.Executor(place) - fetches = exe.run(fluid.default_main_program(), - feed={"input": input_np}, - fetch_list=[result]) - self.assertTrue(np.allclose(fetches[0], result_np)) - - def check_adaptive_avg_static_results(self, place): - with fluid.program_guard(fluid.Program(), fluid.Program()): - input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") - result = F.adaptive_avg_pool1d(input, output_size=16) - - input_np = np.random.random([2, 3, 32]).astype("float32") - result_np = avg_pool1D_forward_naive( - input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True) - - exe = fluid.Executor(place) - fetches = exe.run(fluid.default_main_program(), - feed={"input": input_np}, - fetch_list=[result]) - self.assertTrue(np.allclose(fetches[0], result_np)) - def check_max_dygraph_padding_same(self, place): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32]).astype("float32") @@ -265,10 +205,6 @@ class TestPool1d_API(unittest.TestCase): self.check_avg_dygraph_results(place) self.check_max_static_results(place) self.check_avg_static_results(place) - self.check_adaptive_max_dygraph_results(place) - self.check_adaptive_avg_dygraph_results(place) - self.check_adaptive_max_static_results(place) - self.check_adaptive_avg_static_results(place) self.check_max_dygraph_padding_same(place) self.check_avg_dygraph_padding_same(place) diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 76063458d44..5cc9f6d32f9 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -97,8 +97,20 @@ from .layer.common import Dropout #DEFINE_ALIAS from .layer.common import Dropout2D #DEFINE_ALIAS from .layer.common import Dropout3D #DEFINE_ALIAS from .layer.common import AlphaDropout #DEFINE_ALIAS + +from .layer.pooling import AvgPool1d #DEFINE_ALIAS +from .layer.pooling import AvgPool2d #DEFINE_ALIAS +from .layer.pooling import AvgPool3d #DEFINE_ALIAS +from .layer.pooling import MaxPool1d #DEFINE_ALIAS +from .layer.pooling import MaxPool2d #DEFINE_ALIAS +from .layer.pooling import MaxPool3d #DEFINE_ALIAS +from .layer.pooling import AdaptiveAvgPool1d #DEFINE_ALIAS from .layer.pooling import AdaptiveAvgPool2d #DEFINE_ALIAS from .layer.pooling import AdaptiveAvgPool3d #DEFINE_ALIAS + +from .layer.pooling import AdaptiveMaxPool1d #DEFINE_ALIAS +from .layer.pooling import AdaptiveMaxPool2d #DEFINE_ALIAS +from .layer.pooling import AdaptiveMaxPool3d #DEFINE_ALIAS from .layer.conv import Conv1d #DEFINE_ALIAS from .layer.conv import Conv2d #DEFINE_ALIAS from .layer.conv import Conv3d #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 414e70853eb..3c0aa9c5c99 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -170,22 +170,28 @@ from .norm import layer_norm #DEFINE_ALIAS from .norm import lrn #DEFINE_ALIAS from .norm import normalize #DEFINE_ALIAS # from .norm import spectral_norm #DEFINE_ALIAS -from .pooling import max_pool1d #DEFINE_ALIAS -from .pooling import avg_pool1d #DEFINE_ALIAS -from .pooling import adaptive_max_pool1d #DEFINE_ALIAS -from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS from .pooling import pool2d #DEFINE_ALIAS from .pooling import pool3d #DEFINE_ALIAS +from .pooling import avg_pool1d #DEFINE_ALIAS from .pooling import adaptive_pool2d #DEFINE_ALIAS from .pooling import adaptive_pool3d #DEFINE_ALIAS -from .rnn import rnn #DEFINE_ALIAS -from .rnn import birnn #DEFINE_ALIAS from .pooling import avg_pool2d #DEFINE_ALIAS -from .pooling import max_pool2d #DEFINE_ALIAS from .pooling import avg_pool3d #DEFINE_ALIAS +from .pooling import max_pool1d #DEFINE_ALIAS +from .pooling import max_pool2d #DEFINE_ALIAS from .pooling import max_pool3d #DEFINE_ALIAS + +from .pooling import adaptive_pool2d #DEFINE_ALIAS +from .pooling import adaptive_pool3d #DEFINE_ALIAS +from .pooling import adaptive_max_pool1d #DEFINE_ALIAS +from .pooling import adaptive_max_pool2d #DEFINE_ALIAS +from .pooling import adaptive_max_pool3d #DEFINE_ALIAS +from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS from .pooling import adaptive_avg_pool2d #DEFINE_ALIAS from .pooling import adaptive_avg_pool3d #DEFINE_ALIAS + +from .rnn import rnn #DEFINE_ALIAS +from .rnn import birnn #DEFINE_ALIAS # from .rnn import gru_unit #DEFINE_ALIAS # from .rnn import lstm #DEFINE_ALIAS # from .rnn import lstm_unit #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index efe001a7d4a..42d7d98aefc 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -158,7 +158,7 @@ def conv1d(x, bias (Tensor, optional): The bias with shape [M,]. Default: None. stride (int or tuple, optional): The stride size. If stride is a tuple, it must contain one integers, (stride_size). Default: 1. - padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms. 1. a string in ['valid', 'same']. 2. an int, which means the feature map is zero paded by size of `padding` on both sides. 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides. @@ -185,7 +185,7 @@ def conv1d(x, same with input. Raises: - ValueError: If the channel dimmention of the input is less than or equal to zero. + ValueError: If the channel dimension of the input is less than or equal to zero. ValueError: If `data_format` is not "NCL" or "NLC". ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 @@ -238,7 +238,7 @@ def conv1d(x, num_channels = x.shape[channel_dim] num_filters = weight.shape[0] if num_channels < 0: - raise ValueError("The channel dimmention of the input({}) " + raise ValueError("The channel dimension of the input({}) " "should be defined. Received: {}.".format( x.shape, num_channels)) if num_channels % groups != 0: @@ -260,7 +260,7 @@ def conv1d(x, padding = padding + [0] else: raise ValueError( - "The size of padding's dimmention should 1 or 2. But got padding={}". + "The size of padding's dimension should be 1 or 2. But got padding={}". format(padding)) stride = utils.convert_to_list(stride, 1, 'stride') + [1] @@ -424,7 +424,7 @@ def conv2d(x, Raises: ValueError: If `data_format` is not "NCHW" or "NHWC". - ValueError: If the channel dimmention of the input is less than or equal to zero. + ValueError: If the channel dimension of the input is less than or equal to zero. ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. @@ -465,7 +465,7 @@ def conv2d(x, num_channels = x.shape[channel_dim] num_filters = weight.shape[0] if num_channels < 0: - raise ValueError("The channel dimmention of the input({}) " + raise ValueError("The channel dimension of the input({}) " "should be defined. Received: {}.".format( x.shape, num_channels)) if num_channels % groups != 0: @@ -710,7 +710,7 @@ def conv_transpose1d(x, num_channels = x.shape[channel_dim] if num_channels < 0: - raise ValueError("The channel dimmention of the input({}) " + raise ValueError("The channel dimension of the input({}) " "should be defined. Received: {}.".format( x.shape, num_channels)) if num_channels % groups != 0: @@ -728,7 +728,7 @@ def conv_transpose1d(x, padding = padding + [0] else: raise ValueError( - "The size of padding's dimmention should 1 or 2. But got padding={}". + "The size of padding's dimension should 1 or 2. But got padding={}". format(padding)) stride = utils.convert_to_list(stride, 1, 'stride') + [1] @@ -965,7 +965,7 @@ def conv_transpose2d(x, channel_dim = -1 if channel_last else 1 num_channels = x.shape[channel_dim] if num_channels < 0: - raise ValueError("The channel dimmention of the input({}) " + raise ValueError("The channel dimension of the input({}) " "should be defined. Received: {}.".format( x.shape, num_channels)) if num_channels % groups != 0: @@ -1146,7 +1146,7 @@ def conv3d(x, Raises: ValueError: If `data_format` is not "NCDHW" or "NDHWC". - ValueError: If the channel dimmention of the input is less than or equal to zero. + ValueError: If the channel dimension of the input is less than or equal to zero. ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 or the element corresponding to the input's channel is not 0. @@ -1187,7 +1187,7 @@ def conv3d(x, num_filters = weight.shape[0] if num_channels < 0: raise ValueError( - "The channel dimmention of the input({}) should be defined. " + "The channel dimension of the input({}) should be defined. " "Received: {}.".format(x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( @@ -1422,7 +1422,7 @@ def conv_transpose3d(x, num_filters = weight.shape[1] if num_channels < 0: raise ValueError( - "The channel dimmention of the input({}) should be defined. " + "The channel dimension of the input({}) should be defined. " "Received: {}.".format(x.shape, num_channels)) if num_channels % groups != 0: raise ValueError( diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index ca657b8be3e..c8790a75901 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -18,124 +18,146 @@ from ...fluid.layers import pool3d #DEFINE_ALIAS from ...fluid.layers import adaptive_pool2d #DEFINE_ALIAS from ...fluid.layers import adaptive_pool3d #DEFINE_ALIAS from ...fluid import core -from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_ -from ...fluid.layers import utils, LayerHelper -from ...fluid.data_feeder import check_type, check_variable_and_dtype, check_type, check_dtype, convert_dtype -from ...fluid.layers import unsqueeze, squeeze +from ...fluid.framework import in_dygraph_mode +from ...fluid.layers import utils, LayerHelper, unsqueeze, squeeze +from ...fluid.data_feeder import check_type, check_variable_and_dtype __all__ = [ 'pool2d', 'pool3d', + 'adaptive_pool2d', + 'adaptive_pool3d', 'avg_pool1d', + 'avg_pool2d', + 'avg_pool3d', 'max_pool1d', + 'max_pool2d', + 'max_pool3d', 'adaptive_avg_pool1d', - 'adaptive_max_pool1d', 'adaptive_avg_pool2d', 'adaptive_avg_pool3d', - 'adaptive_pool2d', - 'adaptive_pool3d', - 'max_pool2d', - 'avg_pool2d', - 'max_pool3d', - 'avg_pool3d', + 'adaptive_max_pool1d', + 'adaptive_max_pool2d', + 'adaptive_max_pool3d', ] -def check_input(x, dimension): +def _is_list_or_tuple(input): + return isinstance(input, (list, tuple)) + + +def _check_input(x, dimension): if len(x.shape) != dimension: - raise ValueError("Excepted Input X is 3-D tensor, but received {}-D {}". - format(len(x.shape), type(x))) + raise ValueError( + "Excepted Input X is {}-D tensor, but received {}-D {}".format( + dimension, len(x.shape), type(x))) -def check_instance(x, x_name, types=(int, float)): +def _check_instance(x, x_name, types=(int, float)): if not isinstance(x, types): raise ValueError("Excepted {} type for {} but received type: {}. ". format(types, x_name, type(x))) -def update_padding1d(padding, pool_type='avg'): - def is_list_or_tuple(ele): - if isinstance(ele, list) or isinstance(ele, tuple): - return True - return False - - if is_list_or_tuple(padding): - if padding.__len__() == 1 and not is_list_or_tuple(padding[0]): - return [0, padding[0]] - else: - raise ValueError( - "{}_pool1d() argument 'padding' should contain one int (got {})". - format(pool_type, padding.__len__())) +def _zero_padding_in_batch_and_channel(padding, channel_last): + if channel_last: + return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0] else: - padding = [0, padding] + return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0] - return padding +def _exclude_padding_in_batch_and_channel(padding, channel_last): + padding_ = padding[1:-1] if channel_last else padding[2:] + padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim] + return padding_ -def update_padding2d(padding, data_format): - def is_list_or_tuple(ele): - if isinstance(ele, list) or isinstance(ele, tuple): - return True - return False - - if is_list_or_tuple(padding) and len(padding) == 4: - if is_list_or_tuple(padding[0]) and (data_format == "NCHW"): - if not (padding[0] == [0, 0] and padding[1] == [0, 0]): - raise ValueError( - "Non-zero pool_padding(%s) in the batch or channel dimensions " - "is not supported." % str(padding)) - padding = padding[2:4] - padding = [ele for a_list in padding for ele in a_list] - elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"): - if not (padding[0] == [0, 0] and padding[3] == [0, 0]): - raise ValueError( - "Non-zero pool_padding(%s) in the batch or channel dimensions " - "is not supported." % str(padding)) - padding = padding[1:3] - padding = [ele for a_list in padding for ele in a_list] - padding = utils.convert_to_list(padding, 4, 'padding') - - if utils._is_symmetric_padding(padding, 2): - padding = [padding[0], padding[2]] - else: - padding = utils.convert_to_list(padding, 2, 'padding') - - return padding +def _channel_last(data_format, num_dims): + if num_dims == 1: + if data_format not in ['NCL', 'NLC']: + raise ValueError( + "Attr(data_format) should be 'NCL' or 'NLC'. Received " + "Attr(data_format): %s" % str(data_format)) + else: + return True if data_format == "NLC" else False + if num_dims == 2: + if data_format not in ['NCHW', 'NHWC']: + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " + "Attr(data_format): %s" % str(data_format)) + else: + return True if data_format == "NHWC" else False + if num_dims == 3: + if data_format not in ['NCDHW', 'NDHWC']: + raise ValueError( + "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " + "Attr(data_format): %s" % str(data_format)) + else: + return True if data_format == "NDHWC" else False -def update_padding3d(padding, data_format): - def is_list_or_tuple(ele): - if isinstance(ele, (list, tuple)): - return True - return False - if is_list_or_tuple(padding) and len(padding) == 5: - if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"): - if not (padding[0] == [0, 0] and padding[1] == [0, 0]): +def _update_padding_nd(padding, num_dims, channel_last=False, ceil_mode=False): + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown padding: '{}'. It can only be 'SAME' or 'VALID'.". + format(padding)) + if padding == "VALID": + if ceil_mode != False: raise ValueError( - "Non-zero pool_padding(%s) in the batch or channel dimensions " - "is not supported." % str(padding)) - padding = padding[2:5] - padding = [ele for a_list in padding for ele in a_list] - elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"): - if not (padding[0] == [0, 0] and padding[4] == [0, 0]): + "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. " + "Received ceil_mode: True.") + + padding_algorithm = "VALID" + padding = [0] * num_dims + else: + padding_algorithm = "SAME" + padding = [0] * num_dims + elif _is_list_or_tuple(padding): + # for padding like + # [(pad_before, pad_after), (pad_before, pad_after), ...] + # padding for batch_dim and channel_dim included + if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]): + if not _zero_padding_in_batch_and_channel(padding, channel_last): raise ValueError( - "Non-zero pool_padding(%s) in the batch or channel dimensions " - "is not supported." % str(padding)) - padding = padding[1:4] - padding = [ele for a_list in padding for ele in a_list] - padding = utils.convert_to_list(padding, 6, 'padding') - if utils._is_symmetric_padding(padding, 3): - padding = [padding[0], padding[2], padding[4]] - - elif is_list_or_tuple(padding) and len(padding) == 6: - padding = utils.convert_to_list(padding, 6, 'padding') - if utils._is_symmetric_padding(padding, 3): - padding = [padding[0], padding[2], padding[4]] + "Non-zero padding({}) in the batch or channel dimensions " + "is not supported.".format(padding)) + padding_algorithm = "EXPLICIT" + padding = _exclude_padding_in_batch_and_channel(padding, + channel_last) + if utils._is_symmetric_padding(padding, num_dims): + padding = padding[0::2] + # for padding like [pad_before, pad_after, pad_before, pad_after, ...] + elif len(padding) == 2 * num_dims and isinstance(padding[0], int): + padding_algorithm = "EXPLICIT" + padding = utils.convert_to_list(padding, 2 * num_dims, 'padding') + if utils._is_symmetric_padding(padding, num_dims): + padding = padding[0::2] + # for padding like [pad_d1, pad_d2, ...] + elif len(padding) == num_dims and isinstance(padding[0], int): + padding_algorithm = "EXPLICIT" + padding = utils.convert_to_list(padding, num_dims, 'padding') + else: + raise ValueError("Invalid padding: {}".format(padding)) + # for integer padding else: - padding = utils.convert_to_list(padding, 3, 'padding') + padding_algorithm = "EXPLICIT" + padding = utils.convert_to_list(padding, num_dims, 'padding') + return padding, padding_algorithm + +def _expand_low_nd_padding(padding): + #1d to 2d fake input + if len(padding) == 2: + padding = [0] * 2 + padding + elif len(padding) == 1: + padding = [0] + padding + else: + raise ValueError( + "The size of padding's dimmention should be 1 or 2. But got padding={}". + format(padding)) return padding @@ -146,73 +168,57 @@ def avg_pool1d(x, count_include_pad=True, ceil_mode=False, name=None): - """ - - This operation applies a 1D average pooling over an input signal composed - of several input planes, based on the input, output_size, return_indices parameters. - Input(X) and output(Out) are in NCL format, where N is batch - size, C is the number of channels, L is the length of the feature. - The output tensor shape will be [N, C, output_size]. - - The output value of the layer with input size (N, C, L), - output (N, C, L_{out}) and kernel_size k can be precisely described as - For average pool1d: - - .. math:: - - Output(N_i, C_i, l) &= mean(Input[N_i, C_i, stride \times l:stride \times l+k]) - + """ + This API implements average pooling 1d operation, + See more details in :ref:`api_nn_pooling_AvgPool1d` . Args: x (Tensor): The input tensor of pooling operator which is a 3-D tensor with shape [N, C, L]. where `N` is batch size, `C` is the number of channels, - `L` is the length of the feature. The data type if float32 or float64. + `L` is the length of the feature. The data type is float32 or float64. kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one integers. + it must contain an integer. stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, - it must contain one integers. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero, - then the input is implicitly zero-padded on both sides for padding number of points. + it must contain an integer. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides. + 4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after]. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. count_include_pad (bool): Whether to exclude padding points in average pooling - mode, default is `true`. + mode, default is `True`. ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width. - If it is set to False, the floor function will be used. Default False + If it is set to False, the floor function will be used. The default value is False. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor: The output tensor of pooling result. The data type is same as input tensor. Raises: ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is "VALID", but `ceil_mode` is True. - ValueError: If `padding` is a list or tuple but its length greater than 1. - ShapeError: If the input is not a 3-D. + ValueError: If `padding` is a list or tuple but its length is greater than 1. + ShapeError: If the input is not a 3-D tensor. ShapeError: If the output's shape calculated is not greater than 0. - Examples: - .. code-block:: python - import paddle import paddle.nn.functional as F paddle.disable_static() - data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) - pool_out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0) - # pool_out shape: [1, 3, 16] - + out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0) + # out shape: [1, 3, 16] """ """NCL to NCHW""" data_format = "NCHW" - check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'avg_pool1d') - check_input(x, 3) + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool1d') + _check_input(x, 3) x = unsqueeze(x, [2]) - kernel_size = utils.convert_to_list(kernel_size, 1, 'pool_size') + kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size') kernel_size = [1] + kernel_size if stride is None: stride = kernel_size @@ -220,33 +226,20 @@ def avg_pool1d(x, stride = utils.convert_to_list(stride, 1, 'pool_stride') stride = [1] + stride - padding_algorithm = "EXPLICIT" - if isinstance(padding, str): - padding = padding.upper() - if padding not in ["SAME", "VALID"]: - raise ValueError( - "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'." - % str(padding)) - if padding == "VALID": - padding_algorithm = "VALID" - padding = [0] - if ceil_mode != False: - raise ValueError( - "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. " - "Received ceil_mode: True.") - elif padding == "SAME": - padding_algorithm = "SAME" - padding = [0] + channel_last = _channel_last("NCL", 1) + padding, padding_algorithm = _update_padding_nd( + padding, 1, channel_last=channel_last, ceil_mode=ceil_mode) - padding = update_padding1d(padding, "avg") + # use 2d to implenment 1d should expand padding in advance. + padding = _expand_low_nd_padding(padding) if in_dygraph_mode(): output = core.ops.pool2d( x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, 'paddings', padding, 'padding_algorithm', - padding_algorithm, 'use_cudnn', not count_include_pad, 'ceil_mode', - ceil_mode, 'use_mkldnn', False, 'exclusive', True, 'data_format', - data_format) + padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', not count_include_pad, + 'data_format', data_format) return squeeze(output, [2]) op_type = 'pool2d' @@ -275,126 +268,103 @@ def avg_pool1d(x, return squeeze(pool_out, [2]) -def max_pool1d(x, +def avg_pool2d(x, kernel_size, stride=None, padding=0, - return_indices=False, ceil_mode=False, + count_include_pad=True, + divisor_override=None, + data_format="NCHW", name=None): """ - - Applies a 1D max pooling over an input signal composed of several input planes based - on the input, output_size, return_indices parameters. - Input(X) and output(Out) are in NCL format, where N is batch - size, C is the number of channels, L is the length of the feature. - - The output value of the layer with input size (N, C, L), - output (N, C, L_{out}) and kernel_size k can be precisely described as - For average pool1d: - - .. math:: - - Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])} - + This API implements average pooling 2d operation. + See more details in :ref:`api_nn_pooling_AvgPool2d` . + Args: - x (Tensor): The input tensor of pooling operator which is a 3-D tensor with - shape [N, C, L], where `N` is batch size, `C` is the number of channels, - `L` is the length of the feature. The data type if float32 or float64. - kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one integers. - stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, - it must contain one integers. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be the following forms: `[pad_left, pad_right]`. - return_indices (bool): Whether return the max indices along with the outputs. default is `False`. - ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default. - If it is set to False, the floor function will be used. Default False. + x (Tensor): The input tensor of pooling operator which is a 4-D tensor with + shape [N, C, H, W]. The format of input tensor is `"NCHW"` or + `"NHWC"`, where `N` is batch size, `C` is the number of channels, + `H` is the height of the feature, and `W` is the width of the + feature. The data type if float32 or float64. + kernel_size (int|list|tuple): The pool kernel size. If it is a tuple or list, + it must contain two integers, (kernel_size_Height, kernel_size_Width). + Otherwise, the pool kernel size will be a square of an int. + stride (int|list|tuple): The stride size. If it is a tuple or list, + it must contain two integers, (stride_Height, stride_Width). + Otherwise, the stride size will be a square of an int. + + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is `true`. + divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor: The output tensor of pooling result. The data type is same as input tensor. - Raises: ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is "VALID", but `ceil_mode` is True. - ValueError: If `padding` is a list or tuple but its length greater than 1. - ShapeError: If the input is not a 3-D. ShapeError: If the output's shape calculated is not greater than 0. - - Examples: - .. code-block:: python - import paddle import paddle.nn.functional as F + import numpy as np paddle.disable_static() - - data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) - pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) - # pool_out shape: [1, 3, 16] - - pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True) - # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] - + # avg pool2d + x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) + out = F.avg_pool2d(x, + kernel_size=2, + stride=2, padding=0) + # out.shape [1, 3, 16, 16] """ - """NCL to NCHW""" - data_format = "NCHW" - check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'max_pool1d') - check_input(x, 3) - x = unsqueeze(x, [2]) - kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size') + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d') + kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size') if stride is None: stride = kernel_size else: - stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride') - - padding_algorithm = "EXPLICIT" - if isinstance(padding, str): - padding = padding.upper() - if padding not in ["SAME", "VALID"]: - raise ValueError( - "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'." - % str(padding)) - if padding == "VALID": - padding_algorithm = "VALID" - padding = [0] - if ceil_mode != False: - raise ValueError( - "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. " - "Received ceil_mode: True.") - elif padding == "SAME": - padding_algorithm = "SAME" - padding = [0] + stride = utils.convert_to_list(stride, 2, 'pool_stride') - padding = update_padding1d(padding, 'max') + channel_last = _channel_last(data_format, 2) + padding, padding_algorithm = _update_padding_nd( + padding, 2, channel_last, ceil_mode=ceil_mode) if in_dygraph_mode(): - pool_out = core.ops.max_pool2d_with_index( - x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, - 'paddings', padding, 'padding_algorithm', padding_algorithm, - 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, - 'exclusive', True, 'data_format', data_format) - return (squeeze(pool_out[0], [2]), squeeze( - pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) + output = core.ops.pool2d( + x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', + False, 'padding_algorithm', padding_algorithm, 'strides', stride, + 'paddings', padding, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', not count_include_pad, + 'data_format', data_format) + if divisor_override is None: + return output + else: + _check_instance(divisor_override, "divisor_override") + return output * (kernel_size[0] * kernel_size[1]) / divisor_override - op_type = 'max_pool2d_with_index' + op_type = 'pool2d' helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) - mask = helper.create_variable_for_type_inference(dtype) - outputs = {"Out": pool_out, "Mask": mask} helper.append_op( type=op_type, inputs={"X": x}, - outputs=outputs, + outputs={"Out": pool_out}, attrs={ - "pooling_type": 'max', + "pooling_type": "avg", "ksize": kernel_size, "global_pooling": False, "strides": stride, @@ -403,335 +373,211 @@ def max_pool1d(x, "use_cudnn": True, "ceil_mode": ceil_mode, "use_mkldnn": False, - "exclusive": True, + "exclusive": not count_include_pad, "data_format": data_format, }) - return (squeeze(pool_out, [2]), - squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) - - -def adaptive_avg_pool1d(x, output_size, name=None): - """ - - This operation applies a 1D adaptive average pooling over an input signal composed - of several input planes, based on the input, output_size, return_indices parameters. - Input(X) and output(Out) are in NCL format, where N is batch - size, C is the number of channels, L is the length of the feature. - The output tensor shape will be [N, C, output_size]. - - For average adaptive pool1d: - - .. math:: - - lstart &= floor(i * L_{in} / L_{out}) - - lend &= ceil((i + 1) * L_{in} / L_{out}) - - Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)} - - Args: - x (Tensor): The input tensor of pooling operator, which is a 3-D tensor - with shape [N, C, L]. The format of input tensor is NCL, - where N is batch size, C is the number of channels, L is the - length of the feature. The data type is float32 or float64. - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one int. - name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - - Returns: - Tensor: The output tensor of adaptive average pooling result. The data type is same - as input tensor. - - Raises: - ValueError: 'output_size' should be a integer or list or tuple with length as 1. - - Examples: - .. code-block:: python - - # average adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) - # - import paddle - import paddle.nn.functional as F - paddle.disable_static() - - data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) - pool_out = F.adaptive_average_pool1d(data, output_size=16) - # pool_out shape: [1, 3, 16]) - """ - pool_type = 'avg' - check_variable_and_dtype(x, 'input', ['float32', 'float64'], - 'adaptive_pool2d') - check_input(x, 3) - check_type(output_size, 'pool_size', (int), 'adaptive_pool1d') - - pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') - - l_type = "pool2d" - x = unsqueeze(x, [2]) - if in_dygraph_mode(): - pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize', - pool_size, 'adaptive', True) - return squeeze(pool_out, [2]) - - helper = LayerHelper(l_type, **locals()) - dtype = helper.input_dtype() - pool_out = helper.create_variable_for_type_inference(dtype) - - outputs = {"Out": pool_out} - helper.append_op( - type=l_type, - inputs={"X": x}, - outputs=outputs, - attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "adaptive": True, - }) - - return squeeze(pool_out, [2]) + if divisor_override is None: + return pool_out + else: + _check_instance(divisor_override, "divisor_override") + return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override -def adaptive_max_pool1d(x, output_size, return_indices=False, name=None): +def avg_pool3d(x, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=False, + divisor_override=None, + data_format="NCDHW", + name=None): """ - This operation applies a 1D adaptive max pooling over an input signal composed - of several input planes, based on the input, output_size, return_indices parameters. - Input(X) and output(Out) are in NCL format, where N is batch - size, C is the number of channels, L is the length of the feature. - The output tensor shape will be [N, C, output_size]. - - For max adaptive pool1d: - - .. math:: - - lstart &= floor(i * L_{in} / L_{out}) - - lend &= ceil((i + 1) * L_{in} / L_{out}) - - Output(i) &= max(Input[lstart:lend])} + This API implements average pooling 3d operation. + See more details in :ref:`api_nn_pooling_AvgPool3d` . Args: - x (Tensor): The input tensor of pooling operator, which is a 3-D tensor - with shape [N, C, L]. The format of input tensor is NCL, - where N is batch size, C is the number of channels, L is the - length of the feature. The data type is float32 or float64. - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one int. - return_indices (bool): If true, the index of max pooling point will be returned along - with outputs. It cannot be set in average pooling type. Default False. + x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with + shape [N, C, D, H, W], where `N` represents the batch size, `C` represents + the number of channels, `D`, `H` and `W` represent the depth, height and width of the feature respectively. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + is a tuple or list, it must contain three integers, + (kernel_size_Depth, kernel_size_Height, kernel_size_Width). + Otherwise, the pool kernel size will be the cube of an int. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain three integers, [stride_Depth, stride_Height, stride_Width). + Otherwise, the pool stride size will be a cube of an int. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode (bool): ${ceil_mode_comment} + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is True. + divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. + The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_depth, input_height, input_width]`. name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: - Tensor: The output tensor of adaptive pooling result. The data type is same - as input tensor. - + Tensor: The output tensor of pooling result. The data type is same as input tensor. Raises: - ValueError: 'output_size' should be a integer or list or tuple with length as 1. - + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. Examples: .. code-block:: python - - # max adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = max(input[:, :, lstart: lend]) - # - import paddle - import paddle.nn.functional as F - paddle.disable_static() - - data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) - pool_out = F.adaptive_max_pool1d(data, output_size=16) - # pool_out shape: [1, 3, 16]) - - pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True) - # pool_out shape: [1, 3, 16] indices shape: [1, 3, 16] - + import paddle.fluid as fluid + import paddle + x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) + # avg pool3d + out = paddle.nn.functional.avg_pool3d( + x, + kernel_size = 2, + stride = 2, + padding=0) + # out.shape: [1, 3, 16, 16, 16] """ - pool_type = 'max' - check_variable_and_dtype(x, 'input', ['float32', 'float64'], - 'adaptive_max_pool1d') - check_input(x, 3) - check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d') - check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d') - - pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d') + kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') + if stride is None: + stride = kernel_size + else: + stride = utils.convert_to_list(stride, 3, 'pool_stride') - l_type = 'max_pool2d_with_index' + channel_last = _channel_last(data_format, 3) + padding, padding_algorithm = _update_padding_nd( + padding, 3, channel_last=channel_last, ceil_mode=ceil_mode) - x = unsqueeze(x, [2]) if in_dygraph_mode(): - pool_out = core.ops.max_pool2d_with_index( - x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True) - return (squeeze(pool_out[0], [2]), squeeze( - pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) + output = core.ops.pool3d( + x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride, + 'paddings', padding, 'global_pooling', False, 'padding_algorithm', + padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', not count_include_pad, + 'data_format', data_format) + if divisor_override is None: + return output + else: + _check_instance(divisor_override, "divisor_override") + return output * (kernel_size[0] * kernel_size[1] * + kernel_size[2]) / divisor_override - helper = LayerHelper(l_type, **locals()) + op_type = "pool3d" + helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() - pool_out = helper.create_variable_for_type_inference(dtype) - - mask = helper.create_variable_for_type_inference(dtype) - outputs = {"Out": pool_out, "Mask": mask} + pool_out = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out} helper.append_op( - type=l_type, + type=op_type, inputs={"X": x}, outputs=outputs, attrs={ - "pooling_type": pool_type, - "ksize": pool_size, - "adaptive": True, + "pooling_type": 'avg', + "ksize": kernel_size, + "global_pooling": False, + "strides": stride, + "paddings": padding, + "padding_algorithm": padding_algorithm, + "use_cudnn": True, + "ceil_mode": ceil_mode, + "use_mkldnn": False, + "exclusive": not count_include_pad, + "data_format": data_format, }) - return (squeeze(pool_out, [2]), - squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) + if divisor_override is None: + return pool_out + else: + _check_instance(divisor_override, "divisor_override") + return pool_out * (kernel_size[0] * kernel_size[1] * + kernel_size[2]) / divisor_override -def max_pool2d(x, +def max_pool1d(x, kernel_size, stride=None, padding=0, return_indices=False, ceil_mode=False, - data_format="NCHW", name=None): """ - This operation applies 2D max pooling over input feature based on the input, - and kernel_size, stride, padding parameters. Input(X) and Output(Out) are - in NCHW format, where N is batch size, C is the number of channels, - H is the height of the feature, and W is the width of the feature. - - Example: - Input: - X shape: $(N, C, H_{in}, W_{in})$ - Attr: - kernel_size: ksize - stride: stride - - Output: - Out shape: $(N, C, H_{out}, W_{out})$ - $$ - out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\ - & \text{input}(N_i, C_j, \text{stride[0]} \times h + m, - \text{stride[1]} \times w + n) - $$ + This API implements max pooling 1d opereation. + See more details in :ref:`api_nn_pooling_MaxPool1d` . Args: - x (Tensor): The input tensor of pooling operator which is a 4-D tensor with - shape [N, C, H, W]. The format of input tensor is `"NCHW"` or - `"NHWC"`, where `N` is batch size, `C` is the number of channels, - `H` is the height of the feature, and `W` is the width of the - feature. The data type if float32 or float64. + x (Tensor): The input tensor of pooling operator which is a 3-D tensor with + shape [N, C, L], where `N` is batch size, `C` is the number of channels, + `L` is the length of the feature. The data type if float32 or float64. kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain two integers, (pool_size_Height, pool_size_Width). - Otherwise, the pool kernel size will be a square of an int. + it must contain an integer. stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, - it must contain two integers, (pool_stride_Height, pool_stride_Width). - Otherwise, the pool stride size will be a square of an int. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be in three forms: `[pad_height, pad_width]` or - `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, - `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - Otherwise, the pool padding size will be a square of an int. - ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape - return_indices (bool): Whether to return the max indices along with the outputs. - data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. - The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: - `[batch_size, input_channels, input_height, input_width]`. + it must contain an integer. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An integer, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides. + 4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after]. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + return_indices (bool): Whether return the max indices along with the outputs. default is `False`. + ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default. + If it is set to False, the floor function will be used. Default False. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor: The output tensor of pooling result. The data type is same as input tensor. + Raises: ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the input is not a 3-D tensor. ShapeError: If the output's shape calculated is not greater than 0. + Examples: .. code-block:: python import paddle import paddle.nn.functional as F - import numpy as np paddle.disable_static() - - # max pool2d - input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) - output = F.max_pool2d(input, - kernel_size=2, - stride=2, padding=0) - # output.shape [1, 3, 16, 16] - - # for return_indices=True - output, max_indices = F.max_pool2d(input, - kernel_size=2, - stride=2, - padding=0, - return_indices=True) - # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) + # pool_out shape: [1, 3, 16] + pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True) + # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] """ - check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d') - kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size') + """NCL to NCHW""" + data_format = "NCHW" + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool1d') + _check_input(x, 3) + x = unsqueeze(x, [2]) + kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size') if stride is None: stride = kernel_size else: - stride = utils.convert_to_list(stride, 2, 'pool_stride') + stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride') - if data_format not in ["NCHW", "NHWC"]: - raise ValueError( - "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " - "Attr(data_format): %s." % str(data_format)) - padding_algorithm = "EXPLICIT" - if isinstance(padding, str): - padding = padding.upper() - if padding not in ["SAME", "VALID"]: - raise ValueError( - "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'." - % str(padding)) - if padding == "VALID": - padding_algorithm = "VALID" - padding = [0, 0] - if ceil_mode != False: - raise ValueError( - "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. " - "Received ceil_mode: True.") - elif padding == "SAME": - padding_algorithm = "SAME" - padding = [0, 0] + padding, padding_algorithm = _update_padding_nd( + padding, 1, ceil_mode=ceil_mode) - padding = update_padding2d(padding, data_format) + # use 2d to implenment 1d should expand padding in advance. + padding = _expand_low_nd_padding(padding) if in_dygraph_mode(): - output = core.ops.max_pool2d_with_index( + pool_out = core.ops.max_pool2d_with_index( x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, 'paddings', padding, 'padding_algorithm', padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, 'data_format', data_format) - return output if return_indices else output[0] + return (squeeze(pool_out[0], [2]), squeeze( + pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) op_type = 'max_pool2d_with_index' helper = LayerHelper(op_type, **locals()) @@ -758,36 +604,21 @@ def max_pool2d(x, "data_format": data_format, }) - return (pool_out, mask) if return_indices else pool_out + return (squeeze(pool_out, [2]), + squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) -def avg_pool2d(x, +def max_pool2d(x, kernel_size, stride=None, padding=0, + return_indices=False, ceil_mode=False, - count_include_pad=True, - divisor_override=None, data_format="NCHW", name=None): """ - This operation applies 2D average pooling over input features based on the input, - and kernel_size, stride, padding parameters. Input(X) and Output(Out) are - in NCHW format, where N is batch size, C is the number of channels, - H is the height of the feature, and W is the width of the feature. - - Example: - Input: - X shape: $(N, C, H_{in}, W_{in})$ - Attr: - kernel_size: ksize - - Output: - Out shape: $(N, C, H_{out}, W_{out})$ - $$ - out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} - input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) - $$ + This API implements max pooling 2d operation. + See more details in :ref:`api_nn_pooling_MaxPool2d` . Args: x (Tensor): The input tensor of pooling operator which is a 4-D tensor with @@ -796,30 +627,26 @@ def avg_pool2d(x, `H` is the height of the feature, and `W` is the width of the feature. The data type if float32 or float64. kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain two integers, (pool_size_Height, pool_size_Width). + it must contain two integers, (kernel_size_Height, kernel_size_Width). Otherwise, the pool kernel size will be a square of an int. stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, - it must contain two integers, (pool_stride_Height, pool_stride_Width). + it must contain two integers, (stride_Height, stride_Width). Otherwise, the pool stride size will be a square of an int. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be in three forms: `[pad_height, pad_width]` or - `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, - `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - Otherwise, the pool padding size will be a square of an int. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape - count_include_pad (bool): Whether to exclude padding points in average pooling - mode, default is `true`. - divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. - data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + return_indices (bool): Whether to return the max indices along with the outputs. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor: The output tensor of pooling result. The data type is same as input tensor. Raises: @@ -832,87 +659,71 @@ def avg_pool2d(x, import paddle.nn.functional as F import numpy as np paddle.disable_static() - - # avg pool2d - input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) - output = F.avg_pool2d(input, + # max pool2d + x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) + out = F.max_pool2d(x, kernel_size=2, stride=2, padding=0) # output.shape [1, 3, 16, 16] - + # for return_indices=True + out, max_indices = F.max_pool2d(x, + kernel_size=2, + stride=2, + padding=0, + return_indices=True) + # out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], """ - check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d') + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d') kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size') if stride is None: stride = kernel_size else: stride = utils.convert_to_list(stride, 2, 'pool_stride') - padding_algorithm = "EXPLICIT" - if isinstance(padding, str): - padding = padding.upper() - if padding not in ["SAME", "VALID"]: - raise ValueError( - "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." - % str(padding)) - if padding == "VALID": - padding_algorithm = "VALID" - padding = [0, 0] - if ceil_mode != False: - raise ValueError( - "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. " - "Received ceil_mode: True.") - elif padding == "SAME": - padding_algorithm = "SAME" - padding = [0, 0] - if data_format not in ["NCHW", "NHWC"]: raise ValueError( "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " "Attr(data_format): %s." % str(data_format)) - pool_padding = update_padding2d(padding, data_format) + + channel_last = True if data_format == "NHWC" else False + + padding, padding_algorithm = _update_padding_nd( + padding, num_dims=2, channel_last=channel_last, ceil_mode=ceil_mode) if in_dygraph_mode(): - output = core.ops.pool2d( - x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', - False, 'padding_algorithm', padding_algorithm, 'strides', stride, - 'paddings', pool_padding, 'use_cudnn', True, 'ceil_mode', ceil_mode, - 'use_mkldnn', False, 'exclusive', not count_include_pad, - 'data_format', data_format) - if divisor_override is None: - return output - else: - check_instance(divisor_override, "divisor_override") - return output * (kernel_size[0] * kernel_size[1]) / divisor_override + output = core.ops.max_pool2d_with_index( + x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, + 'paddings', padding, 'padding_algorithm', padding_algorithm, + 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, + 'exclusive', True, 'data_format', data_format) + return output if return_indices else output[0] - op_type = 'pool2d' + op_type = 'max_pool2d_with_index' helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} helper.append_op( type=op_type, inputs={"X": x}, - outputs={"Out": pool_out}, + outputs=outputs, attrs={ - "pooling_type": "avg", + "pooling_type": 'max', "ksize": kernel_size, "global_pooling": False, "strides": stride, - "paddings": pool_padding, + "paddings": padding, "padding_algorithm": padding_algorithm, "use_cudnn": True, "ceil_mode": ceil_mode, "use_mkldnn": False, - "exclusive": not count_include_pad, + "exclusive": True, "data_format": data_format, }) - if divisor_override is None: - return pool_out - else: - check_instance(divisor_override, "divisor_override") - return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override + return (pool_out, mask) if return_indices else pool_out def max_pool3d(x, @@ -924,47 +735,25 @@ def max_pool3d(x, data_format="NCDHW", name=None): """ - This operation applies 3D max pooling over input features based on the input, - and kernel_size, stride, padding parameters. Input(X) and Output(Out) are - in NCDHW format, where N is batch size, C is the number of channels, - H is the height of the feature, D is the depth of the feature, and W is the width of the feature. - - Example: - Input: - X shape: $(N, C, D_{in}, H_{in}, W_{in})$ - Attr: - kernel_size: ksize - - Output: - Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ - $$ - \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, ksize[0]-1} \max_{m=0, \ldots, ksize[1]-1} \max_{n=0, \ldots, ksize[2]-1} \\ - & \text{input}(N_i, C_j, \text{stride[0]} \times d + k, - \text{stride[1]} \times h + m, \text{stride[2]} \times w + n) - $$ - + This API implements max pooling 2d operation. + See more details in :ref:`api_nn_pooling_MaxPool3d` . Args: x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with - shape [N, C, D, H, W]. The format of - input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is - the number of channels, `D` is the depth of the feature, - `H` is the height of the feature, and `W` is the width - of the feature. - kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + shape [N, C, D, H, W]. The format of input tensor is `"NCDHW"` or `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively. + kernel_size (int|list|tuple): The pool kernel size. If the kernel size is a tuple or list, it must contain three integers, - (pool_size_Depth, pool_size_Height, pool_size_Width). + (kernel_size_Depth, kernel_size_Height, kernel_size_Width). Otherwise, the pool kernel size will be the cube of an int. - stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, - it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain three integers, [stride_Depth, stride_Height, stride_Width). Otherwise, the pool stride size will be a cube of an int. - padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, - it could be in three forms: `[pad_depth, pad_height, pad_width]` or - `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form - `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. ceil_mode (bool): ${ceil_mode_comment} return_indices (bool): Whether to return the max indices along with the outputs. data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. @@ -973,7 +762,6 @@ def max_pool3d(x, name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor: The output tensor of pooling result. The data type is same as input tensor. Raises: @@ -986,23 +774,20 @@ def max_pool3d(x, import paddle.nn.functional as F import numpy as np paddle.disable_static() - # max pool3d - input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) - output = F.max_pool2d(input, + x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) + output = F.max_pool2d(x, kernel_size=2, stride=2, padding=0) output.shape [1, 3, 16, 16, 16] - # for return_indices=True - input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) - output, max_indices = paddle.nn.functional.max_pool3d(input, + x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) + output, max_indices = paddle.nn.functional.max_pool3d(x, kernel_size = 2, stride = 2, padding=0, return_indices=True) # output.shape [None, 3, 16, 16, 16], max_indices.shape [None, 3, 16, 16, 16], - """ check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d') kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') @@ -1011,29 +796,10 @@ def max_pool3d(x, else: stride = utils.convert_to_list(stride, 3, 'pool_stride') - padding_algorithm = "EXPLICIT" - if isinstance(padding, str): - padding = padding.upper() - if padding not in ["SAME", "VALID"]: - raise ValueError( - "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." - % str(padding)) - if padding == "VALID": - padding_algorithm = "VALID" - padding = [0, 0, 0] - if ceil_mode != False: - raise ValueError( - "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. " - "Received ceil_mode: True.") - elif padding == "SAME": - padding_algorithm = "SAME" - padding = [0, 0, 0] + channel_last = _channel_last(data_format, 3) - if data_format not in ["NCDHW", "NDHWC"]: - raise ValueError( - "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " - "Attr(data_format): %s" % str(data_format)) - padding = update_padding3d(padding, data_format) + padding, padding_algorithm = _update_padding_nd( + padding, 3, channel_last=channel_last, ceil_mode=ceil_mode) if in_dygraph_mode(): output = core.ops.max_pool3d_with_index( @@ -1071,170 +837,83 @@ def max_pool3d(x, return (pool_out, mask) if return_indices else pool_out -def avg_pool3d(x, - kernel_size, - stride=None, - padding=0, - ceil_mode=False, - count_include_pad=False, - divisor_override=None, - data_format="NCDHW", - name=None): +def adaptive_avg_pool1d(x, output_size, name=None): """ - This operation applies 3D max pooling over input features based on the input, - and kernel_size, stride, padding parameters. Input(X) and Output(Out) are - in NCDHW format, where N is batch size, C is the number of channels, - H is the height of the feature, D is the depth of the feature, and W is the width of the feature. - + This API implements adaptive average pooling 1d operation. + See more details in :ref:`api_nn_pooling_AdaptiveAvgPool1d` . + Args: - input (Tensor): The input tensor of pooling operator, which is a 5-D tensor with - shape [N, C, D, H, W], where `N` is batch size, `C` is - the number of channels, `D` is the depth of the feature, - `H` is the height of the feature, and `W` is the width - of the feature. - kernel_size (int|list|tuple): The pool kernel size. If pool kernel size - is a tuple or list, it must contain three integers, - (pool_size_Depth, pool_size_Height, pool_size_Width). - Otherwise, the pool kernel size will be the cube of an int. - stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, - it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. - Otherwise, the pool stride size will be a cube of an int. - padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, - it could be in three forms: `[pad_depth, pad_height, pad_width]` or - `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form - `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - ceil_mode (bool): ${ceil_mode_comment} - count_include_pad (bool): Whether to exclude padding points in average pooling - mode, default is True. - divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. - data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. - The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: - `[batch_size, input_channels, input_depth, input_height, input_width]`. + x (Tensor): The input tensor of pooling operator, which is a 3-D tensor + with shape [N, C, L]. The format of input tensor is NCL, + where N is batch size, C is the number of channels, L is the + length of the feature. The data type is float32 or float64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - - + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: - Tensor: The output tensor of pooling result. The data type is same as input tensor. + Tensor: The output tensor of adaptive average pooling result. The data type is same + as input tensor. Raises: - ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is "VALID", but `ceil_mode` is True. - ShapeError: If the output's shape calculated is not greater than 0. + ValueError: 'output_size' should be an integer or list or tuple with length as 1. Examples: .. code-block:: python - import paddle.fluid as fluid - import paddle - input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) - # avg pool3d - pool3d = paddle.nn.functional.avg_pool3d( - input, - kernel_size = 2, - stride = 2, - padding=0) - # pool3d.shape: [1, 3, 16, 16, 16] - """ - check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d') - kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') - if stride is None: - stride = kernel_size - else: - stride = utils.convert_to_list(stride, 3, 'pool_stride') - - padding_algorithm = "EXPLICIT" - if isinstance(padding, str): - padding = padding.upper() - if padding not in ["SAME", "VALID"]: - raise ValueError( - "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." - % str(padding)) - if padding == "VALID": - padding_algorithm = "VALID" - padding = [0, 0, 0] - if ceil_mode != False: - raise ValueError( - "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. " - "Received ceil_mode: True.") - elif padding == "SAME": - padding_algorithm = "SAME" - padding = [0, 0, 0] + # average adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) + # + import paddle + import paddle.nn.functional as F + paddle.disable_static() + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + pool_out = F.adaptive_average_pool1d(data, output_size=16) + # pool_out shape: [1, 3, 16]) + """ + pool_type = 'avg' + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'adaptive_pool2d') + _check_input(x, 3) + check_type(output_size, 'pool_size', (int), 'adaptive_pool1d') - if data_format not in ["NCDHW", "NDHWC"]: - raise ValueError( - "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " - "Attr(data_format): %s" % str(data_format)) - padding = update_padding3d(padding, data_format) + pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') + l_type = "pool2d" + x = unsqueeze(x, [2]) if in_dygraph_mode(): - output = core.ops.pool3d( - x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride, - 'paddings', padding, 'global_pooling', False, 'padding_algorithm', - padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, - 'use_mkldnn', False, 'exclusive', not count_include_pad, - 'data_format', data_format) - if divisor_override is None: - return output - else: - check_instance(divisor_override, "divisor_override") - return output * (kernel_size[0] * kernel_size[1] * - kernel_size[2]) / divisor_override + pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize', + pool_size, 'adaptive', True) + return squeeze(pool_out, [2]) - op_type = "pool3d" - helper = LayerHelper(op_type, **locals()) + helper = LayerHelper(l_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) - outputs = {"Out": pool_out} + outputs = {"Out": pool_out} helper.append_op( - type=op_type, + type=l_type, inputs={"X": x}, outputs=outputs, attrs={ - "pooling_type": 'avg', - "ksize": kernel_size, - "global_pooling": False, - "strides": stride, - "paddings": padding, - "padding_algorithm": padding_algorithm, - "use_cudnn": True, - "ceil_mode": ceil_mode, - "use_mkldnn": False, - "exclusive": not count_include_pad, - "data_format": data_format, + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, }) - if divisor_override is None: - return pool_out - else: - check_instance(divisor_override, "divisor_override") - return pool_out * (kernel_size[0] * kernel_size[1] * - kernel_size[2]) / divisor_override + return squeeze(pool_out, [2]) def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): """ - - This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions - of the output tensor are determined by the parameter output_size. - See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool2d` . - - For avg adaptive pool2d: - - .. math:: - - hstart &= floor(i * H_{in} / H_{out}) - - hend &= ceil((i + 1) * H_{in} / H_{out}) - - wstart &= floor(j * W_{in} / W_{out}) - - wend &= ceil((j + 1) * W_{in} / W_{out}) - - Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} + This API implements adaptive average pooling 2d operation. + See more details in :ref:`api_nn_pooling_AdaptiveAvgPool2d` . Args: x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. @@ -1248,16 +927,12 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor: The output tensor of avg adaptive pool2d result. The data type is same as input tensor. - Raises: ValueError: If `data_format` is not "NCHW" or "NHWC". - Examples: .. code-block:: python - # adaptive avg pool2d # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], # output shape is [N, C, m, n], adaptive pool divide H and W dimensions @@ -1279,10 +954,10 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): input_data = np.random.rand(2, 3, 32, 32) x = paddle.to_tensor(input_data) # x.shape is [2, 3, 32, 32] - pool_out = paddle.nn.functional.adaptive_avg_pool2d( + out = paddle.nn.functional.adaptive_avg_pool2d( x = x, output_size=[3, 3]) - # pool_out.shape is [2, 3, 3, 3] + # out.shape is [2, 3, 3, 3] """ if not in_dygraph_mode(): check_variable_and_dtype(x, 'x', ['float32', 'float64'], @@ -1337,28 +1012,8 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): """ - - This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions - of the output tensor are determined by the parameter output_size. - See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool3d` . - - For avg adaptive pool3d: - - .. math:: - - dstart &= floor(i * D_{in} / D_{out}) - - dend &= ceil((i + 1) * D_{in} / D_{out}) - - hstart &= floor(j * H_{in} / H_{out}) - - hend &= ceil((j + 1) * H_{in} / H_{out}) - - wstart &= floor(k * W_{in} / W_{out}) - - wend &= ceil((k + 1) * W_{in} / W_{out}) - - Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} + This API implements adaptive average pooling 3d operation. + See more details in :ref:`api_nn_pooling_AdaptiveAvgPool3d` . Args: x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. @@ -1372,16 +1027,12 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor: The output tensor of avg adaptive pool3d result. The data type is same as input tensor. - Raises: ValueError: If `data_format` is not "NCDHW" or "NDHWC". - Examples: .. code-block:: python - # adaptive avg pool3d # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions @@ -1406,10 +1057,10 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): input_data = np.random.rand(2, 3, 8, 32, 32) x = paddle.to_tensor(input_data) # x.shape is [2, 3, 8, 32, 32] - pool_out = paddle.nn.functional.adaptive_avg_pool3d( + out = paddle.nn.functional.adaptive_avg_pool3d( x = x, output_size=[3, 3, 3]) - # pool_out.shape is [2, 3, 3, 3, 3] + # out.shape is [2, 3, 3, 3, 3] """ if not in_dygraph_mode(): check_variable_and_dtype(x, 'x', ['float32', 'float64'], @@ -1461,3 +1112,257 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): }) return pool_out + + +def adaptive_max_pool1d(x, output_size, return_indices=False, name=None): + """ + This API implements adaptive max pooling 1d operation. + See more details in :ref:`api_nn_pooling_AdaptiveMaxPool1d` . + + Args: + x (Tensor): The input tensor of pooling operator, which is a 3-D tensor + with shape [N, C, L]. The format of input tensor is NCL, + where N is batch size, C is the number of channels, L is the + length of the feature. The data type is float32 or float64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. + return_indices (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. Default False. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: + Tensor: The output tensor of adaptive pooling result. The data type is same + as input tensor. + Raises: + ValueError: 'output_size' should be a integer or list or tuple with length as 1. + Examples: + .. code-block:: python + # max adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = max(input[:, :, lstart: lend]) + # + import paddle + import paddle.nn.functional as F + paddle.disable_static() + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + pool_out = F.adaptive_max_pool1d(data, output_size=16) + # pool_out shape: [1, 3, 16]) + pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True) + # pool_out shape: [1, 3, 16] indices shape: [1, 3, 16] + """ + pool_type = 'max' + check_variable_and_dtype(x, 'x', ['float32', 'float64'], + 'adaptive_max_pool1d') + _check_input(x, 3) + check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d') + check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d') + + pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') + + l_type = 'max_pool2d_with_index' + + x = unsqueeze(x, [2]) + if in_dygraph_mode(): + pool_out = core.ops.max_pool2d_with_index( + x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True) + return (squeeze(pool_out[0], [2]), squeeze( + pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": pool_type, + "ksize": pool_size, + "adaptive": True, + }) + + return (squeeze(pool_out, [2]), + squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) + + +def adaptive_max_pool2d(x, output_size, return_indices=False, name=None): + """ + This operation applies a 2D adaptive max pooling on input tensor. + See more details in :ref:`api_nn_pooling_AdaptiveMaxPool2d` . + Args: + x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float16, float32, float64, int32 or int64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two elements, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input. + return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False. + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. + Returns: + Tensor: The output tensor of adaptive max pool2d result. The data type is same as input tensor. + Examples: + .. code-block:: python + # max adaptive pool2d + # suppose input data in the shape of [N, C, H, W], `output_size` is [m, n] + # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + # of input data into m*n grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # for j in range(n): + # hstart = floor(i * H / m) + # hend = ceil((i + 1) * H / m) + # wstart = floor(i * W / n) + # wend = ceil((i + 1) * W / n) + # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) + # + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 32, 32] + out = paddle.nn.functional.adaptive_max_pool2d( + x = x, + output_size=[3, 3]) + # out.shape is [2, 3, 3, 3] + """ + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], + 'adaptive_max_pool2d') + _check_input(x, 4) + #check_type(output_size, 'pool_size', (int), 'adaptive_max_pool2d') + check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool2d') + + in_h, in_w = x.shape[2:4] + if isinstance(output_size, int): + output_size = utils.convert_to_list(output_size, 2, 'output_size') + else: + if output_size[0] == None: + output_size[0] = in_h + if output_size[1] == None: + output_size[1] = in_w + + if in_dygraph_mode(): + pool_out = core.ops.max_pool2d_with_index( + x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True) + return pool_out if return_indices else pool_out[0] + + l_type = 'max_pool2d_with_index' + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": output_size, + "adaptive": True, + }) + #return (pool_out, mask) if return_indices else pool_out + return pool_out + + +def adaptive_max_pool3d(x, output_size, return_indices=False, name=None): + """ + This operation applies a 3D adaptive max pooling on input tensor. + See more details in :ref:`api_nn_pooling_AdaptiveMaxPool3d` . + Args: + x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input. + return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False. + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. + Returns: + Tensor: The output tensor of adaptive max pool3d result. The data type is same as input tensor. + Examples: + .. code-block:: python + # adaptive max pool3d + # suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n] + # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + # of input data into m*n grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(l): + # for j in range(m): + # for k in range(n): + # dstart = floor(i * D / l) + # dend = ceil((i + 1) * D / l) + # hstart = floor(i * H / m) + # hend = ceil((i + 1) * H / m) + # wstart = floor(i * W / n) + # wend = ceil((i + 1) * W / n) + # output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend]) + # + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 8, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 8, 32, 32] + out = paddle.nn.functional.adaptive_max_pool3d( + x = x, + output_size=[3, 3, 3]) + # out.shape is [2, 3, 3, 3, 3] + """ + + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], + 'adaptive_max_pool3d') + _check_input(x, 5) + #check_type(output_size, 'pool_size', (int), 'adaptive_max_pool3d') + check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool3d') + + in_l, in_h, in_w = x.shape[2:5] + if isinstance(output_size, int): + output_size = utils.convert_to_list(output_size, 3, 'output_size') + else: + if output_size[0] == None: + output_size[0] = in_l + if output_size[1] == None: + output_size[1] = in_h + if output_size[2] == None: + output_size[2] = in_w + + if in_dygraph_mode(): + pool_out = core.ops.max_pool3d_with_index( + x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True) + return pool_out if return_indices else pool_out[0] + + l_type = 'max_pool3d_with_index' + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + pool_out = helper.create_variable_for_type_inference(dtype) + + mask = helper.create_variable_for_type_inference(dtype) + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": output_size, + "adaptive": True, + }) + + return (pool_out, mask) if return_indices else pool_out diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 3399e4e34c9..6eac15cd694 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -66,16 +66,18 @@ from .common import Dropout #DEFINE_ALIAS from .common import Dropout2D #DEFINE_ALIAS from .common import Dropout3D #DEFINE_ALIAS from .common import AlphaDropout #DEFINE_ALIAS -from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS -from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS from .pooling import AvgPool1d #DEFINE_ALIAS -from .pooling import MaxPool1d #DEFINE_ALIAS -from .pooling import AdaptiveAvgPool1d #DEFINE_ALIAS -from .pooling import AdaptiveMaxPool1d #DEFINE_ALIAS from .pooling import AvgPool2d #DEFINE_ALIAS -from .pooling import MaxPool2d #DEFINE_ALIAS from .pooling import AvgPool3d #DEFINE_ALIAS +from .pooling import MaxPool1d #DEFINE_ALIAS +from .pooling import MaxPool2d #DEFINE_ALIAS from .pooling import MaxPool3d #DEFINE_ALIAS +from .pooling import AdaptiveAvgPool1d #DEFINE_ALIAS +from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS +from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS +from .pooling import AdaptiveMaxPool1d #DEFINE_ALIAS +from .pooling import AdaptiveMaxPool2d #DEFINE_ALIAS +from .pooling import AdaptiveMaxPool3d #DEFINE_ALIAS from .conv import Conv1d #DEFINE_ALIAS from .conv import Conv2d #DEFINE_ALIAS from .conv import Conv3d #DEFINE_ALIAS diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 87fa0caec9e..6f6b5678497 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -12,198 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle - -from ...fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype -from ...fluid.layers import utils from ...fluid.dygraph import layers from ...fluid.layer_helper import LayerHelper from .. import functional as F __all__ = [ - 'AdaptiveAvgPool2d', - 'AdaptiveAvgPool3d', 'AvgPool1d', - 'maxPool1d', - 'AdaptiveMaxPool1d', - 'AdaptiveAvgPool1d', 'AvgPool2d', - 'MaxPool2d', 'AvgPool3d', + 'MaxPool1d', + 'MaxPool2d', 'MaxPool3d', + 'AdaptiveAvgPool1d', + 'AdaptiveAvgPool2d', + 'AdaptiveAvgPool3d', + 'AdaptiveMaxPool1d', + 'AdaptiveMaxPool2d', + 'AdaptiveMaxPool3d', ] -class AdaptiveAvgPool2d(layers.Layer): - """ - - This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions - of the output tensor are determined by the parameter output_size. - - For avg adaptive pool2d: - - .. math:: - - hstart &= floor(i * H_{in} / H_{out}) - - hend &= ceil((i + 1) * H_{in} / H_{out}) - - wstart &= floor(j * W_{in} / W_{out}) - - wend &= ceil((j + 1) * W_{in} / W_{out}) - - Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} - - - Parameters: - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain two element, (H, W). H and W can be either a int, or None which means - the size will be the same as that of the input. - data_format (str): The data format of the input and output data. An optional string - from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in - the order of: [batch_size, input_channels, input_height, input_width]. - name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - - Shape: - x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32 or float64. - output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x. - - Returns: - A callable object of AdaptiveAvgPool2d. - - Examples: - .. code-block:: python - - # adaptive avg pool2d - # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], - # output shape is [N, C, m, n], adaptive pool divide H and W dimensions - # of input data into m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive avg pool performs calculations as follow: - # - # for i in range(m): - # for j in range(n): - # hstart = floor(i * H / m) - # hend = ceil((i + 1) * H / m) - # wstart = floor(i * W / n) - # wend = ceil((i + 1) * W / n) - # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) - # - import paddle - import numpy as np - paddle.disable_static() - input_data = np.random.rand(2, 3, 32, 32) - x = paddle.to_tensor(input_data) - # x.shape is [2, 3, 32, 32] - adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3) - pool_out = adaptive_avg_pool(x = x) - # pool_out.shape is [2, 3, 3, 3] - """ - - def __init__(self, output_size, data_format="NCHW", name=None): - super(AdaptiveAvgPool2d, self).__init__() - self._output_size = output_size - self._data_format = data_format - self._name = name - - def forward(self, x): - return F.adaptive_avg_pool2d( - x, - output_size=self._output_size, - data_format=self._data_format, - name=self._name) - - -class AdaptiveAvgPool3d(layers.Layer): - """ - - This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions - of the output tensor are determined by the parameter output_size. - - For avg adaptive pool3d: - - .. math:: - - dstart &= floor(i * D_{in} / D_{out}) - - dend &= ceil((i + 1) * D_{in} / D_{out}) - - hstart &= floor(j * H_{in} / H_{out}) - - hend &= ceil((j + 1) * H_{in} / H_{out}) - - wstart &= floor(k * W_{in} / W_{out}) - - wend &= ceil((k + 1) * W_{in} / W_{out}) - - Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} - - - Parameters: - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means - the size will be the same as that of the input. - data_format (str): The data format of the input and output data. An optional string - from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in - the order of: [batch_size, input_channels, input_depth, input_height, input_width]. - name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - Shape: - x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32 or float64. - output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x. - - Returns: - A callable object of AdaptiveAvgPool3d. - - Examples: - .. code-block:: python - - # adaptive avg pool3d - # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], - # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions - # of input data into l * m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive avg pool performs calculations as follow: - # - # for i in range(l): - # for j in range(m): - # for k in range(n): - # dstart = floor(i * D / l) - # dend = ceil((i + 1) * D / l) - # hstart = floor(j * H / m) - # hend = ceil((j + 1) * H / m) - # wstart = floor(k * W / n) - # wend = ceil((k + 1) * W / n) - # output[:, :, i, j, k] = - # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) - import paddle - import numpy as np - paddle.disable_static() - input_data = np.random.rand(2, 3, 8, 32, 32) - x = paddle.to_tensor(input_data) - # x.shape is [2, 3, 8, 32, 32] - adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3) - pool_out = adaptive_avg_pool(x = x) - # pool_out = [2, 3, 3, 3, 3] - """ - - def __init__(self, output_size, data_format="NCDHW", name=None): - super(AdaptiveAvgPool3d, self).__init__() - self._output_size = output_size - self._data_format = data_format - self._name = name - - def forward(self, x): - return F.adaptive_avg_pool3d( - x, - output_size=self._output_size, - data_format=self._data_format, - name=self._name) - - class AvgPool1d(layers.Layer): """ This operation applies a 1D average pooling over an input signal composed @@ -223,17 +51,20 @@ class AvgPool1d(layers.Layer): Args: kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one integers. + it must contain an integer. stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, - it must contain one integers. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero, - then the input is implicitly zero-padded on both sides for padding number of points. + it must contain an integer. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides. + 4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after]. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. count_include_pad (bool): Whether to exclude padding points in average pooling - mode, default is `true`. + mode, default is `True`. ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width. - If it is set to False, the floor function will be used. Default False + If it is set to False, the floor function will be used. The default value is False. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -245,10 +76,14 @@ class AvgPool1d(layers.Layer): ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is a list or tuple but its length greater than 1. - ShapeError: If the input is not a 3-D. + ShapeError: If the input is not a 3-D tensor. ShapeError: If the output's shape calculated is not greater than 0. + Shape: + - inpuut: 3-D tensor. + - output: 3-D tensor + Examples: .. code-block:: python @@ -284,63 +119,74 @@ class AvgPool1d(layers.Layer): return out -class MaxPool1d(layers.Layer): +class AvgPool2d(layers.Layer): """ - Applies a 1D max pooling over an input signal composed of several input planes based - on the input, output_size, return_indices parameters. - Input(X) and output(Out) are in NCL format, where N is batch - size, C is the number of channels, L is the length of the feature. - - The output value of the layer with input size (N, C, L), - output (N, C, L_{out}) and kernel_size k can be precisely described as - For average pool1d: + This operation applies 2D average pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCHW format, where N is batch size, C is the number of channels, + H is the height of the feature, and W is the width of the feature. - .. math:: + Example: + Input: + X shape: $(N, C, H_{in}, W_{in})$ + Attr: + kernel_size: ksize - Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])} + Output: + Out shape: $(N, C, H_{out}, W_{out})$ + $$ + out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} + input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) + $$ Args: - kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one integers. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two integers, (pool_size_Height, pool_size_Width). + Otherwise, the pool kernel size will be a square of an int. stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, - it must contain one integers. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be the following forms: `[pad_left, pad_right]`. - return_indices (bool): Whether return the max indices along with the outputs. default is `False`. - ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default. - If it is set to False, the floor function will be used. Default False + it must contain two integers, (pool_stride_Height, pool_stride_Width). + Otherwise, the pool stride size will be a square of an int. + + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is `true`. + divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: - None. + Shape: + - x: 4-D tensor. + - out: 2-D tensor + Returns: None. Raises: ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is "VALID", but `ceil_mode` is True. - ValueError: If `padding` is a list or tuple but its length greater than 1. - ShapeError: If the input is not a 3-D. ShapeError: If the output's shape calculated is not greater than 0. - - Examples: - .. code-block:: python - import paddle import paddle.nn as nn + import numpy as np paddle.disable_static() - data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) - MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0) - pool_out = MaxPool1d(data) - # pool_out shape: [1, 3, 16] - - MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True) - pool_out, indices = MaxPool1d(data) - # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + # max pool2d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) + AvgPool2d = nn.AvgPool2d(kernel_size=2, + stride=2, padding=0) + output = AvgPoo2d(input) + # output.shape [1, 3, 16, 16] """ @@ -348,113 +194,155 @@ class MaxPool1d(layers.Layer): kernel_size, stride=None, padding=0, - return_indices=False, ceil_mode=False, + count_include_pad=True, + divisor_override=None, + data_format="NCHW", name=None): - super(MaxPool1d, self).__init__() - self.kernel_size = kernel_size + super(AvgPool2d, self).__init__() + self.ksize = kernel_size self.stride = stride self.padding = padding self.ceil_mode = ceil_mode - self.return_indices = return_indices + self.count_include_pad = count_include_pad + self.divisor = divisor_override + self.data_format = data_format self.name = name - def forward(self, input): - out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding, - self.return_indices, self.ceil_mode, self.name) - return out + def forward(self, x): + return F.avg_pool2d( + x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + count_include_pad=self.count_include_pad, + divisor_override=self.divisor, + data_format=self.data_format, + name=self.name) -class AdaptiveAvgPool1d(layers.Layer): +class AvgPool3d(layers.Layer): """ - - This operation applies a 1D adaptive average pooling over an input signal composed - of several input planes, based on the input, output_size, return_indices parameters. - Input(X) and output(Out) are in NCL format, where N is batch - size, C is the number of channels, L is the length of the feature. - The output tensor shape will be [N, C, output_size]. - - For average adaptive pool1d: - - .. math:: - - lstart &= floor(i * L_{in} / L_{out}) - - lend &= ceil((i + 1) * L_{in} / L_{out}) - - Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)} + This operation applies 3D max pooling over input features based on the input, + and kernel_size, stride, padding parameters. Input(X) and Output(Out) are + in NCDHW format, where N is batch size, C is the number of channels, + H is the height of the feature, D is the depth of the feature, and W is the width of the feature. Args: - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one int. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + is a tuple or list, it must contain three integers, + (kernel_size_Depth, kernel_size_Height, kernel_size_Width). + Otherwise, the pool kernel size will be the cube of an int. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain three integers, [stride_Depth, stride_Height, stride_Width). + Otherwise, the pool stride size will be a cube of an int. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode (bool): ${ceil_mode_comment} + count_include_pad (bool): Whether to exclude padding points in average pooling + mode, default is True. + divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. + data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. + The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_depth, input_height, input_width]`. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: - None. - + Returns: None. Raises: - ValueError: 'pool_size' should be a integer or list or tuple with length as 1. + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ShapeError: If the output's shape calculated is not greater than 0. + + Shape: + - x: 5-D tensor. + - out: 5-D tensor. Examples: .. code-block:: python - - # average adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) - # import paddle import paddle.nn as nn + import numpy as np paddle.disable_static() - data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) - AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16) - pool_out = AdaptiveAvgPool1d(data) - # pool_out shape: [1, 3, 16] + # avg pool3d + input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32)) + AvgPool3d = nn.AvgPool3d(kernel_size=2, + stride=2, padding=0) + output = AvgPool3d(input) + # output.shape [1, 2, 3, 16, 16] + """ - def __init__(self, output_size, name=None): - super(AdaptiveAvgPool1d, self).__init__() - self.output_size = output_size + def __init__(self, + kernel_size, + stride, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, + data_format="NCDHW", + name=None): + super(AvgPool3d, self).__init__() + self.ksize = kernel_size + self.stride = stride + self.padding = padding + self.ceil_mode = ceil_mode + self.count_include_pad = count_include_pad + self.divisor = divisor_override + self.data_format = data_format self.name = name - def forward(self, input): - return F.adaptive_avg_pool1d(input, self.output_size, self.name) + def forward(self, x): + return F.avg_pool3d( + x, + kernel_size=self.ksize, + stride=self.stride, + padding=self.padding, + ceil_mode=self.ceil_mode, + count_include_pad=self.count_include_pad, + divisor_override=self.divisor, + data_format=self.data_format, + name=self.name) -class AdaptiveMaxPool1d(layers.Layer): +class MaxPool1d(layers.Layer): """ - - This operation applies a 1D adaptive max pooling over an input signal composed - of several input planes, based on the input, output_size, return_indices parameters. + Applies a 1D max pooling over an input signal composed of several input planes based + on the input, output_size, return_indices parameters. Input(X) and output(Out) are in NCL format, where N is batch size, C is the number of channels, L is the length of the feature. - The output tensor shape will be [N, C, output_size]. - For max adaptive pool1d: + The output value of the layer with input size (N, C, L), + output (N, C, L_{out}) and kernel_size k can be precisely described as + For average pool1d: .. math:: - lstart &= floor(i * L_{in} / L_{out}) - - lend &= ceil((i + 1) * L_{in} / L_{out}) - - Output(i) &= max(Input[lstart:lend])} + Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])} Args: - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one int. - return_indices (bool): If true, the index of max pooling point will be returned along - with outputs. It cannot be set in average pooling type. Default False. + kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain an integer. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain an integer. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An integer, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides. + 4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after]. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + return_indices (bool): Whether return the max indices along with the outputs. default is `False`. + ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default. + If it is set to False, the floor function will be used. Default False. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -462,53 +350,60 @@ class AdaptiveMaxPool1d(layers.Layer): None. Raises: - ValueError: 'pool_size' should be a integer or list or tuple with length as 1. + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is "VALID", but `ceil_mode` is True. + ValueError: If `padding` is a list or tuple but its length greater than 1. + ShapeError: If the input is not a 3-D. + ShapeError: If the output's shape calculated is not greater than 0. + + + Shape: + - x: 3-D tensor. + - out: 3-D tensor. Examples: + .. code-block:: python - # max adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = max(input[:, :, lstart: lend]) - # - import paddle + import paddle import paddle.nn as nn paddle.disable_static() data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) - AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16) - pool_out = AdaptiveMaxPool1d(data) + MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0) + pool_out = MaxPool1d(data) # pool_out shape: [1, 3, 16] - # for return_indices = true - AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True) - pool_out, indices = AdaptiveMaxPool1d(data) + MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True) + pool_out, indices = MaxPool1d(data) # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] """ - def __init__(self, output_size, return_indices=False, name=None): - super(AdaptiveMaxPool1d, self).__init__() - self.output_size = output_size + def __init__(self, + kernel_size, + stride=None, + padding=0, + return_indices=False, + ceil_mode=False, + name=None): + super(MaxPool1d, self).__init__() + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.ceil_mode = ceil_mode self.return_indices = return_indices self.name = name def forward(self, input): - return F.adaptive_max_pool1d(input, self.output_size, - self.return_indices, self.name) + out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding, + self.return_indices, self.ceil_mode, self.name) + return out -class AvgPool2d(layers.Layer): +class MaxPool2d(layers.Layer): """ - This operation applies 2D average pooling over input features based on the input, + This operation applies 2D max pooling over input feature based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are in NCHW format, where N is batch size, C is the number of channels, H is the height of the feature, and W is the width of the feature. @@ -522,8 +417,9 @@ class AvgPool2d(layers.Layer): Output: Out shape: $(N, C, H_{out}, W_{out})$ $$ - out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} - input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) + out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\ + & \text{input}(N_i, C_j, \text{stride[0]} \times h + m, + \text{stride[1]} \times w + n) $$ Args: @@ -532,31 +428,33 @@ class AvgPool2d(layers.Layer): Otherwise, the pool kernel size will be a square of an int. stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, it must contain two integers, (pool_stride_Height, pool_stride_Width). - Otherwise, the pool stride size will be a square of an int. Default: kernel_size. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be in three forms: `[pad_height, pad_width]` or - `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, - `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - Otherwise, the pool padding size will be a square of an int. + Otherwise, the pool stride size will be a square of an int. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape - count_include_pad (bool): Whether to exclude padding points in average pooling - mode, default is `true`. - divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. - name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. + return_indices (bool): Whether to return the max indices along with the outputs. data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. - Returns: None. + Returns: None Raises: ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is "VALID", but `ceil_mode` is True. ShapeError: If the output's shape calculated is not greater than 0. + + Shape: + - x: 4-D tensor. + - out: 4-D tensor. + Examples: .. code-block:: python import paddle @@ -566,172 +464,72 @@ class AvgPool2d(layers.Layer): # max pool2d input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) - AvgPool2d = nn.AvgPool2d(kernel_size=2, - stride=2, padding=0) - output = AvgPoo2d(input) + MaxPool2d = nn.MaxPool2d(kernel_size=2, + stride=2, padding=0) + output = MaxPool2d(input) # output.shape [1, 3, 16, 16] + # for return_indices=True + MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True) + output, max_indices = MaxPool2d(input) + # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], """ def __init__(self, kernel_size, stride=None, padding=0, + return_indices=False, ceil_mode=False, - count_include_pad=True, - divisor_override=None, data_format="NCHW", name=None): - super(AvgPool2d, self).__init__() + super(MaxPool2d, self).__init__() self.ksize = kernel_size self.stride = stride self.padding = padding + self.return_indices = return_indices self.ceil_mode = ceil_mode - self.count_include_pad = count_include_pad - self.divisor = divisor_override self.data_format = data_format self.name = name def forward(self, x): - return F.avg_pool2d( + return F.max_pool2d( x, kernel_size=self.ksize, stride=self.stride, padding=self.padding, - ceil_mode=self.ceil_mode, - count_include_pad=self.count_include_pad, - divisor_override=self.divisor, + return_indices=self.return_indices, data_format=self.data_format, name=self.name) -class MaxPool2d(layers.Layer): +class MaxPool3d(layers.Layer): """ - This operation applies 2D max pooling over input feature based on the input, + This operation applies 3D max pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are - in NCHW format, where N is batch size, C is the number of channels, - H is the height of the feature, and W is the width of the feature. - - Example: - Input: - X shape: $(N, C, H_{in}, W_{in})$ - Attr: - kernel_size: ksize - - Output: - Out shape: $(N, C, H_{out}, W_{out})$ - $$ - out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\ - & \text{input}(N_i, C_j, \text{stride[0]} \times h + m, - \text{stride[1]} \times w + n) - $$ - - Args: - kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain two integers, (pool_size_Height, pool_size_Width). - Otherwise, the pool kernel size will be a square of an int. - stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, - it must contain two integers, (pool_stride_Height, pool_stride_Width). - Otherwise, the pool stride size will be a square of an int. Default: kernel_size. - padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, - it could be in three forms: `[pad_height, pad_width]` or - `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, - `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - Otherwise, the pool padding size will be a square of an int. - ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape - return_indices (bool): Whether to return the max indices along with the outputs. - data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. - The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: - `[batch_size, input_channels, input_height, input_width]`. - name(str, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - - Returns: None - Raises: - ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is "VALID", but `ceil_mode` is True. - ShapeError: If the output's shape calculated is not greater than 0. - Examples: - .. code-block:: python - import paddle - import paddle.nn as nn - import numpy as np - paddle.disable_static() - - # max pool2d - input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) - MaxPool2d = nn.MaxPool2d(kernel_size=2, - stride=2, padding=0) - output = MaxPool2d(input) - # output.shape [1, 3, 16, 16] - - # for return_indices=True - MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True) - output, max_indices = MaxPool2d(input) - # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], - """ - - def __init__(self, - kernel_size, - stride=None, - padding=0, - return_indices=False, - ceil_mode=False, - data_format="NCHW", - name=None): - super(MaxPool2d, self).__init__() - self.ksize = kernel_size - self.stride = stride - self.padding = padding - self.return_indices = return_indices - self.ceil_mode = ceil_mode - self.data_format = data_format - self.name = name - - def forward(self, x): - return F.max_pool2d( - x, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - return_indices=self.return_indices, - data_format=self.data_format, - name=self.name) - - -class MaxPool3d(layers.Layer): - """ - This operation applies 3D max pooling over input features based on the input, - and kernel_size, stride, padding parameters. Input(X) and Output(Out) are - in NCDHW format, where N is batch size, C is the number of channels, - H is the height of the feature, D is the depth of the feature, and W is the width of the feature. + in NCDHW format, where N is batch size, C is the number of channels, + H is the height of the feature, D is the depth of the feature, and W is the width of the feature. Args: - kernel_size (int|list|tuple): The pool kernel size. If pool kernel size + kernel_size (int|list|tuple): The pool kernel size. If the kernel size is a tuple or list, it must contain three integers, - (pool_size_Depth, pool_size_Height, pool_size_Width). + (kernel_size_Depth, kernel_size_Height, kernel_size_Width). Otherwise, the pool kernel size will be the cube of an int. - stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, - it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. - Otherwise, the pool stride size will be a cube of an int. Default kernel_size. - padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, - it could be in three forms: `[pad_depth, pad_height, pad_width]` or - `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form - `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - ceil_mode (bool): when True, will use ceil instead of floor to compute the output shape. - count_include_pad (bool): Whether to exclude padding points in average pooling - mode, default is True. - data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. - The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: - `[batch_size, input_channels, input_height, input_width]`. + stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, + it must contain three integers, [stride_Depth, stride_Height, stride_Width). + Otherwise, the pool stride size will be a cube of an int. + padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms. + 1. A string in ['valid', 'same']. + 2. An int, which means the feature map is zero padded by size of `padding` on every sides. + 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension. + 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side. + 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). + The default value is 0. + ceil_mode (bool): ${ceil_mode_comment} + return_indices (bool): Whether to return the max indices along with the outputs. + data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. + The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_depth, input_height, input_width]`. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -742,6 +540,11 @@ class MaxPool3d(layers.Layer): ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is "VALID", but `ceil_mode` is True. ShapeError: If the output's shape calculated is not greater than 0. + + Shape: + - x: 5-D tensor. + - out: 5-D tensor. + Examples: .. code-block:: python import paddle @@ -790,88 +593,457 @@ class MaxPool3d(layers.Layer): name=self.name) -class AvgPool3d(layers.Layer): +class AdaptiveAvgPool1d(layers.Layer): """ - This operation applies 3D max pooling over input features based on the input, - and kernel_size, stride, padding parameters. Input(X) and Output(Out) are - in NCDHW format, where N is batch size, C is the number of channels, - H is the height of the feature, D is the depth of the feature, and W is the width of the feature. + + This operation applies a 1D adaptive average pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + For average adaptive pool1d: + + .. math:: + + lstart &= floor(i * L_{in} / L_{out}) + + lend &= ceil((i + 1) * L_{in} / L_{out}) + + Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)} Args: - kernel_size (int|list|tuple): The pool kernel size. If pool kernel size - is a tuple or list, it must contain three integers, - (pool_size_Depth, pool_size_Height, pool_size_Width). - Otherwise, the pool kernel size will be the cube of an int. - stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, - it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. - Otherwise, the pool stride size will be a cube of an int. - padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, - it could be in three forms: `[pad_depth, pad_height, pad_width]` or - `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form - `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - ceil_mode (bool): ${ceil_mode_comment} - count_include_pad (bool): Whether to exclude padding points in average pooling - mode, default is True. - divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. - data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. - The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: - `[batch_size, input_channels, input_height, input_width]`. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: None. + Returns: + None. + Raises: - ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is "VALID", but `ceil_mode` is True. - ShapeError: If the output's shape calculated is not greater than 0. + ValueError: 'pool_size' should be a integer or list or tuple with length as 1. + + Shape: + - x: 3-D tensor. + - out: 3-D tensor. + Examples: .. code-block:: python + + # average adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) + # import paddle import paddle.nn as nn - import numpy as np paddle.disable_static() - # avg pool3d - input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32)) - AvgPool3d = nn.AvgPool3d(kernel_size=2, - stride=2, padding=0) - output = AvgPool3d(input) - # output.shape [1, 2, 3, 16, 16] - + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16) + pool_out = AdaptiveAvgPool1d(data) + # pool_out shape: [1, 3, 16] """ - def __init__(self, - kernel_size, - stride, - padding=0, - ceil_mode=False, - count_include_pad=True, - divisor_override=None, - data_format="NCDHW", - name=None): - super(AvgPool3d, self).__init__() - self.ksize = kernel_size - self.stride = stride - self.padding = padding - self.ceil_mode = ceil_mode - self.count_include_pad = count_include_pad - self.divisor = divisor_override - self.data_format = data_format + def __init__(self, output_size, name=None): + super(AdaptiveAvgPool1d, self).__init__() + self.output_size = output_size self.name = name - def forward(self, x): - return F.avg_pool3d( - x, - kernel_size=self.ksize, - stride=self.stride, - padding=self.padding, - ceil_mode=self.ceil_mode, - count_include_pad=self.count_include_pad, - divisor_override=self.divisor, - data_format=self.data_format, - name=self.name) + def forward(self, input): + return F.adaptive_avg_pool1d(input, self.output_size, self.name) + + +class AdaptiveAvgPool2d(layers.Layer): + """ + + This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. + + For avg adaptive pool2d: + + .. math:: + + hstart &= floor(i * H_{in} / H_{out}) + + hend &= ceil((i + 1) * H_{in} / H_{out}) + + wstart &= floor(j * W_{in} / W_{out}) + + wend &= ceil((j + 1) * W_{in} / W_{out}) + + Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} + + + Parameters: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain two element, (H, W). H and W can be either a int, or None which means + the size will be the same as that of the input. + data_format (str): The data format of the input and output data. An optional string + from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in + the order of: [batch_size, input_channels, input_height, input_width]. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Shape: + x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32, float64. + output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x. + + Returns: + A callable object of AdaptiveAvgPool2d. + + Examples: + .. code-block:: python + + # adaptive avg pool2d + # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + # of input data into m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive avg pool performs calculations as follow: + # + # for i in range(m): + # for j in range(n): + # hstart = floor(i * H / m) + # hend = ceil((i + 1) * H / m) + # wstart = floor(i * W / n) + # wend = ceil((i + 1) * W / n) + # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) + # + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 32, 32] + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3) + pool_out = adaptive_avg_pool(x = x) + # pool_out.shape is [2, 3, 3, 3] + """ + + def __init__(self, output_size, data_format="NCHW", name=None): + super(AdaptiveAvgPool2d, self).__init__() + self._output_size = output_size + self._data_format = data_format + self._name = name + + def forward(self, x): + return F.adaptive_avg_pool2d( + x, + output_size=self._output_size, + data_format=self._data_format, + name=self._name) + + +class AdaptiveAvgPool3d(layers.Layer): + """ + + This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. + + For avg adaptive pool3d: + + .. math:: + + dstart &= floor(i * D_{in} / D_{out}) + + dend &= ceil((i + 1) * D_{in} / D_{out}) + + hstart &= floor(j * H_{in} / H_{out}) + + hend &= ceil((j + 1) * H_{in} / H_{out}) + + wstart &= floor(k * W_{in} / W_{out}) + + wend &= ceil((k + 1) * W_{in} / W_{out}) + + Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} + + + Parameters: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means + the size will be the same as that of the input. + data_format (str): The data format of the input and output data. An optional string + from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in + the order of: [batch_size, input_channels, input_depth, input_height, input_width]. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Shape: + x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32, float64. + output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x. + + Returns: + A callable object of AdaptiveAvgPool3d. + + Examples: + .. code-block:: python + + # adaptive avg pool3d + # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + # of input data into l * m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive avg pool performs calculations as follow: + # + # for i in range(l): + # for j in range(m): + # for k in range(n): + # dstart = floor(i * D / l) + # dend = ceil((i + 1) * D / l) + # hstart = floor(j * H / m) + # hend = ceil((j + 1) * H / m) + # wstart = floor(k * W / n) + # wend = ceil((k + 1) * W / n) + # output[:, :, i, j, k] = + # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 8, 32, 32) + x = paddle.to_tensor(input_data) + # x.shape is [2, 3, 8, 32, 32] + adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3) + pool_out = adaptive_avg_pool(x = x) + # pool_out = [2, 3, 3, 3, 3] + """ + + def __init__(self, output_size, data_format="NCDHW", name=None): + super(AdaptiveAvgPool3d, self).__init__() + self._output_size = output_size + self._data_format = data_format + self._name = name + + def forward(self, x): + return F.adaptive_avg_pool3d( + x, + output_size=self._output_size, + data_format=self._data_format, + name=self._name) + + +class AdaptiveMaxPool1d(layers.Layer): + """ + + This operation applies a 1D adaptive max pooling over an input signal composed + of several input planes, based on the input, output_size, return_indices parameters. + Input(X) and output(Out) are in NCL format, where N is batch + size, C is the number of channels, L is the length of the feature. + The output tensor shape will be [N, C, output_size]. + + For max adaptive pool1d: + + .. math:: + + lstart &= floor(i * L_{in} / L_{out}) + + lend &= ceil((i + 1) * L_{in} / L_{out}) + + Output(i) &= max(Input[lstart:lend])} + + Args: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain one int. + return_indices (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. Default False. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: + None. + + Raises: + ValueError: 'pool_size' should be a integer or list or tuple with length as 1. + + Shape: + x (Tensor): The input tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type can be float32, float64. + output (Tensor): The output tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type is same as input x. + + Examples: + .. code-block:: python + + # max adaptive pool1d + # suppose input data in shape of [N, C, L], `output_size` is m or [m], + # output shape is [N, C, m], adaptive pool divide L dimension + # of input data into m grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # lstart = floor(i * L / m) + # lend = ceil((i + 1) * L / m) + # output[:, :, i] = max(input[:, :, lstart: lend]) + # + import paddle + import paddle.nn as nn + paddle.disable_static() + + data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) + AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16) + pool_out = AdaptiveMaxPool1d(data) + # pool_out shape: [1, 3, 16] + + # for return_indices = true + AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True) + pool_out, indices = AdaptiveMaxPool1d(data) + # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + + """ + + def __init__(self, output_size, return_indices=False, name=None): + super(AdaptiveMaxPool1d, self).__init__() + self.output_size = output_size + self.return_indices = return_indices + self.name = name + + def forward(self, input): + return F.adaptive_max_pool1d(input, self.output_size, + self.return_indices, self.name) + + +class AdaptiveMaxPool2d(layers.Layer): + """ + This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size. + For adaptive max pool2d: + .. math:: + hstart &= floor(i * H_{in} / H_{out}) + hend &= ceil((i + 1) * H_{in} / H_{out}) + wstart &= floor(j * W_{in} / W_{out}) + wend &= ceil((j + 1) * W_{in} / W_{out}) + Output(i ,j) &= max(Input[hstart:hend, wstart:wend]) + Parameters: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input. + return_indices (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Shape: + x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float32, float64. + output (Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type is same as input x. + + Returns: + A callable object of AdaptiveMaxPool2d. + Examples: + .. code-block:: python + # adaptive max pool2d + # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + # of input data into m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(m): + # for j in range(n): + # hstart = floor(i * H / m) + # hend = ceil((i + 1) * H / m) + # wstart = floor(i * W / n) + # wend = ceil((i + 1) * W / n) + # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) + # + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 32, 32) + x = paddle.to_tensor(input_data) + adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_indices=True) + pool_out, indices = adaptive_max_pool(x = x) + """ + + def __init__(self, output_size, return_indices=False, name=None): + super(AdaptiveMaxPool2d, self).__init__() + self._output_size = output_size + self._return_indices = return_indices + self._name = name + + def forward(self, x): + return F.adaptive_max_pool2d( + x, + output_size=self._output_size, + return_indices=self._return_indices, + name=self._name) + + +class AdaptiveMaxPool3d(layers.Layer): + """ + This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions + of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size. + For adaptive max pool3d: + .. math:: + dstart &= floor(i * D_{in} / D_{out}) + dend &= ceil((i + 1) * D_{in} / D_{out}) + hstart &= floor(j * H_{in} / H_{out}) + hend &= ceil((j + 1) * H_{in} / H_{out}) + wstart &= floor(k * W_{in} / W_{out}) + wend &= ceil((k + 1) * W_{in} / W_{out}) + Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend]) + Parameters: + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, + it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means + the size will be the same as that of the input. + return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Shape: + x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64. + output (Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type is same as input x. + Returns: + A callable object of AdaptiveMaxPool3d. + Examples: + .. code-block:: python + # adaptive max pool3d + # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + # of input data into l * m * n grids averagely and performs poolings in each + # grid to get output. + # adaptive max pool performs calculations as follow: + # + # for i in range(l): + # for j in range(m): + # for k in range(n): + # dstart = floor(i * D / l) + # dend = ceil((i + 1) * D / l) + # hstart = floor(j * H / m) + # hend = ceil((j + 1) * H / m) + # wstart = floor(k * W / n) + # wend = ceil((k + 1) * W / n) + # output[:, :, i, j, k] = + # max(input[:, :, dstart:dend, hstart: hend, wstart: wend]) + import paddle + import numpy as np + paddle.disable_static() + input_data = np.random.rand(2, 3, 8, 32, 32) + x = paddle.to_tensor(input_data) + pool = paddle.nn.AdaptiveMaxPool3d(output_size=4) + out = pool(x) + # out shape: [2, 3, 4, 4, 4] + pool, indices = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_indices=True) + out = pool(x) + # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4] + + """ + + def __init__(self, output_size, return_indices=False, name=None): + super(AdaptiveMaxPool3d, self).__init__() + self._output_size = output_size + self._return_indices = return_indices + self._name = name + + def forward(self, x): + return F.adaptive_max_pool3d( + x, + output_size=self._output_size, + return_indices=self._return_indices, + name=self._name) -- GitLab