diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 592b6a009513ee9d6f9a409e76d843f455626f25..39c4df00657daccb88ae1ad95781891c4c6ec11e 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1858,6 +1858,7 @@ def conv3d(input,
     return helper.append_activation(pre_act)
 
 
+@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool2d")
 @templatedoc()
 def pool2d(input,
            pool_size=-1,
@@ -2075,6 +2076,7 @@ def pool2d(input,
     return pool_out
 
 
+@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool3d")
 @templatedoc()
 def pool3d(input,
            pool_size=-1,
@@ -2303,6 +2305,7 @@ def pool3d(input,
     return pool_out
 
 
+@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool2d")
 @templatedoc(op_type="pool2d")
 def adaptive_pool2d(input,
                     pool_size,
@@ -2450,6 +2453,7 @@ def adaptive_pool2d(input,
     return (pool_out, mask) if require_index else pool_out
 
 
+@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool3d")
 @templatedoc(op_type="pool3d")
 def adaptive_pool3d(input,
                     pool_size,
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a135cea52903a0d896df2d446b58d99e5a18993
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool1d.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import unittest
+import numpy as np
+from op_test import OpTest
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+from paddle.fluid import compiler, Program, program_guard
+import paddle
+import paddle.nn.functional as F
+import paddle.fluid as fluid
+
+
+def adaptive_start_index(index, input_size, output_size):
+    return int(np.floor(index * input_size / output_size))
+
+
+def adaptive_end_index(index, input_size, output_size):
+    return int(np.ceil((index + 1) * input_size / output_size))
+
+
+def avg_pool1D_forward_naive(x,
+                             ksize,
+                             strides,
+                             paddings,
+                             global_pool=0,
+                             ceil_mode=False,
+                             exclusive=False,
+                             adaptive=False,
+                             data_type=np.float64):
+    N, C, L = x.shape
+    if global_pool == 1:
+        ksize = [L]
+    if adaptive:
+        L_out = ksize[0]
+    else:
+        L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1
+                 ) // strides[0] + 1 if ceil_mode else (
+                     L - ksize[0] + 2 * paddings[0]) // strides[0] + 1
+
+    out = np.zeros((N, C, L_out))
+    for i in range(L_out):
+        if adaptive:
+            r_start = adaptive_start_index(i, L, ksize[0])
+            r_end = adaptive_end_index(i, L, ksize[0])
+        else:
+            r_start = np.max((i * strides[0] - paddings[0], 0))
+            r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L))
+        x_masked = x[:, :, r_start:r_end]
+
+        field_size = (r_end - r_start) \
+            if (exclusive or adaptive) else (ksize[0])
+        if data_type == np.int8 or data_type == np.uint8:
+            out[:, :, i] = (np.rint(
+                np.sum(x_masked, axis=(2, 3)) / field_size)).astype(data_type)
+        else:
+            out[:, :, i] = (np.sum(x_masked, axis=(2)) /
+                            field_size).astype(data_type)
+    return out
+
+
+class TestPool1d_API(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(123)
+        self.places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            self.places.append(fluid.CUDAPlace(0))
+
+    def check_adaptive_avg_dygraph_results(self, place):
+        with fluid.dygraph.guard(place):
+            input_np = np.random.random([2, 3, 32]).astype("float32")
+            input = fluid.dygraph.to_variable(input_np)
+            result = F.adaptive_avg_pool1d(input, output_size=16)
+            result_np = avg_pool1D_forward_naive(
+                input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
+
+            self.assertTrue(np.allclose(result.numpy(), result_np))
+
+            ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d(
+                output_size=16)
+            result = ada_max_pool1d_dg(input)
+            self.assertTrue(np.allclose(result.numpy(), result_np))
+
+    def check_adaptive_avg_static_results(self, place):
+        with fluid.program_guard(fluid.Program(), fluid.Program()):
+            input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
+            result = F.adaptive_avg_pool1d(input, output_size=16)
+
+            input_np = np.random.random([2, 3, 32]).astype("float32")
+            result_np = avg_pool1D_forward_naive(
+                input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
+
+            exe = fluid.Executor(place)
+            fetches = exe.run(fluid.default_main_program(),
+                              feed={"input": input_np},
+                              fetch_list=[result])
+            self.assertTrue(np.allclose(fetches[0], result_np))
+
+    def test_adaptive_avg_pool1d(self):
+        for place in self.places:
+            self.check_adaptive_avg_dygraph_results(place)
+            self.check_adaptive_avg_static_results(place)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py
new file mode 100644
index 0000000000000000000000000000000000000000..875fdf9e9c3f9a9b891ecc6911dfeda788eee271
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool1d.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import unittest
+from op_test import OpTest
+import paddle.fluid.core as core
+from paddle.fluid import compiler, Program, program_guard
+import paddle
+import paddle.nn.functional as F
+import paddle.fluid as fluid
+
+
+def adaptive_start_index(index, input_size, output_size):
+    return int(np.floor(index * input_size / output_size))
+
+
+def adaptive_end_index(index, input_size, output_size):
+    return int(np.ceil((index + 1) * input_size / output_size))
+
+
+def max_pool1D_forward_naive(x,
+                             ksize,
+                             strides,
+                             paddings,
+                             global_pool=0,
+                             ceil_mode=False,
+                             exclusive=False,
+                             adaptive=False,
+                             data_type=np.float64):
+    N, C, L = x.shape
+    if global_pool == 1:
+        ksize = [L]
+    if adaptive:
+        L_out = ksize[0]
+    else:
+        L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1
+                 ) // strides[0] + 1 if ceil_mode else (
+                     L - ksize[0] + 2 * paddings[0]) // strides[0] + 1
+
+    out = np.zeros((N, C, L_out))
+    for i in range(L_out):
+        if adaptive:
+            r_start = adaptive_start_index(i, L, ksize[0])
+            r_end = adaptive_end_index(i, L, ksize[0])
+        else:
+            r_start = np.max((i * strides[0] - paddings[0], 0))
+            r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L))
+        x_masked = x[:, :, r_start:r_end]
+
+        out[:, :, i] = np.max(x_masked, axis=(2))
+    return out
+
+
+class TestPool1d_API(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(123)
+        self.places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            self.places.append(fluid.CUDAPlace(0))
+
+    def check_adaptive_max_dygraph_results(self, place):
+        with fluid.dygraph.guard(place):
+            input_np = np.random.random([2, 3, 32]).astype("float32")
+            input = fluid.dygraph.to_variable(input_np)
+            result = F.adaptive_max_pool1d(input, output_size=16)
+
+            result_np = max_pool1D_forward_naive(
+                input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
+            self.assertTrue(np.allclose(result.numpy(), result_np))
+
+            ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d(
+                output_size=16)
+            result = ada_max_pool1d_dg(input)
+            self.assertTrue(np.allclose(result.numpy(), result_np))
+
+    def check_adaptive_max_static_results(self, place):
+        with fluid.program_guard(fluid.Program(), fluid.Program()):
+            input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
+            result = F.adaptive_max_pool1d(input, output_size=16)
+
+            input_np = np.random.random([2, 3, 32]).astype("float32")
+            result_np = max_pool1D_forward_naive(
+                input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
+
+            exe = fluid.Executor(place)
+            fetches = exe.run(fluid.default_main_program(),
+                              feed={"input": input_np},
+                              fetch_list=[result])
+            self.assertTrue(np.allclose(fetches[0], result_np))
+
+    def test_adaptive_max_pool1d(self):
+        for place in self.places:
+            self.check_adaptive_max_dygraph_results(place)
+            self.check_adaptive_max_static_results(place)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..d78788eb1e7c63be485210780db25e1de6fd84b4
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
@@ -0,0 +1,274 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+from __future__ import division
+
+import unittest
+import numpy as np
+
+import paddle.fluid.core as core
+from op_test import OpTest
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
+
+def adaptive_start_index(index, input_size, output_size):
+    return int(np.floor(index * input_size / output_size))
+
+
+def adaptive_end_index(index, input_size, output_size):
+    return int(np.ceil((index + 1) * input_size / output_size))
+
+
+def adaptive_pool2d_forward(x, output_size, data_format='NCHW',
+                            pool_type="max"):
+
+    N = x.shape[0]
+    C, H, W = [x.shape[1], x.shape[2], x.shape[3]] if data_format == 'NCHW' \
+        else [x.shape[3], x.shape[1], x.shape[2]]
+
+    if (isinstance(output_size, int) or output_size == None):
+        H_out = output_size
+        W_out = output_size
+        output_size = [H_out, W_out]
+    else:
+        H_out, W_out = output_size
+
+    if output_size[0] == None:
+        output_size[0] = H
+        H_out = H
+    if output_size[1] == None:
+        output_size[1] = W
+        W_out = W
+
+    out = np.zeros((N, C, H_out, W_out)) if data_format=='NCHW' \
+        else np.zeros((N, H_out, W_out, C))
+
+    for i in range(H_out):
+        in_h_start = adaptive_start_index(i, H, output_size[0])
+        in_h_end = adaptive_end_index(i, H, output_size[0])
+
+        for j in range(W_out):
+            in_w_start = adaptive_start_index(j, W, output_size[1])
+            in_w_end = adaptive_end_index(j, W, output_size[1])
+
+            if data_format == 'NCHW':
+                x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end]
+                if pool_type == 'avg':
+                    field_size = (
+                        (in_h_end - in_h_start) * (in_w_end - in_w_start))
+                    out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size
+                elif pool_type == 'max':
+                    out[:, :, i, j] = np.max(x_masked, axis=(2, 3))
+            elif data_format == 'NHWC':
+                x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :]
+                if pool_type == 'avg':
+                    field_size = (
+                        (in_h_end - in_h_start) * (in_w_end - in_w_start))
+                    out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size
+                elif pool_type == 'max':
+                    out[:, i, j, :] = np.max(x_masked, axis=(1, 2))
+    return out
+
+
+class TestAdaptiveMaxPool2dAPI(unittest.TestCase):
+    def setUp(self):
+        self.x_np = np.random.random([2, 3, 7, 7]).astype("float32")
+        self.res_1_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=[3, 3], pool_type="max")
+
+        self.res_2_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=5, pool_type="max")
+
+        self.res_3_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=[2, 5], pool_type="max")
+        """
+        self.res_4_np = adaptive_pool2d_forward(
+            x=self.x_np,
+            output_size=[3, 3],
+            pool_type="max",
+            data_format="NHWC")
+        """
+        self.res_5_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=[None, 3], pool_type="max")
+
+    def test_static_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.enable_static()
+            x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32")
+
+            out_1 = paddle.nn.functional.adaptive_max_pool2d(
+                x=x, output_size=[3, 3])
+
+            out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5)
+
+            out_3 = paddle.nn.functional.adaptive_max_pool2d(
+                x=x, output_size=[2, 5])
+
+            #out_4 = paddle.nn.functional.adaptive_max_pool2d(
+            #    x=x, output_size=[3, 3], data_format="NHWC")
+
+            out_5 = paddle.nn.functional.adaptive_max_pool2d(
+                x=x, output_size=[None, 3])
+
+            exe = paddle.static.Executor(place=place)
+            [res_1, res_2, res_3, res_5] = exe.run(
+                fluid.default_main_program(),
+                feed={"x": self.x_np},
+                fetch_list=[out_1, out_2, out_3, out_5])
+
+            assert np.allclose(res_1, self.res_1_np)
+
+            assert np.allclose(res_2, self.res_2_np)
+
+            assert np.allclose(res_3, self.res_3_np)
+
+            #assert np.allclose(res_4, self.res_4_np)
+
+            assert np.allclose(res_5, self.res_5_np)
+
+    def test_dynamic_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.disable_static(place=place)
+            x = paddle.to_variable(self.x_np)
+
+            out_1 = paddle.nn.functional.adaptive_max_pool2d(
+                x=x, return_indices=False, output_size=[3, 3])
+
+            out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5)
+
+            out_3 = paddle.nn.functional.adaptive_max_pool2d(
+                x=x, output_size=[2, 5])
+
+            #out_4 = paddle.nn.functional.adaptive_max_pool2d(
+            #    x=x, output_size=[3, 3], data_format="NHWC")
+
+            out_5 = paddle.nn.functional.adaptive_max_pool2d(
+                x=x, output_size=[None, 3])
+
+            assert np.allclose(out_1.numpy(), self.res_1_np)
+
+            assert np.allclose(out_2.numpy(), self.res_2_np)
+
+            assert np.allclose(out_3.numpy(), self.res_3_np)
+
+            #assert np.allclose(out_4.numpy(), self.res_4_np)
+
+            assert np.allclose(out_5.numpy(), self.res_5_np)
+
+
+class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase):
+    def setUp(self):
+        self.x_np = np.random.random([2, 3, 7, 7]).astype("float32")
+        self.res_1_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=[3, 3], pool_type="max")
+
+        self.res_2_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=5, pool_type="max")
+
+        self.res_3_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=[2, 5], pool_type="max")
+
+        #self.res_4_np = adaptive_pool2d_forward(
+        #    x=self.x_np,
+        #    output_size=[3, 3],
+        #    pool_type="max",
+        #    data_format="NHWC")
+
+        self.res_5_np = adaptive_pool2d_forward(
+            x=self.x_np, output_size=[None, 3], pool_type="max")
+
+    def test_static_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.enable_static()
+            x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32")
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3])
+            out_1 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5)
+            out_2 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5])
+            out_3 = adaptive_max_pool(x=x)
+
+            #    adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
+            #        output_size=[3, 3], data_format="NHWC")
+            #    out_4 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
+                output_size=[None, 3])
+            out_5 = adaptive_max_pool(x=x)
+
+            exe = paddle.static.Executor(place=place)
+            [res_1, res_2, res_3, res_5] = exe.run(
+                fluid.default_main_program(),
+                feed={"x": self.x_np},
+                fetch_list=[out_1, out_2, out_3, out_5])
+
+            assert np.allclose(res_1, self.res_1_np)
+
+            assert np.allclose(res_2, self.res_2_np)
+
+            assert np.allclose(res_3, self.res_3_np)
+
+            #assert np.allclose(res_4, self.res_4_np)
+
+            assert np.allclose(res_5, self.res_5_np)
+
+    def test_dynamic_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.disable_static(place=place)
+            x = paddle.to_variable(self.x_np)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3])
+            out_1 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5)
+            out_2 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5])
+            out_3 = adaptive_max_pool(x=x)
+
+            #adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
+            #    output_size=[3, 3], data_format="NHWC")
+            #out_4 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
+                output_size=[None, 3])
+            out_5 = adaptive_max_pool(x=x)
+
+            assert np.allclose(out_1.numpy(), self.res_1_np)
+
+            assert np.allclose(out_2.numpy(), self.res_2_np)
+
+            assert np.allclose(out_3.numpy(), self.res_3_np)
+
+            #assert np.allclose(out_4.numpy(), self.res_4_np)
+
+            assert np.allclose(out_5.numpy(), self.res_5_np)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
new file mode 100755
index 0000000000000000000000000000000000000000..a7de0a5c6a7017617124b893313e0f9830cc09f9
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
@@ -0,0 +1,293 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+from __future__ import division
+
+import unittest
+import numpy as np
+
+import paddle.fluid.core as core
+from op_test import OpTest
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
+
+def adaptive_start_index(index, input_size, output_size):
+    return int(np.floor(index * input_size / output_size))
+
+
+def adaptive_end_index(index, input_size, output_size):
+    return int(np.ceil((index + 1) * input_size / output_size))
+
+
+def adaptive_pool3d_forward(x,
+                            output_size,
+                            adaptive=True,
+                            data_format='NCDHW',
+                            pool_type='max'):
+
+    N = x.shape[0]
+    C, D, H, W = [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] \
+        if data_format == 'NCDHW' else [x.shape[4], x.shape[1], x.shape[2],x.shape[3]]
+
+    if (isinstance(output_size, int) or output_size == None):
+        H_out = output_size
+        W_out = output_size
+        D_out = output_size
+        output_size = [D_out, H_out, W_out]
+    else:
+        D_out, H_out, W_out = output_size
+
+    if output_size[0] == None:
+        output_size[0] = D
+        D_out = D
+    if output_size[1] == None:
+        output_size[1] = H
+        H_out = H
+    if output_size[2] == None:
+        output_size[2] = W
+        W_out = W
+
+    out = np.zeros((N, C, D_out, H_out, W_out)) if data_format=='NCDHW' \
+        else np.zeros((N, D_out, H_out, W_out, C))
+    for k in range(D_out):
+        d_start = adaptive_start_index(k, D, output_size[0])
+        d_end = adaptive_end_index(k, D, output_size[0])
+
+        for i in range(H_out):
+            h_start = adaptive_start_index(i, H, output_size[1])
+            h_end = adaptive_end_index(i, H, output_size[1])
+
+            for j in range(W_out):
+                w_start = adaptive_start_index(j, W, output_size[2])
+                w_end = adaptive_end_index(j, W, output_size[2])
+
+                if data_format == 'NCDHW':
+                    x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:
+                                 w_end]
+                    if pool_type == 'avg':
+                        field_size = (d_end - d_start) * (h_end - h_start) * (
+                            w_end - w_start)
+                        out[:, :, k, i, j] = np.sum(x_masked,
+                                                    axis=(2, 3, 4)) / field_size
+                    elif pool_type == 'max':
+                        out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4))
+
+                elif data_format == 'NDHWC':
+                    x_masked = x[:, d_start:d_end, h_start:h_end, w_start:
+                                 w_end, :]
+                    if pool_type == 'avg':
+                        field_size = (d_end - d_start) * (h_end - h_start) * (
+                            w_end - w_start)
+                        out[:, k, i, j, :] = np.sum(x_masked,
+                                                    axis=(1, 2, 3)) / field_size
+                    elif pool_type == 'max':
+                        out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3))
+    return out
+
+
+class TestAdaptiveMaxPool3dAPI(unittest.TestCase):
+    def setUp(self):
+        self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32")
+        self.res_1_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=[3, 3, 3], pool_type="max")
+
+        self.res_2_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=5, pool_type="max")
+
+        self.res_3_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=[2, 3, 5], pool_type="max")
+
+        self.res_4_np = adaptive_pool3d_forward(
+            x=self.x_np,
+            output_size=[3, 3, 3],
+            pool_type="max",
+            data_format="NDHWC")
+
+        self.res_5_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=[None, 3, None], pool_type="max")
+
+    def test_static_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.enable_static()
+            x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32")
+
+            out_1 = paddle.nn.functional.adaptive_max_pool3d(
+                x=x, output_size=[3, 3, 3])
+
+            out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5)
+
+            out_3 = paddle.nn.functional.adaptive_max_pool3d(
+                x=x, output_size=[2, 3, 5])
+
+            #out_4 = paddle.nn.functional.adaptive_max_pool3d(
+            #    x=x, output_size=[3, 3, 3], data_format="NDHWC")
+
+            out_5 = paddle.nn.functional.adaptive_max_pool3d(
+                x=x, output_size=[None, 3, None])
+
+            exe = paddle.static.Executor(place=place)
+            [res_1, res_2, res_3, res_5] = exe.run(
+                fluid.default_main_program(),
+                feed={"x": self.x_np},
+                fetch_list=[out_1, out_2, out_3, out_5])
+
+            assert np.allclose(res_1, self.res_1_np)
+
+            assert np.allclose(res_2, self.res_2_np)
+
+            assert np.allclose(res_3, self.res_3_np)
+
+            #assert np.allclose(res_4, self.res_4_np)
+
+            assert np.allclose(res_5, self.res_5_np)
+
+    def test_dynamic_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.disable_static(place=place)
+            x = paddle.to_variable(self.x_np)
+
+            out_1 = paddle.nn.functional.adaptive_max_pool3d(
+                x=x, output_size=[3, 3, 3])
+
+            out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5)
+
+            out_3 = paddle.nn.functional.adaptive_max_pool3d(
+                x=x, output_size=[2, 3, 5])
+
+            #out_4 = paddle.nn.functional.adaptive_max_pool3d(
+            #    x=x, output_size=[3, 3, 3], data_format="NDHWC")
+
+            out_5 = paddle.nn.functional.adaptive_max_pool3d(
+                x=x, output_size=[None, 3, None])
+
+            assert np.allclose(out_1.numpy(), self.res_1_np)
+
+            assert np.allclose(out_2.numpy(), self.res_2_np)
+
+            assert np.allclose(out_3.numpy(), self.res_3_np)
+
+            #assert np.allclose(out_4.numpy(), self.res_4_np)
+
+            assert np.allclose(out_5.numpy(), self.res_5_np)
+
+
+class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase):
+    def setUp(self):
+        self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32")
+        self.res_1_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=[3, 3, 3], pool_type="max")
+
+        self.res_2_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=5, pool_type="max")
+
+        self.res_3_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=[2, 3, 5], pool_type="max")
+
+        # self.res_4_np = adaptive_pool3d_forward(
+        #     x=self.x_np,
+        #     output_size=[3, 3, 3],
+        #     pool_type="max",
+        #     data_format="NDHWC")
+
+        self.res_5_np = adaptive_pool3d_forward(
+            x=self.x_np, output_size=[None, 3, None], pool_type="max")
+
+    def test_static_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.enable_static()
+            x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32")
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+                output_size=[3, 3, 3])
+            out_1 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5)
+            out_2 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+                output_size=[2, 3, 5])
+            out_3 = adaptive_max_pool(x=x)
+
+            #     adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+            #         output_size=[3, 3, 3], data_format="NDHWC")
+            #     out_4 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+                output_size=[None, 3, None])
+            out_5 = adaptive_max_pool(x=x)
+
+            exe = paddle.static.Executor(place=place)
+            [res_1, res_2, res_3, res_5] = exe.run(
+                fluid.default_main_program(),
+                feed={"x": self.x_np},
+                fetch_list=[out_1, out_2, out_3, out_5])
+
+            assert np.allclose(res_1, self.res_1_np)
+
+            assert np.allclose(res_2, self.res_2_np)
+
+            assert np.allclose(res_3, self.res_3_np)
+
+            #     assert np.allclose(res_4, self.res_4_np)
+
+            assert np.allclose(res_5, self.res_5_np)
+
+    def test_dynamic_graph(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+            paddle.disable_static(place=place)
+            x = paddle.to_variable(self.x_np)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+                output_size=[3, 3, 3])
+            out_1 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5)
+            out_2 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+                output_size=[2, 3, 5])
+            out_3 = adaptive_max_pool(x=x)
+
+            #     adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+            #         output_size=[3, 3, 3], data_format="NDHWC")
+            #     out_4 = adaptive_max_pool(x=x)
+
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
+                output_size=[None, 3, None])
+            out_5 = adaptive_max_pool(x=x)
+
+            assert np.allclose(out_1.numpy(), self.res_1_np)
+
+            assert np.allclose(out_2.numpy(), self.res_2_np)
+
+            assert np.allclose(out_3.numpy(), self.res_3_np)
+
+            #     assert np.allclose(out_4.numpy(), self.res_4_np)
+
+            assert np.allclose(out_5.numpy(), self.res_5_np)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
index b1a25ad3529e8b0a4126bc458838ecd876e5af30..1c05b96f1fc61234028e940f6403ae08a0186027 100644
--- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py
@@ -174,66 +174,6 @@ class TestPool1d_API(unittest.TestCase):
             result = max_pool1d_dg(input)
             self.assertTrue(np.allclose(result.numpy(), result_np))
 
-    def check_adaptive_max_dygraph_results(self, place):
-        with fluid.dygraph.guard(place):
-            input_np = np.random.random([2, 3, 32]).astype("float32")
-            input = fluid.dygraph.to_variable(input_np)
-            result = F.adaptive_max_pool1d(input, output_size=16)
-
-            result_np = max_pool1D_forward_naive(
-                input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
-            self.assertTrue(np.allclose(result.numpy(), result_np))
-
-            ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d(
-                output_size=16)
-            result = ada_max_pool1d_dg(input)
-            self.assertTrue(np.allclose(result.numpy(), result_np))
-
-    def check_adaptive_avg_dygraph_results(self, place):
-        with fluid.dygraph.guard(place):
-            input_np = np.random.random([2, 3, 32]).astype("float32")
-            input = fluid.dygraph.to_variable(input_np)
-            result = F.adaptive_avg_pool1d(input, output_size=16)
-            result_np = avg_pool1D_forward_naive(
-                input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
-
-            self.assertTrue(np.allclose(result.numpy(), result_np))
-
-            ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d(
-                output_size=16)
-            result = ada_max_pool1d_dg(input)
-            self.assertTrue(np.allclose(result.numpy(), result_np))
-
-    def check_adaptive_max_static_results(self, place):
-        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
-            result = F.adaptive_max_pool1d(input, output_size=16)
-
-            input_np = np.random.random([2, 3, 32]).astype("float32")
-            result_np = max_pool1D_forward_naive(
-                input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
-
-            exe = fluid.Executor(place)
-            fetches = exe.run(fluid.default_main_program(),
-                              feed={"input": input_np},
-                              fetch_list=[result])
-            self.assertTrue(np.allclose(fetches[0], result_np))
-
-    def check_adaptive_avg_static_results(self, place):
-        with fluid.program_guard(fluid.Program(), fluid.Program()):
-            input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
-            result = F.adaptive_avg_pool1d(input, output_size=16)
-
-            input_np = np.random.random([2, 3, 32]).astype("float32")
-            result_np = avg_pool1D_forward_naive(
-                input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
-
-            exe = fluid.Executor(place)
-            fetches = exe.run(fluid.default_main_program(),
-                              feed={"input": input_np},
-                              fetch_list=[result])
-            self.assertTrue(np.allclose(fetches[0], result_np))
-
     def check_max_dygraph_padding_same(self, place):
         with fluid.dygraph.guard(place):
             input_np = np.random.random([2, 3, 32]).astype("float32")
@@ -265,10 +205,6 @@ class TestPool1d_API(unittest.TestCase):
             self.check_avg_dygraph_results(place)
             self.check_max_static_results(place)
             self.check_avg_static_results(place)
-            self.check_adaptive_max_dygraph_results(place)
-            self.check_adaptive_avg_dygraph_results(place)
-            self.check_adaptive_max_static_results(place)
-            self.check_adaptive_avg_static_results(place)
             self.check_max_dygraph_padding_same(place)
             self.check_avg_dygraph_padding_same(place)
 
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index 76063458d44de3000ad7c1af08376c07e0209c27..5cc9f6d32f9d7ef3dafd73badd0ea88bed372968 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -97,8 +97,20 @@ from .layer.common import Dropout  #DEFINE_ALIAS
 from .layer.common import Dropout2D  #DEFINE_ALIAS
 from .layer.common import Dropout3D  #DEFINE_ALIAS
 from .layer.common import AlphaDropout  #DEFINE_ALIAS
+
+from .layer.pooling import AvgPool1d  #DEFINE_ALIAS
+from .layer.pooling import AvgPool2d  #DEFINE_ALIAS
+from .layer.pooling import AvgPool3d  #DEFINE_ALIAS
+from .layer.pooling import MaxPool1d  #DEFINE_ALIAS
+from .layer.pooling import MaxPool2d  #DEFINE_ALIAS
+from .layer.pooling import MaxPool3d  #DEFINE_ALIAS
+from .layer.pooling import AdaptiveAvgPool1d  #DEFINE_ALIAS
 from .layer.pooling import AdaptiveAvgPool2d  #DEFINE_ALIAS
 from .layer.pooling import AdaptiveAvgPool3d  #DEFINE_ALIAS
+
+from .layer.pooling import AdaptiveMaxPool1d  #DEFINE_ALIAS
+from .layer.pooling import AdaptiveMaxPool2d  #DEFINE_ALIAS
+from .layer.pooling import AdaptiveMaxPool3d  #DEFINE_ALIAS
 from .layer.conv import Conv1d  #DEFINE_ALIAS
 from .layer.conv import Conv2d  #DEFINE_ALIAS
 from .layer.conv import Conv3d  #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
index 414e70853eb7163230ab2db987fc19c58e168f19..3c0aa9c5c99e545b657559c30fcde46a69781231 100644
--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -170,22 +170,28 @@ from .norm import layer_norm  #DEFINE_ALIAS
 from .norm import lrn  #DEFINE_ALIAS
 from .norm import normalize  #DEFINE_ALIAS
 # from .norm import spectral_norm        #DEFINE_ALIAS
-from .pooling import max_pool1d  #DEFINE_ALIAS
-from .pooling import avg_pool1d  #DEFINE_ALIAS
-from .pooling import adaptive_max_pool1d  #DEFINE_ALIAS
-from .pooling import adaptive_avg_pool1d  #DEFINE_ALIAS
 from .pooling import pool2d  #DEFINE_ALIAS
 from .pooling import pool3d  #DEFINE_ALIAS
+from .pooling import avg_pool1d  #DEFINE_ALIAS
 from .pooling import adaptive_pool2d  #DEFINE_ALIAS
 from .pooling import adaptive_pool3d  #DEFINE_ALIAS
-from .rnn import rnn  #DEFINE_ALIAS
-from .rnn import birnn  #DEFINE_ALIAS
 from .pooling import avg_pool2d  #DEFINE_ALIAS
-from .pooling import max_pool2d  #DEFINE_ALIAS
 from .pooling import avg_pool3d  #DEFINE_ALIAS
+from .pooling import max_pool1d  #DEFINE_ALIAS
+from .pooling import max_pool2d  #DEFINE_ALIAS
 from .pooling import max_pool3d  #DEFINE_ALIAS
+
+from .pooling import adaptive_pool2d  #DEFINE_ALIAS
+from .pooling import adaptive_pool3d  #DEFINE_ALIAS
+from .pooling import adaptive_max_pool1d  #DEFINE_ALIAS
+from .pooling import adaptive_max_pool2d  #DEFINE_ALIAS
+from .pooling import adaptive_max_pool3d  #DEFINE_ALIAS
+from .pooling import adaptive_avg_pool1d  #DEFINE_ALIAS
 from .pooling import adaptive_avg_pool2d  #DEFINE_ALIAS
 from .pooling import adaptive_avg_pool3d  #DEFINE_ALIAS
+
+from .rnn import rnn  #DEFINE_ALIAS
+from .rnn import birnn  #DEFINE_ALIAS
 # from .rnn import gru_unit        #DEFINE_ALIAS
 # from .rnn import lstm        #DEFINE_ALIAS
 # from .rnn import lstm_unit        #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index efe001a7d4a560d97a85b959749d68b79be430f0..42d7d98aefcbbf51f562b98c4c494aeccfe20cf2 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -158,7 +158,7 @@ def conv1d(x,
         bias (Tensor, optional): The bias with shape [M,]. Default: None.
         stride (int or tuple, optional): The stride size. If stride is a tuple, it must
             contain one integers, (stride_size). Default: 1.
-        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
+        padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
             1. a string in ['valid', 'same'].
             2. an int, which means the feature map is zero paded by size of `padding` on both sides.
             3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides.
@@ -185,7 +185,7 @@ def conv1d(x,
         same with input.
 
     Raises:
-        ValueError: If the channel dimmention of the input is less than or equal to zero.
+        ValueError: If the channel dimension of the input is less than or equal to zero.
         ValueError: If `data_format` is not "NCL" or "NLC".
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
@@ -238,7 +238,7 @@ def conv1d(x,
     num_channels = x.shape[channel_dim]
     num_filters = weight.shape[0]
     if num_channels < 0:
-        raise ValueError("The channel dimmention of the input({}) "
+        raise ValueError("The channel dimension of the input({}) "
                          "should be defined. Received: {}.".format(
                              x.shape, num_channels))
     if num_channels % groups != 0:
@@ -260,7 +260,7 @@ def conv1d(x,
         padding = padding + [0]
     else:
         raise ValueError(
-            "The size of padding's dimmention should 1 or 2. But got padding={}".
+            "The size of padding's dimension should be 1 or 2. But got padding={}".
             format(padding))
 
     stride = utils.convert_to_list(stride, 1, 'stride') + [1]
@@ -424,7 +424,7 @@ def conv2d(x,
 
     Raises:
         ValueError: If `data_format` is not "NCHW" or "NHWC".
-        ValueError: If the channel dimmention of the input is less than or equal to zero.
+        ValueError: If the channel dimension of the input is less than or equal to zero.
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
             or the element corresponding to the input's channel is not 0.
@@ -465,7 +465,7 @@ def conv2d(x,
     num_channels = x.shape[channel_dim]
     num_filters = weight.shape[0]
     if num_channels < 0:
-        raise ValueError("The channel dimmention of the input({}) "
+        raise ValueError("The channel dimension of the input({}) "
                          "should be defined. Received: {}.".format(
                              x.shape, num_channels))
     if num_channels % groups != 0:
@@ -710,7 +710,7 @@ def conv_transpose1d(x,
 
     num_channels = x.shape[channel_dim]
     if num_channels < 0:
-        raise ValueError("The channel dimmention of the input({}) "
+        raise ValueError("The channel dimension of the input({}) "
                          "should be defined. Received: {}.".format(
                              x.shape, num_channels))
     if num_channels % groups != 0:
@@ -728,7 +728,7 @@ def conv_transpose1d(x,
         padding = padding + [0]
     else:
         raise ValueError(
-            "The size of padding's dimmention should 1 or 2. But got padding={}".
+            "The size of padding's dimension should 1 or 2. But got padding={}".
             format(padding))
 
     stride = utils.convert_to_list(stride, 1, 'stride') + [1]
@@ -965,7 +965,7 @@ def conv_transpose2d(x,
     channel_dim = -1 if channel_last else 1
     num_channels = x.shape[channel_dim]
     if num_channels < 0:
-        raise ValueError("The channel dimmention of the input({}) "
+        raise ValueError("The channel dimension of the input({}) "
                          "should be defined. Received: {}.".format(
                              x.shape, num_channels))
     if num_channels % groups != 0:
@@ -1146,7 +1146,7 @@ def conv3d(x,
 
     Raises:
         ValueError: If `data_format` is not "NCDHW" or "NDHWC".
-        ValueError: If the channel dimmention of the input is less than or equal to zero.
+        ValueError: If the channel dimension of the input is less than or equal to zero.
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
             or the element corresponding to the input's channel is not 0.
@@ -1187,7 +1187,7 @@ def conv3d(x,
     num_filters = weight.shape[0]
     if num_channels < 0:
         raise ValueError(
-            "The channel dimmention of the input({}) should be defined. "
+            "The channel dimension of the input({}) should be defined. "
             "Received: {}.".format(x.shape, num_channels))
     if num_channels % groups != 0:
         raise ValueError(
@@ -1422,7 +1422,7 @@ def conv_transpose3d(x,
     num_filters = weight.shape[1]
     if num_channels < 0:
         raise ValueError(
-            "The channel dimmention of the input({}) should be defined. "
+            "The channel dimension of the input({}) should be defined. "
             "Received: {}.".format(x.shape, num_channels))
     if num_channels % groups != 0:
         raise ValueError(
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index ca657b8be3e67c7acb795a0f427ca5fe2c57b1f2..c8790a75901fd5d9a38862158246e3756dc575c4 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -18,124 +18,146 @@ from ...fluid.layers import pool3d  #DEFINE_ALIAS
 from ...fluid.layers import adaptive_pool2d  #DEFINE_ALIAS
 from ...fluid.layers import adaptive_pool3d  #DEFINE_ALIAS
 from ...fluid import core
-from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_
-from ...fluid.layers import utils, LayerHelper
-from ...fluid.data_feeder import check_type, check_variable_and_dtype, check_type, check_dtype, convert_dtype
-from ...fluid.layers import unsqueeze, squeeze
+from ...fluid.framework import in_dygraph_mode
+from ...fluid.layers import utils, LayerHelper, unsqueeze, squeeze
+from ...fluid.data_feeder import check_type, check_variable_and_dtype
 
 __all__ = [
     'pool2d',
     'pool3d',
+    'adaptive_pool2d',
+    'adaptive_pool3d',
     'avg_pool1d',
+    'avg_pool2d',
+    'avg_pool3d',
     'max_pool1d',
+    'max_pool2d',
+    'max_pool3d',
     'adaptive_avg_pool1d',
-    'adaptive_max_pool1d',
     'adaptive_avg_pool2d',
     'adaptive_avg_pool3d',
-    'adaptive_pool2d',
-    'adaptive_pool3d',
-    'max_pool2d',
-    'avg_pool2d',
-    'max_pool3d',
-    'avg_pool3d',
+    'adaptive_max_pool1d',
+    'adaptive_max_pool2d',
+    'adaptive_max_pool3d',
 ]
 
 
-def check_input(x, dimension):
+def _is_list_or_tuple(input):
+    return isinstance(input, (list, tuple))
+
+
+def _check_input(x, dimension):
     if len(x.shape) != dimension:
-        raise ValueError("Excepted Input X is 3-D tensor, but received {}-D {}".
-                         format(len(x.shape), type(x)))
+        raise ValueError(
+            "Excepted Input X is {}-D tensor, but received {}-D {}".format(
+                dimension, len(x.shape), type(x)))
 
 
-def check_instance(x, x_name, types=(int, float)):
+def _check_instance(x, x_name, types=(int, float)):
 
     if not isinstance(x, types):
         raise ValueError("Excepted {} type for {} but received type: {}. ".
                          format(types, x_name, type(x)))
 
 
-def update_padding1d(padding, pool_type='avg'):
-    def is_list_or_tuple(ele):
-        if isinstance(ele, list) or isinstance(ele, tuple):
-            return True
-        return False
-
-    if is_list_or_tuple(padding):
-        if padding.__len__() == 1 and not is_list_or_tuple(padding[0]):
-            return [0, padding[0]]
-        else:
-            raise ValueError(
-                "{}_pool1d() argument 'padding' should contain one int (got {})".
-                format(pool_type, padding.__len__()))
+def _zero_padding_in_batch_and_channel(padding, channel_last):
+    if channel_last:
+        return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0]
     else:
-        padding = [0, padding]
+        return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0]
 
-    return padding
 
+def _exclude_padding_in_batch_and_channel(padding, channel_last):
+    padding_ = padding[1:-1] if channel_last else padding[2:]
+    padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim]
+    return padding_
 
-def update_padding2d(padding, data_format):
-    def is_list_or_tuple(ele):
-        if isinstance(ele, list) or isinstance(ele, tuple):
-            return True
-        return False
-
-    if is_list_or_tuple(padding) and len(padding) == 4:
-        if is_list_or_tuple(padding[0]) and (data_format == "NCHW"):
-            if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
-                raise ValueError(
-                    "Non-zero pool_padding(%s) in the batch or channel dimensions "
-                    "is not supported." % str(padding))
-            padding = padding[2:4]
-            padding = [ele for a_list in padding for ele in a_list]
-        elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"):
-            if not (padding[0] == [0, 0] and padding[3] == [0, 0]):
-                raise ValueError(
-                    "Non-zero pool_padding(%s) in the batch or channel dimensions "
-                    "is not supported." % str(padding))
-            padding = padding[1:3]
-            padding = [ele for a_list in padding for ele in a_list]
-        padding = utils.convert_to_list(padding, 4, 'padding')
-
-        if utils._is_symmetric_padding(padding, 2):
-            padding = [padding[0], padding[2]]
-    else:
-        padding = utils.convert_to_list(padding, 2, 'padding')
-
-    return padding
 
+def _channel_last(data_format, num_dims):
+    if num_dims == 1:
+        if data_format not in ['NCL', 'NLC']:
+            raise ValueError(
+                "Attr(data_format) should be 'NCL' or 'NLC'. Received "
+                "Attr(data_format): %s" % str(data_format))
+        else:
+            return True if data_format == "NLC" else False
+    if num_dims == 2:
+        if data_format not in ['NCHW', 'NHWC']:
+            raise ValueError(
+                "Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
+                "Attr(data_format): %s" % str(data_format))
+        else:
+            return True if data_format == "NHWC" else False
+    if num_dims == 3:
+        if data_format not in ['NCDHW', 'NDHWC']:
+            raise ValueError(
+                "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
+                "Attr(data_format): %s" % str(data_format))
+        else:
+            return True if data_format == "NDHWC" else False
 
-def update_padding3d(padding, data_format):
-    def is_list_or_tuple(ele):
-        if isinstance(ele, (list, tuple)):
-            return True
-        return False
 
-    if is_list_or_tuple(padding) and len(padding) == 5:
-        if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"):
-            if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
+def _update_padding_nd(padding, num_dims, channel_last=False, ceil_mode=False):
+    if isinstance(padding, str):
+        padding = padding.upper()
+        if padding not in ["SAME", "VALID"]:
+            raise ValueError(
+                "Unknown padding: '{}'. It can only be 'SAME' or 'VALID'.".
+                format(padding))
+        if padding == "VALID":
+            if ceil_mode != False:
                 raise ValueError(
-                    "Non-zero pool_padding(%s) in the batch or channel dimensions "
-                    "is not supported." % str(padding))
-            padding = padding[2:5]
-            padding = [ele for a_list in padding for ele in a_list]
-        elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"):
-            if not (padding[0] == [0, 0] and padding[4] == [0, 0]):
+                    "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
+                    "Received ceil_mode: True.")
+
+            padding_algorithm = "VALID"
+            padding = [0] * num_dims
+        else:
+            padding_algorithm = "SAME"
+            padding = [0] * num_dims
+    elif _is_list_or_tuple(padding):
+        # for padding like
+        # [(pad_before, pad_after), (pad_before, pad_after), ...]
+        # padding for batch_dim and channel_dim included
+        if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]):
+            if not _zero_padding_in_batch_and_channel(padding, channel_last):
                 raise ValueError(
-                    "Non-zero pool_padding(%s) in the batch or channel dimensions "
-                    "is not supported." % str(padding))
-            padding = padding[1:4]
-            padding = [ele for a_list in padding for ele in a_list]
-        padding = utils.convert_to_list(padding, 6, 'padding')
-        if utils._is_symmetric_padding(padding, 3):
-            padding = [padding[0], padding[2], padding[4]]
-
-    elif is_list_or_tuple(padding) and len(padding) == 6:
-        padding = utils.convert_to_list(padding, 6, 'padding')
-        if utils._is_symmetric_padding(padding, 3):
-            padding = [padding[0], padding[2], padding[4]]
+                    "Non-zero padding({}) in the batch or channel dimensions "
+                    "is not supported.".format(padding))
+            padding_algorithm = "EXPLICIT"
+            padding = _exclude_padding_in_batch_and_channel(padding,
+                                                            channel_last)
+            if utils._is_symmetric_padding(padding, num_dims):
+                padding = padding[0::2]
+        # for padding like [pad_before, pad_after, pad_before, pad_after, ...]
+        elif len(padding) == 2 * num_dims and isinstance(padding[0], int):
+            padding_algorithm = "EXPLICIT"
+            padding = utils.convert_to_list(padding, 2 * num_dims, 'padding')
+            if utils._is_symmetric_padding(padding, num_dims):
+                padding = padding[0::2]
+        # for padding like [pad_d1, pad_d2, ...]
+        elif len(padding) == num_dims and isinstance(padding[0], int):
+            padding_algorithm = "EXPLICIT"
+            padding = utils.convert_to_list(padding, num_dims, 'padding')
+        else:
+            raise ValueError("Invalid padding: {}".format(padding))
+    # for integer padding
     else:
-        padding = utils.convert_to_list(padding, 3, 'padding')
+        padding_algorithm = "EXPLICIT"
+        padding = utils.convert_to_list(padding, num_dims, 'padding')
+    return padding, padding_algorithm
+
 
+def _expand_low_nd_padding(padding):
+    #1d to 2d fake input
+    if len(padding) == 2:
+        padding = [0] * 2 + padding
+    elif len(padding) == 1:
+        padding = [0] + padding
+    else:
+        raise ValueError(
+            "The size of padding's dimmention should be 1 or 2. But got padding={}".
+            format(padding))
     return padding
 
 
@@ -146,73 +168,57 @@ def avg_pool1d(x,
                count_include_pad=True,
                ceil_mode=False,
                name=None):
-    """
-
-    This operation applies a 1D average pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
-    Input(X) and output(Out) are in NCL format, where N is batch
-    size, C is the number of channels, L is the length of the feature.
-    The output tensor shape will be [N, C, output_size].
-
-    The output value of the layer with input size (N, C, L),
-    output (N, C, L_{out}) and kernel_size k can be precisely described as
-    For average pool1d:
-
-    ..  math::
-
-       Output(N_i, C_i, l) &= mean(Input[N_i, C_i, stride \times l:stride \times l+k])
-
+    """ 
+    This API implements average pooling 1d operation,
+    See more details in :ref:`api_nn_pooling_AvgPool1d` .
 
     Args:
         x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
                           shape [N, C, L]. where `N` is batch size, `C` is the number of channels,
-                          `L` is the length of the feature. The data type if float32 or float64.
+                          `L` is the length of the feature. The data type is float32 or float64.
         kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain one integers.
+            it must contain an integer.
         stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
-            it must contain one integers.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero,
-            then the input is implicitly zero-padded on both sides for padding number of points.
+            it must contain an integer.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
+            4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
         count_include_pad (bool): Whether to exclude padding points in average pooling
-                          mode, default is `true`.
+                          mode, default is `True`.
         ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
-            If it is set to False, the floor function will be used. Default False
+            If it is set to False, the floor function will be used. The default value is False.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
 
     Raises:
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ValueError: If `padding` is a list or tuple but its length greater than 1.
-        ShapeError: If the input is not a 3-D.
+        ValueError: If `padding` is a list or tuple but its length is greater than 1.
+        ShapeError: If the input is not a 3-D tensor.
         ShapeError: If the output's shape calculated is not greater than 0.
 
-
     Examples:
-
         .. code-block:: python
-
           import paddle
           import paddle.nn.functional as F
           paddle.disable_static()
-
           data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          pool_out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0)
-          # pool_out shape: [1, 3, 16]
-
+          out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0)
+          # out shape: [1, 3, 16]
     """
     """NCL to NCHW"""
     data_format = "NCHW"
-    check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'avg_pool1d')
-    check_input(x, 3)
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool1d')
+    _check_input(x, 3)
     x = unsqueeze(x, [2])
-    kernel_size = utils.convert_to_list(kernel_size, 1, 'pool_size')
+    kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size')
     kernel_size = [1] + kernel_size
     if stride is None:
         stride = kernel_size
@@ -220,33 +226,20 @@ def avg_pool1d(x,
         stride = utils.convert_to_list(stride, 1, 'pool_stride')
         stride = [1] + stride
 
-    padding_algorithm = "EXPLICIT"
-    if isinstance(padding, str):
-        padding = padding.upper()
-        if padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
-                % str(padding))
-        if padding == "VALID":
-            padding_algorithm = "VALID"
-            padding = [0]
-            if ceil_mode != False:
-                raise ValueError(
-                    "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
-                    "Received ceil_mode: True.")
-        elif padding == "SAME":
-            padding_algorithm = "SAME"
-            padding = [0]
+    channel_last = _channel_last("NCL", 1)
+    padding, padding_algorithm = _update_padding_nd(
+        padding, 1, channel_last=channel_last, ceil_mode=ceil_mode)
 
-    padding = update_padding1d(padding, "avg")
+    # use 2d to implenment 1d should expand padding in advance.
+    padding = _expand_low_nd_padding(padding)
 
     if in_dygraph_mode():
         output = core.ops.pool2d(
             x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
             False, 'strides', stride, 'paddings', padding, 'padding_algorithm',
-            padding_algorithm, 'use_cudnn', not count_include_pad, 'ceil_mode',
-            ceil_mode, 'use_mkldnn', False, 'exclusive', True, 'data_format',
-            data_format)
+            padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
+            'use_mkldnn', False, 'exclusive', not count_include_pad,
+            'data_format', data_format)
         return squeeze(output, [2])
 
     op_type = 'pool2d'
@@ -275,126 +268,103 @@ def avg_pool1d(x,
     return squeeze(pool_out, [2])
 
 
-def max_pool1d(x,
+def avg_pool2d(x,
                kernel_size,
                stride=None,
                padding=0,
-               return_indices=False,
                ceil_mode=False,
+               count_include_pad=True,
+               divisor_override=None,
+               data_format="NCHW",
                name=None):
     """
-
-    Applies a 1D max pooling over an input signal composed of several input planes based
-    on the input, output_size, return_indices parameters.
-    Input(X) and output(Out) are in NCL format, where N is batch
-    size, C is the number of channels, L is the length of the feature.
-
-    The output value of the layer with input size (N, C, L),
-    output (N, C, L_{out}) and kernel_size k can be precisely described as
-    For average pool1d:
-
-    ..  math::
-
-       Output(N_i, C_i, l) &=  max(Input[N_i, C_i, stride \times l:stride \times l+k])}
-
+    This API implements average pooling 2d operation.
+    See more details in :ref:`api_nn_pooling_AvgPool2d` .
+ 
     Args:
-        x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
-                          shape [N, C, L], where `N` is batch size, `C` is the number of channels,
-                          `L` is the length of the feature. The data type if float32 or float64.
-        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain one integers.
-        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
-            it must contain one integers.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be the following forms: `[pad_left, pad_right]`.
-        return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
-        ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
-            If it is set to False, the floor function will be used. Default False.
+        x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
+                          shape [N, C, H, W]. The format of input tensor is `"NCHW"` or
+                          `"NHWC"`, where `N` is batch size, `C` is the number of channels,
+                          `H` is the height of the feature, and `W` is the width of the
+                          feature. The data type if float32 or float64.
+        kernel_size (int|list|tuple): The pool kernel size. If it is a tuple or list,
+            it must contain two integers, (kernel_size_Height, kernel_size_Width).
+            Otherwise, the pool kernel size will be a square of an int.
+        stride (int|list|tuple): The stride size. If it is a tuple or list,
+            it must contain two integers, (stride_Height, stride_Width).
+            Otherwise, the stride size will be a square of an int.
+
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
+        ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
+        count_include_pad (bool): Whether to exclude padding points in average pooling
+                          mode, default is `true`.
+        divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
+        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
+                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+                        `[batch_size, input_channels, input_height, input_width]`.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
-
     Raises:
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ValueError: If `padding` is a list or tuple but its length greater than 1.
-        ShapeError: If the input is not a 3-D.
         ShapeError: If the output's shape calculated is not greater than 0.
-
-
     Examples:
-
         .. code-block:: python
-
           import paddle
           import paddle.nn.functional as F
+          import numpy as np
           paddle.disable_static()
-
-          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
-          # pool_out shape: [1, 3, 16]
-
-          pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True)
-          # pool_out shape: [1, 3, 16],  indices shape: [1, 3, 16]
-
+          # avg pool2d
+          x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
+          out = F.avg_pool2d(x,
+                                kernel_size=2,
+                                stride=2, padding=0)
+          # out.shape [1, 3, 16, 16]
     """
-    """NCL to NCHW"""
-    data_format = "NCHW"
-    check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'max_pool1d')
-    check_input(x, 3)
-    x = unsqueeze(x, [2])
-    kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size')
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d')
+    kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
     if stride is None:
         stride = kernel_size
     else:
-        stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride')
-
-    padding_algorithm = "EXPLICIT"
-    if isinstance(padding, str):
-        padding = padding.upper()
-        if padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
-                % str(padding))
-        if padding == "VALID":
-            padding_algorithm = "VALID"
-            padding = [0]
-            if ceil_mode != False:
-                raise ValueError(
-                    "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
-                    "Received ceil_mode: True.")
-        elif padding == "SAME":
-            padding_algorithm = "SAME"
-            padding = [0]
+        stride = utils.convert_to_list(stride, 2, 'pool_stride')
 
-    padding = update_padding1d(padding, 'max')
+    channel_last = _channel_last(data_format, 2)
+    padding, padding_algorithm = _update_padding_nd(
+        padding, 2, channel_last, ceil_mode=ceil_mode)
 
     if in_dygraph_mode():
-        pool_out = core.ops.max_pool2d_with_index(
-            x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
-            'paddings', padding, 'padding_algorithm', padding_algorithm,
-            'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
-            'exclusive', True, 'data_format', data_format)
-        return (squeeze(pool_out[0], [2]), squeeze(
-            pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
+        output = core.ops.pool2d(
+            x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
+            False, 'padding_algorithm', padding_algorithm, 'strides', stride,
+            'paddings', padding, 'use_cudnn', True, 'ceil_mode', ceil_mode,
+            'use_mkldnn', False, 'exclusive', not count_include_pad,
+            'data_format', data_format)
+        if divisor_override is None:
+            return output
+        else:
+            _check_instance(divisor_override, "divisor_override")
+            return output * (kernel_size[0] * kernel_size[1]) / divisor_override
 
-    op_type = 'max_pool2d_with_index'
+    op_type = 'pool2d'
     helper = LayerHelper(op_type, **locals())
     dtype = helper.input_dtype()
     pool_out = helper.create_variable_for_type_inference(dtype)
-    mask = helper.create_variable_for_type_inference(dtype)
-    outputs = {"Out": pool_out, "Mask": mask}
 
     helper.append_op(
         type=op_type,
         inputs={"X": x},
-        outputs=outputs,
+        outputs={"Out": pool_out},
         attrs={
-            "pooling_type": 'max',
+            "pooling_type": "avg",
             "ksize": kernel_size,
             "global_pooling": False,
             "strides": stride,
@@ -403,335 +373,211 @@ def max_pool1d(x,
             "use_cudnn": True,
             "ceil_mode": ceil_mode,
             "use_mkldnn": False,
-            "exclusive": True,
+            "exclusive": not count_include_pad,
             "data_format": data_format,
         })
 
-    return (squeeze(pool_out, [2]),
-            squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
-
-
-def adaptive_avg_pool1d(x, output_size, name=None):
-    """
-
-    This operation applies a 1D adaptive average pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
-    Input(X) and output(Out) are in NCL format, where N is batch
-    size, C is the number of channels, L is the length of the feature.
-    The output tensor shape will be [N, C, output_size].
-
-    For average adaptive pool1d:
-
-    ..  math::
-
-        lstart &= floor(i * L_{in} / L_{out})
-
-        lend &= ceil((i + 1) * L_{in} / L_{out})
-
-        Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
-
-    Args:
-        x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
-                              with shape [N, C, L].  The format of input tensor is NCL,
-                              where N is batch size, C is the number of channels, L is the
-                              length of the feature. The data type is float32 or float64.
-        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-                it must contain one int.
-        name(str, optional): For detailed information, please refer
-                                 to :ref:`api_guide_Name`. Usually name is no need to set and
-                                 None by default.
-
-    Returns:
-            Tensor: The output tensor of adaptive average pooling result. The data type is same
-                      as input tensor.
-
-    Raises:
-            ValueError: 'output_size' should be a integer or list or tuple with length as 1.
-
-    Examples:
-        .. code-block:: python
-
-              # average adaptive pool1d
-              # suppose input data in shape of [N, C, L], `output_size` is m or [m],
-              # output shape is [N, C, m], adaptive pool divide L dimension
-              # of input data into m grids averagely and performs poolings in each
-              # grid to get output.
-              # adaptive max pool performs calculations as follow:
-              #
-              #     for i in range(m):
-              #         lstart = floor(i * L / m)
-              #         lend = ceil((i + 1) * L / m)
-              #         output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
-              #
-              import paddle
-              import paddle.nn.functional as F
-              paddle.disable_static()
-
-              data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-              pool_out = F.adaptive_average_pool1d(data, output_size=16)
-              # pool_out shape: [1, 3, 16])
-    """
-    pool_type = 'avg'
-    check_variable_and_dtype(x, 'input', ['float32', 'float64'],
-                             'adaptive_pool2d')
-    check_input(x, 3)
-    check_type(output_size, 'pool_size', (int), 'adaptive_pool1d')
-
-    pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
-
-    l_type = "pool2d"
-    x = unsqueeze(x, [2])
-    if in_dygraph_mode():
-        pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize',
-                                   pool_size, 'adaptive', True)
-        return squeeze(pool_out, [2])
-
-    helper = LayerHelper(l_type, **locals())
-    dtype = helper.input_dtype()
-    pool_out = helper.create_variable_for_type_inference(dtype)
-
-    outputs = {"Out": pool_out}
-    helper.append_op(
-        type=l_type,
-        inputs={"X": x},
-        outputs=outputs,
-        attrs={
-            "pooling_type": pool_type,
-            "ksize": pool_size,
-            "adaptive": True,
-        })
-
-    return squeeze(pool_out, [2])
+    if divisor_override is None:
+        return pool_out
+    else:
+        _check_instance(divisor_override, "divisor_override")
+        return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
 
 
-def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
+def avg_pool3d(x,
+               kernel_size,
+               stride=None,
+               padding=0,
+               ceil_mode=False,
+               count_include_pad=False,
+               divisor_override=None,
+               data_format="NCDHW",
+               name=None):
     """
-    This operation applies a 1D adaptive max pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
-    Input(X) and output(Out) are in NCL format, where N is batch
-    size, C is the number of channels, L is the length of the feature.
-    The output tensor shape will be [N, C, output_size].
-
-    For max adaptive pool1d:
-
-    ..  math::
-
-        lstart &= floor(i * L_{in} / L_{out})
-
-        lend &= ceil((i + 1) * L_{in} / L_{out})
-
-        Output(i) &= max(Input[lstart:lend])}
+    This API implements average pooling 3d operation.
+    See more details in :ref:`api_nn_pooling_AvgPool3d` .
 
     Args:
-        x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
-                              with shape [N, C, L].  The format of input tensor is NCL,
-                              where N is batch size, C is the number of channels, L is the
-                              length of the feature. The data type is float32 or float64.
-        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-                it must contain one int.
-        return_indices (bool): If true, the index of max pooling point will be returned along
-                with outputs. It cannot be set in average pooling type. Default False.
+        x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
+                          shape [N, C, D, H, W], where `N` represents the batch size, `C` represents
+                          the number of channels, `D`, `H` and `W` represent the depth, height and width of the feature respectively.
+        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
+            is a tuple or list, it must contain three integers,
+            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
+            Otherwise, the pool kernel size will be the cube of an int.
+        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
+            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
+            Otherwise, the pool stride size will be a cube of an int.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
+        ceil_mode (bool): ${ceil_mode_comment}
+        count_include_pad (bool): Whether to exclude padding points in average pooling
+                          mode, default is True.
+        divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
+        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
+                        The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
+                        `[batch_size, input_channels, input_depth, input_height, input_width]`.
         name(str, optional): For detailed information, please refer
-                                 to :ref:`api_guide_Name`. Usually name is no need to set and
-                                 None by default.
-
+                             to :ref:`api_guide_Name`. Usually name is no need to set and
+                             None by default.
     Returns:
-            Tensor: The output tensor of adaptive pooling result. The data type is same
-                      as input tensor.
-
+        Tensor: The output tensor of pooling result. The data type is same as input tensor.
     Raises:
-            ValueError: 'output_size' should be a integer or list or tuple with length as 1.
-
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
+        ShapeError: If the output's shape calculated is not greater than 0.
     Examples:
         .. code-block:: python
-
-              # max adaptive pool1d
-              # suppose input data in shape of [N, C, L], `output_size` is m or [m],
-              # output shape is [N, C, m], adaptive pool divide L dimension
-              # of input data into m grids averagely and performs poolings in each
-              # grid to get output.
-              # adaptive max pool performs calculations as follow:
-              #
-              #     for i in range(m):
-              #         lstart = floor(i * L / m)
-              #         lend = ceil((i + 1) * L / m)
-              #         output[:, :, i] = max(input[:, :, lstart: lend])
-              #
-              import paddle
-              import paddle.nn.functional as F
-              paddle.disable_static()
-
-              data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-              pool_out = F.adaptive_max_pool1d(data, output_size=16)
-              # pool_out shape: [1, 3, 16])
-
-              pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True)
-              # pool_out shape: [1, 3, 16] indices  shape: [1, 3, 16]
-
+          import paddle.fluid as fluid
+          import paddle
+          x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
+          # avg pool3d
+          out = paddle.nn.functional.avg_pool3d(
+                                            x,
+                                            kernel_size = 2,
+                                            stride = 2,
+                                            padding=0)
+          # out.shape: [1, 3, 16, 16, 16]
     """
-    pool_type = 'max'
-    check_variable_and_dtype(x, 'input', ['float32', 'float64'],
-                             'adaptive_max_pool1d')
-    check_input(x, 3)
-    check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d')
-    check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d')
-
-    pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
+    kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
+    if stride is None:
+        stride = kernel_size
+    else:
+        stride = utils.convert_to_list(stride, 3, 'pool_stride')
 
-    l_type = 'max_pool2d_with_index'
+    channel_last = _channel_last(data_format, 3)
+    padding, padding_algorithm = _update_padding_nd(
+        padding, 3, channel_last=channel_last, ceil_mode=ceil_mode)
 
-    x = unsqueeze(x, [2])
     if in_dygraph_mode():
-        pool_out = core.ops.max_pool2d_with_index(
-            x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
-        return (squeeze(pool_out[0], [2]), squeeze(
-            pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
+        output = core.ops.pool3d(
+            x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
+            'paddings', padding, 'global_pooling', False, 'padding_algorithm',
+            padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
+            'use_mkldnn', False, 'exclusive', not count_include_pad,
+            'data_format', data_format)
+        if divisor_override is None:
+            return output
+        else:
+            _check_instance(divisor_override, "divisor_override")
+            return output * (kernel_size[0] * kernel_size[1] *
+                             kernel_size[2]) / divisor_override
 
-    helper = LayerHelper(l_type, **locals())
+    op_type = "pool3d"
+    helper = LayerHelper(op_type, **locals())
     dtype = helper.input_dtype()
-    pool_out = helper.create_variable_for_type_inference(dtype)
-
-    mask = helper.create_variable_for_type_inference(dtype)
-    outputs = {"Out": pool_out, "Mask": mask}
+    pool_out = helper.create_variable_for_type_inference(dtype)
+    outputs = {"Out": pool_out}
 
     helper.append_op(
-        type=l_type,
+        type=op_type,
         inputs={"X": x},
         outputs=outputs,
         attrs={
-            "pooling_type": pool_type,
-            "ksize": pool_size,
-            "adaptive": True,
+            "pooling_type": 'avg',
+            "ksize": kernel_size,
+            "global_pooling": False,
+            "strides": stride,
+            "paddings": padding,
+            "padding_algorithm": padding_algorithm,
+            "use_cudnn": True,
+            "ceil_mode": ceil_mode,
+            "use_mkldnn": False,
+            "exclusive": not count_include_pad,
+            "data_format": data_format,
         })
 
-    return (squeeze(pool_out, [2]),
-            squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
+    if divisor_override is None:
+        return pool_out
+    else:
+        _check_instance(divisor_override, "divisor_override")
+        return pool_out * (kernel_size[0] * kernel_size[1] *
+                           kernel_size[2]) / divisor_override
 
 
-def max_pool2d(x,
+def max_pool1d(x,
                kernel_size,
                stride=None,
                padding=0,
                return_indices=False,
                ceil_mode=False,
-               data_format="NCHW",
                name=None):
     """
-    This operation applies 2D max pooling over input feature based on the input,
-    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
-    in NCHW format, where N is batch size, C is the number of channels,
-    H is the height of the feature, and W is the width of the feature.
-
-    Example:
-      Input:
-           X shape: $(N, C, H_{in}, W_{in})$
-      Attr:
-           kernel_size: ksize
-           stride: stride
-
-      Output:
-           Out shape: $(N, C, H_{out}, W_{out})$
-           $$
-           out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
-                                    & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
-                                                   \text{stride[1]} \times w + n)
-           $$
+    This API implements max pooling 1d opereation.
+    See more details in :ref:`api_nn_pooling_MaxPool1d` .
 
     Args:
-        x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
-                          shape [N, C, H, W]. The format of input tensor is `"NCHW"` or
-                          `"NHWC"`, where `N` is batch size, `C` is the number of channels,
-                          `H` is the height of the feature, and `W` is the width of the
-                          feature. The data type if float32 or float64.
+        x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
+                          shape [N, C, L], where `N` is batch size, `C` is the number of channels,
+                          `L` is the length of the feature. The data type if float32 or float64.
         kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain two integers, (pool_size_Height, pool_size_Width).
-            Otherwise, the pool kernel size will be a square of an int.
+            it must contain an integer.
         stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
-            it must contain two integers, (pool_stride_Height, pool_stride_Width).
-            Otherwise, the pool stride size will be a square of an int.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_height, pad_width]` or
-            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
-            `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-            Otherwise, the pool padding size will be a square of an int.
-        ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        return_indices (bool): Whether to return the max indices along with the outputs.
-        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
-                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-                        `[batch_size, input_channels, input_height, input_width]`.
+            it must contain an integer.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
+            4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
+        return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
+        ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
+            If it is set to False, the floor function will be used. Default False.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
+
     Raises:
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is "VALID", but `ceil_mode` is True.
+        ShapeError: If the input is not a 3-D tensor.
         ShapeError: If the output's shape calculated is not greater than 0.
+
     Examples:
         .. code-block:: python
           import paddle
           import paddle.nn.functional as F
-          import numpy as np
           paddle.disable_static()
-
-          # max pool2d
-          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
-          output = F.max_pool2d(input,
-                                kernel_size=2,
-                                stride=2, padding=0)
-          # output.shape [1, 3, 16, 16]
-
-          # for return_indices=True
-          output, max_indices = F.max_pool2d(input,
-                                             kernel_size=2,
-                                             stride=2,
-                                             padding=0,
-                                             return_indices=True)
-          # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
+          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
+          pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
+          # pool_out shape: [1, 3, 16]
+          pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True)
+          # pool_out shape: [1, 3, 16],  indices shape: [1, 3, 16]
     """
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d')
-    kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
+    """NCL to NCHW"""
+    data_format = "NCHW"
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool1d')
+    _check_input(x, 3)
+    x = unsqueeze(x, [2])
+    kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size')
     if stride is None:
         stride = kernel_size
     else:
-        stride = utils.convert_to_list(stride, 2, 'pool_stride')
+        stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride')
 
-    if data_format not in ["NCHW", "NHWC"]:
-        raise ValueError(
-            "Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
-            "Attr(data_format): %s." % str(data_format))
-    padding_algorithm = "EXPLICIT"
-    if isinstance(padding, str):
-        padding = padding.upper()
-        if padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
-                % str(padding))
-        if padding == "VALID":
-            padding_algorithm = "VALID"
-            padding = [0, 0]
-            if ceil_mode != False:
-                raise ValueError(
-                    "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
-                    "Received ceil_mode: True.")
-        elif padding == "SAME":
-            padding_algorithm = "SAME"
-            padding = [0, 0]
+    padding, padding_algorithm = _update_padding_nd(
+        padding, 1, ceil_mode=ceil_mode)
 
-    padding = update_padding2d(padding, data_format)
+    # use 2d to implenment 1d should expand padding in advance.
+    padding = _expand_low_nd_padding(padding)
 
     if in_dygraph_mode():
-        output = core.ops.max_pool2d_with_index(
+        pool_out = core.ops.max_pool2d_with_index(
             x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
             'paddings', padding, 'padding_algorithm', padding_algorithm,
             'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
             'exclusive', True, 'data_format', data_format)
-        return output if return_indices else output[0]
+        return (squeeze(pool_out[0], [2]), squeeze(
+            pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
 
     op_type = 'max_pool2d_with_index'
     helper = LayerHelper(op_type, **locals())
@@ -758,36 +604,21 @@ def max_pool2d(x,
             "data_format": data_format,
         })
 
-    return (pool_out, mask) if return_indices else pool_out
+    return (squeeze(pool_out, [2]),
+            squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
 
 
-def avg_pool2d(x,
+def max_pool2d(x,
                kernel_size,
                stride=None,
                padding=0,
+               return_indices=False,
                ceil_mode=False,
-               count_include_pad=True,
-               divisor_override=None,
                data_format="NCHW",
                name=None):
     """
-    This operation applies 2D average pooling over input features based on the input,
-    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
-    in NCHW format, where N is batch size, C is the number of channels,
-    H is the height of the feature, and W is the width of the feature.
-
-    Example:
-      Input:
-           X shape: $(N, C, H_{in}, W_{in})$
-      Attr:
-           kernel_size: ksize
-
-      Output:
-           Out shape: $(N, C, H_{out}, W_{out})$
-           $$
-           out(N_i, C_j, h, w)  = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
-                               input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
-           $$
+    This API implements max pooling 2d operation.
+    See more details in :ref:`api_nn_pooling_MaxPool2d` .
 
     Args:
         x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
@@ -796,30 +627,26 @@ def avg_pool2d(x,
                           `H` is the height of the feature, and `W` is the width of the
                           feature. The data type if float32 or float64.
         kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain two integers, (pool_size_Height, pool_size_Width).
+            it must contain two integers, (kernel_size_Height, kernel_size_Width).
             Otherwise, the pool kernel size will be a square of an int.
         stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
-            it must contain two integers, (pool_stride_Height, pool_stride_Width).
+            it must contain two integers, (stride_Height, stride_Width).
             Otherwise, the pool stride size will be a square of an int.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_height, pad_width]` or
-            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
-            `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-            Otherwise, the pool padding size will be a square of an int.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
         ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        count_include_pad (bool): Whether to exclude padding points in average pooling
-                          mode, default is `true`.
-        divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
-        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
+        return_indices (bool): Whether to return the max indices along with the outputs.
+        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
                         The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
                         `[batch_size, input_channels, input_height, input_width]`.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
     Raises:
@@ -832,87 +659,71 @@ def avg_pool2d(x,
           import paddle.nn.functional as F
           import numpy as np
           paddle.disable_static()
-
-          # avg pool2d
-          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
-          output = F.avg_pool2d(input,
+          # max pool2d
+          x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
+          out = F.max_pool2d(x,
                                 kernel_size=2,
                                 stride=2, padding=0)
           # output.shape [1, 3, 16, 16]
-
+          # for return_indices=True
+          out, max_indices = F.max_pool2d(x,
+                                             kernel_size=2,
+                                             stride=2,
+                                             padding=0,
+                                             return_indices=True)
+          # out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
     """
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d')
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d')
     kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
     if stride is None:
         stride = kernel_size
     else:
         stride = utils.convert_to_list(stride, 2, 'pool_stride')
 
-    padding_algorithm = "EXPLICIT"
-    if isinstance(padding, str):
-        padding = padding.upper()
-        if padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
-                % str(padding))
-        if padding == "VALID":
-            padding_algorithm = "VALID"
-            padding = [0, 0]
-            if ceil_mode != False:
-                raise ValueError(
-                    "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
-                    "Received ceil_mode: True.")
-        elif padding == "SAME":
-            padding_algorithm = "SAME"
-            padding = [0, 0]
-
     if data_format not in ["NCHW", "NHWC"]:
         raise ValueError(
             "Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
             "Attr(data_format): %s." % str(data_format))
-    pool_padding = update_padding2d(padding, data_format)
+
+    channel_last = True if data_format == "NHWC" else False
+
+    padding, padding_algorithm = _update_padding_nd(
+        padding, num_dims=2, channel_last=channel_last, ceil_mode=ceil_mode)
 
     if in_dygraph_mode():
-        output = core.ops.pool2d(
-            x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
-            False, 'padding_algorithm', padding_algorithm, 'strides', stride,
-            'paddings', pool_padding, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not count_include_pad,
-            'data_format', data_format)
-        if divisor_override is None:
-            return output
-        else:
-            check_instance(divisor_override, "divisor_override")
-            return output * (kernel_size[0] * kernel_size[1]) / divisor_override
+        output = core.ops.max_pool2d_with_index(
+            x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
+            'paddings', padding, 'padding_algorithm', padding_algorithm,
+            'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
+            'exclusive', True, 'data_format', data_format)
+        return output if return_indices else output[0]
 
-    op_type = 'pool2d'
+    op_type = 'max_pool2d_with_index'
     helper = LayerHelper(op_type, **locals())
     dtype = helper.input_dtype()
     pool_out = helper.create_variable_for_type_inference(dtype)
+    mask = helper.create_variable_for_type_inference(dtype)
+    outputs = {"Out": pool_out, "Mask": mask}
 
     helper.append_op(
         type=op_type,
         inputs={"X": x},
-        outputs={"Out": pool_out},
+        outputs=outputs,
         attrs={
-            "pooling_type": "avg",
+            "pooling_type": 'max',
             "ksize": kernel_size,
             "global_pooling": False,
             "strides": stride,
-            "paddings": pool_padding,
+            "paddings": padding,
             "padding_algorithm": padding_algorithm,
             "use_cudnn": True,
             "ceil_mode": ceil_mode,
             "use_mkldnn": False,
-            "exclusive": not count_include_pad,
+            "exclusive": True,
             "data_format": data_format,
         })
 
-    if divisor_override is None:
-        return pool_out
-    else:
-        check_instance(divisor_override, "divisor_override")
-        return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
+    return (pool_out, mask) if return_indices else pool_out
 
 
 def max_pool3d(x,
@@ -924,47 +735,25 @@ def max_pool3d(x,
                data_format="NCDHW",
                name=None):
     """
-    This operation applies 3D max pooling over input features based on the input,
-    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
-    in NCDHW format, where N is batch size, C is the number of channels,
-    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
-
-    Example:
-      Input:
-           X shape: $(N, C, D_{in}, H_{in}, W_{in})$
-      Attr:
-           kernel_size: ksize
-
-      Output:
-           Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
-           $$
-           \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, ksize[0]-1} \max_{m=0, \ldots, ksize[1]-1} \max_{n=0, \ldots, ksize[2]-1} \\
-                                              & \text{input}(N_i, C_j, \text{stride[0]} \times d + k,
-                                                             \text{stride[1]} \times h + m, \text{stride[2]} \times w + n)
-           $$
-
+    This API implements max pooling 2d operation.
+    See more details in :ref:`api_nn_pooling_MaxPool3d` .
     Args:
         x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
-                          shape [N, C, D, H, W]. The format of
-                          input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is
-                          the number of channels, `D` is the depth of the feature,
-                          `H` is the height of the feature, and `W` is the width
-                          of the feature.
-        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
+                          shape [N, C, D, H, W]. The format of input tensor is `"NCDHW"` or `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively. 
+        kernel_size (int|list|tuple): The pool kernel size. If the kernel size
             is a tuple or list, it must contain three integers,
-            (pool_size_Depth, pool_size_Height, pool_size_Width).
+            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
             Otherwise, the pool kernel size will be the cube of an int.
-        stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
-            it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
+        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
+            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
             Otherwise, the pool stride size will be a cube of an int.
-        padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
-            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
-            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
         ceil_mode (bool): ${ceil_mode_comment}
         return_indices (bool): Whether to return the max indices along with the outputs.
         data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
@@ -973,7 +762,6 @@ def max_pool3d(x,
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns:
         Tensor: The output tensor of pooling result. The data type is same as input tensor.
     Raises:
@@ -986,23 +774,20 @@ def max_pool3d(x,
           import paddle.nn.functional as F
           import numpy as np
           paddle.disable_static()
-
           # max pool3d
-          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
-          output = F.max_pool2d(input,
+          x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
+          output = F.max_pool2d(x,
                                 kernel_size=2,
                                 stride=2, padding=0)
           output.shape [1, 3, 16, 16, 16]
-
           # for return_indices=True
-          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
-          output, max_indices = paddle.nn.functional.max_pool3d(input,
+          x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
+          output, max_indices = paddle.nn.functional.max_pool3d(x,
                                         kernel_size = 2,
                                         stride = 2,
                                         padding=0,
                                         return_indices=True)
           # output.shape [None, 3, 16, 16, 16], max_indices.shape [None, 3, 16, 16, 16],
-
     """
     check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
     kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
@@ -1011,29 +796,10 @@ def max_pool3d(x,
     else:
         stride = utils.convert_to_list(stride, 3, 'pool_stride')
 
-    padding_algorithm = "EXPLICIT"
-    if isinstance(padding, str):
-        padding = padding.upper()
-        if padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
-                % str(padding))
-        if padding == "VALID":
-            padding_algorithm = "VALID"
-            padding = [0, 0, 0]
-            if ceil_mode != False:
-                raise ValueError(
-                    "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
-                    "Received ceil_mode: True.")
-        elif padding == "SAME":
-            padding_algorithm = "SAME"
-            padding = [0, 0, 0]
+    channel_last = _channel_last(data_format, 3)
 
-    if data_format not in ["NCDHW", "NDHWC"]:
-        raise ValueError(
-            "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
-            "Attr(data_format): %s" % str(data_format))
-    padding = update_padding3d(padding, data_format)
+    padding, padding_algorithm = _update_padding_nd(
+        padding, 3, channel_last=channel_last, ceil_mode=ceil_mode)
 
     if in_dygraph_mode():
         output = core.ops.max_pool3d_with_index(
@@ -1071,170 +837,83 @@ def max_pool3d(x,
     return (pool_out, mask) if return_indices else pool_out
 
 
-def avg_pool3d(x,
-               kernel_size,
-               stride=None,
-               padding=0,
-               ceil_mode=False,
-               count_include_pad=False,
-               divisor_override=None,
-               data_format="NCDHW",
-               name=None):
+def adaptive_avg_pool1d(x, output_size, name=None):
     """
-    This operation applies 3D max pooling over input features based on the input,
-    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
-    in NCDHW format, where N is batch size, C is the number of channels,
-    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
-
+    This API implements adaptive average pooling 1d operation.
+    See more details in :ref:`api_nn_pooling_AdaptiveAvgPool1d` .
+    
     Args:
-        input (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
-                          shape [N, C, D, H, W], where `N` is batch size, `C` is
-                          the number of channels, `D` is the depth of the feature,
-                          `H` is the height of the feature, and `W` is the width
-                          of the feature.
-        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
-            is a tuple or list, it must contain three integers,
-            (pool_size_Depth, pool_size_Height, pool_size_Width).
-            Otherwise, the pool kernel size will be the cube of an int.
-        stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
-            it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
-            Otherwise, the pool stride size will be a cube of an int.
-        padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
-            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
-            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-        ceil_mode (bool): ${ceil_mode_comment}
-        count_include_pad (bool): Whether to exclude padding points in average pooling
-                          mode, default is True.
-        divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
-        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
-                        The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
-                        `[batch_size, input_channels, input_depth, input_height, input_width]`.
+        x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
+                              with shape [N, C, L].  The format of input tensor is NCL,
+                              where N is batch size, C is the number of channels, L is the
+                              length of the feature. The data type is float32 or float64.
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+                it must contain one int.
         name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
-
-
+                                 to :ref:`api_guide_Name`. Usually name is no need to set and
+                                 None by default.
     Returns:
-        Tensor: The output tensor of pooling result. The data type is same as input tensor.
+            Tensor: The output tensor of adaptive average pooling result. The data type is same
+                      as input tensor.
     Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ShapeError: If the output's shape calculated is not greater than 0.
+            ValueError: 'output_size' should be an integer or list or tuple with length as 1.
     Examples:
         .. code-block:: python
-          import paddle.fluid as fluid
-          import paddle
-          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
-          # avg pool3d
-          pool3d = paddle.nn.functional.avg_pool3d(
-                                            input,
-                                            kernel_size = 2,
-                                            stride = 2,
-                                            padding=0)
-          # pool3d.shape: [1, 3, 16, 16, 16]
-    """
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
-    kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
-    if stride is None:
-        stride = kernel_size
-    else:
-        stride = utils.convert_to_list(stride, 3, 'pool_stride')
-
-    padding_algorithm = "EXPLICIT"
-    if isinstance(padding, str):
-        padding = padding.upper()
-        if padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
-                % str(padding))
-        if padding == "VALID":
-            padding_algorithm = "VALID"
-            padding = [0, 0, 0]
-            if ceil_mode != False:
-                raise ValueError(
-                    "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
-                    "Received ceil_mode: True.")
-        elif padding == "SAME":
-            padding_algorithm = "SAME"
-            padding = [0, 0, 0]
+              # average adaptive pool1d
+              # suppose input data in shape of [N, C, L], `output_size` is m or [m],
+              # output shape is [N, C, m], adaptive pool divide L dimension
+              # of input data into m grids averagely and performs poolings in each
+              # grid to get output.
+              # adaptive max pool performs calculations as follow:
+              #
+              #     for i in range(m):
+              #         lstart = floor(i * L / m)
+              #         lend = ceil((i + 1) * L / m)
+              #         output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
+              #
+              import paddle
+              import paddle.nn.functional as F
+              paddle.disable_static()
+              data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
+              pool_out = F.adaptive_average_pool1d(data, output_size=16)
+              # pool_out shape: [1, 3, 16])
+    """
+    pool_type = 'avg'
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'adaptive_pool2d')
+    _check_input(x, 3)
+    check_type(output_size, 'pool_size', (int), 'adaptive_pool1d')
 
-    if data_format not in ["NCDHW", "NDHWC"]:
-        raise ValueError(
-            "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
-            "Attr(data_format): %s" % str(data_format))
-    padding = update_padding3d(padding, data_format)
+    pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
 
+    l_type = "pool2d"
+    x = unsqueeze(x, [2])
     if in_dygraph_mode():
-        output = core.ops.pool3d(
-            x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
-            'paddings', padding, 'global_pooling', False, 'padding_algorithm',
-            padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
-            'use_mkldnn', False, 'exclusive', not count_include_pad,
-            'data_format', data_format)
-        if divisor_override is None:
-            return output
-        else:
-            check_instance(divisor_override, "divisor_override")
-            return output * (kernel_size[0] * kernel_size[1] *
-                             kernel_size[2]) / divisor_override
+        pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize',
+                                   pool_size, 'adaptive', True)
+        return squeeze(pool_out, [2])
 
-    op_type = "pool3d"
-    helper = LayerHelper(op_type, **locals())
+    helper = LayerHelper(l_type, **locals())
     dtype = helper.input_dtype()
     pool_out = helper.create_variable_for_type_inference(dtype)
-    outputs = {"Out": pool_out}
 
+    outputs = {"Out": pool_out}
     helper.append_op(
-        type=op_type,
+        type=l_type,
         inputs={"X": x},
         outputs=outputs,
         attrs={
-            "pooling_type": 'avg',
-            "ksize": kernel_size,
-            "global_pooling": False,
-            "strides": stride,
-            "paddings": padding,
-            "padding_algorithm": padding_algorithm,
-            "use_cudnn": True,
-            "ceil_mode": ceil_mode,
-            "use_mkldnn": False,
-            "exclusive": not count_include_pad,
-            "data_format": data_format,
+            "pooling_type": pool_type,
+            "ksize": pool_size,
+            "adaptive": True,
         })
 
-    if divisor_override is None:
-        return pool_out
-    else:
-        check_instance(divisor_override, "divisor_override")
-        return pool_out * (kernel_size[0] * kernel_size[1] *
-                           kernel_size[2]) / divisor_override
+    return squeeze(pool_out, [2])
 
 
 def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
     """
-
-    This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
-    of the output tensor are determined by the parameter output_size.
-    See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool2d` .
-
-    For avg adaptive pool2d:
-
-    ..  math::
-
-       hstart &= floor(i * H_{in} / H_{out})
-
-       hend &= ceil((i + 1) * H_{in} / H_{out})
-
-       wstart &= floor(j * W_{in} / W_{out})
-
-       wend &= ceil((j + 1) * W_{in} / W_{out})
-
-       Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
+    This API implements adaptive average pooling 2d operation.
+    See more details in :ref:`api_nn_pooling_AdaptiveAvgPool2d` .
 
     Args:
         x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor.
@@ -1248,16 +927,12 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns:
         Tensor: The output tensor of avg adaptive pool2d result. The data type is same as input tensor.
-
     Raises:
         ValueError: If `data_format` is not "NCHW" or "NHWC".
-
     Examples:
         .. code-block:: python
-
             # adaptive avg pool2d
             # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
             # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
@@ -1279,10 +954,10 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
             input_data = np.random.rand(2, 3, 32, 32)
             x = paddle.to_tensor(input_data)
             # x.shape is [2, 3, 32, 32]
-            pool_out = paddle.nn.functional.adaptive_avg_pool2d(
+            out = paddle.nn.functional.adaptive_avg_pool2d(
                             x = x,
                             output_size=[3, 3])
-            # pool_out.shape is [2, 3, 3, 3]
+            # out.shape is [2, 3, 3, 3]
     """
     if not in_dygraph_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'],
@@ -1337,28 +1012,8 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
 
 def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
     """
-
-    This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
-    of the output tensor are determined by the parameter output_size.
-    See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool3d` .
-
-    For avg adaptive pool3d:
-
-    ..  math::
-
-      dstart &= floor(i * D_{in} / D_{out})
-
-      dend &= ceil((i + 1) * D_{in} / D_{out})
-
-      hstart &= floor(j * H_{in} / H_{out})
-
-      hend &= ceil((j + 1) * H_{in} / H_{out})
-
-      wstart &= floor(k * W_{in} / W_{out})
-
-      wend &= ceil((k + 1) * W_{in} / W_{out})
-
-      Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
+    This API implements adaptive average pooling 3d operation.
+    See more details in :ref:`api_nn_pooling_AdaptiveAvgPool3d` .
 
     Args:
         x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor.
@@ -1372,16 +1027,12 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns:
         Tensor: The output tensor of avg adaptive pool3d result. The data type is same as input tensor.
-
     Raises:
         ValueError: If `data_format` is not "NCDHW" or "NDHWC".
-
     Examples:
         .. code-block:: python
-
             # adaptive avg pool3d
             # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
             # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
@@ -1406,10 +1057,10 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
             input_data = np.random.rand(2, 3, 8, 32, 32)
             x = paddle.to_tensor(input_data)
             # x.shape is [2, 3, 8, 32, 32]
-            pool_out = paddle.nn.functional.adaptive_avg_pool3d(
+            out = paddle.nn.functional.adaptive_avg_pool3d(
                             x = x,
                             output_size=[3, 3, 3])
-            # pool_out.shape is [2, 3, 3, 3, 3]
+            # out.shape is [2, 3, 3, 3, 3]
     """
     if not in_dygraph_mode():
         check_variable_and_dtype(x, 'x', ['float32', 'float64'],
@@ -1461,3 +1112,257 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
         })
 
     return pool_out
+
+
+def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
+    """
+    This API implements adaptive max pooling 1d operation.
+    See more details in :ref:`api_nn_pooling_AdaptiveMaxPool1d` .
+
+    Args:
+        x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
+                              with shape [N, C, L].  The format of input tensor is NCL,
+                              where N is batch size, C is the number of channels, L is the
+                              length of the feature. The data type is float32 or float64.
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+                it must contain one int.
+        return_indices (bool): If true, the index of max pooling point will be returned along
+                with outputs. It cannot be set in average pooling type. Default False.
+        name(str, optional): For detailed information, please refer
+                                 to :ref:`api_guide_Name`. Usually name is no need to set and
+                                 None by default.
+    Returns:
+            Tensor: The output tensor of adaptive pooling result. The data type is same
+                      as input tensor.
+    Raises:
+            ValueError: 'output_size' should be a integer or list or tuple with length as 1.
+    Examples:
+        .. code-block:: python
+              # max adaptive pool1d
+              # suppose input data in shape of [N, C, L], `output_size` is m or [m],
+              # output shape is [N, C, m], adaptive pool divide L dimension
+              # of input data into m grids averagely and performs poolings in each
+              # grid to get output.
+              # adaptive max pool performs calculations as follow:
+              #
+              #     for i in range(m):
+              #         lstart = floor(i * L / m)
+              #         lend = ceil((i + 1) * L / m)
+              #         output[:, :, i] = max(input[:, :, lstart: lend])
+              #
+              import paddle
+              import paddle.nn.functional as F
+              paddle.disable_static()
+              data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
+              pool_out = F.adaptive_max_pool1d(data, output_size=16)
+              # pool_out shape: [1, 3, 16])
+              pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True)
+              # pool_out shape: [1, 3, 16] indices  shape: [1, 3, 16]
+    """
+    pool_type = 'max'
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'],
+                             'adaptive_max_pool1d')
+    _check_input(x, 3)
+    check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d')
+    check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d')
+
+    pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
+
+    l_type = 'max_pool2d_with_index'
+
+    x = unsqueeze(x, [2])
+    if in_dygraph_mode():
+        pool_out = core.ops.max_pool2d_with_index(
+            x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
+        return (squeeze(pool_out[0], [2]), squeeze(
+            pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
+
+    helper = LayerHelper(l_type, **locals())
+    dtype = helper.input_dtype()
+    pool_out = helper.create_variable_for_type_inference(dtype)
+
+    mask = helper.create_variable_for_type_inference(dtype)
+    outputs = {"Out": pool_out, "Mask": mask}
+
+    helper.append_op(
+        type=l_type,
+        inputs={"X": x},
+        outputs=outputs,
+        attrs={
+            "pooling_type": pool_type,
+            "ksize": pool_size,
+            "adaptive": True,
+        })
+
+    return (squeeze(pool_out, [2]),
+            squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
+
+
+def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
+    """
+        This operation applies a 2D adaptive max pooling on input tensor.
+        See more details in :ref:`api_nn_pooling_AdaptiveMaxPool2d` .
+        Args:
+            x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float16, float32, float64, int32 or int64.
+            output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two elements, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
+            return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
+            name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
+        Returns:
+            Tensor: The output tensor of adaptive max pool2d result. The data type is same as input tensor.
+        Examples:
+            .. code-block:: python
+              # max adaptive pool2d
+              # suppose input data in the shape of [N, C, H, W], `output_size` is [m, n]
+              # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
+              # of input data into m*n grids averagely and performs poolings in each
+              # grid to get output.
+              # adaptive max pool performs calculations as follow:
+              #
+              #     for i in range(m):
+              #         for j in range(n):
+              #             hstart = floor(i * H / m)
+              #             hend = ceil((i + 1) * H / m)
+              #             wstart = floor(i * W / n)
+              #             wend = ceil((i + 1) * W / n)
+              #             output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
+              #
+              import paddle
+              import numpy as np
+              paddle.disable_static()
+              input_data = np.random.rand(2, 3, 32, 32)
+              x = paddle.to_tensor(input_data)
+              # x.shape is [2, 3, 32, 32]
+              out = paddle.nn.functional.adaptive_max_pool2d(
+                            x = x,
+                            output_size=[3, 3])
+              # out.shape is [2, 3, 3, 3]
+    """
+    if not in_dygraph_mode():
+        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
+                                 'adaptive_max_pool2d')
+    _check_input(x, 4)
+    #check_type(output_size, 'pool_size', (int), 'adaptive_max_pool2d')
+    check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool2d')
+
+    in_h, in_w = x.shape[2:4]
+    if isinstance(output_size, int):
+        output_size = utils.convert_to_list(output_size, 2, 'output_size')
+    else:
+        if output_size[0] == None:
+            output_size[0] = in_h
+        if output_size[1] == None:
+            output_size[1] = in_w
+
+    if in_dygraph_mode():
+        pool_out = core.ops.max_pool2d_with_index(
+            x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
+        return pool_out if return_indices else pool_out[0]
+
+    l_type = 'max_pool2d_with_index'
+
+    helper = LayerHelper(l_type, **locals())
+    dtype = helper.input_dtype()
+    pool_out = helper.create_variable_for_type_inference(dtype)
+
+    mask = helper.create_variable_for_type_inference(dtype)
+    outputs = {"Out": pool_out, "Mask": mask}
+
+    helper.append_op(
+        type=l_type,
+        inputs={"X": x},
+        outputs=outputs,
+        attrs={
+            "pooling_type": 'max',
+            "ksize": output_size,
+            "adaptive": True,
+        })
+    #return (pool_out, mask) if return_indices else pool_out
+    return pool_out
+
+
+def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
+    """
+        This operation applies a 3D adaptive max pooling on input tensor.
+        See more details in :ref:`api_nn_pooling_AdaptiveMaxPool3d` .
+        Args:
+            x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
+            output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input.
+            return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
+            name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
+        Returns:
+            Tensor: The output tensor of adaptive max pool3d result. The data type is same as input tensor.
+        Examples:
+            .. code-block:: python
+              # adaptive max pool3d
+              # suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n]
+              # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
+              # of input data into m*n grids averagely and performs poolings in each
+              # grid to get output.
+              # adaptive max pool performs calculations as follow:
+              #
+              #     for i in range(l):
+              #         for j in range(m):
+              #             for k in range(n):
+              #                 dstart = floor(i * D / l)
+              #                 dend = ceil((i + 1) * D / l)
+              #                 hstart = floor(i * H / m)
+              #                 hend = ceil((i + 1) * H / m)
+              #                 wstart = floor(i * W / n)
+              #                 wend = ceil((i + 1) * W / n)
+              #             output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend])
+              #
+              import paddle
+              import numpy as np
+              paddle.disable_static()
+              input_data = np.random.rand(2, 3, 8, 32, 32)
+              x = paddle.to_tensor(input_data)
+              # x.shape is [2, 3, 8, 32, 32]
+              out = paddle.nn.functional.adaptive_max_pool3d(
+                            x = x,
+                            output_size=[3, 3, 3])
+              # out.shape is [2, 3, 3, 3, 3]
+    """
+
+    if not in_dygraph_mode():
+        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
+                                 'adaptive_max_pool3d')
+    _check_input(x, 5)
+    #check_type(output_size, 'pool_size', (int), 'adaptive_max_pool3d')
+    check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool3d')
+
+    in_l, in_h, in_w = x.shape[2:5]
+    if isinstance(output_size, int):
+        output_size = utils.convert_to_list(output_size, 3, 'output_size')
+    else:
+        if output_size[0] == None:
+            output_size[0] = in_l
+        if output_size[1] == None:
+            output_size[1] = in_h
+        if output_size[2] == None:
+            output_size[2] = in_w
+
+    if in_dygraph_mode():
+        pool_out = core.ops.max_pool3d_with_index(
+            x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
+        return pool_out if return_indices else pool_out[0]
+
+    l_type = 'max_pool3d_with_index'
+
+    helper = LayerHelper(l_type, **locals())
+    dtype = helper.input_dtype()
+    pool_out = helper.create_variable_for_type_inference(dtype)
+
+    mask = helper.create_variable_for_type_inference(dtype)
+    outputs = {"Out": pool_out, "Mask": mask}
+
+    helper.append_op(
+        type=l_type,
+        inputs={"X": x},
+        outputs=outputs,
+        attrs={
+            "pooling_type": 'max',
+            "ksize": output_size,
+            "adaptive": True,
+        })
+
+    return (pool_out, mask) if return_indices else pool_out
diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py
index 3399e4e34c9e3bc61fde515fc1917deb213f3d0b..6eac15cd694e51c24f94f7686b6e63fa7c6cbf09 100644
--- a/python/paddle/nn/layer/__init__.py
+++ b/python/paddle/nn/layer/__init__.py
@@ -66,16 +66,18 @@ from .common import Dropout  #DEFINE_ALIAS
 from .common import Dropout2D  #DEFINE_ALIAS
 from .common import Dropout3D  #DEFINE_ALIAS
 from .common import AlphaDropout  #DEFINE_ALIAS
-from .pooling import AdaptiveAvgPool2d  #DEFINE_ALIAS
-from .pooling import AdaptiveAvgPool3d  #DEFINE_ALIAS
 from .pooling import AvgPool1d  #DEFINE_ALIAS
-from .pooling import MaxPool1d  #DEFINE_ALIAS
-from .pooling import AdaptiveAvgPool1d  #DEFINE_ALIAS
-from .pooling import AdaptiveMaxPool1d  #DEFINE_ALIAS
 from .pooling import AvgPool2d  #DEFINE_ALIAS
-from .pooling import MaxPool2d  #DEFINE_ALIAS
 from .pooling import AvgPool3d  #DEFINE_ALIAS
+from .pooling import MaxPool1d  #DEFINE_ALIAS
+from .pooling import MaxPool2d  #DEFINE_ALIAS
 from .pooling import MaxPool3d  #DEFINE_ALIAS
+from .pooling import AdaptiveAvgPool1d  #DEFINE_ALIAS
+from .pooling import AdaptiveAvgPool2d  #DEFINE_ALIAS
+from .pooling import AdaptiveAvgPool3d  #DEFINE_ALIAS
+from .pooling import AdaptiveMaxPool1d  #DEFINE_ALIAS
+from .pooling import AdaptiveMaxPool2d  #DEFINE_ALIAS
+from .pooling import AdaptiveMaxPool3d  #DEFINE_ALIAS
 from .conv import Conv1d  #DEFINE_ALIAS
 from .conv import Conv2d  #DEFINE_ALIAS
 from .conv import Conv3d  #DEFINE_ALIAS
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index 87fa0caec9ee287c42d8308d9da25c6d2fc9b911..6f6b567849732ff889db4507708758cd8eeab2a8 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -12,198 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
-
-from ...fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
-from ...fluid.layers import utils
 from ...fluid.dygraph import layers
 from ...fluid.layer_helper import LayerHelper
 from .. import functional as F
 
 __all__ = [
-    'AdaptiveAvgPool2d',
-    'AdaptiveAvgPool3d',
     'AvgPool1d',
-    'maxPool1d',
-    'AdaptiveMaxPool1d',
-    'AdaptiveAvgPool1d',
     'AvgPool2d',
-    'MaxPool2d',
     'AvgPool3d',
+    'MaxPool1d',
+    'MaxPool2d',
     'MaxPool3d',
+    'AdaptiveAvgPool1d',
+    'AdaptiveAvgPool2d',
+    'AdaptiveAvgPool3d',
+    'AdaptiveMaxPool1d',
+    'AdaptiveMaxPool2d',
+    'AdaptiveMaxPool3d',
 ]
 
 
-class AdaptiveAvgPool2d(layers.Layer):
-    """
-
-    This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
-    of the output tensor are determined by the parameter output_size.
-
-    For avg adaptive pool2d:
-
-    ..  math::
-
-       hstart &= floor(i * H_{in} / H_{out})
-
-       hend &= ceil((i + 1) * H_{in} / H_{out})
-
-       wstart &= floor(j * W_{in} / W_{out})
-
-       wend &= ceil((j + 1) * W_{in} / W_{out})
-
-       Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
-
-
-    Parameters:
-        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain two element, (H, W). H and W can be either a int, or None which means
-            the size will be the same as that of the input.
-        data_format (str): The data format of the input and output data. An optional string
-            from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
-            the order of: [batch_size, input_channels, input_height, input_width].
-        name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
-
-    Shape:
-        x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32 or float64.
-        output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.
-
-    Returns:
-        A callable object of AdaptiveAvgPool2d.
-
-    Examples:
-        .. code-block:: python
-
-            # adaptive avg pool2d
-            # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
-            # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
-            # of input data into m * n grids averagely and performs poolings in each
-            # grid to get output.
-            # adaptive avg pool performs calculations as follow:
-            #
-            #     for i in range(m):
-            #         for j in range(n):
-            #             hstart = floor(i * H / m)
-            #             hend = ceil((i + 1) * H / m)
-            #             wstart = floor(i * W / n)
-            #             wend = ceil((i + 1) * W / n)
-            #             output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
-            #
-            import paddle
-            import numpy as np
-            paddle.disable_static()
-            input_data = np.random.rand(2, 3, 32, 32)
-            x = paddle.to_tensor(input_data)
-            # x.shape is [2, 3, 32, 32]
-            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3)
-            pool_out = adaptive_avg_pool(x = x)
-            # pool_out.shape is [2, 3, 3, 3]
-    """
-
-    def __init__(self, output_size, data_format="NCHW", name=None):
-        super(AdaptiveAvgPool2d, self).__init__()
-        self._output_size = output_size
-        self._data_format = data_format
-        self._name = name
-
-    def forward(self, x):
-        return F.adaptive_avg_pool2d(
-            x,
-            output_size=self._output_size,
-            data_format=self._data_format,
-            name=self._name)
-
-
-class AdaptiveAvgPool3d(layers.Layer):
-    """
-
-    This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
-    of the output tensor are determined by the parameter output_size.
-
-    For avg adaptive pool3d:
-
-    ..  math::
-
-      dstart &= floor(i * D_{in} / D_{out})
-
-      dend &= ceil((i + 1) * D_{in} / D_{out})
-
-      hstart &= floor(j * H_{in} / H_{out})
-
-      hend &= ceil((j + 1) * H_{in} / H_{out})
-
-      wstart &= floor(k * W_{in} / W_{out})
-
-      wend &= ceil((k + 1) * W_{in} / W_{out})
-
-      Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
-
-
-    Parameters:
-        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
-            the size will be the same as that of the input.
-        data_format (str): The data format of the input and output data. An optional string
-            from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
-            the order of: [batch_size, input_channels, input_depth, input_height, input_width].
-        name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
-    Shape:
-        x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32 or float64.
-        output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.
-
-    Returns:
-        A callable object of AdaptiveAvgPool3d.
-
-    Examples:
-        .. code-block:: python
-
-            # adaptive avg pool3d
-            # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
-            # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
-            # of input data into l * m * n grids averagely and performs poolings in each
-            # grid to get output.
-            # adaptive avg pool performs calculations as follow:
-            #
-            #     for i in range(l):
-            #         for j in range(m):
-            #             for k in range(n):
-            #                 dstart = floor(i * D / l)
-            #                 dend = ceil((i + 1) * D / l)
-            #                 hstart = floor(j * H / m)
-            #                 hend = ceil((j + 1) * H / m)
-            #                 wstart = floor(k * W / n)
-            #                 wend = ceil((k + 1) * W / n)
-            #                 output[:, :, i, j, k] =
-            #                     avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
-            import paddle
-            import numpy as np
-            paddle.disable_static()
-            input_data = np.random.rand(2, 3, 8, 32, 32)
-            x = paddle.to_tensor(input_data)
-            # x.shape is [2, 3, 8, 32, 32]
-            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3)
-            pool_out = adaptive_avg_pool(x = x)
-            # pool_out = [2, 3, 3, 3, 3]
-    """
-
-    def __init__(self, output_size, data_format="NCDHW", name=None):
-        super(AdaptiveAvgPool3d, self).__init__()
-        self._output_size = output_size
-        self._data_format = data_format
-        self._name = name
-
-    def forward(self, x):
-        return F.adaptive_avg_pool3d(
-            x,
-            output_size=self._output_size,
-            data_format=self._data_format,
-            name=self._name)
-
-
 class AvgPool1d(layers.Layer):
     """
     This operation applies a 1D average pooling over an input signal composed
@@ -223,17 +51,20 @@ class AvgPool1d(layers.Layer):
 
     Args:
         kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain one integers.
+            it must contain an integer.
         stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
-            it must contain one integers.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero,
-            then the input is implicitly zero-padded on both sides for padding number of points.
+            it must contain an integer.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
+            4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
         count_include_pad (bool): Whether to exclude padding points in average pooling
-                          mode, default is `true`.
+                          mode, default is `True`.
         ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
-            If it is set to False, the floor function will be used. Default False
+            If it is set to False, the floor function will be used. The default value is False.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
@@ -245,10 +76,14 @@ class AvgPool1d(layers.Layer):
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is "VALID", but `ceil_mode` is True.
         ValueError: If `padding` is a list or tuple but its length greater than 1.
-        ShapeError: If the input is not a 3-D.
+        ShapeError: If the input is not a 3-D tensor.
         ShapeError: If the output's shape calculated is not greater than 0.
 
 
+    Shape:
+        - inpuut: 3-D tensor.
+        - output: 3-D tensor
+
     Examples:
 
         .. code-block:: python
@@ -284,63 +119,74 @@ class AvgPool1d(layers.Layer):
         return out
 
 
-class MaxPool1d(layers.Layer):
+class AvgPool2d(layers.Layer):
     """
-    Applies a 1D max pooling over an input signal composed of several input planes based
-    on the input, output_size, return_indices parameters.
-    Input(X) and output(Out) are in NCL format, where N is batch
-    size, C is the number of channels, L is the length of the feature.
-
-    The output value of the layer with input size (N, C, L),
-    output (N, C, L_{out}) and kernel_size k can be precisely described as
-    For average pool1d:
+    This operation applies 2D average pooling over input features based on the input,
+    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
+    in NCHW format, where N is batch size, C is the number of channels,
+    H is the height of the feature, and W is the width of the feature.
 
-    ..  math::
+    Example:
+      Input:
+           X shape: $(N, C, H_{in}, W_{in})$
+      Attr:
+           kernel_size: ksize
 
-       Output(N_i, C_i, l) &=  max(Input[N_i, C_i, stride \times l:stride \times l+k])}
+      Output:
+           Out shape: $(N, C, H_{out}, W_{out})$
+           $$
+           out(N_i, C_j, h, w)  = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
+                               input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
+           $$
 
     Args:
-        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain one integers.
+       kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+            it must contain two integers, (pool_size_Height, pool_size_Width).
+            Otherwise, the pool kernel size will be a square of an int.
         stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
-            it must contain one integers.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be the following forms: `[pad_left, pad_right]`.
-        return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
-        ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
-            If it is set to False, the floor function will be used. Default False
+            it must contain two integers, (pool_stride_Height, pool_stride_Width).
+            Otherwise, the pool stride size will be a square of an int.
+
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
+        ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
+        count_include_pad (bool): Whether to exclude padding points in average pooling
+                          mode, default is `true`.
+        divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
+        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
+                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+                        `[batch_size, input_channels, input_height, input_width]`.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
 
-    Returns:
-        None.
+    Shape:
+        - x: 4-D tensor.
+        - out: 2-D tensor
 
+    Returns: None.
     Raises:
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ValueError: If `padding` is a list or tuple but its length greater than 1.
-        ShapeError: If the input is not a 3-D.
         ShapeError: If the output's shape calculated is not greater than 0.
-
-
     Examples:
-
         .. code-block:: python
-
           import paddle
           import paddle.nn as nn
+          import numpy as np
           paddle.disable_static()
 
-          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
-          pool_out = MaxPool1d(data)
-          # pool_out shape: [1, 3, 16]
-
-          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True)
-          pool_out, indices = MaxPool1d(data)
-          # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
+          # max pool2d
+          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
+          AvgPool2d = nn.AvgPool2d(kernel_size=2,
+                                stride=2, padding=0)
+          output = AvgPoo2d(input)
+          # output.shape [1, 3, 16, 16]
 
     """
 
@@ -348,113 +194,155 @@ class MaxPool1d(layers.Layer):
                  kernel_size,
                  stride=None,
                  padding=0,
-                 return_indices=False,
                  ceil_mode=False,
+                 count_include_pad=True,
+                 divisor_override=None,
+                 data_format="NCHW",
                  name=None):
-        super(MaxPool1d, self).__init__()
-        self.kernel_size = kernel_size
+        super(AvgPool2d, self).__init__()
+        self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
         self.ceil_mode = ceil_mode
-        self.return_indices = return_indices
+        self.count_include_pad = count_include_pad
+        self.divisor = divisor_override
+        self.data_format = data_format
         self.name = name
 
-    def forward(self, input):
-        out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
-                           self.return_indices, self.ceil_mode, self.name)
-        return out
+    def forward(self, x):
+        return F.avg_pool2d(
+            x,
+            kernel_size=self.ksize,
+            stride=self.stride,
+            padding=self.padding,
+            ceil_mode=self.ceil_mode,
+            count_include_pad=self.count_include_pad,
+            divisor_override=self.divisor,
+            data_format=self.data_format,
+            name=self.name)
 
 
-class AdaptiveAvgPool1d(layers.Layer):
+class AvgPool3d(layers.Layer):
     """
-
-    This operation applies a 1D adaptive average pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
-    Input(X) and output(Out) are in NCL format, where N is batch
-    size, C is the number of channels, L is the length of the feature.
-    The output tensor shape will be [N, C, output_size].
-
-    For average adaptive pool1d:
-
-    ..  math::
-
-       lstart &= floor(i * L_{in} / L_{out})
-
-       lend &= ceil((i + 1) * L_{in} / L_{out})
-
-       Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
+    This operation applies 3D max pooling over input features based on the input,
+    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
+    in NCDHW format, where N is batch size, C is the number of channels,
+    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
 
     Args:
-        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain one int.
+        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
+            is a tuple or list, it must contain three integers,
+            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
+            Otherwise, the pool kernel size will be the cube of an int.
+        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
+            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
+            Otherwise, the pool stride size will be a cube of an int.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
+        ceil_mode (bool): ${ceil_mode_comment}
+        count_include_pad (bool): Whether to exclude padding points in average pooling
+                          mode, default is True.
+        divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
+        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
+                        The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
+                        `[batch_size, input_channels, input_depth, input_height, input_width]`.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
 
-    Returns:
-        None.
-
+    Returns: None.
     Raises:
-        ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
+        ShapeError: If the output's shape calculated is not greater than 0.
+
+    Shape:
+        - x: 5-D tensor.
+        - out: 5-D tensor.
 
     Examples:
         .. code-block:: python
-
-          # average adaptive pool1d
-          # suppose input data in shape of [N, C, L], `output_size` is m or [m],
-          # output shape is [N, C, m], adaptive pool divide L dimension
-          # of input data into m grids averagely and performs poolings in each
-          # grid to get output.
-          # adaptive max pool performs calculations as follow:
-          #
-          #     for i in range(m):
-          #         lstart = floor(i * L / m)
-          #         lend = ceil((i + 1) * L / m)
-          #         output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
-          #
           import paddle
           import paddle.nn as nn
+          import numpy as np
           paddle.disable_static()
 
-          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16)
-          pool_out = AdaptiveAvgPool1d(data)
-          # pool_out shape: [1, 3, 16]
+          # avg pool3d
+          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
+          AvgPool3d = nn.AvgPool3d(kernel_size=2,
+                                   stride=2, padding=0)
+          output = AvgPool3d(input)
+          # output.shape [1, 2, 3, 16, 16]
+
     """
 
-    def __init__(self, output_size, name=None):
-        super(AdaptiveAvgPool1d, self).__init__()
-        self.output_size = output_size
+    def __init__(self,
+                 kernel_size,
+                 stride,
+                 padding=0,
+                 ceil_mode=False,
+                 count_include_pad=True,
+                 divisor_override=None,
+                 data_format="NCDHW",
+                 name=None):
+        super(AvgPool3d, self).__init__()
+        self.ksize = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.ceil_mode = ceil_mode
+        self.count_include_pad = count_include_pad
+        self.divisor = divisor_override
+        self.data_format = data_format
         self.name = name
 
-    def forward(self, input):
-        return F.adaptive_avg_pool1d(input, self.output_size, self.name)
+    def forward(self, x):
+        return F.avg_pool3d(
+            x,
+            kernel_size=self.ksize,
+            stride=self.stride,
+            padding=self.padding,
+            ceil_mode=self.ceil_mode,
+            count_include_pad=self.count_include_pad,
+            divisor_override=self.divisor,
+            data_format=self.data_format,
+            name=self.name)
 
 
-class AdaptiveMaxPool1d(layers.Layer):
+class MaxPool1d(layers.Layer):
     """
-
-    This operation applies a 1D adaptive max pooling over an input signal composed
-    of several input planes, based on the input, output_size, return_indices parameters.
+    Applies a 1D max pooling over an input signal composed of several input planes based
+    on the input, output_size, return_indices parameters.
     Input(X) and output(Out) are in NCL format, where N is batch
     size, C is the number of channels, L is the length of the feature.
-    The output tensor shape will be [N, C, output_size].
 
-    For max adaptive pool1d:
+    The output value of the layer with input size (N, C, L),
+    output (N, C, L_{out}) and kernel_size k can be precisely described as
+    For average pool1d:
 
     ..  math::
 
-       lstart &= floor(i * L_{in} / L_{out})
-
-       lend &= ceil((i + 1) * L_{in} / L_{out})
-
-       Output(i) &= max(Input[lstart:lend])}
+       Output(N_i, C_i, l) &=  max(Input[N_i, C_i, stride \times l:stride \times l+k])}
 
     Args:
-        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-             it must contain one int.
-        return_indices (bool): If true, the index of max pooling point will be returned along
-            with outputs. It cannot be set in average pooling type. Default False.
+       kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+            it must contain an integer.
+        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
+            it must contain an integer.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
+            4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
+        return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
+        ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
+            If it is set to False, the floor function will be used. Default False.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
@@ -462,53 +350,60 @@ class AdaptiveMaxPool1d(layers.Layer):
         None.
 
     Raises:
-        ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
+        ValueError: If `padding` is a list or tuple but its length greater than 1.
+        ShapeError: If the input is not a 3-D.
+        ShapeError: If the output's shape calculated is not greater than 0.
+
+
+    Shape:
+        - x: 3-D tensor.
+        - out: 3-D tensor.
 
     Examples:
+
         .. code-block:: python
 
-          # max adaptive pool1d
-          # suppose input data in shape of [N, C, L], `output_size` is m or [m],
-          # output shape is [N, C, m], adaptive pool divide L dimension
-          # of input data into m grids averagely and performs poolings in each
-          # grid to get output.
-          # adaptive max pool performs calculations as follow:
-          #
-          #     for i in range(m):
-          #         lstart = floor(i * L / m)
-          #         lend = ceil((i + 1) * L / m)
-          #         output[:, :, i] = max(input[:, :, lstart: lend])
-          #
-                    import paddle
+          import paddle
           import paddle.nn as nn
           paddle.disable_static()
 
           data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
-          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16)
-          pool_out = AdaptiveMaxPool1d(data)
+          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
+          pool_out = MaxPool1d(data)
           # pool_out shape: [1, 3, 16]
 
-          # for return_indices = true
-          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True)
-          pool_out, indices = AdaptiveMaxPool1d(data)
+          MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True)
+          pool_out, indices = MaxPool1d(data)
           # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
 
     """
 
-    def __init__(self, output_size, return_indices=False, name=None):
-        super(AdaptiveMaxPool1d, self).__init__()
-        self.output_size = output_size
+    def __init__(self,
+                 kernel_size,
+                 stride=None,
+                 padding=0,
+                 return_indices=False,
+                 ceil_mode=False,
+                 name=None):
+        super(MaxPool1d, self).__init__()
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.ceil_mode = ceil_mode
         self.return_indices = return_indices
         self.name = name
 
     def forward(self, input):
-        return F.adaptive_max_pool1d(input, self.output_size,
-                                     self.return_indices, self.name)
+        out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
+                           self.return_indices, self.ceil_mode, self.name)
+        return out
 
 
-class AvgPool2d(layers.Layer):
+class MaxPool2d(layers.Layer):
     """
-    This operation applies 2D average pooling over input features based on the input,
+    This operation applies 2D max pooling over input feature based on the input,
     and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
     in NCHW format, where N is batch size, C is the number of channels,
     H is the height of the feature, and W is the width of the feature.
@@ -522,8 +417,9 @@ class AvgPool2d(layers.Layer):
       Output:
            Out shape: $(N, C, H_{out}, W_{out})$
            $$
-           out(N_i, C_j, h, w)  = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
-                               input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
+           out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
+                                    & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
+                                                   \text{stride[1]} \times w + n)
            $$
 
     Args:
@@ -532,31 +428,33 @@ class AvgPool2d(layers.Layer):
             Otherwise, the pool kernel size will be a square of an int.
         stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
             it must contain two integers, (pool_stride_Height, pool_stride_Width).
-            Otherwise, the pool stride size will be a square of an int. Default: kernel_size.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_height, pad_width]` or
-            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
-            `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-            Otherwise, the pool padding size will be a square of an int.
+            Otherwise, the pool stride size will be a square of an int.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
         ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        count_include_pad (bool): Whether to exclude padding points in average pooling
-                          mode, default is `true`.
-        divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
-        name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
+        return_indices (bool): Whether to return the max indices along with the outputs.
         data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
                         The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
                         `[batch_size, input_channels, input_height, input_width]`.
+        name(str, optional): For detailed information, please refer
+                             to :ref:`api_guide_Name`. Usually name is no need to set and
+                             None by default.
 
-    Returns: None.
+    Returns: None
     Raises:
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is "VALID", but `ceil_mode` is True.
         ShapeError: If the output's shape calculated is not greater than 0.
+
+    Shape:
+        - x: 4-D tensor.
+        - out: 4-D tensor.
+
     Examples:
         .. code-block:: python
           import paddle
@@ -566,172 +464,72 @@ class AvgPool2d(layers.Layer):
 
           # max pool2d
           input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
-          AvgPool2d = nn.AvgPool2d(kernel_size=2,
-                                stride=2, padding=0)
-          output = AvgPoo2d(input)
+          MaxPool2d = nn.MaxPool2d(kernel_size=2,
+                                   stride=2, padding=0)
+          output = MaxPool2d(input)
           # output.shape [1, 3, 16, 16]
 
+          # for return_indices=True
+          MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True)
+          output, max_indices = MaxPool2d(input)
+          # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
     """
 
     def __init__(self,
                  kernel_size,
                  stride=None,
                  padding=0,
+                 return_indices=False,
                  ceil_mode=False,
-                 count_include_pad=True,
-                 divisor_override=None,
                  data_format="NCHW",
                  name=None):
-        super(AvgPool2d, self).__init__()
+        super(MaxPool2d, self).__init__()
         self.ksize = kernel_size
         self.stride = stride
         self.padding = padding
+        self.return_indices = return_indices
         self.ceil_mode = ceil_mode
-        self.count_include_pad = count_include_pad
-        self.divisor = divisor_override
         self.data_format = data_format
         self.name = name
 
     def forward(self, x):
-        return F.avg_pool2d(
+        return F.max_pool2d(
             x,
             kernel_size=self.ksize,
             stride=self.stride,
             padding=self.padding,
-            ceil_mode=self.ceil_mode,
-            count_include_pad=self.count_include_pad,
-            divisor_override=self.divisor,
+            return_indices=self.return_indices,
             data_format=self.data_format,
             name=self.name)
 
 
-class MaxPool2d(layers.Layer):
+class MaxPool3d(layers.Layer):
     """
-    This operation applies 2D max pooling over input feature based on the input,
+    This operation applies 3D max pooling over input features based on the input,
     and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
-    in NCHW format, where N is batch size, C is the number of channels,
-    H is the height of the feature, and W is the width of the feature.
-
-    Example:
-      Input:
-           X shape: $(N, C, H_{in}, W_{in})$
-      Attr:
-           kernel_size: ksize
-
-      Output:
-           Out shape: $(N, C, H_{out}, W_{out})$
-           $$
-           out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
-                                    & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
-                                                   \text{stride[1]} \times w + n)
-           $$
-
-    Args:
-        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
-            it must contain two integers, (pool_size_Height, pool_size_Width).
-            Otherwise, the pool kernel size will be a square of an int.
-        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
-            it must contain two integers, (pool_stride_Height, pool_stride_Width).
-            Otherwise, the pool stride size will be a square of an int. Default: kernel_size.
-        padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_height, pad_width]` or
-            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
-            `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-            Otherwise, the pool padding size will be a square of an int.
-        ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
-        return_indices (bool): Whether to return the max indices along with the outputs.
-        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
-                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-                        `[batch_size, input_channels, input_height, input_width]`.
-        name(str, optional): For detailed information, please refer
-                             to :ref:`api_guide_Name`. Usually name is no need to set and
-                             None by default.
-
-    Returns: None
-    Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ShapeError: If the output's shape calculated is not greater than 0.
-    Examples:
-        .. code-block:: python
-          import paddle
-          import paddle.nn as nn
-          import numpy as np
-          paddle.disable_static()
-
-          # max pool2d
-          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
-          MaxPool2d = nn.MaxPool2d(kernel_size=2,
-                                   stride=2, padding=0)
-          output = MaxPool2d(input)
-          # output.shape [1, 3, 16, 16]
-
-          # for return_indices=True
-          MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True)
-          output, max_indices = MaxPool2d(input)
-          # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
-    """
-
-    def __init__(self,
-                 kernel_size,
-                 stride=None,
-                 padding=0,
-                 return_indices=False,
-                 ceil_mode=False,
-                 data_format="NCHW",
-                 name=None):
-        super(MaxPool2d, self).__init__()
-        self.ksize = kernel_size
-        self.stride = stride
-        self.padding = padding
-        self.return_indices = return_indices
-        self.ceil_mode = ceil_mode
-        self.data_format = data_format
-        self.name = name
-
-    def forward(self, x):
-        return F.max_pool2d(
-            x,
-            kernel_size=self.ksize,
-            stride=self.stride,
-            padding=self.padding,
-            return_indices=self.return_indices,
-            data_format=self.data_format,
-            name=self.name)
-
-
-class MaxPool3d(layers.Layer):
-    """
-    This operation applies 3D max pooling over input features based on the input,
-    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
-    in NCDHW format, where N is batch size, C is the number of channels,
-    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
+    in NCDHW format, where N is batch size, C is the number of channels,
+    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
 
     Args:
-        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
+        kernel_size (int|list|tuple): The pool kernel size. If the kernel size
             is a tuple or list, it must contain three integers,
-            (pool_size_Depth, pool_size_Height, pool_size_Width).
+            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
             Otherwise, the pool kernel size will be the cube of an int.
-        stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
-            it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
-            Otherwise, the pool stride size will be a cube of an int. Default kernel_size.
-        padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
-            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
-            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-        ceil_mode (bool): when True, will use ceil instead of floor to compute the output shape.
-        count_include_pad (bool): Whether to exclude padding points in average pooling
-                          mode, default is True.
-        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
-                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-                        `[batch_size, input_channels, input_height, input_width]`.
+        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
+            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
+            Otherwise, the pool stride size will be a cube of an int.
+        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
+            1. A string in ['valid', 'same'].
+            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
+            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
+            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
+            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
+            The default value is 0.
+        ceil_mode (bool): ${ceil_mode_comment}
+        return_indices (bool): Whether to return the max indices along with the outputs.
+        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
+                        The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
+                        `[batch_size, input_channels, input_depth, input_height, input_width]`.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
@@ -742,6 +540,11 @@ class MaxPool3d(layers.Layer):
         ValueError: If `padding` is a string, but not "SAME" or "VALID".
         ValueError: If `padding` is "VALID", but `ceil_mode` is True.
         ShapeError: If the output's shape calculated is not greater than 0.
+
+    Shape:
+        - x: 5-D tensor.
+        - out: 5-D tensor.
+
     Examples:
         .. code-block:: python
           import paddle
@@ -790,88 +593,457 @@ class MaxPool3d(layers.Layer):
             name=self.name)
 
 
-class AvgPool3d(layers.Layer):
+class AdaptiveAvgPool1d(layers.Layer):
     """
-    This operation applies 3D max pooling over input features based on the input,
-    and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
-    in NCDHW format, where N is batch size, C is the number of channels,
-    H is the height of the feature,  D is the depth of the feature, and W is the width of the feature.
+
+    This operation applies a 1D adaptive average pooling over an input signal composed
+    of several input planes, based on the input, output_size, return_indices parameters.
+    Input(X) and output(Out) are in NCL format, where N is batch
+    size, C is the number of channels, L is the length of the feature.
+    The output tensor shape will be [N, C, output_size].
+
+    For average adaptive pool1d:
+
+    ..  math::
+
+       lstart &= floor(i * L_{in} / L_{out})
+
+       lend &= ceil((i + 1) * L_{in} / L_{out})
+
+       Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
 
     Args:
-        kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
-            is a tuple or list, it must contain three integers,
-            (pool_size_Depth, pool_size_Height, pool_size_Width).
-            Otherwise, the pool kernel size will be the cube of an int.
-        stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
-            it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
-            Otherwise, the pool stride size will be a cube of an int.
-        padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
-            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
-            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
-            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-        ceil_mode (bool): ${ceil_mode_comment}
-        count_include_pad (bool): Whether to exclude padding points in average pooling
-                          mode, default is True.
-        divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
-        data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
-                        The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-                        `[batch_size, input_channels, input_height, input_width]`.
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+            it must contain one int.
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
 
-    Returns: None.
+    Returns:
+        None.
+
     Raises:
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is "VALID", but `ceil_mode` is True.
-        ShapeError: If the output's shape calculated is not greater than 0.
+        ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
+
+    Shape:
+        - x: 3-D tensor.
+        - out: 3-D tensor.
+
     Examples:
         .. code-block:: python
+
+          # average adaptive pool1d
+          # suppose input data in shape of [N, C, L], `output_size` is m or [m],
+          # output shape is [N, C, m], adaptive pool divide L dimension
+          # of input data into m grids averagely and performs poolings in each
+          # grid to get output.
+          # adaptive max pool performs calculations as follow:
+          #
+          #     for i in range(m):
+          #         lstart = floor(i * L / m)
+          #         lend = ceil((i + 1) * L / m)
+          #         output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
+          #
           import paddle
           import paddle.nn as nn
-          import numpy as np
           paddle.disable_static()
 
-          # avg pool3d
-          input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
-          AvgPool3d = nn.AvgPool3d(kernel_size=2,
-                                   stride=2, padding=0)
-          output = AvgPool3d(input)
-          # output.shape [1, 2, 3, 16, 16]
-
+          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
+          AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16)
+          pool_out = AdaptiveAvgPool1d(data)
+          # pool_out shape: [1, 3, 16]
     """
 
-    def __init__(self,
-                 kernel_size,
-                 stride,
-                 padding=0,
-                 ceil_mode=False,
-                 count_include_pad=True,
-                 divisor_override=None,
-                 data_format="NCDHW",
-                 name=None):
-        super(AvgPool3d, self).__init__()
-        self.ksize = kernel_size
-        self.stride = stride
-        self.padding = padding
-        self.ceil_mode = ceil_mode
-        self.count_include_pad = count_include_pad
-        self.divisor = divisor_override
-        self.data_format = data_format
+    def __init__(self, output_size, name=None):
+        super(AdaptiveAvgPool1d, self).__init__()
+        self.output_size = output_size
         self.name = name
 
-    def forward(self, x):
-        return F.avg_pool3d(
-            x,
-            kernel_size=self.ksize,
-            stride=self.stride,
-            padding=self.padding,
-            ceil_mode=self.ceil_mode,
-            count_include_pad=self.count_include_pad,
-            divisor_override=self.divisor,
-            data_format=self.data_format,
-            name=self.name)
+    def forward(self, input):
+        return F.adaptive_avg_pool1d(input, self.output_size, self.name)
+
+
+class AdaptiveAvgPool2d(layers.Layer):
+    """
+
+    This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
+    of the output tensor are determined by the parameter output_size.
+
+    For avg adaptive pool2d:
+
+    ..  math::
+
+       hstart &= floor(i * H_{in} / H_{out})
+
+       hend &= ceil((i + 1) * H_{in} / H_{out})
+
+       wstart &= floor(j * W_{in} / W_{out})
+
+       wend &= ceil((j + 1) * W_{in} / W_{out})
+
+       Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
+
+
+    Parameters:
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+            it must contain two element, (H, W). H and W can be either a int, or None which means
+            the size will be the same as that of the input.
+        data_format (str): The data format of the input and output data. An optional string
+            from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
+            the order of: [batch_size, input_channels, input_height, input_width].
+        name(str, optional): For detailed information, please refer
+                             to :ref:`api_guide_Name`. Usually name is no need to set and
+                             None by default.
+
+    Shape:
+        x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
+        output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.
+
+    Returns:
+        A callable object of AdaptiveAvgPool2d.
+
+    Examples:
+        .. code-block:: python
+
+            # adaptive avg pool2d
+            # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
+            # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
+            # of input data into m * n grids averagely and performs poolings in each
+            # grid to get output.
+            # adaptive avg pool performs calculations as follow:
+            #
+            #     for i in range(m):
+            #         for j in range(n):
+            #             hstart = floor(i * H / m)
+            #             hend = ceil((i + 1) * H / m)
+            #             wstart = floor(i * W / n)
+            #             wend = ceil((i + 1) * W / n)
+            #             output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
+            #
+            import paddle
+            import numpy as np
+            paddle.disable_static()
+            input_data = np.random.rand(2, 3, 32, 32)
+            x = paddle.to_tensor(input_data)
+            # x.shape is [2, 3, 32, 32]
+            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3)
+            pool_out = adaptive_avg_pool(x = x)
+            # pool_out.shape is [2, 3, 3, 3]
+    """
+
+    def __init__(self, output_size, data_format="NCHW", name=None):
+        super(AdaptiveAvgPool2d, self).__init__()
+        self._output_size = output_size
+        self._data_format = data_format
+        self._name = name
+
+    def forward(self, x):
+        return F.adaptive_avg_pool2d(
+            x,
+            output_size=self._output_size,
+            data_format=self._data_format,
+            name=self._name)
+
+
+class AdaptiveAvgPool3d(layers.Layer):
+    """
+
+    This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
+    of the output tensor are determined by the parameter output_size.
+
+    For avg adaptive pool3d:
+
+    ..  math::
+
+      dstart &= floor(i * D_{in} / D_{out})
+
+      dend &= ceil((i + 1) * D_{in} / D_{out})
+
+      hstart &= floor(j * H_{in} / H_{out})
+
+      hend &= ceil((j + 1) * H_{in} / H_{out})
+
+      wstart &= floor(k * W_{in} / W_{out})
+
+      wend &= ceil((k + 1) * W_{in} / W_{out})
+
+      Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
+
+
+    Parameters:
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+            it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
+            the size will be the same as that of the input.
+        data_format (str): The data format of the input and output data. An optional string
+            from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
+            the order of: [batch_size, input_channels, input_depth, input_height, input_width].
+        name(str, optional): For detailed information, please refer
+                             to :ref:`api_guide_Name`. Usually name is no need to set and
+                             None by default.
+    Shape:
+        x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
+        output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.
+
+    Returns:
+        A callable object of AdaptiveAvgPool3d.
+
+    Examples:
+        .. code-block:: python
+
+            # adaptive avg pool3d
+            # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
+            # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
+            # of input data into l * m * n grids averagely and performs poolings in each
+            # grid to get output.
+            # adaptive avg pool performs calculations as follow:
+            #
+            #     for i in range(l):
+            #         for j in range(m):
+            #             for k in range(n):
+            #                 dstart = floor(i * D / l)
+            #                 dend = ceil((i + 1) * D / l)
+            #                 hstart = floor(j * H / m)
+            #                 hend = ceil((j + 1) * H / m)
+            #                 wstart = floor(k * W / n)
+            #                 wend = ceil((k + 1) * W / n)
+            #                 output[:, :, i, j, k] =
+            #                     avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
+            import paddle
+            import numpy as np
+            paddle.disable_static()
+            input_data = np.random.rand(2, 3, 8, 32, 32)
+            x = paddle.to_tensor(input_data)
+            # x.shape is [2, 3, 8, 32, 32]
+            adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3)
+            pool_out = adaptive_avg_pool(x = x)
+            # pool_out = [2, 3, 3, 3, 3]
+    """
+
+    def __init__(self, output_size, data_format="NCDHW", name=None):
+        super(AdaptiveAvgPool3d, self).__init__()
+        self._output_size = output_size
+        self._data_format = data_format
+        self._name = name
+
+    def forward(self, x):
+        return F.adaptive_avg_pool3d(
+            x,
+            output_size=self._output_size,
+            data_format=self._data_format,
+            name=self._name)
+
+
+class AdaptiveMaxPool1d(layers.Layer):
+    """
+
+    This operation applies a 1D adaptive max pooling over an input signal composed
+    of several input planes, based on the input, output_size, return_indices parameters.
+    Input(X) and output(Out) are in NCL format, where N is batch
+    size, C is the number of channels, L is the length of the feature.
+    The output tensor shape will be [N, C, output_size].
+
+    For max adaptive pool1d:
+
+    ..  math::
+
+       lstart &= floor(i * L_{in} / L_{out})
+
+       lend &= ceil((i + 1) * L_{in} / L_{out})
+
+       Output(i) &= max(Input[lstart:lend])}
+
+    Args:
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+             it must contain one int.
+        return_indices (bool): If true, the index of max pooling point will be returned along
+            with outputs. It cannot be set in average pooling type. Default False.
+        name(str, optional): For detailed information, please refer
+                             to :ref:`api_guide_Name`. Usually name is no need to set and
+                             None by default.
+    Returns:
+        None.
+
+    Raises:
+        ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
+
+    Shape:
+        x (Tensor): The input tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type can be float32, float64.
+        output (Tensor): The output tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type is same as input x.
+
+    Examples:
+        .. code-block:: python
+
+          # max adaptive pool1d
+          # suppose input data in shape of [N, C, L], `output_size` is m or [m],
+          # output shape is [N, C, m], adaptive pool divide L dimension
+          # of input data into m grids averagely and performs poolings in each
+          # grid to get output.
+          # adaptive max pool performs calculations as follow:
+          #
+          #     for i in range(m):
+          #         lstart = floor(i * L / m)
+          #         lend = ceil((i + 1) * L / m)
+          #         output[:, :, i] = max(input[:, :, lstart: lend])
+          #
+                    import paddle
+          import paddle.nn as nn
+          paddle.disable_static()
+
+          data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
+          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16)
+          pool_out = AdaptiveMaxPool1d(data)
+          # pool_out shape: [1, 3, 16]
+
+          # for return_indices = true
+          AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True)
+          pool_out, indices = AdaptiveMaxPool1d(data)
+          # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
+
+    """
+
+    def __init__(self, output_size, return_indices=False, name=None):
+        super(AdaptiveMaxPool1d, self).__init__()
+        self.output_size = output_size
+        self.return_indices = return_indices
+        self.name = name
+
+    def forward(self, input):
+        return F.adaptive_max_pool1d(input, self.output_size,
+                                     self.return_indices, self.name)
+
+
+class AdaptiveMaxPool2d(layers.Layer):
+    """
+    This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
+    of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
+    For adaptive max pool2d:
+    ..  math::
+       hstart &= floor(i * H_{in} / H_{out})
+       hend &= ceil((i + 1) * H_{in} / H_{out})
+       wstart &= floor(j * W_{in} / W_{out})
+       wend &= ceil((j + 1) * W_{in} / W_{out})
+       Output(i ,j) &= max(Input[hstart:hend, wstart:wend])
+    Parameters:
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
+        return_indices (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False.
+        name(str, optional): For detailed information, please refer
+                             to :ref:`api_guide_Name`. Usually name is no need to set and
+                             None by default.
+    Shape:
+        x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
+        output (Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type is same as input x.
+    
+    Returns:
+        A callable object of AdaptiveMaxPool2d.
+    Examples:
+        .. code-block:: python
+            # adaptive max pool2d
+            # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
+            # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
+            # of input data into m * n grids averagely and performs poolings in each
+            # grid to get output.
+            # adaptive max pool performs calculations as follow:
+            #
+            #     for i in range(m):
+            #         for j in range(n):
+            #             hstart = floor(i * H / m)
+            #             hend = ceil((i + 1) * H / m)
+            #             wstart = floor(i * W / n)
+            #             wend = ceil((i + 1) * W / n)
+            #             output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
+            #
+            import paddle
+            import numpy as np
+            paddle.disable_static()
+            input_data = np.random.rand(2, 3, 32, 32)
+            x = paddle.to_tensor(input_data)
+            adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_indices=True)
+            pool_out, indices = adaptive_max_pool(x = x)
+    """
+
+    def __init__(self, output_size, return_indices=False, name=None):
+        super(AdaptiveMaxPool2d, self).__init__()
+        self._output_size = output_size
+        self._return_indices = return_indices
+        self._name = name
+
+    def forward(self, x):
+        return F.adaptive_max_pool2d(
+            x,
+            output_size=self._output_size,
+            return_indices=self._return_indices,
+            name=self._name)
+
+
+class AdaptiveMaxPool3d(layers.Layer):
+    """
+   This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions
+    of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
+    For adaptive max pool3d:
+    ..  math::
+      dstart &= floor(i * D_{in} / D_{out})
+      dend &= ceil((i + 1) * D_{in} / D_{out})
+      hstart &= floor(j * H_{in} / H_{out})
+      hend &= ceil((j + 1) * H_{in} / H_{out})
+      wstart &= floor(k * W_{in} / W_{out})
+      wend &= ceil((k + 1) * W_{in} / W_{out})
+      Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend])
+    Parameters:
+        output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
+            it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
+            the size will be the same as that of the input.
+        return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
+        name(str, optional): For detailed information, please refer
+                             to :ref:`api_guide_Name`. Usually name is no need to set and
+                             None by default.
+    Shape:
+        x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
+        output (Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type is same as input x.
+    Returns:
+        A callable object of AdaptiveMaxPool3d.
+    Examples:
+        .. code-block:: python
+            # adaptive max pool3d
+            # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
+            # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
+            # of input data into l * m * n grids averagely and performs poolings in each
+            # grid to get output.
+            # adaptive max pool performs calculations as follow:
+            #
+            #     for i in range(l):
+            #         for j in range(m):
+            #             for k in range(n):
+            #                 dstart = floor(i * D / l)
+            #                 dend = ceil((i + 1) * D / l)
+            #                 hstart = floor(j * H / m)
+            #                 hend = ceil((j + 1) * H / m)
+            #                 wstart = floor(k * W / n)
+            #                 wend = ceil((k + 1) * W / n)
+            #                 output[:, :, i, j, k] =
+            #                     max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
+            import paddle
+            import numpy as np
+            paddle.disable_static()
+            input_data = np.random.rand(2, 3, 8, 32, 32)
+            x = paddle.to_tensor(input_data)
+            pool = paddle.nn.AdaptiveMaxPool3d(output_size=4)
+            out = pool(x)
+            # out shape: [2, 3, 4, 4, 4]
+            pool, indices = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_indices=True)
+            out = pool(x)
+            # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
+            
+    """
+
+    def __init__(self, output_size, return_indices=False, name=None):
+        super(AdaptiveMaxPool3d, self).__init__()
+        self._output_size = output_size
+        self._return_indices = return_indices
+        self._name = name
+
+    def forward(self, x):
+        return F.adaptive_max_pool3d(
+            x,
+            output_size=self._output_size,
+            return_indices=self._return_indices,
+            name=self._name)