未验证 提交 4ff6999a 编写于 作者: C cyberslack_lee 提交者: GitHub

[xdoctest] reformat example code with google style No.80-85 (#55806)

* [Doctest]fix No.80-85, test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* fix

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

---------
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>
上级 128f5df8
...@@ -219,13 +219,14 @@ def avg_pool1d( ...@@ -219,13 +219,14 @@ def avg_pool1d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
data = paddle.uniform([1, 3, 32], paddle.float32) >>> data = paddle.uniform([1, 3, 32], paddle.float32)
AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) >>> AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
pool_out = AvgPool1D(data) >>> pool_out = AvgPool1D(data)
# pool_out shape: [1, 3, 16] >>> print(pool_out.shape)
[1, 3, 16]
""" """
"""NCL to NCHW""" """NCL to NCHW"""
data_format = "NCHW" data_format = "NCHW"
...@@ -350,15 +351,16 @@ def avg_pool2d( ...@@ -350,15 +351,16 @@ def avg_pool2d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
# avg pool2d >>> # avg pool2d
x = paddle.uniform([1, 3, 32, 32], paddle.float32) >>> x = paddle.uniform([1, 3, 32, 32], paddle.float32)
out = F.avg_pool2d(x, >>> out = F.avg_pool2d(x,
kernel_size=2, ... kernel_size=2,
stride=2, padding=0) ... stride=2, padding=0)
# out.shape [1, 3, 16, 16] >>> print(out.shape)
[1, 3, 16, 16]
""" """
kernel_size = convert_to_list(kernel_size, 2, 'pool_size') kernel_size = convert_to_list(kernel_size, 2, 'pool_size')
if stride is None: if stride is None:
...@@ -480,16 +482,16 @@ def avg_pool3d( ...@@ -480,16 +482,16 @@ def avg_pool3d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.uniform([1, 3, 32, 32, 32], paddle.float32) >>> x = paddle.uniform([1, 3, 32, 32, 32], paddle.float32)
# avg pool3d >>> # avg pool3d
out = paddle.nn.functional.avg_pool3d( >>> out = paddle.nn.functional.avg_pool3d(x,
x, ... kernel_size = 2,
kernel_size = 2, ... stride = 2,
stride = 2, ... padding=0)
padding=0) >>> print(out.shape)
# out.shape: [1, 3, 16, 16, 16] [1, 3, 16, 16, 16]
""" """
kernel_size = convert_to_list(kernel_size, 3, 'pool_size') kernel_size = convert_to_list(kernel_size, 3, 'pool_size')
if stride is None: if stride is None:
...@@ -599,14 +601,18 @@ def max_pool1d( ...@@ -599,14 +601,18 @@ def max_pool1d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.uniform([1, 3, 32], paddle.float32) >>> data = paddle.uniform([1, 3, 32], paddle.float32)
pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) >>> pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
# pool_out shape: [1, 3, 16] >>> print(pool_out.shape)
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) [1, 3, 16]
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] >>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
""" """
"""NCL to NCHW""" """NCL to NCHW"""
data_format = "NCHW" data_format = "NCHW"
...@@ -789,14 +795,18 @@ def max_unpool1d( ...@@ -789,14 +795,18 @@ def max_unpool1d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.rand(shape=[1, 3, 16]) >>> data = paddle.rand(shape=[1, 3, 16])
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) >>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 3, 8], indices shape: [1, 3, 8] >>> print(pool_out.shape)
unpool_out = F.max_unpool1d(pool_out, indices, kernel_size=2, padding=0) [1, 3, 8]
# unpool_out shape: [1, 3, 16] >>> print(indices.shape)
[1, 3, 8]
>>> unpool_out = F.max_unpool1d(pool_out, indices, kernel_size=2, padding=0)
>>> print(unpool_out.shape)
[1, 3, 16]
""" """
"""NCL to NCHW""" """NCL to NCHW"""
...@@ -926,18 +936,23 @@ def max_unpool2d( ...@@ -926,18 +936,23 @@ def max_unpool2d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.rand(shape=[1,1,6,6]) >>> data = paddle.rand(shape=[1, 1, 6, 6])
pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True) >>> pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 3, 3], indices shape: [1, 1, 3, 3] >>> print(pool_out.shape)
unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0) [1, 1, 3, 3]
# unpool_out shape: [1, 1, 6, 6] >>> print(indices.shape)
[1, 1, 3, 3]
>>> unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0)
>>> print(unpool_out.shape)
[1, 1, 6, 6]
# specify a different output size than input size >>> # specify a different output size than input size
unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0, output_size=[7,7]) >>> unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0, output_size=[7, 7])
# unpool_out shape: [1, 1, 7, 7] >>> print(unpool_out.shape)
[1, 1, 7, 7]
""" """
if x.ndim != 4: if x.ndim != 4:
...@@ -1073,14 +1088,18 @@ def max_unpool3d( ...@@ -1073,14 +1088,18 @@ def max_unpool3d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.rand(shape=[1, 1, 4, 4, 6]) >>> data = paddle.rand(shape=[1, 1, 4, 4, 6])
pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True) >>> pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 2, 2, 3], indices shape: [1, 1, 2, 2, 3] >>> print(pool_out.shape)
unpool_out = F.max_unpool3d(pool_out, indices, kernel_size=2, padding=0) [1, 1, 2, 2, 3]
# unpool_out shape: [1, 1, 4, 4, 6] >>> print(indices.shape)
[1, 1, 2, 2, 3]
>>> unpool_out = F.max_unpool3d(pool_out, indices, kernel_size=2, padding=0)
>>> print(unpool_out.shape)
[1, 1, 4, 4, 6]
""" """
if x.ndim != 5: if x.ndim != 5:
...@@ -1200,16 +1219,20 @@ def max_pool2d( ...@@ -1200,16 +1219,20 @@ def max_pool2d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
# max pool2d >>> # max pool2d
x = paddle.uniform([1, 3, 32, 32], paddle.float32) >>> x = paddle.uniform([1, 3, 32, 32], paddle.float32)
out = F.max_pool2d(x, kernel_size=2, stride=2, padding=0) >>> out = F.max_pool2d(x, kernel_size=2, stride=2, padding=0)
# output.shape [1, 3, 16, 16] >>> print(out.shape)
# for return_mask=True [1, 3, 16, 16]
out, max_indices = F.max_pool2d(x, kernel_size=2, stride=2, padding=0, return_mask=True) >>> # for return_mask=True
# out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], >>> out, max_indices = F.max_pool2d(x, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(out.shape)
[1, 3, 16, 16]
>>> print(max_indices.shape)
[1, 3, 16, 16]
""" """
kernel_size = convert_to_list(kernel_size, 2, 'pool_size') kernel_size = convert_to_list(kernel_size, 2, 'pool_size')
...@@ -1359,24 +1382,30 @@ def max_pool3d( ...@@ -1359,24 +1382,30 @@ def max_pool3d(
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
# max pool3d >>> # max pool3d
x = paddle.uniform([1, 3, 32, 32, 32]) >>> x = paddle.uniform([1, 3, 32, 32, 32])
output = F.max_pool3d(x, >>> output = F.max_pool3d(x,
kernel_size=2, ... kernel_size=2,
stride=2, padding=0) ... stride=2,
# output.shape [1, 3, 16, 16, 16] ... padding=0)
# for return_mask=True >>> print(output.shape)
x = paddle.uniform([1, 3, 32, 32, 32]) [1, 3, 16, 16, 16]
output, max_indices = paddle.nn.functional.max_pool3d(x,
kernel_size=2, >>> # for return_mask=True
stride=2, >>> x = paddle.uniform([1, 3, 32, 32, 32])
padding=0, >>> output, max_indices = paddle.nn.functional.max_pool3d(x,
return_mask=True) ... kernel_size=2,
... stride=2,
# output.shape [1, 3, 16, 16, 16], max_indices.shape [1, 3, 16, 16, 16] ... padding=0,
... return_mask=True)
...
>>> print(output.shape)
[1, 3, 16, 16, 16]
>>> print(max_indices.shape)
[1, 3, 16, 16, 16]
""" """
kernel_size = convert_to_list(kernel_size, 3, 'pool_size') kernel_size = convert_to_list(kernel_size, 3, 'pool_size')
...@@ -1468,24 +1497,25 @@ def adaptive_avg_pool1d(x, output_size, name=None): ...@@ -1468,24 +1497,25 @@ def adaptive_avg_pool1d(x, output_size, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
# average adaptive pool1d >>> # average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m], >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension >>> # output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each >>> # of input data into m grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# lstart = floor(i * L / m) >>> # lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m) >>> # lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) >>> # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
# >>> #
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.uniform([1, 3, 32]) >>> data = paddle.uniform([1, 3, 32])
pool_out = F.adaptive_avg_pool1d(data, output_size=16) >>> pool_out = F.adaptive_avg_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16]) >>> print(pool_out.shape)
[1, 3, 16]
""" """
pool_type = 'avg' pool_type = 'avg'
_check_input(x, 3) _check_input(x, 3)
...@@ -1567,29 +1597,29 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): ...@@ -1567,29 +1597,29 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive avg pool2d >>> # adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n], >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each >>> # of input data into m * n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive avg pool performs calculations as follow: >>> # adaptive avg pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# for j in range(n): >>> # for j in range(n):
# hstart = floor(i * H / m) >>> # hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m) >>> # hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n) >>> # wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n) >>> # wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) >>> # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
# >>> #
import paddle >>> import paddle
x = paddle.rand([2, 3, 32, 32]) >>> x = paddle.rand([2, 3, 32, 32])
# x.shape is [2, 3, 32, 32] >>> # x.shape is [2, 3, 32, 32]
out = paddle.nn.functional.adaptive_avg_pool2d( >>> out = paddle.nn.functional.adaptive_avg_pool2d(x = x,
x = x, ... output_size=[3, 3])
output_size=[3, 3]) >>> print(out.shape)
# out.shape is [2, 3, 3, 3] [2, 3, 3, 3]
""" """
if data_format not in ["NCHW", "NHWC"]: if data_format not in ["NCHW", "NHWC"]:
...@@ -1700,31 +1730,31 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): ...@@ -1700,31 +1730,31 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive avg pool3d >>> # adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each >>> # of input data into l * m * n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive avg pool performs calculations as follow: >>> # adaptive avg pool performs calculations as follow:
# >>> #
# for i in range(l): >>> # for i in range(l):
# for j in range(m): >>> # for j in range(m):
# for k in range(n): >>> # for k in range(n):
# dstart = floor(i * D / l) >>> # dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l) >>> # dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m) >>> # hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m) >>> # hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n) >>> # wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n) >>> # wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] = >>> # output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) >>> # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle >>> import paddle
input_data = paddle.randn(shape=(2, 3, 8, 32, 32)) >>> input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
out = paddle.nn.functional.adaptive_avg_pool3d( >>> out = paddle.nn.functional.adaptive_avg_pool3d(x = input_data,
x = input_data, ... output_size=[3, 3, 3])
output_size=[3, 3, 3]) >>> print(out.shape)
# out.shape is [2, 3, 3, 3, 3] [2, 3, 3, 3, 3]
""" """
if data_format not in ["NCDHW", "NDHWC"]: if data_format not in ["NCDHW", "NDHWC"]:
...@@ -1815,26 +1845,30 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None): ...@@ -1815,26 +1845,30 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
# max adaptive pool1d >>> # max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m], >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension >>> # output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each >>> # of input data into m grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# lstart = floor(i * L / m) >>> # lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m) >>> # lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend]) >>> # output[:, :, i] = max(input[:, :, lstart: lend])
# >>> #
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.uniform([1, 3, 32], paddle.float32) >>> data = paddle.uniform([1, 3, 32], paddle.float32)
pool_out = F.adaptive_max_pool1d(data, output_size=16) >>> pool_out = F.adaptive_max_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16]) >>> print(pool_out.shape)
pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True) [1, 3, 16]
# pool_out shape: [1, 3, 16] indices shape: [1, 3, 16] >>> pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True)
>>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
""" """
_check_input(x, 3) _check_input(x, 3)
...@@ -1901,28 +1935,28 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None): ...@@ -1901,28 +1935,28 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
# max adaptive pool2d >>> # max adaptive pool2d
# suppose input data in the shape of [N, C, H, W], `output_size` is [m, n] >>> # suppose input data in the shape of [N, C, H, W], `output_size` is [m, n]
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m*n grids averagely and performs poolings in each >>> # of input data into m*n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# for j in range(n): >>> # for j in range(n):
# hstart = floor(i * H / m) >>> # hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m) >>> # hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n) >>> # wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n) >>> # wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) >>> # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
# >>> #
import paddle >>> import paddle
input_data = paddle.randn(shape=(2, 3, 32, 32)) >>> input_data = paddle.randn(shape=(2, 3, 32, 32))
out = paddle.nn.functional.adaptive_max_pool2d( >>> out = paddle.nn.functional.adaptive_max_pool2d(x = input_data,
x = input_data, ... output_size=[3, 3])
output_size=[3, 3]) >>> print(out.shape)
# out.shape is [2, 3, 3, 3] [2, 3, 3, 3]
""" """
_check_input(x, 4) _check_input(x, 4)
...@@ -1987,31 +2021,31 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None): ...@@ -1987,31 +2021,31 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive max pool3d >>> # adaptive max pool3d
# suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n] >>> # suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n]
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into m*n grids averagely and performs poolings in each >>> # of input data into m*n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(l): >>> # for i in range(l):
# for j in range(m): >>> # for j in range(m):
# for k in range(n): >>> # for k in range(n):
# dstart = floor(i * D / l) >>> # dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l) >>> # dend = ceil((i + 1) * D / l)
# hstart = floor(i * H / m) >>> # hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m) >>> # hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n) >>> # wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n) >>> # wend = ceil((i + 1) * W / n)
# output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend]) >>> # output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend])
# >>> #
import paddle >>> import paddle
input_data = paddle.randn(shape=(2, 3, 8, 32, 32)) >>> input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
out = paddle.nn.functional.adaptive_max_pool3d( >>> out = paddle.nn.functional.adaptive_max_pool3d(x = input_data,
x = input_data, ... output_size=[3, 3, 3])
output_size=[3, 3, 3]) >>> print(out.shape)
# out.shape is [2, 3, 3, 3, 3] [2, 3, 3, 3, 3]
""" """
_check_input(x, 5) _check_input(x, 5)
......
...@@ -88,50 +88,51 @@ def sparse_attention( ...@@ -88,50 +88,51 @@ def sparse_attention(
Examples: Examples:
.. code-block:: python .. code-block:: python
# required: skiptest >>> # doctest: +SKIP('This API is only used in CUDA11.3 and above.')
import paddle >>> import paddle
paddle.disable_static() >>> paddle.disable_static()
# `query`, `key` and `value` all have shape [1, 1, 4, 2] >>> # `query`, `key` and `value` all have shape [1, 1, 4, 2]
query = paddle.to_tensor([[[[0, 1, ], [2, 3], >>> query = paddle.to_tensor([[[[0, 1, ], [2, 3],
[0, 1], [2, 3]]]], dtype="float32") ... [0, 1], [2, 3]]]], dtype="float32")
key = paddle.to_tensor([[[[0, 1], [2, 3], >>> key = paddle.to_tensor([[[[0, 1], [2, 3],
[0, 1], [2, 3]]]], dtype="float32") ... [0, 1], [2, 3]]]], dtype="float32")
value = paddle.to_tensor([[[[0, 1], [2, 3], >>> value = paddle.to_tensor([[[[0, 1], [2, 3],
[0, 1], [2, 3]]]], dtype="float32") ... [0, 1], [2, 3]]]], dtype="float32")
...
>>> offset = paddle.to_tensor([[[0, 2, 4, 6, 8]]], dtype="int32")
offset = paddle.to_tensor([[[0, 2, 4, 6, 8]]], dtype="int32") >>> columns = paddle.to_tensor([[[0, 1, 0, 1, 2, 3, 2, 3]]], dtype="int32")
columns = paddle.to_tensor([[[0, 1, 0, 1, 2, 3, 2, 3]]], dtype="int32") ...
>>> print(offset.shape)
print(offset.shape) # (1, 1, 5) [1, 1, 5]
print(columns.shape) # (1, 1, 8) >>> print(columns.shape)
[1, 1, 8]
key_padding_mask = paddle.to_tensor([[1, 1, 1, 0]], dtype="float32") ...
attention_mask = paddle.to_tensor([[1, 0, 1, 1], >>> key_padding_mask = paddle.to_tensor([[1, 1, 1, 0]], dtype="float32")
[1, 1, 1, 1], >>> attention_mask = paddle.to_tensor([[1, 0, 1, 1],
[1, 1, 1, 1], ... [1, 1, 1, 1],
[1, 1, 1, 1]], dtype="float32") ... [1, 1, 1, 1],
output_mask = paddle.nn.functional.sparse_attention(query, key, ... [1, 1, 1, 1]], dtype="float32")
value, offset, columns, >>> output_mask = paddle.nn.functional.sparse_attention(query, key,
key_padding_mask=key_padding_mask, ... value, offset, columns,
attn_mask=attention_mask) ... key_padding_mask=key_padding_mask,
print(output_mask) ... attn_mask=attention_mask)
# Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, >>> print(output_mask)
# [[[[0. , 1. ], Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
# [1.99830270, 2.99830270], [[[[0. , 1. ],
# [0. , 1. ], [1.99830270, 2.99830270],
# [0. , 1. ]]]]) [0. , 1. ],
[0. , 1. ]]]])
output = paddle.nn.functional.sparse_attention(query, key,
value, offset, columns) >>> output = paddle.nn.functional.sparse_attention(query, key,
print(output) ... value, offset, columns)
# Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, >>> print(output)
# [[[[1.60885942, 2.60885954], Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
# [1.99830270, 2.99830270], [[[[1.60885942, 2.60885954],
# [1.60885942, 2.60885954], [1.99830270, 2.99830270],
# [1.99830270, 2.99830270]]]]) [1.60885942, 2.60885954],
[1.99830270, 2.99830270]]]])
""" """
if in_dynamic_mode(): if in_dynamic_mode():
( (
......
...@@ -43,13 +43,15 @@ class CELU(Layer): ...@@ -43,13 +43,15 @@ class CELU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([[-1. ,6.], [1., 15.6]]) >>> x = paddle.to_tensor([[-1. ,6.], [1., 15.6]])
m = paddle.nn.CELU(0.2) >>> m = paddle.nn.CELU(0.2)
out = m(x) >>> out = m(x)
# [[-0.19865242, 6. ], >>> print(out)
# [ 1. , 15.60000038]] Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.19865242, 6. ],
[ 1. , 15.60000038]])
""" """
def __init__(self, alpha=1.0, name=None): def __init__(self, alpha=1.0, name=None):
...@@ -91,13 +93,15 @@ class ELU(Layer): ...@@ -91,13 +93,15 @@ class ELU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([[-1. ,6.], [1., 15.6]]) >>> x = paddle.to_tensor([[-1. ,6.], [1., 15.6]])
m = paddle.nn.ELU(0.2) >>> m = paddle.nn.ELU(0.2)
out = m(x) >>> out = m(x)
# [[-0.12642411 6. ] >>> print(out)
# [ 1. 15.6 ]] Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.12642412, 6. ],
[ 1. , 15.60000038]])
""" """
def __init__(self, alpha=1.0, name=None): def __init__(self, alpha=1.0, name=None):
...@@ -141,15 +145,20 @@ class GELU(Layer): ...@@ -141,15 +145,20 @@ class GELU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> x = paddle.to_tensor([[-1, 0.5],[1, 1.5]])
x = paddle.to_tensor([[-1, 0.5],[1, 1.5]]) >>> m = paddle.nn.GELU()
>>> out = m(x)
m = paddle.nn.GELU() >>> print(out)
out = m(x) # [-0.158655 0.345731 0.841345 1.39979] Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15865529, 0.34573123],
m = paddle.nn.GELU(True) [ 0.84134471, 1.39978933]])
out = m(x) # [-0.158808 0.345714 0.841192 1.39957] >>> m = paddle.nn.GELU(True)
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15880796, 0.34571400],
[ 0.84119201, 1.39957154]])
""" """
def __init__(self, approximate=False, name=None): def __init__(self, approximate=False, name=None):
...@@ -193,11 +202,14 @@ class Hardshrink(Layer): ...@@ -193,11 +202,14 @@ class Hardshrink(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-1, 0.3, 2.5]) >>> x = paddle.to_tensor([-1, 0.3, 2.5])
m = paddle.nn.Hardshrink() >>> m = paddle.nn.Hardshrink()
out = m(x) # [-1., 0., 2.5] >>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-1. , 0. , 2.50000000])
""" """
def __init__(self, threshold=0.5, name=None): def __init__(self, threshold=0.5, name=None):
...@@ -244,11 +256,14 @@ class Hardswish(Layer): ...@@ -244,11 +256,14 @@ class Hardswish(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-4., 5., 1.]) >>> x = paddle.to_tensor([-4., 5., 1.])
m = paddle.nn.Hardswish() >>> m = paddle.nn.Hardswish()
out = m(x) # [0., 5., 0.666667] >>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0. , 5. , 0.66666669])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -282,14 +297,14 @@ class Tanh(Layer): ...@@ -282,14 +297,14 @@ class Tanh(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
m = paddle.nn.Tanh() >>> m = paddle.nn.Tanh()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-0.37994894, -0.19737533, 0.09966800, 0.29131261]) [-0.37994900, -0.19737528, 0.09966799, 0.29131261])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -333,11 +348,14 @@ class Hardtanh(Layer): ...@@ -333,11 +348,14 @@ class Hardtanh(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-1.5, 0.3, 2.5]) >>> x = paddle.to_tensor([-1.5, 0.3, 2.5])
m = paddle.nn.Hardtanh() >>> m = paddle.nn.Hardtanh()
out = m(x) # [-1., 0.3, 1.] >>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-1. , 0.30000001, 1. ])
""" """
def __init__(self, min=-1.0, max=1.0, name=None): def __init__(self, min=-1.0, max=1.0, name=None):
...@@ -386,25 +404,25 @@ class PReLU(Layer): ...@@ -386,25 +404,25 @@ class PReLU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
paddle.set_default_dtype("float64")
>>> data = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0],
data = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0], ... [ 3.0, -4.0, 5.0, -6.0],
[ 3.0, -4.0, 5.0, -6.0], ... [-7.0, -8.0, 8.0, 9.0]],
[-7.0, -8.0, 8.0, 9.0]], ... [[ 1.0, -2.0, -3.0, 4.0],
[[ 1.0, -2.0, -3.0, 4.0], ... [-5.0, 6.0, 7.0, -8.0],
[-5.0, 6.0, 7.0, -8.0], ... [ 6.0, 7.0, 8.0, 9.0]]]])
[ 6.0, 7.0, 8.0, 9.0]]]]) ...
>>> m = paddle.nn.PReLU(1, 0.25)
m = paddle.nn.PReLU(1, 0.25) >>> out = m(data)
out = m(data) >>> print(out)
print(out) Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
# [[[[-0.5 , 3. , -1. , 5. ], [[[[-0.50000000, 3. , -1. , 5. ],
# [ 3. , -1. , 5. , -1.5 ], [ 3. , -1. , 5. , -1.50000000],
# [-1.75, -2. , 8. , 9. ]], [-1.75000000, -2. , 8. , 9. ]],
# [[ 1. , -0.5 , -0.75, 4. ], [[ 1. , -0.50000000, -0.75000000, 4. ],
# [-1.25, 6. , 7. , -2. ], [-1.25000000, 6. , 7. , -2. ],
# [ 6. , 7. , 8. , 9. ]]]] [ 6. , 7. , 8. , 9. ]]]])
""" """
def __init__( def __init__(
...@@ -495,24 +513,26 @@ class RReLU(Layer): ...@@ -495,24 +513,26 @@ class RReLU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(2023)
input_tensor = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0],
[ 3.0, -4.0, 5.0, -6.0], >>> input_tensor = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0],
[-7.0, -8.0, 8.0, 9.0]], ... [ 3.0, -4.0, 5.0, -6.0],
[[ 1.0, -2.0, -3.0, 4.0], ... [-7.0, -8.0, 8.0, 9.0]],
[-5.0, 6.0, 7.0, -8.0], ... [[ 1.0, -2.0, -3.0, 4.0],
[ 6.0, 7.0, 8.0, 9.0]]]], dtype='float32') ... [-5.0, 6.0, 7.0, -8.0],
... [ 6.0, 7.0, 8.0, 9.0]]]], dtype='float32')
rrelu_layer = paddle.nn.RReLU(0.1, 0.3) ...
out = rrelu_layer(input_tensor) >>> rrelu_layer = paddle.nn.RReLU(0.1, 0.3)
print(out) >>> out = rrelu_layer(input_tensor)
#[[[[-0.20000899 3. -0.88108218 5. ] >>> print(out)
# [ 3. -0.55175185 5. -1.07761011] Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-1.06806871 -1.98962009 8. 9. ]] [[[[-0.54633451, 3. , -0.81611776, 5. ],
# [[ 1. -0.52382672 -0.65515128 4. ] [ 3. , -0.60768753, 5. , -1.68630385],
# [-1.37663394 6. 7. -2.34657836] [-1.29360127, -1.45026064, 8. , 9. ]],
# [ 6. 7. 8. 9. ]]]] [[ 1. , -0.58808362, -0.74662417, 4. ],
[-1.01785135, 6. , 7. , -1.97268605],
[ 6. , 7. , 8. , 9. ]]]])
""" """
def __init__(self, lower=1.0 / 8.0, upper=1.0 / 3.0, name=None): def __init__(self, lower=1.0 / 8.0, upper=1.0 / 3.0, name=None):
...@@ -554,13 +574,14 @@ class ReLU(Layer): ...@@ -554,13 +574,14 @@ class ReLU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-2., 0., 1.]) >>> x = paddle.to_tensor([-2., 0., 1.])
m = paddle.nn.ReLU() >>> m = paddle.nn.ReLU()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# [0., 0., 1.] Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[0., 0., 1.])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -596,13 +617,14 @@ class ReLU6(Layer): ...@@ -596,13 +617,14 @@ class ReLU6(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-1., 0.3, 6.5]) >>> x = paddle.to_tensor([-1., 0.3, 6.5])
m = paddle.nn.ReLU6() >>> m = paddle.nn.ReLU6()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# [0, 0.3, 6] Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[0. , 0.30000000, 6. ])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -644,13 +666,15 @@ class SELU(Layer): ...@@ -644,13 +666,15 @@ class SELU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]]) >>> x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]])
m = paddle.nn.SELU() >>> m = paddle.nn.SELU()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# [[0, 1.050701],[2.101402, 3.152103]] Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[0. , 1.05070102],
[2.10140204, 3.15210295]])
""" """
def __init__( def __init__(
...@@ -703,11 +727,14 @@ class LeakyReLU(Layer): ...@@ -703,11 +727,14 @@ class LeakyReLU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
m = paddle.nn.LeakyReLU() >>> m = paddle.nn.LeakyReLU()
x = paddle.to_tensor([-2.0, 0, 1]) >>> x = paddle.to_tensor([-2.0, 0, 1])
out = m(x) # [-0.02, 0., 1.] >>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.02000000, 0. , 1. ])
""" """
def __init__(self, negative_slope=0.01, name=None): def __init__(self, negative_slope=0.01, name=None):
...@@ -744,11 +771,14 @@ class Sigmoid(Layer): ...@@ -744,11 +771,14 @@ class Sigmoid(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
m = paddle.nn.Sigmoid() >>> m = paddle.nn.Sigmoid()
x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
out = m(x) # [0.7310586, 0.880797, 0.95257413, 0.98201376] >>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.73105860, 0.88079703, 0.95257413, 0.98201376])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -795,11 +825,14 @@ class Hardsigmoid(Layer): ...@@ -795,11 +825,14 @@ class Hardsigmoid(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
m = paddle.nn.Hardsigmoid() >>> m = paddle.nn.Hardsigmoid()
x = paddle.to_tensor([-4., 5., 1.]) >>> x = paddle.to_tensor([-4., 5., 1.])
out = m(x) # [0., 1, 0.666667] >>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[0. , 1. , 0.66666669])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -836,11 +869,14 @@ class Softplus(Layer): ...@@ -836,11 +869,14 @@ class Softplus(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32') >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32')
m = paddle.nn.Softplus() >>> m = paddle.nn.Softplus()
out = m(x) # [0.513015, 0.598139, 0.744397, 0.854355] >>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.51301527, 0.59813893, 0.74439669, 0.85435522])
""" """
def __init__(self, beta=1, threshold=20, name=None): def __init__(self, beta=1, threshold=20, name=None):
...@@ -887,14 +923,14 @@ class Softshrink(Layer): ...@@ -887,14 +923,14 @@ class Softshrink(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8]) >>> x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8])
m = paddle.nn.Softshrink() >>> m = paddle.nn.Softshrink()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-0.39999998, 0. , 0. , 0.30000001]) [-0.39999998, 0. , 0. , 0.30000001])
""" """
def __init__(self, threshold=0.5, name=None): def __init__(self, threshold=0.5, name=None):
...@@ -929,14 +965,14 @@ class Softsign(Layer): ...@@ -929,14 +965,14 @@ class Softsign(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
m = paddle.nn.Softsign() >>> m = paddle.nn.Softsign()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-0.28571430, -0.16666666, 0.09090909, 0.23076925]) [-0.28571430, -0.16666666, 0.09090909, 0.23076925])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -970,14 +1006,14 @@ class Swish(Layer): ...@@ -970,14 +1006,14 @@ class Swish(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-2., 0., 1.]) >>> x = paddle.to_tensor([-2., 0., 1.])
m = paddle.nn.Swish() >>> m = paddle.nn.Swish()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-0.23840584, 0. , 0.73105854]) [-0.23840584, 0. , 0.73105860])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -1017,11 +1053,14 @@ class Mish(Layer): ...@@ -1017,11 +1053,14 @@ class Mish(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-5., 0., 5.]) >>> x = paddle.to_tensor([-5., 0., 5.])
m = paddle.nn.Mish() >>> m = paddle.nn.Mish()
out = m(x) # [-0.03357624, 0., 4.99955208] >>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.03357624, 0. , 4.99955177])
""" """
...@@ -1056,14 +1095,14 @@ class Tanhshrink(Layer): ...@@ -1056,14 +1095,14 @@ class Tanhshrink(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
m = paddle.nn.Tanhshrink() >>> m = paddle.nn.Tanhshrink()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-0.02005106, -0.00262468, 0.00033200, 0.00868741]) [-0.02005100, -0.00262472, 0.00033201, 0.00868741])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -1105,14 +1144,14 @@ class ThresholdedReLU(Layer): ...@@ -1105,14 +1144,14 @@ class ThresholdedReLU(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([2., 0., 1.]) >>> x = paddle.to_tensor([2., 0., 1.])
m = paddle.nn.ThresholdedReLU() >>> m = paddle.nn.ThresholdedReLU()
out = m(x) >>> out = m(x)
print(out) >>> print(out)
# Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [2., 0., 0.]) [2., 0., 0.])
""" """
def __init__(self, threshold=1.0, name=None): def __init__(self, threshold=1.0, name=None):
...@@ -1148,11 +1187,14 @@ class Silu(Layer): ...@@ -1148,11 +1187,14 @@ class Silu(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
m = paddle.nn.Silu() >>> m = paddle.nn.Silu()
out = m(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ] >>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.73105860, 1.76159406, 2.85772228, 3.92805505])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -1187,11 +1229,14 @@ class LogSigmoid(Layer): ...@@ -1187,11 +1229,14 @@ class LogSigmoid(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
m = paddle.nn.LogSigmoid() >>> m = paddle.nn.LogSigmoid()
out = m(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] >>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.31326166, -0.12692805, -0.04858733, -0.01814996])
""" """
def __init__(self, name=None): def __init__(self, name=None):
...@@ -1299,22 +1344,25 @@ class Softmax(Layer): ...@@ -1299,22 +1344,25 @@ class Softmax(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0], >>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0], ... [3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]], ... [7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0], ... [[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0], ... [5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]], dtype='float32') ... [6.0, 7.0, 8.0, 9.0]]], dtype='float32')
m = paddle.nn.Softmax() >>> m = paddle.nn.Softmax()
out = m(x) >>> out = m(x)
# [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], >>> print(out)
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426], Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.07232949, 0.19661193, 0.19661193, 0.53444665]], [[[0.03205860, 0.08714432, 0.23688284, 0.64391428],
# [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], [0.03205860, 0.08714432, 0.23688284, 0.64391428],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426], [0.07232949, 0.19661194, 0.19661194, 0.53444666]],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] [[0.03205860, 0.08714432, 0.23688284, 0.64391428],
[0.03205860, 0.08714432, 0.23688284, 0.64391428],
[0.03205860, 0.08714432, 0.23688284, 0.64391428]]])
""" """
def __init__(self, axis=-1, name=None): def __init__(self, axis=-1, name=None):
...@@ -1357,23 +1405,26 @@ class LogSoftmax(Layer): ...@@ -1357,23 +1405,26 @@ class LogSoftmax(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = [[[-2.0, 3.0, -4.0, 5.0], >>> x = [[[-2.0, 3.0, -4.0, 5.0],
[3.0, -4.0, 5.0, -6.0], ... [ 3.0, -4.0, 5.0, -6.0],
[-7.0, -8.0, 8.0, 9.0]], ... [-7.0, -8.0, 8.0, 9.0]],
[[1.0, -2.0, -3.0, 4.0], ... [[ 1.0, -2.0, -3.0, 4.0],
[-5.0, 6.0, 7.0, -8.0], ... [-5.0, 6.0, 7.0, -8.0],
[6.0, 7.0, 8.0, 9.0]]] ... [ 6.0, 7.0, 8.0, 9.0]]]
m = paddle.nn.LogSoftmax() >>> m = paddle.nn.LogSoftmax()
x = paddle.to_tensor(x) >>> x = paddle.to_tensor(x)
out = m(x) >>> out = m(x)
# [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948] >>> print(out)
# [ -2.1270514 -9.127051 -0.12705144 -11.127051 ] Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-16.313261 -17.313261 -1.3132617 -0.31326184]] [[[-7.12783957 , -2.12783957 , -9.12783909 , -0.12783945 ],
# [[ -3.0518122 -6.051812 -7.051812 -0.051812 ] [-2.12705135 , -9.12705135 , -0.12705141 , -11.12705135],
# [-12.313267 -1.3132664 -0.3132665 -15.313267 ] [-16.31326103, -17.31326103, -1.31326187 , -0.31326184 ]],
# [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] [[-3.05181193 , -6.05181217 , -7.05181217 , -0.05181199 ],
[-12.31326675, -1.31326652 , -0.31326646 , -15.31326675],
[-3.44018984 , -2.44018984 , -1.44018972 , -0.44018975 ]]])
""" """
def __init__(self, axis=-1, name=None): def __init__(self, axis=-1, name=None):
...@@ -1426,20 +1477,17 @@ class Maxout(Layer): ...@@ -1426,20 +1477,17 @@ class Maxout(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand([1, 2, 3, 4])
# [[[[0.5002636 0.22272532 0.17402348 0.2874594 ] >>> x = paddle.rand([1, 2, 3, 4])
# [0.95313174 0.6228939 0.7129065 0.7087491 ] >>> m = paddle.nn.Maxout(groups=2)
# [0.02879342 0.88725346 0.61093384 0.38833922]] >>> out = m(x)
# [[0.5231306 0.03807496 0.91661984 0.15602879] >>> print(out)
# [0.666127 0.616567 0.30741522 0.24044901] Tensor(shape=[1, 1, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.7142536 0.7351477 0.31588817 0.23782359]]]] [[[[0.85139430, 0.95717543, 0.43864486, 0.51577556],
m = paddle.nn.Maxout(groups=2) [0.84765935, 0.45680618, 0.39412445, 0.72039396],
out = m(x) [0.59444654, 0.78120756, 0.78364515, 0.90572405]]]])
# [[[[0.5231306 0.22272532 0.91661984 0.2874594 ]
# [0.95313174 0.6228939 0.7129065 0.7087491 ]
# [0.7142536 0.88725346 0.61093384 0.38833922]]]]
""" """
def __init__(self, groups, axis=1, name=None): def __init__(self, groups, axis=1, name=None):
...@@ -1473,25 +1521,20 @@ class Softmax2D(Layer): ...@@ -1473,25 +1521,20 @@ class Softmax2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand([1, 2, 3, 4])
# [[[[0.42496058 0.1172187 0.14664008 0.8151267 ] >>> x = paddle.rand([1, 2, 3, 4])
# [0.24430142 0.42052492 0.60372984 0.79307914] >>> m = paddle.nn.Softmax2D()
# [0.4539401 0.90458065 0.10235776 0.62009853]] >>> out = m(x)
>>> print(out)
# [[0.11731581 0.16053623 0.05667042 0.91876775] Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.9413854 0.30770817 0.6788164 0.9543593 ] [[[[0.42608523, 0.32081410, 0.39483935, 0.55642301],
# [0.4145064 0.75909156 0.11598814 0.73599935]]]] [0.38131708, 0.45118359, 0.44891062, 0.46053308],
m = paddle.nn.Softmax2D() [0.35746980, 0.60766530, 0.38638926, 0.70425135]],
out = m(x) [[0.57391477, 0.67918587, 0.60516071, 0.44357699],
# [[[[0.5763103 0.48917228 0.5224772 0.4741129 ] [0.61868292, 0.54881644, 0.55108935, 0.53946698],
# [0.3324591 0.5281743 0.48123717 0.45976716] [0.64253020, 0.39233473, 0.61361068, 0.29574865]]]])
# [0.5098571 0.5363083 0.49659243 0.4710572 ]]
# [[0.42368975 0.51082766 0.47752273 0.5258871 ]
# [0.66754097 0.47182566 0.5187628 0.5402329 ]
# [0.49014282 0.46369177 0.50340754 0.5289428 ]]]]
""" """
......
...@@ -50,18 +50,22 @@ class Identity(Layer): ...@@ -50,18 +50,22 @@ class Identity(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
input_tensor = paddle.randn(shape=[3, 2])
layer = paddle.nn.Identity() >>> input_tensor = paddle.randn(shape=[3, 2])
out = layer(input_tensor) >>> layer = paddle.nn.Identity()
# input_tensor: [[-0.32342386 -1.200079 ] >>> out = layer(input_tensor)
# [ 0.7979031 -0.90978354] >>> print(input_tensor)
# [ 0.40597573 1.8095392 ]] Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
# out: [[-0.32342386 -1.200079 ] [[-1.41661501, 0.25904641],
# [ 0.7979031 -0.90978354] [ 0.00979547, -0.30324230],
# [ 0.40597573 1.8095392 ]] [-1.34256756, -0.76540256]])
>>> print(out)
Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-1.41661501, 0.25904641],
[ 0.00979547, -0.30324230],
[-1.34256756, -0.76540256]])
""" """
...@@ -120,28 +124,35 @@ class Linear(Layer): ...@@ -120,28 +124,35 @@ class Linear(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
# Define the linear layer.
weight_attr = paddle.ParamAttr( >>> # Define the linear layer.
name="weight", >>> weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Constant(value=0.5)) ... name="weight",
bias_attr = paddle.ParamAttr( ... initializer=paddle.nn.initializer.Constant(value=0.5))
name="bias", >>> bias_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Constant(value=1.0)) ... name="bias",
linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr) ... initializer=paddle.nn.initializer.Constant(value=1.0))
# linear.weight: [[0.5 0.5 0.5 0.5] >>> linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr)
# [0.5 0.5 0.5 0.5]] >>> print(linear.weight)
# linear.bias: [1. 1. 1. 1.] Parameter containing:
Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
x = paddle.randn((3, 2), dtype="float32") [[0.50000000, 0.50000000, 0.50000000, 0.50000000],
# x: [[-0.32342386 -1.200079 ] [0.50000000, 0.50000000, 0.50000000, 0.50000000]])
# [ 0.7979031 -0.90978354]
# [ 0.40597573 1.8095392 ]] >>> print(linear.bias)
y = linear(x) Parameter containing:
# y: [[0.23824859 0.23824859 0.23824859 0.23824859] Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=False,
# [0.9440598 0.9440598 0.9440598 0.9440598 ] [1., 1., 1., 1.])
# [2.1077576 2.1077576 2.1077576 2.1077576 ]]
>>> x = paddle.randn((3, 2), dtype="float32")
>>> y = linear(x)
>>> print(y)
Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
[[ 0.42121571, 0.42121571, 0.42121571, 0.42121571],
[ 0.85327661, 0.85327661, 0.85327661, 0.85327661],
[-0.05398512, -0.05398512, -0.05398512, -0.05398512]])
""" """
def __init__( def __init__(
...@@ -237,19 +248,22 @@ class LinearCompress(Layer): ...@@ -237,19 +248,22 @@ class LinearCompress(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
# Define the linear layer.
paddle.set_default_dtype('float16') >>> # Define the linear layer.
weight_attr = paddle.ParamAttr( >>> paddle.set_default_dtype('float16')
name="weight", >>> weight_attr = paddle.ParamAttr(
initializer=paddle.nn.initializer.Constant(value=0.5)) ... name="weight",
bias_attr = paddle.ParamAttr( ... initializer=paddle.nn.initializer.Constant(value=0.5))
name="bias",
initializer=paddle.nn.initializer.Constant(value=1.0)) >>> bias_attr = paddle.ParamAttr(
linear = paddle.nn.LinearCompress(128, 64, weight_attr=weight_attr, bias_attr=bias_attr, bits=8, algo='weight_only') ... name="bias",
x = paddle.randn((3, 128), dtype="float16") ... initializer=paddle.nn.initializer.Constant(value=1.0))
y = linear(x)
>>> linear = paddle.nn.LinearCompress(128, 64, weight_attr=weight_attr, bias_attr=bias_attr, bits=8, algo='weight_only')
>>> x = paddle.randn((3, 128), dtype="float16")
>>> y = linear(x)
""" """
def __init__( def __init__(
...@@ -527,14 +541,14 @@ class Upsample(Layer): ...@@ -527,14 +541,14 @@ class Upsample(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
input = paddle.rand([2,3,6,10], dtype="float32") >>> input = paddle.rand([2, 3, 6, 10], dtype="float32")
upsample_out = paddle.nn.Upsample(size=[12,12]) >>> upsample_out = paddle.nn.Upsample(size=[12, 12])
output = upsample_out(x=input) >>> output = upsample_out(x=input)
print(output.shape) >>> print(output.shape)
# [2, 3, 12, 12] [2, 3, 12, 12]
""" """
...@@ -627,15 +641,15 @@ class UpsamplingNearest2D(Layer): ...@@ -627,15 +641,15 @@ class UpsamplingNearest2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
input_data = paddle.rand(shape=(2,3,6,10)).astype("float32") >>> input_data = paddle.rand(shape=(2, 3, 6, 10)).astype("float32")
upsample_out = paddle.nn.UpsamplingNearest2D(size=[12,12]) >>> upsample_out = paddle.nn.UpsamplingNearest2D(size=[12, 12])
input = paddle.to_tensor(input_data) >>> input = paddle.to_tensor(input_data)
output = upsample_out(x=input) >>> output = upsample_out(x=input)
print(output.shape) >>> print(output.shape)
# [2L, 3L, 12L, 12L] [2, 3, 12, 12]
""" """
def __init__( def __init__(
...@@ -713,15 +727,15 @@ class UpsamplingBilinear2D(Layer): ...@@ -713,15 +727,15 @@ class UpsamplingBilinear2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
input_data = paddle.rand(shape=(2,3,6,10)).astype("float32") >>> input_data = paddle.rand(shape=(2, 3, 6, 10)).astype("float32")
upsample_out = paddle.nn.UpsamplingBilinear2D(size=[12,12]) >>> upsample_out = paddle.nn.UpsamplingBilinear2D(size=[12, 12])
input = paddle.to_tensor(input_data) >>> input = paddle.to_tensor(input_data)
output = upsample_out(x=input) >>> output = upsample_out(x=input)
print(output.shape) >>> print(output.shape)
# [2L, 3L, 12L, 12L] [2, 3, 12, 12]
""" """
def __init__( def __init__(
...@@ -798,15 +812,19 @@ class Bilinear(Layer): ...@@ -798,15 +812,19 @@ class Bilinear(Layer):
Tensor: A 2-D Tensor of shape [batch_size, out_features]. Tensor: A 2-D Tensor of shape [batch_size, out_features].
Examples: Examples:
.. code-block:: python .. code-block:: python
>>> import paddle
import paddle >>> layer1 = paddle.rand((5, 5)).astype('float32')
>>> layer2 = paddle.rand((5, 4)).astype('float32')
>>> bilinear = paddle.nn.Bilinear(in1_features=5,
... in2_features=4,
... out_features=1000)
layer1 = paddle.rand((5, 5)).astype('float32') >>> result = bilinear(layer1,layer2)
layer2 = paddle.rand((5, 4)).astype('float32') >>> print(result.shape)
bilinear = paddle.nn.Bilinear( [5, 1000]
in1_features=5, in2_features=4, out_features=1000)
result = bilinear(layer1,layer2) # result shape [5, 1000]
""" """
...@@ -897,23 +915,24 @@ class Dropout(Layer): ...@@ -897,23 +915,24 @@ class Dropout(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(2023)
x = paddle.to_tensor([[1,2,3], [4,5,6]], dtype="float32") >>> x = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype="float32")
m = paddle.nn.Dropout(p=0.5) >>> m = paddle.nn.Dropout(p=0.5)
y_train = m(x) >>> y_train = m(x)
print(y_train) >>> print(y_train)
# Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[2., 0., 6.], [[2., 4., 0.],
# [0., 0., 0.]]) [8., 0., 0.]])
m.eval() # switch the model to test phase >>> m.eval() # switch the model to test phase
y_test = m(x) >>> y_test = m(x)
print(y_test) >>> print(y_test)
# Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[1., 2., 3.], [[1., 2., 3.],
# [4., 5., 6.]]) [4., 5., 6.]])
""" """
def __init__(self, p=0.5, axis=None, mode="upscale_in_train", name=None): def __init__(self, p=0.5, axis=None, mode="upscale_in_train", name=None):
...@@ -967,36 +986,33 @@ class Dropout2D(Layer): ...@@ -967,36 +986,33 @@ class Dropout2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand([2, 2, 1, 3], dtype="float32") >>> x = paddle.rand([2, 2, 1, 3], dtype="float32")
print(x) >>> print(x)
# Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[[[0.10052059, 0.93890846, 0.45351565]], [[[[0.55355281, 0.20714243, 0.01162981]],
# [[0.47507706, 0.45021373, 0.11331241]]], [[0.51577556, 0.36369765, 0.26091650]]],
[[[0.18905126, 0.56219709, 0.00808361]],
# [[[0.53358698, 0.97375143, 0.34997326]], [[0.78120756, 0.32112977, 0.90572405]]]])
# [[0.24758087, 0.52628899, 0.17970420]]]])
>>> m = paddle.nn.Dropout2D(p=0.5)
m = paddle.nn.Dropout2D(p=0.5) >>> y_train = m(x)
y_train = m(x) >>> print(y_train)
print(y_train) Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
# Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, [[[[1.10710561, 0.41428486, 0.02325963]],
# [[[[0. , 0. , 0. ]], [[1.03155112, 0.72739530, 0.52183300]]],
# [[0.95015413, 0.90042746, 0.22662482]]], [[[0. , 0. , 0. ]],
[[0. , 0. , 0. ]]]])
# [[[1.06717396, 1.94750285, 0.69994652]],
# [[0. , 0. , 0. ]]]]) >>> m.eval() # switch the model to test phase
>>> y_test = m(x)
m.eval() # switch the model to test phase >>> print(y_test)
y_test = m(x) Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
print(y_test) [[[[0.55355281, 0.20714243, 0.01162981]],
# Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, [[0.51577556, 0.36369765, 0.26091650]]],
# [[[[0.10052059, 0.93890846, 0.45351565]], [[[0.18905126, 0.56219709, 0.00808361]],
# [[0.47507706, 0.45021373, 0.11331241]]], [[0.78120756, 0.32112977, 0.90572405]]]])
# [[[0.53358698, 0.97375143, 0.34997326]],
# [[0.24758087, 0.52628899, 0.17970420]]]])
""" """
def __init__(self, p=0.5, data_format='NCHW', name=None): def __init__(self, p=0.5, data_format='NCHW', name=None):
...@@ -1048,48 +1064,35 @@ class Dropout3D(Layer): ...@@ -1048,48 +1064,35 @@ class Dropout3D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.arange(24, dtype="float32").reshape((1, 2, 2, 2, 3)) >>> x = paddle.arange(24, dtype="float32").reshape((1, 2, 2, 2, 3))
print(x) >>> print(x)
# Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[[[[0. , 1. , 2. ], [[[[[0. , 1. , 2. ],
# [3. , 4. , 5. ]], [3. , 4. , 5. ]],
# [[6. , 7. , 8. ], [[6. , 7. , 8. ],
# [9. , 10., 11.]]], [9. , 10., 11.]]],
[[[12., 13., 14.],
# [[[12., 13., 14.], [15., 16., 17.]],
# [15., 16., 17.]], [[18., 19., 20.],
# [[18., 19., 20.], [21., 22., 23.]]]]])
# [21., 22., 23.]]]]])
>>> m = paddle.nn.Dropout3D(p=0.5)
m = paddle.nn.Dropout3D(p=0.5) >>> y_train = m(x)
y_train = m(x)
print(y_train) >>> m.eval() # switch the model to test phase
# Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, >>> y_test = m(x)
# [[[[[0. , 2. , 4. ], >>> print(y_test)
# [6. , 8. , 10.]], Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[12., 14., 16.], [[[[[0. , 1. , 2. ],
# [18., 20., 22.]]], [3. , 4. , 5. ]],
[[6. , 7. , 8. ],
# [[[0. , 0. , 0. ], [9. , 10., 11.]]],
# [0. , 0. , 0. ]], [[[12., 13., 14.],
# [[0. , 0. , 0. ], [15., 16., 17.]],
# [0. , 0. , 0. ]]]]]) [[18., 19., 20.],
[21., 22., 23.]]]]])
m.eval() # switch the model to test phase
y_test = m(x)
print(y_test)
# Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[[[[0. , 1. , 2. ],
# [3. , 4. , 5. ]],
# [[6. , 7. , 8. ],
# [9. , 10., 11.]]],
# [[[12., 13., 14.],
# [15., 16., 17.]],
# [[18., 19., 20.],
# [21., 22., 23.]]]]])
""" """
def __init__(self, p=0.5, data_format='NCDHW', name=None): def __init__(self, p=0.5, data_format='NCDHW', name=None):
...@@ -1139,22 +1142,23 @@ class AlphaDropout(Layer): ...@@ -1139,22 +1142,23 @@ class AlphaDropout(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(2023)
x = paddle.to_tensor([[-1, 1], [-1, 1]], dtype="float32")
m = paddle.nn.AlphaDropout(p=0.5) >>> x = paddle.to_tensor([[-1, 1], [-1, 1]], dtype="float32")
y_train = m(x) >>> m = paddle.nn.AlphaDropout(p=0.5)
print(y_train) >>> y_train = m(x)
# Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True, >>> print(y_train)
# [[-0.77919382, 1.66559887], Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-0.77919382, -0.77919382]]) [[-0.10721093, 1.66559887],
[-0.77919382, 1.66559887]])
m.eval() # switch the model to test phase
y_test = m(x) >>> m.eval() # switch the model to test phase
print(y_test) >>> y_test = m(x)
# Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True, >>> print(y_test)
# [[-1., 1.], Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
# [-1., 1.]]) [[-1., 1.],
[-1., 1.]])
""" """
def __init__(self, p=0.5, name=None): def __init__(self, p=0.5, name=None):
...@@ -1201,18 +1205,19 @@ class Pad1D(Layer): ...@@ -1201,18 +1205,19 @@ class Pad1D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
input_shape = (1, 2, 3) >>> input_shape = (1, 2, 3)
pad = [1, 2] >>> pad = [1, 2]
mode = "constant" >>> mode = "constant"
data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
my_pad = nn.Pad1D(padding=pad, mode=mode) >>> my_pad = nn.Pad1D(padding=pad, mode=mode)
result = my_pad(data) >>> result = my_pad(data)
print(result) >>> print(result)
# [[[0. 1. 2. 3. 0. 0.] Tensor(shape=[1, 2, 6], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0. 4. 5. 6. 0. 0.]]] [[[0., 1., 2., 3., 0., 0.],
[0., 4., 5., 6., 0., 0.]]])
""" """
def __init__( def __init__(
...@@ -1271,21 +1276,22 @@ class Pad2D(Layer): ...@@ -1271,21 +1276,22 @@ class Pad2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
input_shape = (1, 1, 2, 3) >>> input_shape = (1, 1, 2, 3)
pad = [1, 0, 1, 2] >>> pad = [1, 0, 1, 2]
mode = "constant" >>> mode = "constant"
data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
my_pad = nn.Pad2D(padding=pad, mode=mode) >>> my_pad = nn.Pad2D(padding=pad, mode=mode)
result = my_pad(data) >>> result = my_pad(data)
print(result) >>> print(result)
# [[[[0. 0. 0. 0.] Tensor(shape=[1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0. 1. 2. 3.] [[[[0., 0., 0., 0.],
# [0. 4. 5. 6.] [0., 1., 2., 3.],
# [0. 0. 0. 0.] [0., 4., 5., 6.],
# [0. 0. 0. 0.]]]] [0., 0., 0., 0.],
[0., 0., 0., 0.]]]])
""" """
def __init__( def __init__(
...@@ -1336,26 +1342,24 @@ class ZeroPad2D(Layer): ...@@ -1336,26 +1342,24 @@ class ZeroPad2D(Layer):
The data type is same as input x. The data type is same as input x.
Examples: Examples:
Examples are as follows.
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
input_shape = paddle.to_tensor([1, 1, 2, 3]) >>> input_shape = paddle.to_tensor([1, 1, 2, 3])
pad = [1, 0, 1, 2] >>> pad = [1, 0, 1, 2]
data = paddle.arange(paddle.prod(input_shape), dtype="float32").reshape(input_shape) + 1 >>> data = paddle.arange(paddle.prod(input_shape), dtype="float32").reshape(input_shape) + 1
>>> my_pad = nn.ZeroPad2D(padding=pad)
my_pad = nn.ZeroPad2D(padding=pad) >>> result = my_pad(data)
result = my_pad(data) >>> print(result)
Tensor(shape=[1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
print(result) [[[[0., 0., 0., 0.],
# [[[[0. 0. 0. 0.] [0., 1., 2., 3.],
# [0. 1. 2. 3.] [0., 4., 5., 6.],
# [0. 4. 5. 6.] [0., 0., 0., 0.],
# [0. 0. 0. 0.] [0., 0., 0., 0.]]]])
# [0. 0. 0. 0.]]]]
""" """
def __init__(self, padding, data_format="NCHW", name=None): def __init__(self, padding, data_format="NCHW", name=None):
...@@ -1412,21 +1416,22 @@ class Pad3D(Layer): ...@@ -1412,21 +1416,22 @@ class Pad3D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
input_shape = (1, 1, 1, 2, 3) >>> input_shape = (1, 1, 1, 2, 3)
pad = [1, 0, 1, 2, 0, 0] >>> pad = [1, 0, 1, 2, 0, 0]
mode = "constant" >>> mode = "constant"
data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
my_pad = nn.Pad3D(padding=pad, mode=mode) >>> my_pad = nn.Pad3D(padding=pad, mode=mode)
result = my_pad(data) >>> result = my_pad(data)
print(result) >>> print(result)
# [[[[[0. 0. 0. 0.] Tensor(shape=[1, 1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0. 1. 2. 3.] [[[[[0., 0., 0., 0.],
# [0. 4. 5. 6.] [0., 1., 2., 3.],
# [0. 0. 0. 0.] [0., 4., 5., 6.],
# [0. 0. 0. 0.]]]]] [0., 0., 0., 0.],
[0., 0., 0., 0.]]]]])
""" """
def __init__( def __init__(
...@@ -1476,13 +1481,13 @@ class CosineSimilarity(Layer): ...@@ -1476,13 +1481,13 @@ class CosineSimilarity(Layer):
Case 0: Case 0:
x1 = [[0.8024077 0.9927354 0.27238318 0.8344984 ] x1 = [[0.8024077 0.9927354 0.27238318 0.8344984 ]
[0.48949873 0.5797396 0.65444374 0.66510963] [0.48949873 0.5797396 0.65444374 0.66510963]
[0.1031398 0.9614342 0.08365563 0.6796464 ] [0.1031398 0.9614342 0.08365563 0.6796464 ]
[0.10760343 0.7461209 0.7726148 0.5801006 ]] [0.10760343 0.7461209 0.7726148 0.5801006 ]]
x2 = [[0.62913156 0.1536727 0.9847992 0.04591406] x2 = [[0.62913156 0.1536727 0.9847992 0.04591406]
[0.9098952 0.15715368 0.8671125 0.3156102 ] [0.9098952 0.15715368 0.8671125 0.3156102 ]
[0.4427798 0.54136837 0.5276275 0.32394758] [0.4427798 0.54136837 0.5276275 0.32394758]
[0.3769419 0.8535014 0.48041078 0.9256797 ]] [0.3769419 0.8535014 0.48041078 0.9256797 ]]
axis = 1 axis = 1
eps = 1e-8 eps = 1e-8
Out: [0.5275037 0.8368967 0.75037485 0.9245899] Out: [0.5275037 0.8368967 0.75037485 0.9245899]
...@@ -1490,19 +1495,19 @@ class CosineSimilarity(Layer): ...@@ -1490,19 +1495,19 @@ class CosineSimilarity(Layer):
Code Examples: Code Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
x1 = paddle.to_tensor([[1., 2., 3.], >>> x1 = paddle.to_tensor([[1., 2., 3.],
[2., 3., 4.]], dtype="float32") ... [2., 3., 4.]], dtype="float32")
x2 = paddle.to_tensor([[8., 3., 3.], >>> x2 = paddle.to_tensor([[8., 3., 3.],
[2., 3., 4.]], dtype="float32") ... [2., 3., 4.]], dtype="float32")
cos_sim_func = nn.CosineSimilarity(axis=0) >>> cos_sim_func = nn.CosineSimilarity(axis=0)
result = cos_sim_func(x1, x2) >>> result = cos_sim_func(x1, x2)
print(result) >>> print(result)
# Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
# [0.65079135, 0.98058069, 1. ]) [0.65079135, 0.98058069, 1. ])
""" """
def __init__(self, axis=1, eps=1e-8): def __init__(self, axis=1, eps=1e-8):
...@@ -1544,19 +1549,16 @@ class Embedding(Layer): ...@@ -1544,19 +1549,16 @@ class Embedding(Layer):
output is a Tensor: output is a Tensor:
out.shape = [3, 2, 16] out.shape = [3, 2, 16]
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
[0.345421456, 0.524563927, ..., 0.144534654]], [0.345421456, 0.524563927, ..., 0.144534654]],
[[0.345249859, 0.124939536, ..., 0.194353745], [[0.345249859, 0.124939536, ..., 0.194353745],
[0.945345345, 0.435394634, ..., 0.435345365]], [0.945345345, 0.435394634, ..., 0.435345365]],
[[0.945345345, 0.435394634, ..., 0.435345365], [[0.945345345, 0.435394634, ..., 0.435345365],
[0.0, 0.0, ..., 0.0 ]]] # padding data [0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127. It will pad all-zero data when ids is 127.
Parameters: Parameters:
num_embeddings (int): Just one element which indicate the size num_embeddings (int): Just one element which indicate the size of the dictionary of embeddings.
of the dictionary of embeddings.
embedding_dim (int): Just one element which indicate the size of each embedding vector respectively. embedding_dim (int): Just one element which indicate the size of each embedding vector respectively.
padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-num_embeddings, num_embeddings). padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-num_embeddings, num_embeddings).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
...@@ -1574,9 +1576,8 @@ class Embedding(Layer): ...@@ -1574,9 +1576,8 @@ class Embedding(Layer):
The local word vector needs to be transformed into numpy format, and the shape of local word The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer` vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example for details. is used to load custom or pre-trained word vectors. See code example for details.
name(str|None, optional): For detailed information, please refer name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and
to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
None by default.
Attribute: Attribute:
**weight** (Parameter): the learnable weights of this layer. **weight** (Parameter): the learnable weights of this layer.
...@@ -1588,36 +1589,36 @@ class Embedding(Layer): ...@@ -1588,36 +1589,36 @@ class Embedding(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.to_tensor([[0], [1], [3]], dtype="int64", stop_gradient=False) >>> x = paddle.to_tensor([[0], [1], [3]], dtype="int64", stop_gradient=False)
embedding = paddle.nn.Embedding(4, 3, sparse=True) >>> embedding = paddle.nn.Embedding(4, 3, sparse=True)
w0 = paddle.to_tensor([[0., 0., 0.], >>> w0 = paddle.to_tensor([[0., 0., 0.],
[1., 1., 1.], ... [1., 1., 1.],
[2., 2., 2.], ... [2., 2., 2.],
[3., 3., 3.]], dtype="float32") ... [3., 3., 3.]], dtype="float32")
embedding.weight.set_value(w0) >>> embedding.weight.set_value(w0)
print(embedding.weight) >>> print(embedding.weight)
# Tensor(shape=[4, 3], dtype=float32, place=Place(gpu:0), stop_gradient=False, Parameter containing:
# [[0., 0., 0.], Tensor(shape=[4, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
# [1., 1., 1.], [[0., 0., 0.],
# [2., 2., 2.], [1., 1., 1.],
# [3., 3., 3.]]) [2., 2., 2.],
[3., 3., 3.]])
adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
adam.clear_grad() >>> adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
>>> adam.clear_grad()
out = embedding(x) >>> out = embedding(x)
print(out) >>> print(out)
# Tensor(shape=[3, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=False, Tensor(shape=[3, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
# [[[0., 0., 0.]], [[[0., 0., 0.]],
# [[1., 1., 1.]], [[1., 1., 1.]],
# [[3., 3., 3.]]]) [[3., 3., 3.]]])
out.backward() >>> out.backward()
adam.step() >>> adam.step()
""" """
...@@ -1708,36 +1709,35 @@ class Unfold(Layer): ...@@ -1708,36 +1709,35 @@ class Unfold(Layer):
Parameters: Parameters:
kernel_sizes(int|list): The size of convolution kernel, should be [k_h, k_w] kernel_sizes(int|list): The size of convolution kernel, should be [k_h, k_w]
or an integer k treated as [k, k]. or an integer k treated as [k, k].
strides(int|list): The strides, should be [stride_h, stride_w] strides(int|list, optional): The strides, should be [stride_h, stride_w]
or an integer stride treated as [sride, stride]. or an integer stride treated as [sride, stride]. For default, strides will be [1, 1].
For default, strides will be [1, 1]. paddings(int|list, optional): The paddings of each dimension, should be
paddings(int|list): The paddings of each dimension, should be [padding_top, padding_left, padding_bottom, padding_right] or [padding_h, padding_w]
[padding_top, padding_left, padding_bottom, padding_right] or an integer padding. If [padding_h, padding_w] was given, it will expanded to
or [padding_h, padding_w] or an integer padding. [padding_h, padding_w, padding_h, padding_w]. If an integer padding was given,
If [padding_h, padding_w] was given, it will expanded to [padding, padding, padding, padding] will be used. For default,
[padding_h, padding_w, padding_h, padding_w]. If an integer paddings will be [0, 0, 0, 0].
padding was given, [padding, padding, padding, padding] will dilations(int|list, optional): The dilations of convolution kernel, should be
be used. For default, paddings will be [0, 0, 0, 0] [dilation_h, dilation_w], or an integer dilation treated as [dilation, dilation].
dilations(int|list): the dilations of convolution kernel, should be For default, it will be [1, 1].
[dilation_h, dilation_w], or an integer dilation treated as name(str, optional): The default value is None. Normally there is no need for user to
[dilation, dilation]. For default, it will be [1, 1]. set this property. For more information, please refer to :ref:`api_guide_Name`
name(str, optional): The default value is None.
Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
>>> x = paddle.randn((100, 3, 224, 224))
>>> unfold = nn.Unfold(kernel_sizes=[3, 3])
>>> result = unfold(x)
>>> print(result.shape)
[100, 27, 49284]
x = paddle.randn((100,3,224,224))
unfold = nn.Unfold(kernel_sizes=[3, 3])
result = unfold(x)
print(result)
""" """
def __init__( def __init__(
...@@ -1790,21 +1790,21 @@ class Fold(Layer): ...@@ -1790,21 +1790,21 @@ class Fold(Layer):
C_{out} &= \frac{C_{in}}{kernel\_sizes[0]\times kernel\_sizes[1]} \\ C_{out} &= \frac{C_{in}}{kernel\_sizes[0]\times kernel\_sizes[1]} \\
Parameters: Parameters:
output_sizes(list): The size of output size, should be [output_size_h, output_size_w] output_sizes(list): The size of output size, should be [output_size_h, output_size_w]
or an interger o treated as [o, o]. or an interger o treated as [o, o].
kernel_sizes(int|list|tuple): The size of convolution kernel, should be [k_h, k_w] kernel_sizes(int|list|tuple): The size of convolution kernel, should be [k_h, k_w]
or an integer k treated as [k, k]. or an integer k treated as [k, k].
strides(int|list|tuple, optional): The strides, should be [stride_h, stride_w] strides(int|list|tuple, optional): The strides, should be [stride_h, stride_w]
or an integer stride treated as [sride, stride]. or an integer stride treated as [sride, stride].
For default, strides will be [1, 1]. For default, strides will be [1, 1].
paddings(int|list|tuple, optional): The paddings of each dimension, should be paddings(int|list|tuple, optional): The paddings of each dimension, should be
[padding_top, padding_left, padding_bottom, padding_right] [padding_top, padding_left, padding_bottom, padding_right]
or [padding_h, padding_w] or an integer padding. or [padding_h, padding_w] or an integer padding.
If [padding_h, padding_w] was given, it will expanded to If [padding_h, padding_w] was given, it will expanded to
[padding_h, padding_w, padding_h, padding_w]. If an integer [padding_h, padding_w, padding_h, padding_w]. If an integer
padding was given, [padding, padding, padding, padding] will padding was given, [padding, padding, padding, padding] will
be used. For default, paddings will be [0, 0, 0, 0] be used. For default, paddings will be [0, 0, 0, 0]
dilations(int|list|tuple, optional): the dilations of convolution kernel, should be dilations(int|list|tuple, optional): The dilations of convolution kernel, should be
[dilation_h, dilation_w], or an integer dilation treated as [dilation_h, dilation_w], or an integer dilation treated as
[dilation, dilation]. For default, it will be [1, 1]. [dilation, dilation]. For default, it will be [1, 1].
name(str, optional): The default value is None. name(str, optional): The default value is None.
...@@ -1820,13 +1820,14 @@ class Fold(Layer): ...@@ -1820,13 +1820,14 @@ class Fold(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
x = paddle.randn([2,3*2*2,12]) >>> x = paddle.randn([2, 3*2*2, 12])
fold = nn.Fold(output_sizes=[4, 5], kernel_sizes=2) >>> fold = nn.Fold(output_sizes=[4, 5], kernel_sizes=2)
y = fold(x) >>> y = fold(x)
# y.shape = [2,3,4,5] >>> print(y.shape)
[2, 3, 4, 5]
""" """
def __init__( def __init__(
...@@ -1886,12 +1887,13 @@ class Flatten(Layer): ...@@ -1886,12 +1887,13 @@ class Flatten(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
inp = paddle.ones([5, 2, 3, 4]).astype('float32') >>> inp = paddle.ones([5, 2, 3, 4]).astype('float32')
flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) >>> flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2)
y = flatten(inp) >>> y = flatten(inp)
# y.shape = [5, 6, 4] >>> print(y.shape)
[5, 6, 4]
""" """
...@@ -1928,15 +1930,15 @@ class Unflatten(Layer): ...@@ -1928,15 +1930,15 @@ class Unflatten(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.randn(shape=[4, 6, 8]) >>> x = paddle.randn(shape=[4, 6, 8])
shape = [2, 3] >>> shape = [2, 3]
axis = 1 >>> axis = 1
unflatten = paddle.nn.Unflatten(axis, shape) >>> unflatten = paddle.nn.Unflatten(axis, shape)
res = unflatten(x) >>> res = unflatten(x)
print(res.shape) >>> print(res.shape)
# [4, 2, 3, 8] [4, 2, 3, 8]
""" """
......
...@@ -357,22 +357,38 @@ class Layer: ...@@ -357,22 +357,38 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
class MyLayer(paddle.nn.Layer): >>> paddle.seed(100)
def __init__(self):
super().__init__() >>> class MyLayer(paddle.nn.Layer):
self._linear = paddle.nn.Linear(1, 1) ... def __init__(self):
self._dropout = paddle.nn.Dropout(p=0.5) ... super().__init__()
def forward(self, input): ... self._linear = paddle.nn.Linear(1, 1)
temp = self._linear(input) ... self._dropout = paddle.nn.Dropout(p=0.5)
temp = self._dropout(temp) ...
return temp ... def forward(self, input):
x = paddle.randn([10, 1], 'float32') ... temp = self._linear(input)
mylayer = MyLayer() ... temp = self._dropout(temp)
mylayer.eval() # set mylayer._dropout to eval mode ... return temp
out = mylayer(x) ...
mylayer.train() # set mylayer._dropout to train mode >>> x = paddle.randn([10, 1], 'float32')
out = mylayer(x) >>> mylayer = MyLayer()
>>> mylayer.eval() # set mylayer._dropout to eval mode
>>> out = mylayer(x)
>>> mylayer.train() # set mylayer._dropout to train mode
>>> out = mylayer(x)
>>> print(out)
Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-3.44879317],
[ 0. ],
[ 0. ],
[-0.73825276],
[ 0. ],
[ 0. ],
[ 0.64444798],
[-3.22185946],
[ 0. ],
[-0.68077987]])
""" """
def __init__(self, name_scope=None, dtype="float32"): def __init__(self, name_scope=None, dtype="float32"):
...@@ -419,25 +435,38 @@ class Layer: ...@@ -419,25 +435,38 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
class MyLayer(paddle.nn.Layer):
def __init__(self): >>> class MyLayer(paddle.nn.Layer):
super().__init__() ... def __init__(self):
self._linear = paddle.nn.Linear(1, 1) ... super().__init__()
self._dropout = paddle.nn.Dropout(p=0.5) ... self._linear = paddle.nn.Linear(1, 1)
... self._dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input): ...
temp = self._linear(input) ... def forward(self, input):
temp = self._dropout(temp) ... temp = self._linear(input)
return temp ... temp = self._dropout(temp)
... return temp
x = paddle.randn([10, 1], 'float32') ...
mylayer = MyLayer() >>> x = paddle.randn([10, 1], 'float32')
mylayer.eval() # set mylayer._dropout to eval mode >>> mylayer = MyLayer()
out = mylayer(x) >>> mylayer.eval() # set mylayer._dropout to eval mode
mylayer.train() # set mylayer._dropout to train mode >>> out = mylayer(x)
out = mylayer(x) >>> mylayer.train() # set mylayer._dropout to train mode
>>> out = mylayer(x)
>>> print(out)
Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-3.44879317],
[ 0. ],
[ 0. ],
[-0.73825276],
[ 0. ],
[ 0. ],
[ 0.64444798],
[-3.22185946],
[ 0. ],
[-0.68077987]])
""" """
# global setting in dygraph # global setting in dygraph
...@@ -461,24 +490,35 @@ class Layer: ...@@ -461,24 +490,35 @@ class Layer:
Example:: Example::
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
class MyLayer(paddle.nn.Layer): >>> class MyLayer(paddle.nn.Layer):
def __init__(self): ... def __init__(self):
super().__init__() ... super().__init__()
self._linear = paddle.nn.Linear(1, 1) ... self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5) ... self._dropout = paddle.nn.Dropout(p=0.5)
...
def forward(self, input): ... def forward(self, input):
temp = self._linear(input) ... temp = self._linear(input)
temp = self._dropout(temp) ... temp = self._dropout(temp)
return temp ... return temp
...
x = paddle.randn([10, 1], 'float32') >>> x = paddle.randn([10, 1], 'float32')
mylayer = MyLayer() >>> mylayer = MyLayer()
mylayer.eval() # set mylayer._dropout to eval mode >>> mylayer.eval() # set mylayer._dropout to eval mode
out = mylayer(x) >>> out = mylayer(x)
print(out) >>> print(out)
Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-1.72439659],
[ 0.31532824],
[ 0.01192369],
[-0.36912638],
[-1.63426113],
[-0.93169814],
[ 0.32222399],
[-1.61092973],
[ 0.77209264],
[-0.34038994]])
""" """
# global setting in dygraph # global setting in dygraph
...@@ -506,22 +546,41 @@ class Layer: ...@@ -506,22 +546,41 @@ class Layer:
Example:: Example::
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
>>> paddle.seed(2023)
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
>>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
def init_weights(layer):
if type(layer) == nn.Linear: >>> def init_weights(layer):
print('before init weight:', layer.weight.numpy()) ... if type(layer) == nn.Linear:
new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9) ... print('before init weight:', layer.weight.numpy())
layer.weight.set_value(new_weight) ... new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9)
print('after init weight:', layer.weight.numpy()) ... layer.weight.set_value(new_weight)
... print('after init weight:', layer.weight.numpy())
net.apply(init_weights) ...
>>> net.apply(init_weights)
print(net.state_dict())
>>> print(net.state_dict())
before init weight: [[ 0.89611185 0.04935038]
[-0.5888344 0.99266374]]
after init weight: [[0.9 0.9]
[0.9 0.9]]
before init weight: [[-0.18615901 -0.22924072]
[ 1.1517721 0.59859073]]
after init weight: [[0.9 0.9]
[0.9 0.9]]
OrderedDict([('0.weight', Parameter containing:
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.89999998, 0.89999998],
[0.89999998, 0.89999998]])), ('0.bias', Parameter containing:
Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
[0., 0.])), ('1.weight', Parameter containing:
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.89999998, 0.89999998],
[0.89999998, 0.89999998]])), ('1.bias', Parameter containing:
Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
[0., 0.]))])
""" """
for layer in self.children(): for layer in self.children():
layer.apply(fn) layer.apply(fn)
...@@ -541,18 +600,19 @@ class Layer: ...@@ -541,18 +600,19 @@ class Layer:
Example:: Example::
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
class LinearNet(paddle.nn.Layer):
def __init__(self):
super().__init__(name_scope = "demo_linear_net")
self._linear = paddle.nn.Linear(1, 1)
def forward(self, x): >>> class LinearNet(paddle.nn.Layer):
return self._linear(x) ... def __init__(self):
... super().__init__(name_scope = "demo_linear_net")
linear_net = LinearNet() ... self._linear = paddle.nn.Linear(1, 1)
print(linear_net.full_name()) # demo_linear_net_0 ...
... def forward(self, x):
... return self._linear(x)
...
>>> linear_net = LinearNet()
>>> print(linear_net.full_name())
demo_linear_net_0
""" """
return self._full_name return self._full_name
...@@ -576,33 +636,33 @@ class Layer: ...@@ -576,33 +636,33 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# the forward_post_hook change the output of the layer: output = output * 2
def forward_post_hook(layer, input, output):
# user can use layer, input and output for information statistis tasks
# change the output >>> # the forward_post_hook change the output of the layer: output = output * 2
return output * 2 >>> def forward_post_hook(layer, input, output):
... # user can use layer, input and output for information statistis tasks
...
... # change the output
... return output * 2
...
>>> linear = paddle.nn.Linear(13, 5)
linear = paddle.nn.Linear(13, 5) >>> # register the hook
>>> forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook)
# register the hook >>> value1 = np.arange(26).reshape(2, 13).astype("float32")
forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook) >>> in1 = paddle.to_tensor(value1)
value1 = np.arange(26).reshape(2, 13).astype("float32") >>> out0 = linear(in1)
in1 = paddle.to_tensor(value1)
out0 = linear(in1) >>> # remove the hook
>>> forward_post_hook_handle.remove()
# remove the hook >>> out1 = linear(in1)
forward_post_hook_handle.remove()
out1 = linear(in1) >>> # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
>>> assert (out0.numpy() == (out1.numpy()) * 2).any()
# hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
assert (out0.numpy() == (out1.numpy()) * 2).any()
""" """
hook_remove_helper = HookRemoveHelper(self._forward_post_hooks) hook_remove_helper = HookRemoveHelper(self._forward_post_hooks)
...@@ -630,35 +690,35 @@ class Layer: ...@@ -630,35 +690,35 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import numpy as np >>> import numpy as np
# the forward_pre_hook change the input of the layer: input = input * 2
def forward_pre_hook(layer, input):
# user can use layer and input for information statistis tasks
# change the input >>> # the forward_pre_hook change the input of the layer: input = input * 2
input_return = (input[0] * 2) >>> def forward_pre_hook(layer, input):
return input_return ... # user can use layer and input for information statistis tasks
...
... # change the input
... input_return = (input[0] * 2)
... return input_return
...
>>> linear = paddle.nn.Linear(13, 5)
linear = paddle.nn.Linear(13, 5) >>> # register the hook
>>> forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook)
# register the hook >>> value0 = np.arange(26).reshape(2, 13).astype("float32")
forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook) >>> in0 = paddle.to_tensor(value0)
>>> out0 = linear(in0)
value0 = np.arange(26).reshape(2, 13).astype("float32") >>> # remove the hook
in0 = paddle.to_tensor(value0) >>> forward_pre_hook_handle.remove()
out0 = linear(in0)
# remove the hook >>> value1 = value0 * 2
forward_pre_hook_handle.remove() >>> in1 = paddle.to_tensor(value1)
>>> out1 = linear(in1)
value1 = value0 * 2 >>> # hook change the linear's input to input * 2, so out0 is equal to out1.
in1 = paddle.to_tensor(value1) >>> assert (out0.numpy() == out1.numpy()).any()
out1 = linear(in1)
# hook change the linear's input to input * 2, so out0 is equal to out1.
assert (out0.numpy() == out1.numpy()).any()
""" """
hook_remove_helper = HookRemoveHelper(self._forward_pre_hooks) hook_remove_helper = HookRemoveHelper(self._forward_pre_hooks)
self._forward_pre_hooks[hook_remove_helper._hook_id] = hook self._forward_pre_hooks[hook_remove_helper._hook_id] = hook
...@@ -691,22 +751,31 @@ class Layer: ...@@ -691,22 +751,31 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(2023)
class MyLayer(paddle.nn.Layer):
def __init__(self): >>> class MyLayer(paddle.nn.Layer):
super().__init__() ... def __init__(self):
self._linear = paddle.nn.Linear(1, 1) ... super().__init__()
w_tmp = self.create_parameter([1,1]) ... self._linear = paddle.nn.Linear(1, 1)
self.add_parameter("w_tmp", w_tmp) ... w_tmp = self.create_parameter([1,1])
... self.add_parameter("w_tmp", w_tmp)
def forward(self, input): ...
return self._linear(input) ... def forward(self, input):
... return self._linear(input)
mylayer = MyLayer() ...
for name, param in mylayer.named_parameters(): >>> mylayer = MyLayer()
print(name, param) # will print w_tmp,_linear.weight,_linear.bias >>> for name, param in mylayer.named_parameters():
... print(name, param) # will print w_tmp,_linear.weight,_linear.bias
w_tmp Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.06979191]])
_linear.weight Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[1.26729357]])
_linear.bias Parameter containing:
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
[0.])
""" """
temp_attr = copy.deepcopy(attr) temp_attr = copy.deepcopy(attr)
if isinstance(temp_attr, str) and temp_attr == "": if isinstance(temp_attr, str) and temp_attr == "":
...@@ -738,22 +807,22 @@ class Layer: ...@@ -738,22 +807,22 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
class MyLinear(paddle.nn.Layer): >>> class MyLinear(paddle.nn.Layer):
def __init__(self, ... def __init__(self,
in_features, ... in_features,
out_features): ... out_features):
super().__init__() ... super().__init__()
self.linear = paddle.nn.Linear( 10, 10) ... self.linear = paddle.nn.Linear( 10, 10)
...
self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype) ... self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)
...
def forward(self, input): ... def forward(self, input):
out = self.linear(input) ... out = self.linear(input)
paddle.assign( out, self.back_var) ... paddle.assign( out, self.back_var)
...
return out ... return out
""" """
if name is not None: if name is not None:
...@@ -790,22 +859,22 @@ class Layer: ...@@ -790,22 +859,22 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
class MyLinear(paddle.nn.Layer): >>> class MyLinear(paddle.nn.Layer):
def __init__(self, ... def __init__(self,
in_features, ... in_features,
out_features): ... out_features):
super().__init__() ... super().__init__()
self.linear = paddle.nn.Linear( 10, 10) ... self.linear = paddle.nn.Linear(10, 10)
...
self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype) ... self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)
...
def forward(self, input): ... def forward(self, input):
out = self.linear(input) ... out = self.linear(input)
paddle.assign( out, self.back_var) ... paddle.assign(out, self.back_var)
...
return out ... return out
""" """
if name is not None: if name is not None:
...@@ -833,10 +902,16 @@ class Layer: ...@@ -833,10 +902,16 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
linear = paddle.nn.Linear(1,1) >>> linear = paddle.nn.Linear(1, 1)
print(linear.parameters()) # print linear_0.w_0 and linear_0.b_0 >>> print(linear.parameters())
[Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.18551230]]), Parameter containing:
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
[0.])]
""" """
ret = [ ret = [
...@@ -858,15 +933,16 @@ class Layer: ...@@ -858,15 +933,16 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
linear1 = paddle.nn.Linear(10, 3) >>> linear1 = paddle.nn.Linear(10, 3)
linear2 = paddle.nn.Linear(3, 10, bias_attr=False) >>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = paddle.nn.Sequential(linear1, linear2) >>> model = paddle.nn.Sequential(linear1, linear2)
layer_list = list(model.children()) >>> layer_list = list(model.children())
print(layer_list) # [<paddle.nn.layer.common.Linear object at 0x7f7b8113f830>, <paddle.nn.layer.common.Linear object at 0x7f7b8113f950>] >>> print(layer_list)
[Linear(in_features=10, out_features=3, dtype=float32), Linear(in_features=3, out_features=10, dtype=float32)]
""" """
for _, layer in self.named_children(): for _, layer in self.named_children():
...@@ -882,16 +958,15 @@ class Layer: ...@@ -882,16 +958,15 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
linear1 = paddle.nn.Linear(10, 3)
linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = paddle.nn.Sequential(linear1, linear2)
for prefix, layer in model.named_children():
print(prefix, layer)
# ('0', <paddle.nn.layer.common.Linear object at 0x7fb61ed85830>)
# ('1', <paddle.nn.layer.common.Linear object at 0x7fb61ed85950>)
>>> linear1 = paddle.nn.Linear(10, 3)
>>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
>>> model = paddle.nn.Sequential(linear1, linear2)
>>> for prefix, layer in model.named_children():
... print(prefix, layer)
0 Linear(in_features=10, out_features=3, dtype=float32)
1 Linear(in_features=3, out_features=10, dtype=float32)
""" """
memo = set() memo = set()
for name, layer in self._sub_layers.items(): for name, layer in self._sub_layers.items():
...@@ -913,21 +988,22 @@ class Layer: ...@@ -913,21 +988,22 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
class MyLayer(paddle.nn.Layer): >>> class MyLayer(paddle.nn.Layer):
def __init__(self): ... def __init__(self):
super().__init__() ... super().__init__()
self._linear = paddle.nn.Linear(1, 1) ... self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5) ... self._dropout = paddle.nn.Dropout(p=0.5)
...
def forward(self, input): ... def forward(self, input):
temp = self._linear(input) ... temp = self._linear(input)
temp = self._dropout(temp) ... temp = self._dropout(temp)
return temp ... return temp
...
mylayer = MyLayer() >>> mylayer = MyLayer()
print(mylayer.sublayers()) # [<paddle.nn.layer.common.Linear object at 0x7f44b58977d0>, <paddle.nn.layer.common.Dropout object at 0x7f44b58978f0>] >>> print(mylayer.sublayers())
[Linear(in_features=1, out_features=1, dtype=float32), Dropout(p=0.5, axis=None, mode=upscale_in_train)]
""" """
ret = [ ret = [
...@@ -951,14 +1027,37 @@ class Layer: ...@@ -951,14 +1027,37 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
fc1 = paddle.nn.Linear(10, 3)
fc2 = paddle.nn.Linear(3, 10, bias_attr=False) >>> fc1 = paddle.nn.Linear(10, 3)
model = paddle.nn.Sequential(fc1, fc2) >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
for name, param in model.named_parameters(): >>> model = paddle.nn.Sequential(fc1, fc2)
print(name, param) >>> for name, param in model.named_parameters():
... print(name, param)
0.weight Parameter containing:
Tensor(shape=[10, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[ 0.07276392, -0.39791510, -0.66356444],
[ 0.02143478, -0.18519843, -0.32485050],
[-0.42249614, 0.08450919, -0.66838276],
[ 0.38208580, -0.24303678, 0.55127048],
[ 0.47745085, 0.62117910, -0.08336520],
[-0.28653207, 0.47237599, -0.05868882],
[-0.14385653, 0.29945642, 0.12832761],
[-0.21237159, 0.38539791, -0.62760031],
[ 0.02637231, 0.20621127, 0.43255770],
[-0.19984481, -0.26259184, -0.29696006]])
0.bias Parameter containing:
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=False,
[0., 0., 0.])
1.weight Parameter containing:
Tensor(shape=[3, 10], dtype=float32, place=Place(cpu), stop_gradient=False,
[[ 0.01985580, -0.40268910, 0.41172385, -0.47249708, -0.09002256,
-0.00533628, -0.52048630, 0.62360322, 0.20848787, -0.02033746],
[ 0.58281910, 0.12841827, 0.12907702, 0.02325618, -0.07746267,
0.31950659, -0.37924835, -0.59209681, -0.11732036, -0.58378261],
[-0.62100595, 0.22293305, 0.28229684, -0.03687060, -0.59323978,
0.08411229, 0.53275704, 0.40431368, 0.03171402, -0.17922515]])
""" """
params_set = set() params_set = set()
named_sublayers = ( named_sublayers = (
...@@ -991,14 +1090,15 @@ class Layer: ...@@ -991,14 +1090,15 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
fc1 = paddle.nn.Linear(10, 3)
fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = paddle.nn.Sequential(fc1, fc2)
for prefix, layer in model.named_sublayers():
print(prefix, layer)
>>> fc1 = paddle.nn.Linear(10, 3)
>>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
>>> model = paddle.nn.Sequential(fc1, fc2)
>>> for prefix, layer in model.named_sublayers():
... print(prefix, layer)
0 Linear(in_features=10, out_features=3, dtype=float32)
1 Linear(in_features=3, out_features=10, dtype=float32)
""" """
if layers_set is None: if layers_set is None:
layers_set = set() layers_set = set()
...@@ -1039,16 +1139,18 @@ class Layer: ...@@ -1039,16 +1139,18 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np >>> import numpy as np
import paddle >>> import paddle
linear = paddle.nn.Linear(10, 3) >>> linear = paddle.nn.Linear(10, 3)
value = np.array([0]).astype("float32") >>> value = np.array([0]).astype("float32")
buffer = paddle.to_tensor(value) >>> buffer = paddle.to_tensor(value)
linear.register_buffer("buf_name", buffer, persistable=True) >>> linear.register_buffer("buf_name", buffer, persistable=True)
# get the buffer by attribute. >>> # get the buffer by attribute.
print(linear.buf_name) >>> print(linear.buf_name)
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.])
""" """
...@@ -1097,15 +1199,17 @@ class Layer: ...@@ -1097,15 +1199,17 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np >>> import numpy as np
import paddle >>> import paddle
linear = paddle.nn.Linear(10, 3) >>> linear = paddle.nn.Linear(10, 3)
value = np.array([0]).astype("float32") >>> value = np.array([0]).astype("float32")
buffer = paddle.to_tensor(value) >>> buffer = paddle.to_tensor(value)
linear.register_buffer("buf_name", buffer, persistable=True) >>> linear.register_buffer("buf_name", buffer, persistable=True)
print(linear.buffers()) # == print([linear.buf_name]) >>> print(linear.buffers())
[Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.])]
""" """
ret = [ ret = [
...@@ -1131,26 +1235,29 @@ class Layer: ...@@ -1131,26 +1235,29 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np >>> import numpy as np
import paddle >>> import paddle
fc1 = paddle.nn.Linear(10, 3) >>> fc1 = paddle.nn.Linear(10, 3)
buffer1 = paddle.to_tensor(np.array([0]).astype("float32")) >>> buffer1 = paddle.to_tensor(np.array([0]).astype("float32"))
# register a tensor as buffer by specific `persistable` >>> # register a tensor as buffer by specific `persistable`
fc1.register_buffer("buf_name_1", buffer1, persistable=True) >>> fc1.register_buffer("buf_name_1", buffer1, persistable=True)
fc2 = paddle.nn.Linear(3, 10) >>> fc2 = paddle.nn.Linear(3, 10)
buffer2 = paddle.to_tensor(np.array([1]).astype("float32")) >>> buffer2 = paddle.to_tensor(np.array([1]).astype("float32"))
# register a buffer by assigning an attribute with Tensor. >>> # register a buffer by assigning an attribute with Tensor.
# The `persistable` can only be False by this way. >>> # The `persistable` can only be False by this way.
fc2.buf_name_2 = buffer2 >>> fc2.buf_name_2 = buffer2
model = paddle.nn.Sequential(fc1, fc2) >>> model = paddle.nn.Sequential(fc1, fc2)
# get all named buffers >>> # get all named buffers
for name, buffer in model.named_buffers(): >>> for name, buffer in model.named_buffers():
print(name, buffer) ... print(name, buffer)
0.buf_name_1 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.])
1.buf_name_2 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[1.])
""" """
buffers_set = set() buffers_set = set()
named_sublayers = ( named_sublayers = (
...@@ -1177,18 +1284,18 @@ class Layer: ...@@ -1177,18 +1284,18 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import numpy as np >>> import numpy as np
value = np.arange(26).reshape(2, 13).astype("float32") >>> value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value) >>> a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5) >>> linear = paddle.nn.Linear(13, 5)
adam = paddle.optimizer.Adam(learning_rate=0.01, >>> adam = paddle.optimizer.Adam(learning_rate=0.01,
parameters=linear.parameters()) ... parameters=linear.parameters())
out = linear(a) >>> out = linear(a)
out.backward() >>> out.backward()
adam.step() >>> adam.step()
linear.clear_gradients() >>> linear.clear_gradients()
""" """
for p in self.parameters(): for p in self.parameters():
...@@ -1271,29 +1378,30 @@ class Layer: ...@@ -1271,29 +1378,30 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
class MySequential(paddle.nn.Layer): >>> class MySequential(paddle.nn.Layer):
def __init__(self, *layers): ... def __init__(self, *layers):
super().__init__() ... super().__init__()
if len(layers) > 0 and isinstance(layers[0], tuple): ... if len(layers) > 0 and isinstance(layers[0], tuple):
for name, layer in layers: ... for name, layer in layers:
self.add_sublayer(name, layer) ... self.add_sublayer(name, layer)
else: ... else:
for idx, layer in enumerate(layers): ... for idx, layer in enumerate(layers):
self.add_sublayer(str(idx), layer) ... self.add_sublayer(str(idx), layer)
...
def forward(self, input): ... def forward(self, input):
for layer in self._sub_layers.values(): ... for layer in self._sub_layers.values():
input = layer(input) ... input = layer(input)
return input ... return input
...
fc1 = paddle.nn.Linear(10, 3) >>> fc1 = paddle.nn.Linear(10, 3)
fc2 = paddle.nn.Linear(3, 10, bias_attr=False) >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = MySequential(fc1, fc2) >>> model = MySequential(fc1, fc2)
for prefix, layer in model.named_sublayers(): >>> for prefix, layer in model.named_sublayers():
print(prefix, layer) ... print(prefix, layer)
0 Linear(in_features=10, out_features=3, dtype=float32)
1 Linear(in_features=3, out_features=10, dtype=float32)
""" """
assert isinstance(sublayer, Layer) or sublayer is None assert isinstance(sublayer, Layer) or sublayer is None
...@@ -1313,22 +1421,31 @@ class Layer: ...@@ -1313,22 +1421,31 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
class MyLayer(paddle.nn.Layer):
def __init__(self): >>> class MyLayer(paddle.nn.Layer):
super().__init__() ... def __init__(self):
self._linear = paddle.nn.Linear(1, 1) ... super().__init__()
w_tmp = self.create_parameter([1,1]) ... self._linear = paddle.nn.Linear(1, 1)
self.add_parameter("w_tmp", w_tmp) ... w_tmp = self.create_parameter([1,1])
... self.add_parameter("w_tmp", w_tmp)
def forward(self, input): ...
return self._linear(input) ... def forward(self, input):
... return self._linear(input)
mylayer = MyLayer() ...
for name, param in mylayer.named_parameters(): >>> mylayer = MyLayer()
print(name, param) # will print w_tmp,_linear.weight,_linear.bias >>> for name, param in mylayer.named_parameters():
... print(name, param)
w_tmp Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-1.01448846]])
_linear.weight Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.18551230]])
_linear.bias Parameter containing:
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
[0.])
""" """
if '_parameters' not in self.__dict__: if '_parameters' not in self.__dict__:
raise RuntimeError("super().__init__() should be called firstly.") raise RuntimeError("super().__init__() should be called firstly.")
...@@ -1580,23 +1697,21 @@ class Layer: ...@@ -1580,23 +1697,21 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import numpy as np >>> import numpy as np
class Mylayer(paddle.nn.Layer): >>> class Mylayer(paddle.nn.Layer):
def __init__(self): ... def __init__(self):
super().__init__() ... super().__init__()
self.linear1 = paddle.nn.Linear(10, 10) ... self.linear1 = paddle.nn.Linear(10, 10)
self.linear2 = paddle.nn.Linear(5, 5) ... self.linear2 = paddle.nn.Linear(5, 5)
self.conv2d = paddle.nn.Conv2D(3, 2, 3) ... self.conv2d = paddle.nn.Conv2D(3, 2, 3)
self.embedding = paddle.nn.Embedding(128, 16) ... self.embedding = paddle.nn.Embedding(128, 16)
self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32')) ... self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32'))
...
mylayer = Mylayer() >>> mylayer = Mylayer()
print(dir(mylayer)) >>> print(dir(mylayer))
# only parts are shown, because of list have too much content ['__call__', '__class__', '__delattr__', '__dict__', ..., 'training']
# ['__call__', '__class__', ... , 'conv2d', 'embedding', 'h_0', 'linear1', 'linear2', ... , 'sublayers', 'train']
""" """
method = dir(self.__class__) method = dir(self.__class__)
attrs = list(self.__dict__.keys()) attrs = list(self.__dict__.keys())
...@@ -1756,12 +1871,12 @@ class Layer: ...@@ -1756,12 +1871,12 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
emb = paddle.nn.Embedding(10, 10) >>> emb = paddle.nn.Embedding(10, 10)
state_dict = emb.to_static_state_dict() >>> state_dict = emb.to_static_state_dict()
paddle.save( state_dict, "paddle_dy.pdparams") >>> paddle.save( state_dict, "paddle_dy.pdparams")
''' '''
return self._state_dict_impl( return self._state_dict_impl(
...@@ -1793,12 +1908,12 @@ class Layer: ...@@ -1793,12 +1908,12 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
emb = paddle.nn.Embedding(10, 10) >>> emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() >>> state_dict = emb.state_dict()
paddle.save( state_dict, "paddle_dy.pdparams") >>> paddle.save( state_dict, "paddle_dy.pdparams")
''' '''
return self._state_dict_impl( return self._state_dict_impl(
...@@ -1825,14 +1940,14 @@ class Layer: ...@@ -1825,14 +1940,14 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
emb = paddle.nn.Embedding(10, 10) >>> emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() >>> state_dict = emb.state_dict()
paddle.save(state_dict, "paddle_dy.pdparams") >>> paddle.save(state_dict, "paddle_dy.pdparams")
para_state_dict = paddle.load("paddle_dy.pdparams") >>> para_state_dict = paddle.load("paddle_dy.pdparams")
emb.set_state_dict(para_state_dict) >>> emb.set_state_dict(para_state_dict)
''' '''
missing_keys = [] missing_keys = []
...@@ -1950,32 +2065,40 @@ class Layer: ...@@ -1950,32 +2065,40 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
# required: skip >>> import paddle
import paddle >>> paddle.seed(2023)
linear=paddle.nn.Linear(2, 2) >>> linear=paddle.nn.Linear(2, 2)
linear.weight >>> linear.weight
#Parameter containing: >>> print(linear.weight)
#Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False, Parameter containing:
# [[-0.32770029, 0.38653070], Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [ 0.46030545, 0.08158520]]) [[ 0.89611185, 0.04935038],
[-0.58883440, 0.99266374]])
linear.to(dtype='float64')
linear.weight >>> linear.to(dtype='float64')
#Tenor(shape=[2, 2], dtype=float64, place=CUDAPlace(0), stop_gradient=False, >>> linear.weight
# [[-0.32770029, 0.38653070], >>> print(linear.weight)
# [ 0.46030545, 0.08158520]]) Parameter containing:
Tensor(shape=[2, 2], dtype=float64, place=Place(gpu:0), stop_gradient=False,
linear.to(device='cpu') [[ 0.89611185, 0.04935038],
linear.weight [-0.58883440, 0.99266374]])
#Tensor(shape=[2, 2], dtype=float64, place=CPUPlace, stop_gradient=False,
# [[-0.32770029, 0.38653070], >>> linear.to(device='cpu')
# [ 0.46030545, 0.08158520]]) >>> linear.weight
linear.to(device=paddle.CUDAPinnedPlace(), blocking=False) >>> print(linear.weight)
linear.weight Parameter containing:
#Tensor(shape=[2, 2], dtype=float64, place=CUDAPinnedPlace, stop_gradient=False, Tensor(shape=[2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
# [[-0.04989364, -0.56889004], [[ 0.89611185, 0.04935038],
# [ 0.33960250, 0.96878713]]) [-0.58883440, 0.99266374]])
>>> # doctest: +REQUIRES(env:GPU)
>>> linear.to(device=paddle.CUDAPinnedPlace(), blocking=False)
>>> linear.weight
>>> print(linear.weight)
Tensor(shape=[2, 2], dtype=float64, place=Place(gpu_pinned), stop_gradient=False,
[[ 0.89611185, 0.04935038],
[-0.58883440, 0.99266374]])
''' '''
return self._to_impl( return self._to_impl(
...@@ -2161,21 +2284,25 @@ class Layer: ...@@ -2161,21 +2284,25 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
class Model(paddle.nn.Layer): >>> class Model(paddle.nn.Layer):
def __init__(self): ... def __init__(self):
super().__init__() ... super().__init__()
self.linear = paddle.nn.Linear(1, 1) ... self.linear = paddle.nn.Linear(1, 1)
self.dropout = paddle.nn.Dropout(p=0.5) ... self.dropout = paddle.nn.Dropout(p=0.5)
...
def forward(self, input): ... def forward(self, input):
out = self.linear(input) ... out = self.linear(input)
out = self.dropout(out) ... out = self.dropout(out)
return out ... return out
...
model = Model() >>> model = Model()
model.float() >>> model.float()
Model(
(linear): Linear(in_features=1, out_features=1, dtype=paddle.float32)
(dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
)
''' '''
excluded_layers = [] if excluded_layers is None else excluded_layers excluded_layers = [] if excluded_layers is None else excluded_layers
...@@ -2213,21 +2340,26 @@ class Layer: ...@@ -2213,21 +2340,26 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> # doctest: +SKIP('Paddle compiled by the user does not support float16, so keep original data type.')
>>> import paddle
class Model(paddle.nn.Layer):
def __init__(self): >>> class Model(paddle.nn.Layer):
super().__init__() ... def __init__(self):
self.linear = paddle.nn.Linear(1, 1) ... super().__init__()
self.dropout = paddle.nn.Dropout(p=0.5) ... self.linear = paddle.nn.Linear(1, 1)
... self.dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input): ...
out = self.linear(input) ... def forward(self, input):
out = self.dropout(out) ... out = self.linear(input)
return out ... out = self.dropout(out)
... return out
model = Model() ...
model.float16() >>> model = Model()
>>> model.float16()
Model(
(linear): Linear(in_features=1, out_features=1, dtype=float32)
(dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
)
''' '''
if paddle.amp.is_float16_supported() is False: if paddle.amp.is_float16_supported() is False:
...@@ -2273,21 +2405,27 @@ class Layer: ...@@ -2273,21 +2405,27 @@ class Layer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> # doctest: +SKIP('bfloat need V100 compile')
>>> import paddle
class Model(paddle.nn.Layer):
def __init__(self): >>> class Model(paddle.nn.Layer):
super().__init__() ... def __init__(self):
self.linear = paddle.nn.Linear(1, 1) ... super().__init__()
self.dropout = paddle.nn.Dropout(p=0.5) ... self.linear = paddle.nn.Linear(1, 1)
... self.dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input): ...
out = self.linear(input) ... def forward(self, input):
out = self.dropout(out) ... out = self.linear(input)
return out ... out = self.dropout(out)
... return out
model = Model() ...
model.bfloat16() >>> model = Model()
>>> model.bfloat16()
>>> #UserWarning: Paddle compiled by the user does not support bfloat16, so keep original data type.
Model(
(linear): Linear(in_features=1, out_features=1, dtype=float32)
(dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
)
''' '''
if paddle.amp.is_bfloat16_supported() is False: if paddle.amp.is_bfloat16_supported() is False:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册