未验证 提交 4ff6999a 编写于 作者: C cyberslack_lee 提交者: GitHub

[xdoctest] reformat example code with google style No.80-85 (#55806)

* [Doctest]fix No.80-85, test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

* fix

* Apply suggestions from code review

* Apply suggestions from code review

* Apply suggestions from code review

* test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview

---------
Co-authored-by: NNyakku Shigure <sigure.qaq@gmail.com>
上级 128f5df8
......@@ -219,13 +219,14 @@ def avg_pool1d(
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
data = paddle.uniform([1, 3, 32], paddle.float32)
AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
pool_out = AvgPool1D(data)
# pool_out shape: [1, 3, 16]
>>> import paddle
>>> import paddle.nn as nn
>>> data = paddle.uniform([1, 3, 32], paddle.float32)
>>> AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
>>> pool_out = AvgPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
"""
"""NCL to NCHW"""
data_format = "NCHW"
......@@ -350,15 +351,16 @@ def avg_pool2d(
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
>>> import paddle
>>> import paddle.nn.functional as F
# avg pool2d
x = paddle.uniform([1, 3, 32, 32], paddle.float32)
out = F.avg_pool2d(x,
kernel_size=2,
stride=2, padding=0)
# out.shape [1, 3, 16, 16]
>>> # avg pool2d
>>> x = paddle.uniform([1, 3, 32, 32], paddle.float32)
>>> out = F.avg_pool2d(x,
... kernel_size=2,
... stride=2, padding=0)
>>> print(out.shape)
[1, 3, 16, 16]
"""
kernel_size = convert_to_list(kernel_size, 2, 'pool_size')
if stride is None:
......@@ -480,16 +482,16 @@ def avg_pool3d(
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.uniform([1, 3, 32, 32, 32], paddle.float32)
# avg pool3d
out = paddle.nn.functional.avg_pool3d(
x,
kernel_size = 2,
stride = 2,
padding=0)
# out.shape: [1, 3, 16, 16, 16]
>>> x = paddle.uniform([1, 3, 32, 32, 32], paddle.float32)
>>> # avg pool3d
>>> out = paddle.nn.functional.avg_pool3d(x,
... kernel_size = 2,
... stride = 2,
... padding=0)
>>> print(out.shape)
[1, 3, 16, 16, 16]
"""
kernel_size = convert_to_list(kernel_size, 3, 'pool_size')
if stride is None:
......@@ -599,14 +601,18 @@ def max_pool1d(
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
data = paddle.uniform([1, 3, 32], paddle.float32)
pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
# pool_out shape: [1, 3, 16]
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
>>> import paddle
>>> import paddle.nn.functional as F
>>> data = paddle.uniform([1, 3, 32], paddle.float32)
>>> pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
>>> print(pool_out.shape)
[1, 3, 16]
>>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
"""
"""NCL to NCHW"""
data_format = "NCHW"
......@@ -789,14 +795,18 @@ def max_unpool1d(
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
>>> import paddle
>>> import paddle.nn.functional as F
data = paddle.rand(shape=[1, 3, 16])
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 3, 8], indices shape: [1, 3, 8]
unpool_out = F.max_unpool1d(pool_out, indices, kernel_size=2, padding=0)
# unpool_out shape: [1, 3, 16]
>>> data = paddle.rand(shape=[1, 3, 16])
>>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 3, 8]
>>> print(indices.shape)
[1, 3, 8]
>>> unpool_out = F.max_unpool1d(pool_out, indices, kernel_size=2, padding=0)
>>> print(unpool_out.shape)
[1, 3, 16]
"""
"""NCL to NCHW"""
......@@ -926,18 +936,23 @@ def max_unpool2d(
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
>>> import paddle
>>> import paddle.nn.functional as F
data = paddle.rand(shape=[1,1,6,6])
pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 3, 3], indices shape: [1, 1, 3, 3]
unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0)
# unpool_out shape: [1, 1, 6, 6]
>>> data = paddle.rand(shape=[1, 1, 6, 6])
>>> pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 1, 3, 3]
>>> print(indices.shape)
[1, 1, 3, 3]
>>> unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0)
>>> print(unpool_out.shape)
[1, 1, 6, 6]
# specify a different output size than input size
unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0, output_size=[7,7])
# unpool_out shape: [1, 1, 7, 7]
>>> # specify a different output size than input size
>>> unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0, output_size=[7, 7])
>>> print(unpool_out.shape)
[1, 1, 7, 7]
"""
if x.ndim != 4:
......@@ -1073,14 +1088,18 @@ def max_unpool3d(
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
>>> import paddle
>>> import paddle.nn.functional as F
data = paddle.rand(shape=[1, 1, 4, 4, 6])
pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 2, 2, 3], indices shape: [1, 1, 2, 2, 3]
unpool_out = F.max_unpool3d(pool_out, indices, kernel_size=2, padding=0)
# unpool_out shape: [1, 1, 4, 4, 6]
>>> data = paddle.rand(shape=[1, 1, 4, 4, 6])
>>> pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 1, 2, 2, 3]
>>> print(indices.shape)
[1, 1, 2, 2, 3]
>>> unpool_out = F.max_unpool3d(pool_out, indices, kernel_size=2, padding=0)
>>> print(unpool_out.shape)
[1, 1, 4, 4, 6]
"""
if x.ndim != 5:
......@@ -1200,16 +1219,20 @@ def max_pool2d(
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
# max pool2d
x = paddle.uniform([1, 3, 32, 32], paddle.float32)
out = F.max_pool2d(x, kernel_size=2, stride=2, padding=0)
# output.shape [1, 3, 16, 16]
# for return_mask=True
out, max_indices = F.max_pool2d(x, kernel_size=2, stride=2, padding=0, return_mask=True)
# out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
>>> import paddle
>>> import paddle.nn.functional as F
>>> # max pool2d
>>> x = paddle.uniform([1, 3, 32, 32], paddle.float32)
>>> out = F.max_pool2d(x, kernel_size=2, stride=2, padding=0)
>>> print(out.shape)
[1, 3, 16, 16]
>>> # for return_mask=True
>>> out, max_indices = F.max_pool2d(x, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(out.shape)
[1, 3, 16, 16]
>>> print(max_indices.shape)
[1, 3, 16, 16]
"""
kernel_size = convert_to_list(kernel_size, 2, 'pool_size')
......@@ -1359,24 +1382,30 @@ def max_pool3d(
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
# max pool3d
x = paddle.uniform([1, 3, 32, 32, 32])
output = F.max_pool3d(x,
kernel_size=2,
stride=2, padding=0)
# output.shape [1, 3, 16, 16, 16]
# for return_mask=True
x = paddle.uniform([1, 3, 32, 32, 32])
output, max_indices = paddle.nn.functional.max_pool3d(x,
kernel_size=2,
stride=2,
padding=0,
return_mask=True)
# output.shape [1, 3, 16, 16, 16], max_indices.shape [1, 3, 16, 16, 16]
>>> import paddle
>>> import paddle.nn.functional as F
>>> # max pool3d
>>> x = paddle.uniform([1, 3, 32, 32, 32])
>>> output = F.max_pool3d(x,
... kernel_size=2,
... stride=2,
... padding=0)
>>> print(output.shape)
[1, 3, 16, 16, 16]
>>> # for return_mask=True
>>> x = paddle.uniform([1, 3, 32, 32, 32])
>>> output, max_indices = paddle.nn.functional.max_pool3d(x,
... kernel_size=2,
... stride=2,
... padding=0,
... return_mask=True)
...
>>> print(output.shape)
[1, 3, 16, 16, 16]
>>> print(max_indices.shape)
[1, 3, 16, 16, 16]
"""
kernel_size = convert_to_list(kernel_size, 3, 'pool_size')
......@@ -1468,24 +1497,25 @@ def adaptive_avg_pool1d(x, output_size, name=None):
Examples:
.. code-block:: python
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle
import paddle.nn.functional as F
data = paddle.uniform([1, 3, 32])
pool_out = F.adaptive_avg_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
>>> # average adaptive pool1d
>>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
>>> # output shape is [N, C, m], adaptive pool divide L dimension
>>> # of input data into m grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # lstart = floor(i * L / m)
>>> # lend = ceil((i + 1) * L / m)
>>> # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
>>> #
>>> import paddle
>>> import paddle.nn.functional as F
>>> data = paddle.uniform([1, 3, 32])
>>> pool_out = F.adaptive_avg_pool1d(data, output_size=16)
>>> print(pool_out.shape)
[1, 3, 16]
"""
pool_type = 'avg'
_check_input(x, 3)
......@@ -1567,29 +1597,29 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
Examples:
.. code-block:: python
# adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
#
import paddle
x = paddle.rand([2, 3, 32, 32])
# x.shape is [2, 3, 32, 32]
out = paddle.nn.functional.adaptive_avg_pool2d(
x = x,
output_size=[3, 3])
# out.shape is [2, 3, 3, 3]
>>> # adaptive avg pool2d
>>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
>>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
>>> # of input data into m * n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive avg pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # for j in range(n):
>>> # hstart = floor(i * H / m)
>>> # hend = ceil((i + 1) * H / m)
>>> # wstart = floor(i * W / n)
>>> # wend = ceil((i + 1) * W / n)
>>> # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
>>> #
>>> import paddle
>>> x = paddle.rand([2, 3, 32, 32])
>>> # x.shape is [2, 3, 32, 32]
>>> out = paddle.nn.functional.adaptive_avg_pool2d(x = x,
... output_size=[3, 3])
>>> print(out.shape)
[2, 3, 3, 3]
"""
if data_format not in ["NCHW", "NHWC"]:
......@@ -1700,31 +1730,31 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
Examples:
.. code-block:: python
# adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
out = paddle.nn.functional.adaptive_avg_pool3d(
x = input_data,
output_size=[3, 3, 3])
# out.shape is [2, 3, 3, 3, 3]
>>> # adaptive avg pool3d
>>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
>>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
>>> # of input data into l * m * n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive avg pool performs calculations as follow:
>>> #
>>> # for i in range(l):
>>> # for j in range(m):
>>> # for k in range(n):
>>> # dstart = floor(i * D / l)
>>> # dend = ceil((i + 1) * D / l)
>>> # hstart = floor(j * H / m)
>>> # hend = ceil((j + 1) * H / m)
>>> # wstart = floor(k * W / n)
>>> # wend = ceil((k + 1) * W / n)
>>> # output[:, :, i, j, k] =
>>> # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
>>> import paddle
>>> input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
>>> out = paddle.nn.functional.adaptive_avg_pool3d(x = input_data,
... output_size=[3, 3, 3])
>>> print(out.shape)
[2, 3, 3, 3, 3]
"""
if data_format not in ["NCDHW", "NDHWC"]:
......@@ -1815,26 +1845,30 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn.functional as F
data = paddle.uniform([1, 3, 32], paddle.float32)
pool_out = F.adaptive_max_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True)
# pool_out shape: [1, 3, 16] indices shape: [1, 3, 16]
>>> # max adaptive pool1d
>>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
>>> # output shape is [N, C, m], adaptive pool divide L dimension
>>> # of input data into m grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # lstart = floor(i * L / m)
>>> # lend = ceil((i + 1) * L / m)
>>> # output[:, :, i] = max(input[:, :, lstart: lend])
>>> #
>>> import paddle
>>> import paddle.nn.functional as F
>>> data = paddle.uniform([1, 3, 32], paddle.float32)
>>> pool_out = F.adaptive_max_pool1d(data, output_size=16)
>>> print(pool_out.shape)
[1, 3, 16]
>>> pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True)
>>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
"""
_check_input(x, 3)
......@@ -1901,28 +1935,28 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
Examples:
.. code-block:: python
# max adaptive pool2d
# suppose input data in the shape of [N, C, H, W], `output_size` is [m, n]
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m*n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
#
import paddle
input_data = paddle.randn(shape=(2, 3, 32, 32))
out = paddle.nn.functional.adaptive_max_pool2d(
x = input_data,
output_size=[3, 3])
# out.shape is [2, 3, 3, 3]
>>> # max adaptive pool2d
>>> # suppose input data in the shape of [N, C, H, W], `output_size` is [m, n]
>>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
>>> # of input data into m*n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # for j in range(n):
>>> # hstart = floor(i * H / m)
>>> # hend = ceil((i + 1) * H / m)
>>> # wstart = floor(i * W / n)
>>> # wend = ceil((i + 1) * W / n)
>>> # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
>>> #
>>> import paddle
>>> input_data = paddle.randn(shape=(2, 3, 32, 32))
>>> out = paddle.nn.functional.adaptive_max_pool2d(x = input_data,
... output_size=[3, 3])
>>> print(out.shape)
[2, 3, 3, 3]
"""
_check_input(x, 4)
......@@ -1987,31 +2021,31 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
Examples:
.. code-block:: python
# adaptive max pool3d
# suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n]
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into m*n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend])
#
import paddle
input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
out = paddle.nn.functional.adaptive_max_pool3d(
x = input_data,
output_size=[3, 3, 3])
# out.shape is [2, 3, 3, 3, 3]
>>> # adaptive max pool3d
>>> # suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n]
>>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
>>> # of input data into m*n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(l):
>>> # for j in range(m):
>>> # for k in range(n):
>>> # dstart = floor(i * D / l)
>>> # dend = ceil((i + 1) * D / l)
>>> # hstart = floor(i * H / m)
>>> # hend = ceil((i + 1) * H / m)
>>> # wstart = floor(i * W / n)
>>> # wend = ceil((i + 1) * W / n)
>>> # output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend])
>>> #
>>> import paddle
>>> input_data = paddle.randn(shape=(2, 3, 8, 32, 32))
>>> out = paddle.nn.functional.adaptive_max_pool3d(x = input_data,
... output_size=[3, 3, 3])
>>> print(out.shape)
[2, 3, 3, 3, 3]
"""
_check_input(x, 5)
......
......@@ -88,50 +88,51 @@ def sparse_attention(
Examples:
.. code-block:: python
# required: skiptest
import paddle
paddle.disable_static()
# `query`, `key` and `value` all have shape [1, 1, 4, 2]
query = paddle.to_tensor([[[[0, 1, ], [2, 3],
[0, 1], [2, 3]]]], dtype="float32")
key = paddle.to_tensor([[[[0, 1], [2, 3],
[0, 1], [2, 3]]]], dtype="float32")
value = paddle.to_tensor([[[[0, 1], [2, 3],
[0, 1], [2, 3]]]], dtype="float32")
offset = paddle.to_tensor([[[0, 2, 4, 6, 8]]], dtype="int32")
columns = paddle.to_tensor([[[0, 1, 0, 1, 2, 3, 2, 3]]], dtype="int32")
print(offset.shape) # (1, 1, 5)
print(columns.shape) # (1, 1, 8)
key_padding_mask = paddle.to_tensor([[1, 1, 1, 0]], dtype="float32")
attention_mask = paddle.to_tensor([[1, 0, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1]], dtype="float32")
output_mask = paddle.nn.functional.sparse_attention(query, key,
value, offset, columns,
key_padding_mask=key_padding_mask,
attn_mask=attention_mask)
print(output_mask)
# Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[[[0. , 1. ],
# [1.99830270, 2.99830270],
# [0. , 1. ],
# [0. , 1. ]]]])
output = paddle.nn.functional.sparse_attention(query, key,
value, offset, columns)
print(output)
# Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[[[1.60885942, 2.60885954],
# [1.99830270, 2.99830270],
# [1.60885942, 2.60885954],
# [1.99830270, 2.99830270]]]])
>>> # doctest: +SKIP('This API is only used in CUDA11.3 and above.')
>>> import paddle
>>> paddle.disable_static()
>>> # `query`, `key` and `value` all have shape [1, 1, 4, 2]
>>> query = paddle.to_tensor([[[[0, 1, ], [2, 3],
... [0, 1], [2, 3]]]], dtype="float32")
>>> key = paddle.to_tensor([[[[0, 1], [2, 3],
... [0, 1], [2, 3]]]], dtype="float32")
>>> value = paddle.to_tensor([[[[0, 1], [2, 3],
... [0, 1], [2, 3]]]], dtype="float32")
...
>>> offset = paddle.to_tensor([[[0, 2, 4, 6, 8]]], dtype="int32")
>>> columns = paddle.to_tensor([[[0, 1, 0, 1, 2, 3, 2, 3]]], dtype="int32")
...
>>> print(offset.shape)
[1, 1, 5]
>>> print(columns.shape)
[1, 1, 8]
...
>>> key_padding_mask = paddle.to_tensor([[1, 1, 1, 0]], dtype="float32")
>>> attention_mask = paddle.to_tensor([[1, 0, 1, 1],
... [1, 1, 1, 1],
... [1, 1, 1, 1],
... [1, 1, 1, 1]], dtype="float32")
>>> output_mask = paddle.nn.functional.sparse_attention(query, key,
... value, offset, columns,
... key_padding_mask=key_padding_mask,
... attn_mask=attention_mask)
>>> print(output_mask)
Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[0. , 1. ],
[1.99830270, 2.99830270],
[0. , 1. ],
[0. , 1. ]]]])
>>> output = paddle.nn.functional.sparse_attention(query, key,
... value, offset, columns)
>>> print(output)
Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[1.60885942, 2.60885954],
[1.99830270, 2.99830270],
[1.60885942, 2.60885954],
[1.99830270, 2.99830270]]]])
"""
if in_dynamic_mode():
(
......
......@@ -43,13 +43,15 @@ class CELU(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([[-1. ,6.], [1., 15.6]])
m = paddle.nn.CELU(0.2)
out = m(x)
# [[-0.19865242, 6. ],
# [ 1. , 15.60000038]]
>>> import paddle
>>> x = paddle.to_tensor([[-1. ,6.], [1., 15.6]])
>>> m = paddle.nn.CELU(0.2)
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.19865242, 6. ],
[ 1. , 15.60000038]])
"""
def __init__(self, alpha=1.0, name=None):
......@@ -91,13 +93,15 @@ class ELU(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([[-1. ,6.], [1., 15.6]])
m = paddle.nn.ELU(0.2)
out = m(x)
# [[-0.12642411 6. ]
# [ 1. 15.6 ]]
>>> x = paddle.to_tensor([[-1. ,6.], [1., 15.6]])
>>> m = paddle.nn.ELU(0.2)
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.12642412, 6. ],
[ 1. , 15.60000038]])
"""
def __init__(self, alpha=1.0, name=None):
......@@ -141,15 +145,20 @@ class GELU(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([[-1, 0.5],[1, 1.5]])
m = paddle.nn.GELU()
out = m(x) # [-0.158655 0.345731 0.841345 1.39979]
m = paddle.nn.GELU(True)
out = m(x) # [-0.158808 0.345714 0.841192 1.39957]
>>> import paddle
>>> x = paddle.to_tensor([[-1, 0.5],[1, 1.5]])
>>> m = paddle.nn.GELU()
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15865529, 0.34573123],
[ 0.84134471, 1.39978933]])
>>> m = paddle.nn.GELU(True)
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15880796, 0.34571400],
[ 0.84119201, 1.39957154]])
"""
def __init__(self, approximate=False, name=None):
......@@ -193,11 +202,14 @@ class Hardshrink(Layer):
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-1, 0.3, 2.5])
m = paddle.nn.Hardshrink()
out = m(x) # [-1., 0., 2.5]
>>> x = paddle.to_tensor([-1, 0.3, 2.5])
>>> m = paddle.nn.Hardshrink()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-1. , 0. , 2.50000000])
"""
def __init__(self, threshold=0.5, name=None):
......@@ -244,11 +256,14 @@ class Hardswish(Layer):
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-4., 5., 1.])
m = paddle.nn.Hardswish()
out = m(x) # [0., 5., 0.666667]
>>> x = paddle.to_tensor([-4., 5., 1.])
>>> m = paddle.nn.Hardswish()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0. , 5. , 0.66666669])
"""
def __init__(self, name=None):
......@@ -282,14 +297,14 @@ class Tanh(Layer):
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
m = paddle.nn.Tanh()
out = m(x)
print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-0.37994894, -0.19737533, 0.09966800, 0.29131261])
>>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
>>> m = paddle.nn.Tanh()
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.37994900, -0.19737528, 0.09966799, 0.29131261])
"""
def __init__(self, name=None):
......@@ -333,11 +348,14 @@ class Hardtanh(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-1.5, 0.3, 2.5])
m = paddle.nn.Hardtanh()
out = m(x) # [-1., 0.3, 1.]
>>> x = paddle.to_tensor([-1.5, 0.3, 2.5])
>>> m = paddle.nn.Hardtanh()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-1. , 0.30000001, 1. ])
"""
def __init__(self, min=-1.0, max=1.0, name=None):
......@@ -386,25 +404,25 @@ class PReLU(Layer):
Examples:
.. code-block:: python
import paddle
paddle.set_default_dtype("float64")
data = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0],
[ 3.0, -4.0, 5.0, -6.0],
[-7.0, -8.0, 8.0, 9.0]],
[[ 1.0, -2.0, -3.0, 4.0],
[-5.0, 6.0, 7.0, -8.0],
[ 6.0, 7.0, 8.0, 9.0]]]])
m = paddle.nn.PReLU(1, 0.25)
out = m(data)
print(out)
# [[[[-0.5 , 3. , -1. , 5. ],
# [ 3. , -1. , 5. , -1.5 ],
# [-1.75, -2. , 8. , 9. ]],
# [[ 1. , -0.5 , -0.75, 4. ],
# [-1.25, 6. , 7. , -2. ],
# [ 6. , 7. , 8. , 9. ]]]]
>>> import paddle
>>> data = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0],
... [ 3.0, -4.0, 5.0, -6.0],
... [-7.0, -8.0, 8.0, 9.0]],
... [[ 1.0, -2.0, -3.0, 4.0],
... [-5.0, 6.0, 7.0, -8.0],
... [ 6.0, 7.0, 8.0, 9.0]]]])
...
>>> m = paddle.nn.PReLU(1, 0.25)
>>> out = m(data)
>>> print(out)
Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[-0.50000000, 3. , -1. , 5. ],
[ 3. , -1. , 5. , -1.50000000],
[-1.75000000, -2. , 8. , 9. ]],
[[ 1. , -0.50000000, -0.75000000, 4. ],
[-1.25000000, 6. , 7. , -2. ],
[ 6. , 7. , 8. , 9. ]]]])
"""
def __init__(
......@@ -495,24 +513,26 @@ class RReLU(Layer):
Examples:
.. code-block:: python
import paddle
input_tensor = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0],
[ 3.0, -4.0, 5.0, -6.0],
[-7.0, -8.0, 8.0, 9.0]],
[[ 1.0, -2.0, -3.0, 4.0],
[-5.0, 6.0, 7.0, -8.0],
[ 6.0, 7.0, 8.0, 9.0]]]], dtype='float32')
rrelu_layer = paddle.nn.RReLU(0.1, 0.3)
out = rrelu_layer(input_tensor)
print(out)
#[[[[-0.20000899 3. -0.88108218 5. ]
# [ 3. -0.55175185 5. -1.07761011]
# [-1.06806871 -1.98962009 8. 9. ]]
# [[ 1. -0.52382672 -0.65515128 4. ]
# [-1.37663394 6. 7. -2.34657836]
# [ 6. 7. 8. 9. ]]]]
>>> import paddle
>>> paddle.seed(2023)
>>> input_tensor = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0],
... [ 3.0, -4.0, 5.0, -6.0],
... [-7.0, -8.0, 8.0, 9.0]],
... [[ 1.0, -2.0, -3.0, 4.0],
... [-5.0, 6.0, 7.0, -8.0],
... [ 6.0, 7.0, 8.0, 9.0]]]], dtype='float32')
...
>>> rrelu_layer = paddle.nn.RReLU(0.1, 0.3)
>>> out = rrelu_layer(input_tensor)
>>> print(out)
Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[-0.54633451, 3. , -0.81611776, 5. ],
[ 3. , -0.60768753, 5. , -1.68630385],
[-1.29360127, -1.45026064, 8. , 9. ]],
[[ 1. , -0.58808362, -0.74662417, 4. ],
[-1.01785135, 6. , 7. , -1.97268605],
[ 6. , 7. , 8. , 9. ]]]])
"""
def __init__(self, lower=1.0 / 8.0, upper=1.0 / 3.0, name=None):
......@@ -554,13 +574,14 @@ class ReLU(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-2., 0., 1.])
m = paddle.nn.ReLU()
out = m(x)
print(out)
# [0., 0., 1.]
>>> x = paddle.to_tensor([-2., 0., 1.])
>>> m = paddle.nn.ReLU()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[0., 0., 1.])
"""
def __init__(self, name=None):
......@@ -596,13 +617,14 @@ class ReLU6(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-1., 0.3, 6.5])
m = paddle.nn.ReLU6()
out = m(x)
print(out)
# [0, 0.3, 6]
>>> x = paddle.to_tensor([-1., 0.3, 6.5])
>>> m = paddle.nn.ReLU6()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[0. , 0.30000000, 6. ])
"""
def __init__(self, name=None):
......@@ -644,13 +666,15 @@ class SELU(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]])
m = paddle.nn.SELU()
out = m(x)
print(out)
# [[0, 1.050701],[2.101402, 3.152103]]
>>> x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]])
>>> m = paddle.nn.SELU()
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[0. , 1.05070102],
[2.10140204, 3.15210295]])
"""
def __init__(
......@@ -703,11 +727,14 @@ class LeakyReLU(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
m = paddle.nn.LeakyReLU()
x = paddle.to_tensor([-2.0, 0, 1])
out = m(x) # [-0.02, 0., 1.]
>>> m = paddle.nn.LeakyReLU()
>>> x = paddle.to_tensor([-2.0, 0, 1])
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.02000000, 0. , 1. ])
"""
def __init__(self, negative_slope=0.01, name=None):
......@@ -744,11 +771,14 @@ class Sigmoid(Layer):
.. code-block:: python
import paddle
>>> import paddle
m = paddle.nn.Sigmoid()
x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
out = m(x) # [0.7310586, 0.880797, 0.95257413, 0.98201376]
>>> m = paddle.nn.Sigmoid()
>>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.73105860, 0.88079703, 0.95257413, 0.98201376])
"""
def __init__(self, name=None):
......@@ -795,11 +825,14 @@ class Hardsigmoid(Layer):
.. code-block:: python
import paddle
>>> import paddle
m = paddle.nn.Hardsigmoid()
x = paddle.to_tensor([-4., 5., 1.])
out = m(x) # [0., 1, 0.666667]
>>> m = paddle.nn.Hardsigmoid()
>>> x = paddle.to_tensor([-4., 5., 1.])
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[0. , 1. , 0.66666669])
"""
def __init__(self, name=None):
......@@ -836,11 +869,14 @@ class Softplus(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32')
m = paddle.nn.Softplus()
out = m(x) # [0.513015, 0.598139, 0.744397, 0.854355]
>>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32')
>>> m = paddle.nn.Softplus()
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.51301527, 0.59813893, 0.74439669, 0.85435522])
"""
def __init__(self, beta=1, threshold=20, name=None):
......@@ -887,14 +923,14 @@ class Softshrink(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8])
m = paddle.nn.Softshrink()
out = m(x)
print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-0.39999998, 0. , 0. , 0.30000001])
>>> x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8])
>>> m = paddle.nn.Softshrink()
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.39999998, 0. , 0. , 0.30000001])
"""
def __init__(self, threshold=0.5, name=None):
......@@ -929,14 +965,14 @@ class Softsign(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
m = paddle.nn.Softsign()
out = m(x)
print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-0.28571430, -0.16666666, 0.09090909, 0.23076925])
>>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
>>> m = paddle.nn.Softsign()
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.28571430, -0.16666666, 0.09090909, 0.23076925])
"""
def __init__(self, name=None):
......@@ -970,14 +1006,14 @@ class Swish(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-2., 0., 1.])
m = paddle.nn.Swish()
out = m(x)
print(out)
# Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-0.23840584, 0. , 0.73105854])
>>> x = paddle.to_tensor([-2., 0., 1.])
>>> m = paddle.nn.Swish()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.23840584, 0. , 0.73105860])
"""
def __init__(self, name=None):
......@@ -1017,11 +1053,14 @@ class Mish(Layer):
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-5., 0., 5.])
m = paddle.nn.Mish()
out = m(x) # [-0.03357624, 0., 4.99955208]
>>> x = paddle.to_tensor([-5., 0., 5.])
>>> m = paddle.nn.Mish()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.03357624, 0. , 4.99955177])
"""
......@@ -1056,14 +1095,14 @@ class Tanhshrink(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
m = paddle.nn.Tanhshrink()
out = m(x)
print(out)
# Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [-0.02005106, -0.00262468, 0.00033200, 0.00868741])
>>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3])
>>> m = paddle.nn.Tanhshrink()
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.02005100, -0.00262472, 0.00033201, 0.00868741])
"""
def __init__(self, name=None):
......@@ -1105,14 +1144,14 @@ class ThresholdedReLU(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([2., 0., 1.])
m = paddle.nn.ThresholdedReLU()
out = m(x)
print(out)
# Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [2., 0., 0.])
>>> x = paddle.to_tensor([2., 0., 1.])
>>> m = paddle.nn.ThresholdedReLU()
>>> out = m(x)
>>> print(out)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[2., 0., 0.])
"""
def __init__(self, threshold=1.0, name=None):
......@@ -1148,11 +1187,14 @@ class Silu(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
m = paddle.nn.Silu()
out = m(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ]
>>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
>>> m = paddle.nn.Silu()
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.73105860, 1.76159406, 2.85772228, 3.92805505])
"""
def __init__(self, name=None):
......@@ -1187,11 +1229,14 @@ class LogSigmoid(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
m = paddle.nn.LogSigmoid()
out = m(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
>>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0])
>>> m = paddle.nn.LogSigmoid()
>>> out = m(x)
>>> print(out)
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True,
[-0.31326166, -0.12692805, -0.04858733, -0.01814996])
"""
def __init__(self, name=None):
......@@ -1299,22 +1344,25 @@ class Softmax(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]], dtype='float32')
m = paddle.nn.Softmax()
out = m(x)
# [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
# [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
# [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
>>> import paddle
>>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
... [3.0, 4.0, 5.0, 6.0],
... [7.0, 8.0, 8.0, 9.0]],
... [[1.0, 2.0, 3.0, 4.0],
... [5.0, 6.0, 7.0, 8.0],
... [6.0, 7.0, 8.0, 9.0]]], dtype='float32')
>>> m = paddle.nn.Softmax()
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[0.03205860, 0.08714432, 0.23688284, 0.64391428],
[0.03205860, 0.08714432, 0.23688284, 0.64391428],
[0.07232949, 0.19661194, 0.19661194, 0.53444666]],
[[0.03205860, 0.08714432, 0.23688284, 0.64391428],
[0.03205860, 0.08714432, 0.23688284, 0.64391428],
[0.03205860, 0.08714432, 0.23688284, 0.64391428]]])
"""
def __init__(self, axis=-1, name=None):
......@@ -1357,23 +1405,26 @@ class LogSoftmax(Layer):
Examples:
.. code-block:: python
import paddle
x = [[[-2.0, 3.0, -4.0, 5.0],
[3.0, -4.0, 5.0, -6.0],
[-7.0, -8.0, 8.0, 9.0]],
[[1.0, -2.0, -3.0, 4.0],
[-5.0, 6.0, 7.0, -8.0],
[6.0, 7.0, 8.0, 9.0]]]
m = paddle.nn.LogSoftmax()
x = paddle.to_tensor(x)
out = m(x)
# [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948]
# [ -2.1270514 -9.127051 -0.12705144 -11.127051 ]
# [-16.313261 -17.313261 -1.3132617 -0.31326184]]
# [[ -3.0518122 -6.051812 -7.051812 -0.051812 ]
# [-12.313267 -1.3132664 -0.3132665 -15.313267 ]
# [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]]
>>> import paddle
>>> x = [[[-2.0, 3.0, -4.0, 5.0],
... [ 3.0, -4.0, 5.0, -6.0],
... [-7.0, -8.0, 8.0, 9.0]],
... [[ 1.0, -2.0, -3.0, 4.0],
... [-5.0, 6.0, 7.0, -8.0],
... [ 6.0, 7.0, 8.0, 9.0]]]
>>> m = paddle.nn.LogSoftmax()
>>> x = paddle.to_tensor(x)
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[-7.12783957 , -2.12783957 , -9.12783909 , -0.12783945 ],
[-2.12705135 , -9.12705135 , -0.12705141 , -11.12705135],
[-16.31326103, -17.31326103, -1.31326187 , -0.31326184 ]],
[[-3.05181193 , -6.05181217 , -7.05181217 , -0.05181199 ],
[-12.31326675, -1.31326652 , -0.31326646 , -15.31326675],
[-3.44018984 , -2.44018984 , -1.44018972 , -0.44018975 ]]])
"""
def __init__(self, axis=-1, name=None):
......@@ -1426,20 +1477,17 @@ class Maxout(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.rand([1, 2, 3, 4])
# [[[[0.5002636 0.22272532 0.17402348 0.2874594 ]
# [0.95313174 0.6228939 0.7129065 0.7087491 ]
# [0.02879342 0.88725346 0.61093384 0.38833922]]
# [[0.5231306 0.03807496 0.91661984 0.15602879]
# [0.666127 0.616567 0.30741522 0.24044901]
# [0.7142536 0.7351477 0.31588817 0.23782359]]]]
m = paddle.nn.Maxout(groups=2)
out = m(x)
# [[[[0.5231306 0.22272532 0.91661984 0.2874594 ]
# [0.95313174 0.6228939 0.7129065 0.7087491 ]
# [0.7142536 0.88725346 0.61093384 0.38833922]]]]
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand([1, 2, 3, 4])
>>> m = paddle.nn.Maxout(groups=2)
>>> out = m(x)
>>> print(out)
Tensor(shape=[1, 1, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0.85139430, 0.95717543, 0.43864486, 0.51577556],
[0.84765935, 0.45680618, 0.39412445, 0.72039396],
[0.59444654, 0.78120756, 0.78364515, 0.90572405]]]])
"""
def __init__(self, groups, axis=1, name=None):
......@@ -1473,25 +1521,20 @@ class Softmax2D(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.rand([1, 2, 3, 4])
# [[[[0.42496058 0.1172187 0.14664008 0.8151267 ]
# [0.24430142 0.42052492 0.60372984 0.79307914]
# [0.4539401 0.90458065 0.10235776 0.62009853]]
# [[0.11731581 0.16053623 0.05667042 0.91876775]
# [0.9413854 0.30770817 0.6788164 0.9543593 ]
# [0.4145064 0.75909156 0.11598814 0.73599935]]]]
m = paddle.nn.Softmax2D()
out = m(x)
# [[[[0.5763103 0.48917228 0.5224772 0.4741129 ]
# [0.3324591 0.5281743 0.48123717 0.45976716]
# [0.5098571 0.5363083 0.49659243 0.4710572 ]]
# [[0.42368975 0.51082766 0.47752273 0.5258871 ]
# [0.66754097 0.47182566 0.5187628 0.5402329 ]
# [0.49014282 0.46369177 0.50340754 0.5289428 ]]]]
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand([1, 2, 3, 4])
>>> m = paddle.nn.Softmax2D()
>>> out = m(x)
>>> print(out)
Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0.42608523, 0.32081410, 0.39483935, 0.55642301],
[0.38131708, 0.45118359, 0.44891062, 0.46053308],
[0.35746980, 0.60766530, 0.38638926, 0.70425135]],
[[0.57391477, 0.67918587, 0.60516071, 0.44357699],
[0.61868292, 0.54881644, 0.55108935, 0.53946698],
[0.64253020, 0.39233473, 0.61361068, 0.29574865]]]])
"""
......
......@@ -50,18 +50,22 @@ class Identity(Layer):
Examples:
.. code-block:: python
import paddle
input_tensor = paddle.randn(shape=[3, 2])
layer = paddle.nn.Identity()
out = layer(input_tensor)
# input_tensor: [[-0.32342386 -1.200079 ]
# [ 0.7979031 -0.90978354]
# [ 0.40597573 1.8095392 ]]
# out: [[-0.32342386 -1.200079 ]
# [ 0.7979031 -0.90978354]
# [ 0.40597573 1.8095392 ]]
>>> import paddle
>>> paddle.seed(100)
>>> input_tensor = paddle.randn(shape=[3, 2])
>>> layer = paddle.nn.Identity()
>>> out = layer(input_tensor)
>>> print(input_tensor)
Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-1.41661501, 0.25904641],
[ 0.00979547, -0.30324230],
[-1.34256756, -0.76540256]])
>>> print(out)
Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-1.41661501, 0.25904641],
[ 0.00979547, -0.30324230],
[-1.34256756, -0.76540256]])
"""
......@@ -120,28 +124,35 @@ class Linear(Layer):
Examples:
.. code-block:: python
import paddle
# Define the linear layer.
weight_attr = paddle.ParamAttr(
name="weight",
initializer=paddle.nn.initializer.Constant(value=0.5))
bias_attr = paddle.ParamAttr(
name="bias",
initializer=paddle.nn.initializer.Constant(value=1.0))
linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr)
# linear.weight: [[0.5 0.5 0.5 0.5]
# [0.5 0.5 0.5 0.5]]
# linear.bias: [1. 1. 1. 1.]
x = paddle.randn((3, 2), dtype="float32")
# x: [[-0.32342386 -1.200079 ]
# [ 0.7979031 -0.90978354]
# [ 0.40597573 1.8095392 ]]
y = linear(x)
# y: [[0.23824859 0.23824859 0.23824859 0.23824859]
# [0.9440598 0.9440598 0.9440598 0.9440598 ]
# [2.1077576 2.1077576 2.1077576 2.1077576 ]]
>>> import paddle
>>> paddle.seed(100)
>>> # Define the linear layer.
>>> weight_attr = paddle.ParamAttr(
... name="weight",
... initializer=paddle.nn.initializer.Constant(value=0.5))
>>> bias_attr = paddle.ParamAttr(
... name="bias",
... initializer=paddle.nn.initializer.Constant(value=1.0))
>>> linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr)
>>> print(linear.weight)
Parameter containing:
Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.50000000, 0.50000000, 0.50000000, 0.50000000],
[0.50000000, 0.50000000, 0.50000000, 0.50000000]])
>>> print(linear.bias)
Parameter containing:
Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=False,
[1., 1., 1., 1.])
>>> x = paddle.randn((3, 2), dtype="float32")
>>> y = linear(x)
>>> print(y)
Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
[[ 0.42121571, 0.42121571, 0.42121571, 0.42121571],
[ 0.85327661, 0.85327661, 0.85327661, 0.85327661],
[-0.05398512, -0.05398512, -0.05398512, -0.05398512]])
"""
def __init__(
......@@ -237,19 +248,22 @@ class LinearCompress(Layer):
Examples:
.. code-block:: python
import paddle
# Define the linear layer.
paddle.set_default_dtype('float16')
weight_attr = paddle.ParamAttr(
name="weight",
initializer=paddle.nn.initializer.Constant(value=0.5))
bias_attr = paddle.ParamAttr(
name="bias",
initializer=paddle.nn.initializer.Constant(value=1.0))
linear = paddle.nn.LinearCompress(128, 64, weight_attr=weight_attr, bias_attr=bias_attr, bits=8, algo='weight_only')
x = paddle.randn((3, 128), dtype="float16")
y = linear(x)
>>> import paddle
>>> paddle.seed(100)
>>> # Define the linear layer.
>>> paddle.set_default_dtype('float16')
>>> weight_attr = paddle.ParamAttr(
... name="weight",
... initializer=paddle.nn.initializer.Constant(value=0.5))
>>> bias_attr = paddle.ParamAttr(
... name="bias",
... initializer=paddle.nn.initializer.Constant(value=1.0))
>>> linear = paddle.nn.LinearCompress(128, 64, weight_attr=weight_attr, bias_attr=bias_attr, bits=8, algo='weight_only')
>>> x = paddle.randn((3, 128), dtype="float16")
>>> y = linear(x)
"""
def __init__(
......@@ -527,14 +541,14 @@ class Upsample(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
input = paddle.rand([2,3,6,10], dtype="float32")
upsample_out = paddle.nn.Upsample(size=[12,12])
>>> input = paddle.rand([2, 3, 6, 10], dtype="float32")
>>> upsample_out = paddle.nn.Upsample(size=[12, 12])
output = upsample_out(x=input)
print(output.shape)
# [2, 3, 12, 12]
>>> output = upsample_out(x=input)
>>> print(output.shape)
[2, 3, 12, 12]
"""
......@@ -627,15 +641,15 @@ class UpsamplingNearest2D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
input_data = paddle.rand(shape=(2,3,6,10)).astype("float32")
upsample_out = paddle.nn.UpsamplingNearest2D(size=[12,12])
input = paddle.to_tensor(input_data)
output = upsample_out(x=input)
print(output.shape)
# [2L, 3L, 12L, 12L]
>>> input_data = paddle.rand(shape=(2, 3, 6, 10)).astype("float32")
>>> upsample_out = paddle.nn.UpsamplingNearest2D(size=[12, 12])
>>> input = paddle.to_tensor(input_data)
>>> output = upsample_out(x=input)
>>> print(output.shape)
[2, 3, 12, 12]
"""
def __init__(
......@@ -713,15 +727,15 @@ class UpsamplingBilinear2D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
input_data = paddle.rand(shape=(2,3,6,10)).astype("float32")
upsample_out = paddle.nn.UpsamplingBilinear2D(size=[12,12])
input = paddle.to_tensor(input_data)
output = upsample_out(x=input)
print(output.shape)
# [2L, 3L, 12L, 12L]
>>> input_data = paddle.rand(shape=(2, 3, 6, 10)).astype("float32")
>>> upsample_out = paddle.nn.UpsamplingBilinear2D(size=[12, 12])
>>> input = paddle.to_tensor(input_data)
>>> output = upsample_out(x=input)
>>> print(output.shape)
[2, 3, 12, 12]
"""
def __init__(
......@@ -798,15 +812,19 @@ class Bilinear(Layer):
Tensor: A 2-D Tensor of shape [batch_size, out_features].
Examples:
.. code-block:: python
.. code-block:: python
>>> import paddle
import paddle
>>> layer1 = paddle.rand((5, 5)).astype('float32')
>>> layer2 = paddle.rand((5, 4)).astype('float32')
>>> bilinear = paddle.nn.Bilinear(in1_features=5,
... in2_features=4,
... out_features=1000)
layer1 = paddle.rand((5, 5)).astype('float32')
layer2 = paddle.rand((5, 4)).astype('float32')
bilinear = paddle.nn.Bilinear(
in1_features=5, in2_features=4, out_features=1000)
result = bilinear(layer1,layer2) # result shape [5, 1000]
>>> result = bilinear(layer1,layer2)
>>> print(result.shape)
[5, 1000]
"""
......@@ -897,23 +915,24 @@ class Dropout(Layer):
Examples:
.. code-block:: python
import paddle
>>> import paddle
>>> paddle.seed(2023)
x = paddle.to_tensor([[1,2,3], [4,5,6]], dtype="float32")
m = paddle.nn.Dropout(p=0.5)
>>> x = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype="float32")
>>> m = paddle.nn.Dropout(p=0.5)
y_train = m(x)
print(y_train)
# Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[2., 0., 6.],
# [0., 0., 0.]])
>>> y_train = m(x)
>>> print(y_train)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[2., 4., 0.],
[8., 0., 0.]])
m.eval() # switch the model to test phase
y_test = m(x)
print(y_test)
# Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[1., 2., 3.],
# [4., 5., 6.]])
>>> m.eval() # switch the model to test phase
>>> y_test = m(x)
>>> print(y_test)
Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[1., 2., 3.],
[4., 5., 6.]])
"""
def __init__(self, p=0.5, axis=None, mode="upscale_in_train", name=None):
......@@ -967,36 +986,33 @@ class Dropout2D(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.rand([2, 2, 1, 3], dtype="float32")
print(x)
# Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[[[0.10052059, 0.93890846, 0.45351565]],
# [[0.47507706, 0.45021373, 0.11331241]]],
# [[[0.53358698, 0.97375143, 0.34997326]],
# [[0.24758087, 0.52628899, 0.17970420]]]])
m = paddle.nn.Dropout2D(p=0.5)
y_train = m(x)
print(y_train)
# Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[[[0. , 0. , 0. ]],
# [[0.95015413, 0.90042746, 0.22662482]]],
# [[[1.06717396, 1.94750285, 0.69994652]],
# [[0. , 0. , 0. ]]]])
m.eval() # switch the model to test phase
y_test = m(x)
print(y_test)
# Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[[[0.10052059, 0.93890846, 0.45351565]],
# [[0.47507706, 0.45021373, 0.11331241]]],
# [[[0.53358698, 0.97375143, 0.34997326]],
# [[0.24758087, 0.52628899, 0.17970420]]]])
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand([2, 2, 1, 3], dtype="float32")
>>> print(x)
Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0.55355281, 0.20714243, 0.01162981]],
[[0.51577556, 0.36369765, 0.26091650]]],
[[[0.18905126, 0.56219709, 0.00808361]],
[[0.78120756, 0.32112977, 0.90572405]]]])
>>> m = paddle.nn.Dropout2D(p=0.5)
>>> y_train = m(x)
>>> print(y_train)
Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[1.10710561, 0.41428486, 0.02325963]],
[[1.03155112, 0.72739530, 0.52183300]]],
[[[0. , 0. , 0. ]],
[[0. , 0. , 0. ]]]])
>>> m.eval() # switch the model to test phase
>>> y_test = m(x)
>>> print(y_test)
Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0.55355281, 0.20714243, 0.01162981]],
[[0.51577556, 0.36369765, 0.26091650]]],
[[[0.18905126, 0.56219709, 0.00808361]],
[[0.78120756, 0.32112977, 0.90572405]]]])
"""
def __init__(self, p=0.5, data_format='NCHW', name=None):
......@@ -1048,48 +1064,35 @@ class Dropout3D(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.arange(24, dtype="float32").reshape((1, 2, 2, 2, 3))
print(x)
# Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[[[[0. , 1. , 2. ],
# [3. , 4. , 5. ]],
# [[6. , 7. , 8. ],
# [9. , 10., 11.]]],
# [[[12., 13., 14.],
# [15., 16., 17.]],
# [[18., 19., 20.],
# [21., 22., 23.]]]]])
m = paddle.nn.Dropout3D(p=0.5)
y_train = m(x)
print(y_train)
# Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[[[[0. , 2. , 4. ],
# [6. , 8. , 10.]],
# [[12., 14., 16.],
# [18., 20., 22.]]],
# [[[0. , 0. , 0. ],
# [0. , 0. , 0. ]],
# [[0. , 0. , 0. ],
# [0. , 0. , 0. ]]]]])
m.eval() # switch the model to test phase
y_test = m(x)
print(y_test)
# Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[[[[0. , 1. , 2. ],
# [3. , 4. , 5. ]],
# [[6. , 7. , 8. ],
# [9. , 10., 11.]]],
# [[[12., 13., 14.],
# [15., 16., 17.]],
# [[18., 19., 20.],
# [21., 22., 23.]]]]])
>>> import paddle
>>> x = paddle.arange(24, dtype="float32").reshape((1, 2, 2, 2, 3))
>>> print(x)
Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[[0. , 1. , 2. ],
[3. , 4. , 5. ]],
[[6. , 7. , 8. ],
[9. , 10., 11.]]],
[[[12., 13., 14.],
[15., 16., 17.]],
[[18., 19., 20.],
[21., 22., 23.]]]]])
>>> m = paddle.nn.Dropout3D(p=0.5)
>>> y_train = m(x)
>>> m.eval() # switch the model to test phase
>>> y_test = m(x)
>>> print(y_test)
Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[[0. , 1. , 2. ],
[3. , 4. , 5. ]],
[[6. , 7. , 8. ],
[9. , 10., 11.]]],
[[[12., 13., 14.],
[15., 16., 17.]],
[[18., 19., 20.],
[21., 22., 23.]]]]])
"""
def __init__(self, p=0.5, data_format='NCDHW', name=None):
......@@ -1139,22 +1142,23 @@ class AlphaDropout(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor([[-1, 1], [-1, 1]], dtype="float32")
m = paddle.nn.AlphaDropout(p=0.5)
y_train = m(x)
print(y_train)
# Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[-0.77919382, 1.66559887],
# [-0.77919382, -0.77919382]])
m.eval() # switch the model to test phase
y_test = m(x)
print(y_test)
# Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [[-1., 1.],
# [-1., 1.]])
>>> import paddle
>>> paddle.seed(2023)
>>> x = paddle.to_tensor([[-1, 1], [-1, 1]], dtype="float32")
>>> m = paddle.nn.AlphaDropout(p=0.5)
>>> y_train = m(x)
>>> print(y_train)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.10721093, 1.66559887],
[-0.77919382, 1.66559887]])
>>> m.eval() # switch the model to test phase
>>> y_test = m(x)
>>> print(y_test)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-1., 1.],
[-1., 1.]])
"""
def __init__(self, p=0.5, name=None):
......@@ -1201,18 +1205,19 @@ class Pad1D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
input_shape = (1, 2, 3)
pad = [1, 2]
mode = "constant"
data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
my_pad = nn.Pad1D(padding=pad, mode=mode)
result = my_pad(data)
print(result)
# [[[0. 1. 2. 3. 0. 0.]
# [0. 4. 5. 6. 0. 0.]]]
>>> import paddle
>>> import paddle.nn as nn
>>> input_shape = (1, 2, 3)
>>> pad = [1, 2]
>>> mode = "constant"
>>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
>>> my_pad = nn.Pad1D(padding=pad, mode=mode)
>>> result = my_pad(data)
>>> print(result)
Tensor(shape=[1, 2, 6], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[0., 1., 2., 3., 0., 0.],
[0., 4., 5., 6., 0., 0.]]])
"""
def __init__(
......@@ -1271,21 +1276,22 @@ class Pad2D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
input_shape = (1, 1, 2, 3)
pad = [1, 0, 1, 2]
mode = "constant"
data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
my_pad = nn.Pad2D(padding=pad, mode=mode)
result = my_pad(data)
print(result)
# [[[[0. 0. 0. 0.]
# [0. 1. 2. 3.]
# [0. 4. 5. 6.]
# [0. 0. 0. 0.]
# [0. 0. 0. 0.]]]]
>>> import paddle
>>> import paddle.nn as nn
>>> input_shape = (1, 1, 2, 3)
>>> pad = [1, 0, 1, 2]
>>> mode = "constant"
>>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
>>> my_pad = nn.Pad2D(padding=pad, mode=mode)
>>> result = my_pad(data)
>>> print(result)
Tensor(shape=[1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0., 0., 0., 0.],
[0., 1., 2., 3.],
[0., 4., 5., 6.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]]]])
"""
def __init__(
......@@ -1336,26 +1342,24 @@ class ZeroPad2D(Layer):
The data type is same as input x.
Examples:
Examples are as follows.
.. code-block:: python
import paddle
import paddle.nn as nn
input_shape = paddle.to_tensor([1, 1, 2, 3])
pad = [1, 0, 1, 2]
data = paddle.arange(paddle.prod(input_shape), dtype="float32").reshape(input_shape) + 1
my_pad = nn.ZeroPad2D(padding=pad)
result = my_pad(data)
print(result)
# [[[[0. 0. 0. 0.]
# [0. 1. 2. 3.]
# [0. 4. 5. 6.]
# [0. 0. 0. 0.]
# [0. 0. 0. 0.]]]]
>>> import paddle
>>> import paddle.nn as nn
>>> input_shape = paddle.to_tensor([1, 1, 2, 3])
>>> pad = [1, 0, 1, 2]
>>> data = paddle.arange(paddle.prod(input_shape), dtype="float32").reshape(input_shape) + 1
>>> my_pad = nn.ZeroPad2D(padding=pad)
>>> result = my_pad(data)
>>> print(result)
Tensor(shape=[1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0., 0., 0., 0.],
[0., 1., 2., 3.],
[0., 4., 5., 6.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]]]])
"""
def __init__(self, padding, data_format="NCHW", name=None):
......@@ -1412,21 +1416,22 @@ class Pad3D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
input_shape = (1, 1, 1, 2, 3)
pad = [1, 0, 1, 2, 0, 0]
mode = "constant"
data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
my_pad = nn.Pad3D(padding=pad, mode=mode)
result = my_pad(data)
print(result)
# [[[[[0. 0. 0. 0.]
# [0. 1. 2. 3.]
# [0. 4. 5. 6.]
# [0. 0. 0. 0.]
# [0. 0. 0. 0.]]]]]
>>> import paddle
>>> import paddle.nn as nn
>>> input_shape = (1, 1, 1, 2, 3)
>>> pad = [1, 0, 1, 2, 0, 0]
>>> mode = "constant"
>>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1
>>> my_pad = nn.Pad3D(padding=pad, mode=mode)
>>> result = my_pad(data)
>>> print(result)
Tensor(shape=[1, 1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[[0., 0., 0., 0.],
[0., 1., 2., 3.],
[0., 4., 5., 6.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]]]]])
"""
def __init__(
......@@ -1476,13 +1481,13 @@ class CosineSimilarity(Layer):
Case 0:
x1 = [[0.8024077 0.9927354 0.27238318 0.8344984 ]
[0.48949873 0.5797396 0.65444374 0.66510963]
[0.1031398 0.9614342 0.08365563 0.6796464 ]
[0.10760343 0.7461209 0.7726148 0.5801006 ]]
[0.48949873 0.5797396 0.65444374 0.66510963]
[0.1031398 0.9614342 0.08365563 0.6796464 ]
[0.10760343 0.7461209 0.7726148 0.5801006 ]]
x2 = [[0.62913156 0.1536727 0.9847992 0.04591406]
[0.9098952 0.15715368 0.8671125 0.3156102 ]
[0.4427798 0.54136837 0.5276275 0.32394758]
[0.3769419 0.8535014 0.48041078 0.9256797 ]]
[0.9098952 0.15715368 0.8671125 0.3156102 ]
[0.4427798 0.54136837 0.5276275 0.32394758]
[0.3769419 0.8535014 0.48041078 0.9256797 ]]
axis = 1
eps = 1e-8
Out: [0.5275037 0.8368967 0.75037485 0.9245899]
......@@ -1490,19 +1495,19 @@ class CosineSimilarity(Layer):
Code Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
x1 = paddle.to_tensor([[1., 2., 3.],
[2., 3., 4.]], dtype="float32")
x2 = paddle.to_tensor([[8., 3., 3.],
[2., 3., 4.]], dtype="float32")
>>> x1 = paddle.to_tensor([[1., 2., 3.],
... [2., 3., 4.]], dtype="float32")
>>> x2 = paddle.to_tensor([[8., 3., 3.],
... [2., 3., 4.]], dtype="float32")
cos_sim_func = nn.CosineSimilarity(axis=0)
result = cos_sim_func(x1, x2)
print(result)
# Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True,
# [0.65079135, 0.98058069, 1. ])
>>> cos_sim_func = nn.CosineSimilarity(axis=0)
>>> result = cos_sim_func(x1, x2)
>>> print(result)
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.65079135, 0.98058069, 1. ])
"""
def __init__(self, axis=1, eps=1e-8):
......@@ -1544,19 +1549,16 @@ class Embedding(Layer):
output is a Tensor:
out.shape = [3, 2, 16]
out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
[0.345421456, 0.524563927, ..., 0.144534654]],
[0.345421456, 0.524563927, ..., 0.144534654]],
[[0.345249859, 0.124939536, ..., 0.194353745],
[0.945345345, 0.435394634, ..., 0.435345365]],
[0.945345345, 0.435394634, ..., 0.435345365]],
[[0.945345345, 0.435394634, ..., 0.435345365],
[0.0, 0.0, ..., 0.0 ]]] # padding data
[0.0, 0.0, ..., 0.0 ]]] # padding data
The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127.
Parameters:
num_embeddings (int): Just one element which indicate the size
of the dictionary of embeddings.
num_embeddings (int): Just one element which indicate the size of the dictionary of embeddings.
embedding_dim (int): Just one element which indicate the size of each embedding vector respectively.
padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-num_embeddings, num_embeddings).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
......@@ -1574,9 +1576,8 @@ class Embedding(Layer):
The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example for details.
name(str|None, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Attribute:
**weight** (Parameter): the learnable weights of this layer.
......@@ -1588,36 +1589,36 @@ class Embedding(Layer):
.. code-block:: python
import paddle
x = paddle.to_tensor([[0], [1], [3]], dtype="int64", stop_gradient=False)
embedding = paddle.nn.Embedding(4, 3, sparse=True)
w0 = paddle.to_tensor([[0., 0., 0.],
[1., 1., 1.],
[2., 2., 2.],
[3., 3., 3.]], dtype="float32")
embedding.weight.set_value(w0)
print(embedding.weight)
# Tensor(shape=[4, 3], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[0., 0., 0.],
# [1., 1., 1.],
# [2., 2., 2.],
# [3., 3., 3.]])
adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
adam.clear_grad()
out = embedding(x)
print(out)
# Tensor(shape=[3, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[[0., 0., 0.]],
# [[1., 1., 1.]],
# [[3., 3., 3.]]])
out.backward()
adam.step()
>>> import paddle
>>> x = paddle.to_tensor([[0], [1], [3]], dtype="int64", stop_gradient=False)
>>> embedding = paddle.nn.Embedding(4, 3, sparse=True)
>>> w0 = paddle.to_tensor([[0., 0., 0.],
... [1., 1., 1.],
... [2., 2., 2.],
... [3., 3., 3.]], dtype="float32")
>>> embedding.weight.set_value(w0)
>>> print(embedding.weight)
Parameter containing:
Tensor(shape=[4, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0., 0., 0.],
[1., 1., 1.],
[2., 2., 2.],
[3., 3., 3.]])
>>> adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
>>> adam.clear_grad()
>>> out = embedding(x)
>>> print(out)
Tensor(shape=[3, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[0., 0., 0.]],
[[1., 1., 1.]],
[[3., 3., 3.]]])
>>> out.backward()
>>> adam.step()
"""
......@@ -1708,36 +1709,35 @@ class Unfold(Layer):
Parameters:
kernel_sizes(int|list): The size of convolution kernel, should be [k_h, k_w]
or an integer k treated as [k, k].
strides(int|list): The strides, should be [stride_h, stride_w]
or an integer stride treated as [sride, stride].
For default, strides will be [1, 1].
paddings(int|list): The paddings of each dimension, should be
[padding_top, padding_left, padding_bottom, padding_right]
or [padding_h, padding_w] or an integer padding.
If [padding_h, padding_w] was given, it will expanded to
[padding_h, padding_w, padding_h, padding_w]. If an integer
padding was given, [padding, padding, padding, padding] will
be used. For default, paddings will be [0, 0, 0, 0]
dilations(int|list): the dilations of convolution kernel, should be
[dilation_h, dilation_w], or an integer dilation treated as
[dilation, dilation]. For default, it will be [1, 1].
name(str, optional): The default value is None.
Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name`
kernel_sizes(int|list): The size of convolution kernel, should be [k_h, k_w]
or an integer k treated as [k, k].
strides(int|list, optional): The strides, should be [stride_h, stride_w]
or an integer stride treated as [sride, stride]. For default, strides will be [1, 1].
paddings(int|list, optional): The paddings of each dimension, should be
[padding_top, padding_left, padding_bottom, padding_right] or [padding_h, padding_w]
or an integer padding. If [padding_h, padding_w] was given, it will expanded to
[padding_h, padding_w, padding_h, padding_w]. If an integer padding was given,
[padding, padding, padding, padding] will be used. For default,
paddings will be [0, 0, 0, 0].
dilations(int|list, optional): The dilations of convolution kernel, should be
[dilation_h, dilation_w], or an integer dilation treated as [dilation, dilation].
For default, it will be [1, 1].
name(str, optional): The default value is None. Normally there is no need for user to
set this property. For more information, please refer to :ref:`api_guide_Name`
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
>>> x = paddle.randn((100, 3, 224, 224))
>>> unfold = nn.Unfold(kernel_sizes=[3, 3])
>>> result = unfold(x)
>>> print(result.shape)
[100, 27, 49284]
x = paddle.randn((100,3,224,224))
unfold = nn.Unfold(kernel_sizes=[3, 3])
result = unfold(x)
print(result)
"""
def __init__(
......@@ -1790,21 +1790,21 @@ class Fold(Layer):
C_{out} &= \frac{C_{in}}{kernel\_sizes[0]\times kernel\_sizes[1]} \\
Parameters:
output_sizes(list): The size of output size, should be [output_size_h, output_size_w]
output_sizes(list): The size of output size, should be [output_size_h, output_size_w]
or an interger o treated as [o, o].
kernel_sizes(int|list|tuple): The size of convolution kernel, should be [k_h, k_w]
or an integer k treated as [k, k].
strides(int|list|tuple, optional): The strides, should be [stride_h, stride_w]
strides(int|list|tuple, optional): The strides, should be [stride_h, stride_w]
or an integer stride treated as [sride, stride].
For default, strides will be [1, 1].
paddings(int|list|tuple, optional): The paddings of each dimension, should be
paddings(int|list|tuple, optional): The paddings of each dimension, should be
[padding_top, padding_left, padding_bottom, padding_right]
or [padding_h, padding_w] or an integer padding.
If [padding_h, padding_w] was given, it will expanded to
[padding_h, padding_w, padding_h, padding_w]. If an integer
padding was given, [padding, padding, padding, padding] will
be used. For default, paddings will be [0, 0, 0, 0]
dilations(int|list|tuple, optional): the dilations of convolution kernel, should be
dilations(int|list|tuple, optional): The dilations of convolution kernel, should be
[dilation_h, dilation_w], or an integer dilation treated as
[dilation, dilation]. For default, it will be [1, 1].
name(str, optional): The default value is None.
......@@ -1820,13 +1820,14 @@ class Fold(Layer):
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
x = paddle.randn([2,3*2*2,12])
fold = nn.Fold(output_sizes=[4, 5], kernel_sizes=2)
y = fold(x)
# y.shape = [2,3,4,5]
>>> x = paddle.randn([2, 3*2*2, 12])
>>> fold = nn.Fold(output_sizes=[4, 5], kernel_sizes=2)
>>> y = fold(x)
>>> print(y.shape)
[2, 3, 4, 5]
"""
def __init__(
......@@ -1886,12 +1887,13 @@ class Flatten(Layer):
.. code-block:: python
import paddle
>>> import paddle
inp = paddle.ones([5, 2, 3, 4]).astype('float32')
flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2)
y = flatten(inp)
# y.shape = [5, 6, 4]
>>> inp = paddle.ones([5, 2, 3, 4]).astype('float32')
>>> flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2)
>>> y = flatten(inp)
>>> print(y.shape)
[5, 6, 4]
"""
......@@ -1928,15 +1930,15 @@ class Unflatten(Layer):
.. code-block:: python
import paddle
>>> import paddle
x = paddle.randn(shape=[4, 6, 8])
shape = [2, 3]
axis = 1
unflatten = paddle.nn.Unflatten(axis, shape)
res = unflatten(x)
print(res.shape)
# [4, 2, 3, 8]
>>> x = paddle.randn(shape=[4, 6, 8])
>>> shape = [2, 3]
>>> axis = 1
>>> unflatten = paddle.nn.Unflatten(axis, shape)
>>> res = unflatten(x)
>>> print(res.shape)
[4, 2, 3, 8]
"""
......
......@@ -357,22 +357,38 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MyLayer(paddle.nn.Layer):
def __init__(self):
super().__init__()
self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
temp = self._linear(input)
temp = self._dropout(temp)
return temp
x = paddle.randn([10, 1], 'float32')
mylayer = MyLayer()
mylayer.eval() # set mylayer._dropout to eval mode
out = mylayer(x)
mylayer.train() # set mylayer._dropout to train mode
out = mylayer(x)
>>> import paddle
>>> paddle.seed(100)
>>> class MyLayer(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self._linear = paddle.nn.Linear(1, 1)
... self._dropout = paddle.nn.Dropout(p=0.5)
...
... def forward(self, input):
... temp = self._linear(input)
... temp = self._dropout(temp)
... return temp
...
>>> x = paddle.randn([10, 1], 'float32')
>>> mylayer = MyLayer()
>>> mylayer.eval() # set mylayer._dropout to eval mode
>>> out = mylayer(x)
>>> mylayer.train() # set mylayer._dropout to train mode
>>> out = mylayer(x)
>>> print(out)
Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-3.44879317],
[ 0. ],
[ 0. ],
[-0.73825276],
[ 0. ],
[ 0. ],
[ 0.64444798],
[-3.22185946],
[ 0. ],
[-0.68077987]])
"""
def __init__(self, name_scope=None, dtype="float32"):
......@@ -419,25 +435,38 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MyLayer(paddle.nn.Layer):
def __init__(self):
super().__init__()
self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
temp = self._linear(input)
temp = self._dropout(temp)
return temp
x = paddle.randn([10, 1], 'float32')
mylayer = MyLayer()
mylayer.eval() # set mylayer._dropout to eval mode
out = mylayer(x)
mylayer.train() # set mylayer._dropout to train mode
out = mylayer(x)
>>> import paddle
>>> paddle.seed(100)
>>> class MyLayer(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self._linear = paddle.nn.Linear(1, 1)
... self._dropout = paddle.nn.Dropout(p=0.5)
...
... def forward(self, input):
... temp = self._linear(input)
... temp = self._dropout(temp)
... return temp
...
>>> x = paddle.randn([10, 1], 'float32')
>>> mylayer = MyLayer()
>>> mylayer.eval() # set mylayer._dropout to eval mode
>>> out = mylayer(x)
>>> mylayer.train() # set mylayer._dropout to train mode
>>> out = mylayer(x)
>>> print(out)
Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-3.44879317],
[ 0. ],
[ 0. ],
[-0.73825276],
[ 0. ],
[ 0. ],
[ 0.64444798],
[-3.22185946],
[ 0. ],
[-0.68077987]])
"""
# global setting in dygraph
......@@ -461,24 +490,35 @@ class Layer:
Example::
.. code-block:: python
import paddle
class MyLayer(paddle.nn.Layer):
def __init__(self):
super().__init__()
self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
temp = self._linear(input)
temp = self._dropout(temp)
return temp
x = paddle.randn([10, 1], 'float32')
mylayer = MyLayer()
mylayer.eval() # set mylayer._dropout to eval mode
out = mylayer(x)
print(out)
>>> import paddle
>>> paddle.seed(100)
>>> class MyLayer(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self._linear = paddle.nn.Linear(1, 1)
... self._dropout = paddle.nn.Dropout(p=0.5)
...
... def forward(self, input):
... temp = self._linear(input)
... temp = self._dropout(temp)
... return temp
...
>>> x = paddle.randn([10, 1], 'float32')
>>> mylayer = MyLayer()
>>> mylayer.eval() # set mylayer._dropout to eval mode
>>> out = mylayer(x)
>>> print(out)
Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-1.72439659],
[ 0.31532824],
[ 0.01192369],
[-0.36912638],
[-1.63426113],
[-0.93169814],
[ 0.32222399],
[-1.61092973],
[ 0.77209264],
[-0.34038994]])
"""
# global setting in dygraph
......@@ -506,22 +546,41 @@ class Layer:
Example::
.. code-block:: python
import paddle
import paddle.nn as nn
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
def init_weights(layer):
if type(layer) == nn.Linear:
print('before init weight:', layer.weight.numpy())
new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9)
layer.weight.set_value(new_weight)
print('after init weight:', layer.weight.numpy())
net.apply(init_weights)
print(net.state_dict())
>>> import paddle
>>> import paddle.nn as nn
>>> paddle.seed(2023)
>>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
>>> def init_weights(layer):
... if type(layer) == nn.Linear:
... print('before init weight:', layer.weight.numpy())
... new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9)
... layer.weight.set_value(new_weight)
... print('after init weight:', layer.weight.numpy())
...
>>> net.apply(init_weights)
>>> print(net.state_dict())
before init weight: [[ 0.89611185 0.04935038]
[-0.5888344 0.99266374]]
after init weight: [[0.9 0.9]
[0.9 0.9]]
before init weight: [[-0.18615901 -0.22924072]
[ 1.1517721 0.59859073]]
after init weight: [[0.9 0.9]
[0.9 0.9]]
OrderedDict([('0.weight', Parameter containing:
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.89999998, 0.89999998],
[0.89999998, 0.89999998]])), ('0.bias', Parameter containing:
Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
[0., 0.])), ('1.weight', Parameter containing:
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.89999998, 0.89999998],
[0.89999998, 0.89999998]])), ('1.bias', Parameter containing:
Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
[0., 0.]))])
"""
for layer in self.children():
layer.apply(fn)
......@@ -541,18 +600,19 @@ class Layer:
Example::
.. code-block:: python
import paddle
class LinearNet(paddle.nn.Layer):
def __init__(self):
super().__init__(name_scope = "demo_linear_net")
self._linear = paddle.nn.Linear(1, 1)
>>> import paddle
def forward(self, x):
return self._linear(x)
linear_net = LinearNet()
print(linear_net.full_name()) # demo_linear_net_0
>>> class LinearNet(paddle.nn.Layer):
... def __init__(self):
... super().__init__(name_scope = "demo_linear_net")
... self._linear = paddle.nn.Linear(1, 1)
...
... def forward(self, x):
... return self._linear(x)
...
>>> linear_net = LinearNet()
>>> print(linear_net.full_name())
demo_linear_net_0
"""
return self._full_name
......@@ -576,33 +636,33 @@ class Layer:
Examples:
.. code-block:: python
import paddle
import numpy as np
# the forward_post_hook change the output of the layer: output = output * 2
def forward_post_hook(layer, input, output):
# user can use layer, input and output for information statistis tasks
>>> import paddle
>>> import numpy as np
# change the output
return output * 2
>>> # the forward_post_hook change the output of the layer: output = output * 2
>>> def forward_post_hook(layer, input, output):
... # user can use layer, input and output for information statistis tasks
...
... # change the output
... return output * 2
...
>>> linear = paddle.nn.Linear(13, 5)
linear = paddle.nn.Linear(13, 5)
>>> # register the hook
>>> forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook)
# register the hook
forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook)
>>> value1 = np.arange(26).reshape(2, 13).astype("float32")
>>> in1 = paddle.to_tensor(value1)
value1 = np.arange(26).reshape(2, 13).astype("float32")
in1 = paddle.to_tensor(value1)
>>> out0 = linear(in1)
out0 = linear(in1)
>>> # remove the hook
>>> forward_post_hook_handle.remove()
# remove the hook
forward_post_hook_handle.remove()
>>> out1 = linear(in1)
out1 = linear(in1)
# hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
assert (out0.numpy() == (out1.numpy()) * 2).any()
>>> # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
>>> assert (out0.numpy() == (out1.numpy()) * 2).any()
"""
hook_remove_helper = HookRemoveHelper(self._forward_post_hooks)
......@@ -630,35 +690,35 @@ class Layer:
Examples:
.. code-block:: python
import paddle
import numpy as np
# the forward_pre_hook change the input of the layer: input = input * 2
def forward_pre_hook(layer, input):
# user can use layer and input for information statistis tasks
>>> import paddle
>>> import numpy as np
# change the input
input_return = (input[0] * 2)
return input_return
>>> # the forward_pre_hook change the input of the layer: input = input * 2
>>> def forward_pre_hook(layer, input):
... # user can use layer and input for information statistis tasks
...
... # change the input
... input_return = (input[0] * 2)
... return input_return
...
>>> linear = paddle.nn.Linear(13, 5)
linear = paddle.nn.Linear(13, 5)
>>> # register the hook
>>> forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook)
# register the hook
forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook)
>>> value0 = np.arange(26).reshape(2, 13).astype("float32")
>>> in0 = paddle.to_tensor(value0)
>>> out0 = linear(in0)
value0 = np.arange(26).reshape(2, 13).astype("float32")
in0 = paddle.to_tensor(value0)
out0 = linear(in0)
>>> # remove the hook
>>> forward_pre_hook_handle.remove()
# remove the hook
forward_pre_hook_handle.remove()
>>> value1 = value0 * 2
>>> in1 = paddle.to_tensor(value1)
>>> out1 = linear(in1)
value1 = value0 * 2
in1 = paddle.to_tensor(value1)
out1 = linear(in1)
# hook change the linear's input to input * 2, so out0 is equal to out1.
assert (out0.numpy() == out1.numpy()).any()
>>> # hook change the linear's input to input * 2, so out0 is equal to out1.
>>> assert (out0.numpy() == out1.numpy()).any()
"""
hook_remove_helper = HookRemoveHelper(self._forward_pre_hooks)
self._forward_pre_hooks[hook_remove_helper._hook_id] = hook
......@@ -691,22 +751,31 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MyLayer(paddle.nn.Layer):
def __init__(self):
super().__init__()
self._linear = paddle.nn.Linear(1, 1)
w_tmp = self.create_parameter([1,1])
self.add_parameter("w_tmp", w_tmp)
def forward(self, input):
return self._linear(input)
mylayer = MyLayer()
for name, param in mylayer.named_parameters():
print(name, param) # will print w_tmp,_linear.weight,_linear.bias
>>> import paddle
>>> paddle.seed(2023)
>>> class MyLayer(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self._linear = paddle.nn.Linear(1, 1)
... w_tmp = self.create_parameter([1,1])
... self.add_parameter("w_tmp", w_tmp)
...
... def forward(self, input):
... return self._linear(input)
...
>>> mylayer = MyLayer()
>>> for name, param in mylayer.named_parameters():
... print(name, param) # will print w_tmp,_linear.weight,_linear.bias
w_tmp Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.06979191]])
_linear.weight Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[1.26729357]])
_linear.bias Parameter containing:
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
[0.])
"""
temp_attr = copy.deepcopy(attr)
if isinstance(temp_attr, str) and temp_attr == "":
......@@ -738,22 +807,22 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MyLinear(paddle.nn.Layer):
def __init__(self,
in_features,
out_features):
super().__init__()
self.linear = paddle.nn.Linear( 10, 10)
self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)
def forward(self, input):
out = self.linear(input)
paddle.assign( out, self.back_var)
return out
>>> import paddle
>>> class MyLinear(paddle.nn.Layer):
... def __init__(self,
... in_features,
... out_features):
... super().__init__()
... self.linear = paddle.nn.Linear( 10, 10)
...
... self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)
...
... def forward(self, input):
... out = self.linear(input)
... paddle.assign( out, self.back_var)
...
... return out
"""
if name is not None:
......@@ -790,22 +859,22 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MyLinear(paddle.nn.Layer):
def __init__(self,
in_features,
out_features):
super().__init__()
self.linear = paddle.nn.Linear( 10, 10)
self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)
def forward(self, input):
out = self.linear(input)
paddle.assign( out, self.back_var)
return out
>>> import paddle
>>> class MyLinear(paddle.nn.Layer):
... def __init__(self,
... in_features,
... out_features):
... super().__init__()
... self.linear = paddle.nn.Linear(10, 10)
...
... self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)
...
... def forward(self, input):
... out = self.linear(input)
... paddle.assign(out, self.back_var)
...
... return out
"""
if name is not None:
......@@ -833,10 +902,16 @@ class Layer:
Examples:
.. code-block:: python
import paddle
>>> import paddle
>>> paddle.seed(100)
linear = paddle.nn.Linear(1,1)
print(linear.parameters()) # print linear_0.w_0 and linear_0.b_0
>>> linear = paddle.nn.Linear(1, 1)
>>> print(linear.parameters())
[Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.18551230]]), Parameter containing:
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
[0.])]
"""
ret = [
......@@ -858,15 +933,16 @@ class Layer:
Examples:
.. code-block:: python
import paddle
>>> import paddle
linear1 = paddle.nn.Linear(10, 3)
linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = paddle.nn.Sequential(linear1, linear2)
>>> linear1 = paddle.nn.Linear(10, 3)
>>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
>>> model = paddle.nn.Sequential(linear1, linear2)
layer_list = list(model.children())
>>> layer_list = list(model.children())
print(layer_list) # [<paddle.nn.layer.common.Linear object at 0x7f7b8113f830>, <paddle.nn.layer.common.Linear object at 0x7f7b8113f950>]
>>> print(layer_list)
[Linear(in_features=10, out_features=3, dtype=float32), Linear(in_features=3, out_features=10, dtype=float32)]
"""
for _, layer in self.named_children():
......@@ -882,16 +958,15 @@ class Layer:
Examples:
.. code-block:: python
import paddle
linear1 = paddle.nn.Linear(10, 3)
linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = paddle.nn.Sequential(linear1, linear2)
for prefix, layer in model.named_children():
print(prefix, layer)
# ('0', <paddle.nn.layer.common.Linear object at 0x7fb61ed85830>)
# ('1', <paddle.nn.layer.common.Linear object at 0x7fb61ed85950>)
>>> import paddle
>>> linear1 = paddle.nn.Linear(10, 3)
>>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
>>> model = paddle.nn.Sequential(linear1, linear2)
>>> for prefix, layer in model.named_children():
... print(prefix, layer)
0 Linear(in_features=10, out_features=3, dtype=float32)
1 Linear(in_features=3, out_features=10, dtype=float32)
"""
memo = set()
for name, layer in self._sub_layers.items():
......@@ -913,21 +988,22 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MyLayer(paddle.nn.Layer):
def __init__(self):
super().__init__()
self._linear = paddle.nn.Linear(1, 1)
self._dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
temp = self._linear(input)
temp = self._dropout(temp)
return temp
mylayer = MyLayer()
print(mylayer.sublayers()) # [<paddle.nn.layer.common.Linear object at 0x7f44b58977d0>, <paddle.nn.layer.common.Dropout object at 0x7f44b58978f0>]
>>> import paddle
>>> class MyLayer(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self._linear = paddle.nn.Linear(1, 1)
... self._dropout = paddle.nn.Dropout(p=0.5)
...
... def forward(self, input):
... temp = self._linear(input)
... temp = self._dropout(temp)
... return temp
...
>>> mylayer = MyLayer()
>>> print(mylayer.sublayers())
[Linear(in_features=1, out_features=1, dtype=float32), Dropout(p=0.5, axis=None, mode=upscale_in_train)]
"""
ret = [
......@@ -951,14 +1027,37 @@ class Layer:
Examples:
.. code-block:: python
import paddle
fc1 = paddle.nn.Linear(10, 3)
fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = paddle.nn.Sequential(fc1, fc2)
for name, param in model.named_parameters():
print(name, param)
>>> import paddle
>>> paddle.seed(100)
>>> fc1 = paddle.nn.Linear(10, 3)
>>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
>>> model = paddle.nn.Sequential(fc1, fc2)
>>> for name, param in model.named_parameters():
... print(name, param)
0.weight Parameter containing:
Tensor(shape=[10, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[ 0.07276392, -0.39791510, -0.66356444],
[ 0.02143478, -0.18519843, -0.32485050],
[-0.42249614, 0.08450919, -0.66838276],
[ 0.38208580, -0.24303678, 0.55127048],
[ 0.47745085, 0.62117910, -0.08336520],
[-0.28653207, 0.47237599, -0.05868882],
[-0.14385653, 0.29945642, 0.12832761],
[-0.21237159, 0.38539791, -0.62760031],
[ 0.02637231, 0.20621127, 0.43255770],
[-0.19984481, -0.26259184, -0.29696006]])
0.bias Parameter containing:
Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=False,
[0., 0., 0.])
1.weight Parameter containing:
Tensor(shape=[3, 10], dtype=float32, place=Place(cpu), stop_gradient=False,
[[ 0.01985580, -0.40268910, 0.41172385, -0.47249708, -0.09002256,
-0.00533628, -0.52048630, 0.62360322, 0.20848787, -0.02033746],
[ 0.58281910, 0.12841827, 0.12907702, 0.02325618, -0.07746267,
0.31950659, -0.37924835, -0.59209681, -0.11732036, -0.58378261],
[-0.62100595, 0.22293305, 0.28229684, -0.03687060, -0.59323978,
0.08411229, 0.53275704, 0.40431368, 0.03171402, -0.17922515]])
"""
params_set = set()
named_sublayers = (
......@@ -991,14 +1090,15 @@ class Layer:
Examples:
.. code-block:: python
import paddle
fc1 = paddle.nn.Linear(10, 3)
fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = paddle.nn.Sequential(fc1, fc2)
for prefix, layer in model.named_sublayers():
print(prefix, layer)
>>> import paddle
>>> fc1 = paddle.nn.Linear(10, 3)
>>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
>>> model = paddle.nn.Sequential(fc1, fc2)
>>> for prefix, layer in model.named_sublayers():
... print(prefix, layer)
0 Linear(in_features=10, out_features=3, dtype=float32)
1 Linear(in_features=3, out_features=10, dtype=float32)
"""
if layers_set is None:
layers_set = set()
......@@ -1039,16 +1139,18 @@ class Layer:
Examples:
.. code-block:: python
import numpy as np
import paddle
>>> import numpy as np
>>> import paddle
linear = paddle.nn.Linear(10, 3)
value = np.array([0]).astype("float32")
buffer = paddle.to_tensor(value)
linear.register_buffer("buf_name", buffer, persistable=True)
>>> linear = paddle.nn.Linear(10, 3)
>>> value = np.array([0]).astype("float32")
>>> buffer = paddle.to_tensor(value)
>>> linear.register_buffer("buf_name", buffer, persistable=True)
# get the buffer by attribute.
print(linear.buf_name)
>>> # get the buffer by attribute.
>>> print(linear.buf_name)
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.])
"""
......@@ -1097,15 +1199,17 @@ class Layer:
Examples:
.. code-block:: python
import numpy as np
import paddle
>>> import numpy as np
>>> import paddle
linear = paddle.nn.Linear(10, 3)
value = np.array([0]).astype("float32")
buffer = paddle.to_tensor(value)
linear.register_buffer("buf_name", buffer, persistable=True)
>>> linear = paddle.nn.Linear(10, 3)
>>> value = np.array([0]).astype("float32")
>>> buffer = paddle.to_tensor(value)
>>> linear.register_buffer("buf_name", buffer, persistable=True)
print(linear.buffers()) # == print([linear.buf_name])
>>> print(linear.buffers())
[Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.])]
"""
ret = [
......@@ -1131,26 +1235,29 @@ class Layer:
Examples:
.. code-block:: python
import numpy as np
import paddle
fc1 = paddle.nn.Linear(10, 3)
buffer1 = paddle.to_tensor(np.array([0]).astype("float32"))
# register a tensor as buffer by specific `persistable`
fc1.register_buffer("buf_name_1", buffer1, persistable=True)
fc2 = paddle.nn.Linear(3, 10)
buffer2 = paddle.to_tensor(np.array([1]).astype("float32"))
# register a buffer by assigning an attribute with Tensor.
# The `persistable` can only be False by this way.
fc2.buf_name_2 = buffer2
model = paddle.nn.Sequential(fc1, fc2)
# get all named buffers
for name, buffer in model.named_buffers():
print(name, buffer)
>>> import numpy as np
>>> import paddle
>>> fc1 = paddle.nn.Linear(10, 3)
>>> buffer1 = paddle.to_tensor(np.array([0]).astype("float32"))
>>> # register a tensor as buffer by specific `persistable`
>>> fc1.register_buffer("buf_name_1", buffer1, persistable=True)
>>> fc2 = paddle.nn.Linear(3, 10)
>>> buffer2 = paddle.to_tensor(np.array([1]).astype("float32"))
>>> # register a buffer by assigning an attribute with Tensor.
>>> # The `persistable` can only be False by this way.
>>> fc2.buf_name_2 = buffer2
>>> model = paddle.nn.Sequential(fc1, fc2)
>>> # get all named buffers
>>> for name, buffer in model.named_buffers():
... print(name, buffer)
0.buf_name_1 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[0.])
1.buf_name_2 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
[1.])
"""
buffers_set = set()
named_sublayers = (
......@@ -1177,18 +1284,18 @@ class Layer:
Examples:
.. code-block:: python
import paddle
import numpy as np
>>> import paddle
>>> import numpy as np
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
adam = paddle.optimizer.Adam(learning_rate=0.01,
parameters=linear.parameters())
out = linear(a)
out.backward()
adam.step()
linear.clear_gradients()
>>> value = np.arange(26).reshape(2, 13).astype("float32")
>>> a = paddle.to_tensor(value)
>>> linear = paddle.nn.Linear(13, 5)
>>> adam = paddle.optimizer.Adam(learning_rate=0.01,
... parameters=linear.parameters())
>>> out = linear(a)
>>> out.backward()
>>> adam.step()
>>> linear.clear_gradients()
"""
for p in self.parameters():
......@@ -1271,29 +1378,30 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MySequential(paddle.nn.Layer):
def __init__(self, *layers):
super().__init__()
if len(layers) > 0 and isinstance(layers[0], tuple):
for name, layer in layers:
self.add_sublayer(name, layer)
else:
for idx, layer in enumerate(layers):
self.add_sublayer(str(idx), layer)
def forward(self, input):
for layer in self._sub_layers.values():
input = layer(input)
return input
fc1 = paddle.nn.Linear(10, 3)
fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
model = MySequential(fc1, fc2)
for prefix, layer in model.named_sublayers():
print(prefix, layer)
>>> import paddle
>>> class MySequential(paddle.nn.Layer):
... def __init__(self, *layers):
... super().__init__()
... if len(layers) > 0 and isinstance(layers[0], tuple):
... for name, layer in layers:
... self.add_sublayer(name, layer)
... else:
... for idx, layer in enumerate(layers):
... self.add_sublayer(str(idx), layer)
...
... def forward(self, input):
... for layer in self._sub_layers.values():
... input = layer(input)
... return input
...
>>> fc1 = paddle.nn.Linear(10, 3)
>>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
>>> model = MySequential(fc1, fc2)
>>> for prefix, layer in model.named_sublayers():
... print(prefix, layer)
0 Linear(in_features=10, out_features=3, dtype=float32)
1 Linear(in_features=3, out_features=10, dtype=float32)
"""
assert isinstance(sublayer, Layer) or sublayer is None
......@@ -1313,22 +1421,31 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class MyLayer(paddle.nn.Layer):
def __init__(self):
super().__init__()
self._linear = paddle.nn.Linear(1, 1)
w_tmp = self.create_parameter([1,1])
self.add_parameter("w_tmp", w_tmp)
def forward(self, input):
return self._linear(input)
mylayer = MyLayer()
for name, param in mylayer.named_parameters():
print(name, param) # will print w_tmp,_linear.weight,_linear.bias
>>> import paddle
>>> paddle.seed(100)
>>> class MyLayer(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self._linear = paddle.nn.Linear(1, 1)
... w_tmp = self.create_parameter([1,1])
... self.add_parameter("w_tmp", w_tmp)
...
... def forward(self, input):
... return self._linear(input)
...
>>> mylayer = MyLayer()
>>> for name, param in mylayer.named_parameters():
... print(name, param)
w_tmp Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[-1.01448846]])
_linear.weight Parameter containing:
Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False,
[[0.18551230]])
_linear.bias Parameter containing:
Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False,
[0.])
"""
if '_parameters' not in self.__dict__:
raise RuntimeError("super().__init__() should be called firstly.")
......@@ -1580,23 +1697,21 @@ class Layer:
Examples:
.. code-block:: python
import paddle
import numpy as np
class Mylayer(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.linear1 = paddle.nn.Linear(10, 10)
self.linear2 = paddle.nn.Linear(5, 5)
self.conv2d = paddle.nn.Conv2D(3, 2, 3)
self.embedding = paddle.nn.Embedding(128, 16)
self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32'))
mylayer = Mylayer()
print(dir(mylayer))
# only parts are shown, because of list have too much content
# ['__call__', '__class__', ... , 'conv2d', 'embedding', 'h_0', 'linear1', 'linear2', ... , 'sublayers', 'train']
>>> import paddle
>>> import numpy as np
>>> class Mylayer(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self.linear1 = paddle.nn.Linear(10, 10)
... self.linear2 = paddle.nn.Linear(5, 5)
... self.conv2d = paddle.nn.Conv2D(3, 2, 3)
... self.embedding = paddle.nn.Embedding(128, 16)
... self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32'))
...
>>> mylayer = Mylayer()
>>> print(dir(mylayer))
['__call__', '__class__', '__delattr__', '__dict__', ..., 'training']
"""
method = dir(self.__class__)
attrs = list(self.__dict__.keys())
......@@ -1756,12 +1871,12 @@ class Layer:
Examples:
.. code-block:: python
import paddle
>>> import paddle
emb = paddle.nn.Embedding(10, 10)
>>> emb = paddle.nn.Embedding(10, 10)
state_dict = emb.to_static_state_dict()
paddle.save( state_dict, "paddle_dy.pdparams")
>>> state_dict = emb.to_static_state_dict()
>>> paddle.save( state_dict, "paddle_dy.pdparams")
'''
return self._state_dict_impl(
......@@ -1793,12 +1908,12 @@ class Layer:
Examples:
.. code-block:: python
import paddle
>>> import paddle
emb = paddle.nn.Embedding(10, 10)
>>> emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
paddle.save( state_dict, "paddle_dy.pdparams")
>>> state_dict = emb.state_dict()
>>> paddle.save( state_dict, "paddle_dy.pdparams")
'''
return self._state_dict_impl(
......@@ -1825,14 +1940,14 @@ class Layer:
Examples:
.. code-block:: python
import paddle
>>> import paddle
emb = paddle.nn.Embedding(10, 10)
>>> emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict()
paddle.save(state_dict, "paddle_dy.pdparams")
para_state_dict = paddle.load("paddle_dy.pdparams")
emb.set_state_dict(para_state_dict)
>>> state_dict = emb.state_dict()
>>> paddle.save(state_dict, "paddle_dy.pdparams")
>>> para_state_dict = paddle.load("paddle_dy.pdparams")
>>> emb.set_state_dict(para_state_dict)
'''
missing_keys = []
......@@ -1950,32 +2065,40 @@ class Layer:
Examples:
.. code-block:: python
# required: skip
import paddle
linear=paddle.nn.Linear(2, 2)
linear.weight
#Parameter containing:
#Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False,
# [[-0.32770029, 0.38653070],
# [ 0.46030545, 0.08158520]])
linear.to(dtype='float64')
linear.weight
#Tenor(shape=[2, 2], dtype=float64, place=CUDAPlace(0), stop_gradient=False,
# [[-0.32770029, 0.38653070],
# [ 0.46030545, 0.08158520]])
linear.to(device='cpu')
linear.weight
#Tensor(shape=[2, 2], dtype=float64, place=CPUPlace, stop_gradient=False,
# [[-0.32770029, 0.38653070],
# [ 0.46030545, 0.08158520]])
linear.to(device=paddle.CUDAPinnedPlace(), blocking=False)
linear.weight
#Tensor(shape=[2, 2], dtype=float64, place=CUDAPinnedPlace, stop_gradient=False,
# [[-0.04989364, -0.56889004],
# [ 0.33960250, 0.96878713]])
>>> import paddle
>>> paddle.seed(2023)
>>> linear=paddle.nn.Linear(2, 2)
>>> linear.weight
>>> print(linear.weight)
Parameter containing:
Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
[[ 0.89611185, 0.04935038],
[-0.58883440, 0.99266374]])
>>> linear.to(dtype='float64')
>>> linear.weight
>>> print(linear.weight)
Parameter containing:
Tensor(shape=[2, 2], dtype=float64, place=Place(gpu:0), stop_gradient=False,
[[ 0.89611185, 0.04935038],
[-0.58883440, 0.99266374]])
>>> linear.to(device='cpu')
>>> linear.weight
>>> print(linear.weight)
Parameter containing:
Tensor(shape=[2, 2], dtype=float64, place=Place(cpu), stop_gradient=False,
[[ 0.89611185, 0.04935038],
[-0.58883440, 0.99266374]])
>>> # doctest: +REQUIRES(env:GPU)
>>> linear.to(device=paddle.CUDAPinnedPlace(), blocking=False)
>>> linear.weight
>>> print(linear.weight)
Tensor(shape=[2, 2], dtype=float64, place=Place(gpu_pinned), stop_gradient=False,
[[ 0.89611185, 0.04935038],
[-0.58883440, 0.99266374]])
'''
return self._to_impl(
......@@ -2161,21 +2284,25 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class Model(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.linear = paddle.nn.Linear(1, 1)
self.dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
out = self.linear(input)
out = self.dropout(out)
return out
model = Model()
model.float()
>>> import paddle
>>> class Model(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self.linear = paddle.nn.Linear(1, 1)
... self.dropout = paddle.nn.Dropout(p=0.5)
...
... def forward(self, input):
... out = self.linear(input)
... out = self.dropout(out)
... return out
...
>>> model = Model()
>>> model.float()
Model(
(linear): Linear(in_features=1, out_features=1, dtype=paddle.float32)
(dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
)
'''
excluded_layers = [] if excluded_layers is None else excluded_layers
......@@ -2213,21 +2340,26 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class Model(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.linear = paddle.nn.Linear(1, 1)
self.dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
out = self.linear(input)
out = self.dropout(out)
return out
model = Model()
model.float16()
>>> # doctest: +SKIP('Paddle compiled by the user does not support float16, so keep original data type.')
>>> import paddle
>>> class Model(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self.linear = paddle.nn.Linear(1, 1)
... self.dropout = paddle.nn.Dropout(p=0.5)
...
... def forward(self, input):
... out = self.linear(input)
... out = self.dropout(out)
... return out
...
>>> model = Model()
>>> model.float16()
Model(
(linear): Linear(in_features=1, out_features=1, dtype=float32)
(dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
)
'''
if paddle.amp.is_float16_supported() is False:
......@@ -2273,21 +2405,27 @@ class Layer:
Examples:
.. code-block:: python
import paddle
class Model(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.linear = paddle.nn.Linear(1, 1)
self.dropout = paddle.nn.Dropout(p=0.5)
def forward(self, input):
out = self.linear(input)
out = self.dropout(out)
return out
model = Model()
model.bfloat16()
>>> # doctest: +SKIP('bfloat need V100 compile')
>>> import paddle
>>> class Model(paddle.nn.Layer):
... def __init__(self):
... super().__init__()
... self.linear = paddle.nn.Linear(1, 1)
... self.dropout = paddle.nn.Dropout(p=0.5)
...
... def forward(self, input):
... out = self.linear(input)
... out = self.dropout(out)
... return out
...
>>> model = Model()
>>> model.bfloat16()
>>> #UserWarning: Paddle compiled by the user does not support bfloat16, so keep original data type.
Model(
(linear): Linear(in_features=1, out_features=1, dtype=float32)
(dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train)
)
'''
if paddle.amp.is_bfloat16_supported() is False:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册