未验证 提交 418cc35d 编写于 作者: C cyberslack_lee 提交者: GitHub

[xdoctest] reformat example code with google style in No.86-90 (#55812)

* norm, test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview
上级 4ff6999a
......@@ -165,14 +165,18 @@ class InstanceNorm1D(_InstanceNormBase):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 3))
instance_norm = paddle.nn.InstanceNorm1D(2)
instance_norm_out = instance_norm(x)
print(instance_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 3))
>>> instance_norm = paddle.nn.InstanceNorm1D(2)
>>> instance_norm_out = instance_norm(x)
>>> print(instance_norm_out)
Tensor(shape=[2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[ 1.32132232, -0.22444785, -1.09687424],
[ 1.29506636, -0.15688568, -1.13818073]],
[[-0.27764025, 1.33961368, -1.06197333],
[ 0.44484580, -1.38489723, 0.94005162]]])
"""
def __init__(
......@@ -255,13 +259,22 @@ class InstanceNorm2D(_InstanceNormBase):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 2, 3))
instance_norm = paddle.nn.InstanceNorm2D(2)
instance_norm_out = instance_norm(x)
print(instance_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 2, 3))
>>> instance_norm = paddle.nn.InstanceNorm2D(2)
>>> instance_norm_out = instance_norm(x)
>>> print(instance_norm_out)
Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 1.26652932, -0.60229748, -1.65705574],
[ 1.06272733, 0.24229208, -0.31219524]],
[[-0.85414171, 0.31684181, -1.42204332],
[ 1.00412714, -0.43966094, 1.39487720]]],
[[[ 0.83324969, 1.25046813, -0.79470295],
[-1.38446140, 0.81851846, -0.72307163]],
[[-0.33560610, 0.95346332, 0.45585334],
[-0.53483474, 1.20336461, -1.74224067]]]])
"""
def __init__(
......@@ -342,13 +355,30 @@ class InstanceNorm3D(_InstanceNormBase):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 2, 2, 3))
instance_norm = paddle.nn.InstanceNorm3D(2)
instance_norm_out = instance_norm(x)
print(instance_norm_out.numpy)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 2, 2, 3))
>>> instance_norm = paddle.nn.InstanceNorm3D(2)
>>> instance_norm_out = instance_norm(x)
>>> print(instance_norm_out)
Tensor(shape=[2, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[[ 0.60520107, -0.67670596, -1.40020907],
[ 0.46540472, -0.09736639, -0.47771260]],
[[-0.74365318, 0.63718963, -1.41333199],
[ 1.44764769, -0.25489071, 1.90842640]]],
[[[ 1.09773374, 1.49568439, -0.45503727],
[-1.01755965, 1.08368278, -0.38671401]],
[[-0.62252384, 0.60490805, 0.13109155],
[-0.81222630, 0.84286022, -1.96189928]]]],
[[[[ 0.28014541, 0.91674680, 1.71797717],
[-0.52062720, -0.74274176, -0.86439967]],
[[ 0.25707796, -1.23866379, 1.64422870],
[-1.48577297, -0.13187379, 0.16790220]]],
[[[-1.49266160, 1.57909954, 0.46455818],
[-0.14981404, 1.46959865, 0.24957968]],
[[ 0.25134835, -0.03276967, -0.30318922],
[ 0.76263177, -1.11345232, -1.68492818]]]]])
"""
def __init__(
......@@ -410,13 +440,38 @@ class GroupNorm(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2))
group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
group_norm_out = group_norm(x)
print(group_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2))
>>> group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
>>> group_norm_out = group_norm(x)
>>> print(group_norm_out)
Tensor(shape=[2, 6, 2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]]],
[[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]]]])
"""
def __init__(
......@@ -575,13 +630,22 @@ class LayerNorm(Layer):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 2, 3))
layer_norm = paddle.nn.LayerNorm(x.shape[1:])
layer_norm_out = layer_norm(x)
print(layer_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 2, 3))
>>> layer_norm = paddle.nn.LayerNorm(x.shape[1:])
>>> layer_norm_out = layer_norm(x)
>>> print(layer_norm_out)
Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 0.60520101, -0.67670590, -1.40020895],
[ 0.46540466, -0.09736638, -0.47771254]],
[[-0.74365306, 0.63718957, -1.41333175],
[ 1.44764745, -0.25489068, 1.90842617]]],
[[[ 1.09773350, 1.49568415, -0.45503747],
[-1.01755989, 1.08368254, -0.38671425]],
[[-0.62252408, 0.60490781, 0.13109133],
[-0.81222653, 0.84285998, -1.96189952]]]])
"""
def __init__(
......@@ -891,17 +955,17 @@ class BatchNorm(Layer):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.nn as nn
from paddle.fluid.dygraph.base import to_variable
import numpy as np
>>> import paddle.fluid as fluid
>>> import paddle.nn as nn
>>> from paddle.fluid.dygraph.base import to_variable
>>> import numpy as np
x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
with fluid.dygraph.guard():
x = to_variable(x)
batch_norm = nn.layer.norm.BatchNorm(10)
hidden1 = batch_norm(x)
>>> x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
>>> with fluid.dygraph.guard():
... x = to_variable(x)
... batch_norm = nn.layer.norm.BatchNorm(10)
... hidden1 = batch_norm(x)
"""
def __init__(
......@@ -1165,13 +1229,16 @@ class BatchNorm1D(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2, 1, 3))
batch_norm = paddle.nn.BatchNorm1D(1)
batch_norm_out = batch_norm(x)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 1, 3))
>>> batch_norm = paddle.nn.BatchNorm1D(1)
>>> batch_norm_out = batch_norm(x)
print(batch_norm_out)
>>> print(batch_norm_out)
Tensor(shape=[2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[ 1.26652932, -0.60229754, -1.65705597]],
[[ 1.06272745, 0.24229205, -0.31219530]]])
"""
def __init__(
......@@ -1277,13 +1344,18 @@ class BatchNorm2D(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2, 1, 2, 3))
batch_norm = paddle.nn.BatchNorm2D(1)
batch_norm_out = batch_norm(x)
print(batch_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 1, 2, 3))
>>> batch_norm = paddle.nn.BatchNorm2D(1)
>>> batch_norm_out = batch_norm(x)
>>> print(batch_norm_out)
Tensor(shape=[2, 1, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 0.60520101, -0.67670590, -1.40020895],
[ 0.46540475, -0.09736633, -0.47771257]]],
[[[-0.74365312, 0.63718963, -1.41333187],
[ 1.44764757, -0.25489068, 1.90842628]]]])
"""
def _check_data_format(self, input):
......@@ -1363,13 +1435,22 @@ class BatchNorm3D(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2, 1, 2, 2, 3))
batch_norm = paddle.nn.BatchNorm3D(1)
batch_norm_out = batch_norm(x)
print(batch_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 1, 2, 2, 3))
>>> batch_norm = paddle.nn.BatchNorm3D(1)
>>> batch_norm_out = batch_norm(x)
>>> print(batch_norm_out)
Tensor(shape=[2, 1, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[[ 0.28011751, -0.95211101, -1.64757574],
[ 0.14573872, -0.39522290, -0.76082933]],
[[-1.01646376, 0.31086648, -1.66019011],
[ 1.08991623, -0.54664266, 1.53283834]]]],
[[[[ 1.33958006, 1.71585774, -0.12862551],
[-0.66051245, 1.32629418, -0.06402326]],
[[-0.28699064, 0.87359405, 0.42558217],
[-0.46636176, 1.09858704, -1.55342245]]]]])
"""
def __init__(
......@@ -1485,23 +1566,22 @@ class SyncBatchNorm(_BatchNormBase):
Examples:
.. code-block:: python
# required: gpu
import paddle
import paddle.nn as nn
x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32')
>>> # doctest: +REQUIRES(env:GPU)
if paddle.is_compiled_with_cuda():
sync_batch_norm = nn.SyncBatchNorm(2)
hidden1 = sync_batch_norm(x)
print(hidden1)
# Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[[[ 0.26824948, 1.09363246],
# [ 0.26824948, -1.63013160]],
>>> import paddle
>>> import paddle.nn as nn
>>> paddle.device.set_device('gpu')
>>> x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32')
# [[ 0.80956620, -0.66528702],
# [-1.27446556, 1.13018656]]]])
>>> if paddle.is_compiled_with_cuda():
... sync_batch_norm = nn.SyncBatchNorm(2)
... hidden1 = sync_batch_norm(x)
... print(hidden1)
Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
[[[[ 0.26824948, 1.09363246],
[ 0.26824948, -1.63013160]],
[[ 0.80956620, -0.66528702],
[-1.27446556, 1.13018656]]]])
"""
......@@ -1625,11 +1705,16 @@ class SyncBatchNorm(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5))
sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
>>> model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5))
>>> sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
>>> print(sync_model)
Sequential(
(0): Conv2D(3, 5, kernel_size=[3, 3], data_format=NCHW)
(1): SyncBatchNorm(num_features=5, momentum=0.9, epsilon=1e-05)
)
"""
layer_output = layer
......@@ -1704,14 +1789,15 @@ class LocalResponseNorm(Layer):
Examples:
.. code-block:: python
.. code-block:: python
import paddle
>>> import paddle
x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32")
m = paddle.nn.LocalResponseNorm(size=5)
y = m(x)
print(y.shape) # [3, 3, 112, 112]
>>> x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32")
>>> m = paddle.nn.LocalResponseNorm(size=5)
>>> y = m(x)
>>> print(y.shape)
[3, 3, 112, 112]
"""
def __init__(
......@@ -1801,15 +1887,14 @@ class SpectralNorm(Layer):
None
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2,8,32,32))
spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2)
spectral_norm_out = spectral_norm(x)
.. code-block:: python
print(spectral_norm_out.shape) # [2, 8, 32, 32]
>>> import paddle
>>> x = paddle.rand((2,8,32,32))
>>> spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2)
>>> spectral_norm_out = spectral_norm(x)
>>> print(spectral_norm_out.shape)
[2, 8, 32, 32]
"""
......
......@@ -65,13 +65,14 @@ class AvgPool1D(Layer):
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
pool_out = AvgPool1D(data)
# pool_out shape: [1, 3, 16]
>>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
>>> AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
>>> pool_out = AvgPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
"""
......@@ -169,15 +170,15 @@ class AvgPool2D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
# max pool2d
input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
AvgPool2D = nn.AvgPool2D(kernel_size=2,
stride=2, padding=0)
output = AvgPool2D(input)
# output.shape [1, 3, 16, 16]
>>> # max pool2d
>>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
>>> AvgPool2D = nn.AvgPool2D(kernel_size=2, stride=2, padding=0)
>>> output = AvgPool2D(input)
>>> print(output.shape)
[1, 3, 16, 16]
"""
......@@ -268,15 +269,15 @@ class AvgPool3D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
# avg pool3d
input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
AvgPool3D = nn.AvgPool3D(kernel_size=2,
stride=2, padding=0)
output = AvgPool3D(input)
# output.shape [1, 2, 3, 16, 16]
>>> # avg pool3d
>>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
>>> AvgPool3D = nn.AvgPool3D(kernel_size=2, stride=2, padding=0)
>>> output = AvgPool3D(input)
>>> print(output.shape)
[1, 2, 1, 16, 16]
"""
......@@ -366,17 +367,21 @@ class MaxPool1D(Layer):
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
pool_out = MaxPool1D(data)
# pool_out shape: [1, 3, 16]
>>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
>>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
>>> pool_out = MaxPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True)
pool_out, indices = MaxPool1D(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
>>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True)
>>> pool_out, indices = MaxPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
"""
......@@ -471,20 +476,23 @@ class MaxPool2D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
# max pool2d
input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
MaxPool2D = nn.MaxPool2D(kernel_size=2,
stride=2, padding=0)
output = MaxPool2D(input)
# output.shape [1, 3, 16, 16]
# for return_mask=True
MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True)
output, max_indices = MaxPool2D(input)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
>>> import paddle
>>> import paddle.nn as nn
>>> # max pool2d
>>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
>>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
>>> output = MaxPool2D(input)
>>> print(output.shape)
[1, 3, 16, 16]
>>> # for return_mask=True
>>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True)
>>> output, max_indices = MaxPool2D(input)
>>> print(output.shape)
[1, 3, 16, 16]
>>> print(max_indices.shape)
[1, 3, 16, 16]
"""
def __init__(
......@@ -568,20 +576,23 @@ class MaxPool3D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
# max pool3d
input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
MaxPool3D = nn.MaxPool3D(kernel_size=2,
stride=2, padding=0)
output = MaxPool3D(input)
# output.shape [1, 2, 3, 16, 16]
# for return_mask=True
MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True)
output, max_indices = MaxPool3D(input)
# output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
>>> import paddle
>>> import paddle.nn as nn
>>> # max pool3d
>>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
>>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0)
>>> output = MaxPool3D(input)
>>> print(output.shape)
[1, 2, 1, 16, 16]
>>> # for return_mask=True
>>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True)
>>> output, max_indices = MaxPool3D(input)
>>> print(output.shape)
[1, 2, 1, 16, 16]
>>> print(max_indices.shape)
[1, 2, 1, 16, 16]
"""
def __init__(
......@@ -650,25 +661,26 @@ class AdaptiveAvgPool1D(Layer):
Examples:
.. code-block:: python
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart)
#
import paddle
import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16)
pool_out = AdaptiveAvgPool1D(data)
# pool_out shape: [1, 3, 16]
>>> # average adaptive pool1d
>>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
>>> # output shape is [N, C, m], adaptive pool divide L dimension
>>> # of input data into m grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # lstart = floor(i * L / m)
>>> # lend = ceil((i + 1) * L / m)
>>> # output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart)
>>> #
>>> import paddle
>>> import paddle.nn as nn
>>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
>>> AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16)
>>> pool_out = AdaptiveAvgPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
"""
def __init__(self, output_size, name=None):
......@@ -726,28 +738,29 @@ class AdaptiveAvgPool2D(Layer):
Examples:
.. code-block:: python
# adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
#
import paddle
x = paddle.rand([2, 3, 32, 32])
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out.shape is [2, 3, 3, 3]
>>> # adaptive avg pool2d
>>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
>>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
>>> # of input data into m * n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive avg pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # for j in range(n):
>>> # hstart = floor(i * H / m)
>>> # hend = ceil((i + 1) * H / m)
>>> # wstart = floor(i * W / n)
>>> # wend = ceil((i + 1) * W / n)
>>> # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
>>> #
>>> import paddle
>>> x = paddle.rand([2, 3, 32, 32])
>>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3)
>>> pool_out = adaptive_avg_pool(x = x)
>>> print(pool_out.shape)
[2, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCHW", name=None):
......@@ -815,31 +828,32 @@ class AdaptiveAvgPool3D(Layer):
Examples:
.. code-block:: python
# adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
x = paddle.rand([2, 3, 8, 32, 32])
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out = [2, 3, 3, 3, 3]
>>> # adaptive avg pool3d
>>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
>>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
>>> # of input data into l * m * n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive avg pool performs calculations as follow:
>>> #
>>> # for i in range(l):
>>> # for j in range(m):
>>> # for k in range(n):
>>> # dstart = floor(i * D / l)
>>> # dend = ceil((i + 1) * D / l)
>>> # hstart = floor(j * H / m)
>>> # hend = ceil((j + 1) * H / m)
>>> # wstart = floor(k * W / n)
>>> # wend = ceil((k + 1) * W / n)
>>> # output[:, :, i, j, k] =
>>> # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
>>> import paddle
>>> x = paddle.rand([2, 3, 8, 32, 32])
>>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3)
>>> pool_out = adaptive_avg_pool(x = x)
>>> print(pool_out.shape)
[2, 3, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCDHW", name=None):
......@@ -898,30 +912,34 @@ class AdaptiveMaxPool1D(Layer):
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16)
pool_out = AdaptiveMaxPool1D(data)
# pool_out shape: [1, 3, 16]
# for return_mask = true
AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
pool_out, indices = AdaptiveMaxPool1D(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
>>> # max adaptive pool1d
>>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
>>> # output shape is [N, C, m], adaptive pool divide L dimension
>>> # of input data into m grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # lstart = floor(i * L / m)
>>> # lend = ceil((i + 1) * L / m)
>>> # output[:, :, i] = max(input[:, :, lstart: lend])
>>> #
>>> import paddle
>>> import paddle.nn as nn
>>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
>>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16)
>>> pool_out = AdaptiveMaxPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
>>> # for return_mask = true
>>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
>>> pool_out, indices = AdaptiveMaxPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
"""
......@@ -981,27 +999,31 @@ class AdaptiveMaxPool2D(Layer):
Examples:
.. code-block:: python
# adaptive max pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
#
import paddle
x = paddle.rand([2, 3, 32, 32])
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
pool_out, indices = adaptive_max_pool(x = x)
>>> # adaptive max pool2d
>>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
>>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
>>> # of input data into m * n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(m):
>>> # for j in range(n):
>>> # hstart = floor(i * H / m)
>>> # hend = ceil((i + 1) * H / m)
>>> # wstart = floor(i * W / n)
>>> # wend = ceil((i + 1) * W / n)
>>> # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
>>> #
>>> import paddle
>>> x = paddle.rand([2, 3, 32, 32])
>>> adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
>>> pool_out, indices = adaptive_max_pool(x = x)
>>> print(pool_out.shape)
[2, 3, 3, 3]
>>> print(indices.shape)
[2, 3, 3, 3]
"""
def __init__(self, output_size, return_mask=False, name=None):
......@@ -1067,33 +1089,37 @@ class AdaptiveMaxPool3D(Layer):
Examples:
.. code-block:: python
# adaptive max pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
x = paddle.rand([2, 3, 8, 32, 32])
pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
out = pool(x)
# out shape: [2, 3, 4, 4, 4]
pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
out, indices = pool(x)
# out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
>>> # adaptive max pool3d
>>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
>>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
>>> # of input data into l * m * n grids averagely and performs poolings in each
>>> # grid to get output.
>>> # adaptive max pool performs calculations as follow:
>>> #
>>> # for i in range(l):
>>> # for j in range(m):
>>> # for k in range(n):
>>> # dstart = floor(i * D / l)
>>> # dend = ceil((i + 1) * D / l)
>>> # hstart = floor(j * H / m)
>>> # hend = ceil((j + 1) * H / m)
>>> # wstart = floor(k * W / n)
>>> # wend = ceil((k + 1) * W / n)
>>> # output[:, :, i, j, k] =
>>> # max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
>>> import paddle
>>> x = paddle.rand([2, 3, 8, 32, 32])
>>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
>>> out = pool(x)
>>> print(out.shape)
[2, 3, 4, 4, 4]
>>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
>>> out, indices = pool(x)
>>> print(out.shape)
[2, 3, 3, 3, 3]
>>> print(indices.shape)
[2, 3, 3, 3, 3]
"""
......@@ -1156,15 +1182,19 @@ class MaxUnPool1D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
>>> import paddle
>>> import paddle.nn.functional as F
data = paddle.rand(shape=[1, 3, 16])
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 3, 8], indices shape: [1, 3, 8]
Unpool1D = paddle.nn.MaxUnPool1D(kernel_size=2, padding=0)
unpool_out = Unpool1D(pool_out, indices)
# unpool_out shape: [1, 3, 16]
>>> data = paddle.rand(shape=[1, 3, 16])
>>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 3, 8]
>>> print(indices.shape)
[1, 3, 8]
>>> Unpool1D = paddle.nn.MaxUnPool1D(kernel_size=2, padding=0)
>>> unpool_out = Unpool1D(pool_out, indices)
>>> print(unpool_out.shape)
[1, 3, 16]
"""
......@@ -1244,15 +1274,19 @@ class MaxUnPool2D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
>>> import paddle
>>> import paddle.nn.functional as F
data = paddle.rand(shape=[1,1,6,6])
pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 3, 3], indices shape: [1, 1, 3, 3]
Unpool2D = paddle.nn.MaxUnPool2D(kernel_size=2, padding=0)
unpool_out = Unpool2D(pool_out, indices)
# unpool_out shape: [1, 1, 6, 6]
>>> data = paddle.rand(shape=[1, 1, 6, 6])
>>> pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 1, 3, 3]
>>> print(indices.shape)
[1, 1, 3, 3]
>>> Unpool2D = paddle.nn.MaxUnPool2D(kernel_size=2, padding=0)
>>> unpool_out = Unpool2D(pool_out, indices)
>>> print(unpool_out.shape)
[1, 1, 6, 6]
"""
......@@ -1335,15 +1369,19 @@ class MaxUnPool3D(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
data = paddle.rand(shape=[1, 1, 4, 4, 6])
pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 2, 2, 3], indices shape: [1, 1, 2, 2, 3]
Unpool3D = paddle.nn.MaxUnPool3D(kernel_size=2, padding=0)
unpool_out = Unpool3D(pool_out, indices)
# unpool_out shape: [1, 1, 4, 4, 6]
>>> import paddle
>>> import paddle.nn.functional as F
>>> data = paddle.rand(shape=[1, 1, 4, 4, 6])
>>> pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
>>> print(pool_out.shape)
[1, 1, 2, 2, 3]
>>> print(indices.shape)
[1, 1, 2, 2, 3]
>>> Unpool3D = paddle.nn.MaxUnPool3D(kernel_size=2, padding=0)
>>> unpool_out = Unpool3D(pool_out, indices)
>>> print(unpool_out.shape)
[1, 1, 4, 4, 6]
"""
......
......@@ -89,14 +89,18 @@ def rnn(
.. code-block:: python
import paddle
paddle.disable_static()
>>> import paddle
cell = paddle.nn.SimpleRNNCell(16, 32)
>>> inputs = paddle.rand((4, 23, 16))
>>> prev_h = paddle.randn((4, 32))
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.nn.layer.rnn(cell, inputs, prev_h)
>>> cell = paddle.nn.SimpleRNNCell(16, 32)
>>> rnn = paddle.nn.RNN(cell)
>>> outputs, final_states = rnn(inputs, prev_h)
>>> print(outputs.shape)
[4, 23, 32]
>>> print(final_states.shape)
[4, 32]
"""
......@@ -397,18 +401,17 @@ def birnn(
.. code-block:: python
import paddle
paddle.disable_static()
>>> import paddle
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.nn.layer.birnn(
cell_fw, cell_bw, inputs, initial_states)
>>> cell_fw = paddle.nn.LSTMCell(16, 32)
>>> cell_bw = paddle.nn.LSTMCell(16, 32)
>>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
>>> inputs = paddle.rand((2, 23, 16))
>>> outputs, final_states = rnn(inputs)
>>> print(outputs.shape)
[2, 23, 64]
>>> print(final_states[0][0].shape)
[2, 32]
"""
......@@ -743,16 +746,15 @@ class SimpleRNNCell(RNNCellBase):
.. code-block:: python
import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
>>> import paddle
cell = paddle.nn.SimpleRNNCell(16, 32)
y, h = cell(x, prev_h)
print(y.shape)
>>> x = paddle.randn((4, 16))
>>> prev_h = paddle.randn((4, 32))
#[4,32]
>>> cell = paddle.nn.SimpleRNNCell(16, 32)
>>> y, h = cell(x, prev_h)
>>> print(y.shape)
[4, 32]
"""
......@@ -897,22 +899,21 @@ class LSTMCell(RNNCellBase):
.. code-block:: python
import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
prev_c = paddle.randn((4, 32))
>>> import paddle
cell = paddle.nn.LSTMCell(16, 32)
y, (h, c) = cell(x, (prev_h, prev_c))
>>> x = paddle.randn((4, 16))
>>> prev_h = paddle.randn((4, 32))
>>> prev_c = paddle.randn((4, 32))
print(y.shape)
print(h.shape)
print(c.shape)
>>> cell = paddle.nn.LSTMCell(16, 32)
>>> y, (h, c) = cell(x, (prev_h, prev_c))
#[4,32]
#[4,32]
#[4,32]
>>> print(y.shape)
[4, 32]
>>> print(h.shape)
[4, 32]
>>> print(c.shape)
[4, 32]
"""
......@@ -1059,19 +1060,19 @@ class GRUCell(RNNCellBase):
.. code-block:: python
import paddle
>>> import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
>>> x = paddle.randn((4, 16))
>>> prev_h = paddle.randn((4, 32))
cell = paddle.nn.GRUCell(16, 32)
y, h = cell(x, prev_h)
>>> cell = paddle.nn.GRUCell(16, 32)
>>> y, h = cell(x, prev_h)
print(y.shape)
print(h.shape)
>>> print(y.shape)
[4, 32]
>>> print(h.shape)
[4, 32]
#[4,32]
#[4,32]
"""
......@@ -1189,20 +1190,19 @@ class RNN(Layer):
.. code-block:: python
import paddle
>>> import paddle
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
>>> inputs = paddle.rand((4, 23, 16))
>>> prev_h = paddle.randn((4, 32))
cell = paddle.nn.SimpleRNNCell(16, 32)
rnn = paddle.nn.RNN(cell)
outputs, final_states = rnn(inputs, prev_h)
>>> cell = paddle.nn.SimpleRNNCell(16, 32)
>>> rnn = paddle.nn.RNN(cell)
>>> outputs, final_states = rnn(inputs, prev_h)
print(outputs.shape)
print(final_states.shape)
#[4,23,32]
#[4,32]
>>> print(outputs.shape)
[4, 23, 32]
>>> print(final_states.shape)
[4, 32]
"""
......@@ -1263,20 +1263,19 @@ class BiRNN(Layer):
.. code-block:: python
import paddle
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
>>> import paddle
inputs = paddle.rand((2, 23, 16))
outputs, final_states = rnn(inputs)
>>> cell_fw = paddle.nn.LSTMCell(16, 32)
>>> cell_bw = paddle.nn.LSTMCell(16, 32)
>>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
print(outputs.shape)
print(final_states[0][0].shape,len(final_states),len(final_states[0]))
>>> inputs = paddle.rand((2, 23, 16))
>>> outputs, final_states = rnn(inputs)
#[4,23,64]
#[2,32] 2 2
>>> print(outputs.shape)
[2, 23, 64]
>>> print(final_states[0][0].shape,len(final_states),len(final_states[0]))
[2, 32] 2 2
"""
......@@ -1702,19 +1701,19 @@ class SimpleRNN(RNNBase):
.. code-block:: python
import paddle
>>> import paddle
rnn = paddle.nn.SimpleRNN(16, 32, 2)
>>> rnn = paddle.nn.SimpleRNN(16, 32, 2)
x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h)
>>> x = paddle.randn((4, 23, 16))
>>> prev_h = paddle.randn((2, 4, 32))
>>> y, h = rnn(x, prev_h)
print(y.shape)
print(h.shape)
>>> print(y.shape)
[4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
"""
......@@ -1833,22 +1832,22 @@ class LSTM(RNNBase):
.. code-block:: python
import paddle
>>> import paddle
rnn = paddle.nn.LSTM(16, 32, 2)
>>> rnn = paddle.nn.LSTM(16, 32, 2)
x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
prev_c = paddle.randn((2, 4, 32))
y, (h, c) = rnn(x, (prev_h, prev_c))
>>> x = paddle.randn((4, 23, 16))
>>> prev_h = paddle.randn((2, 4, 32))
>>> prev_c = paddle.randn((2, 4, 32))
>>> y, (h, c) = rnn(x, (prev_h, prev_c))
print(y.shape)
print(h.shape)
print(c.shape)
>>> print(y.shape)
[4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
>>> print(c.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
#[2,4,32]
"""
......@@ -1955,19 +1954,19 @@ class GRU(RNNBase):
.. code-block:: python
import paddle
>>> import paddle
rnn = paddle.nn.GRU(16, 32, 2)
>>> rnn = paddle.nn.GRU(16, 32, 2)
x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h)
>>> x = paddle.randn((4, 23, 16))
>>> prev_h = paddle.randn((2, 4, 32))
>>> y, h = rnn(x, prev_h)
print(y.shape)
print(h.shape)
>>> print(y.shape)
[4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
"""
......
......@@ -141,14 +141,16 @@ class MultiHeadAttention(Layer):
.. code-block:: python
import paddle
# encoder input: [batch_size, sequence_length, d_model]
query = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, num_heads, query_len, query_len]
attn_mask = paddle.rand((2, 2, 4, 4))
multi_head_attn = paddle.nn.MultiHeadAttention(128, 2)
output = multi_head_attn(query, None, None, attn_mask=attn_mask) # [2, 4, 128]
>>> import paddle
>>> # encoder input: [batch_size, sequence_length, d_model]
>>> query = paddle.rand((2, 4, 128))
>>> # self attention mask: [batch_size, num_heads, query_len, query_len]
>>> attn_mask = paddle.rand((2, 2, 4, 4))
>>> multi_head_attn = paddle.nn.MultiHeadAttention(128, 2)
>>> output = multi_head_attn(query, None, None, attn_mask=attn_mask)
>>> print(output.shape)
[2, 4, 128]
"""
Cache = collections.namedtuple("Cache", ["k", "v"])
......@@ -490,15 +492,17 @@ class TransformerEncoderLayer(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerEncoderLayer
# encoder input: [batch_size, src_len, d_model]
enc_input = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, n_head, src_len, src_len]
attn_mask = paddle.rand((2, 2, 4, 4))
encoder_layer = TransformerEncoderLayer(128, 2, 512)
enc_output = encoder_layer(enc_input, attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerEncoderLayer
>>> # encoder input: [batch_size, src_len, d_model]
>>> enc_input = paddle.rand((2, 4, 128))
>>> # self attention mask: [batch_size, n_head, src_len, src_len]
>>> attn_mask = paddle.rand((2, 2, 4, 4))
>>> encoder_layer = TransformerEncoderLayer(128, 2, 512)
>>> enc_output = encoder_layer(enc_input, attn_mask)
>>> print(enc_output.shape)
[2, 4, 128]
"""
def __init__(
......@@ -659,16 +663,18 @@ class TransformerEncoder(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerEncoderLayer, TransformerEncoder
# encoder input: [batch_size, src_len, d_model]
enc_input = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, n_head, src_len, src_len]
attn_mask = paddle.rand((2, 2, 4, 4))
encoder_layer = TransformerEncoderLayer(128, 2, 512)
encoder = TransformerEncoder(encoder_layer, 2)
enc_output = encoder(enc_input, attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerEncoderLayer, TransformerEncoder
>>> # encoder input: [batch_size, src_len, d_model]
>>> enc_input = paddle.rand((2, 4, 128))
>>> # self attention mask: [batch_size, n_head, src_len, src_len]
>>> attn_mask = paddle.rand((2, 2, 4, 4))
>>> encoder_layer = TransformerEncoderLayer(128, 2, 512)
>>> encoder = TransformerEncoder(encoder_layer, 2)
>>> enc_output = encoder(enc_input, attn_mask)
>>> print(enc_output.shape)
[2, 4, 128]
"""
def __init__(self, encoder_layer, num_layers, norm=None):
......@@ -809,22 +815,24 @@ class TransformerDecoderLayer(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerDecoderLayer
# decoder input: [batch_size, tgt_len, d_model]
dec_input = paddle.rand((2, 4, 128))
# encoder output: [batch_size, src_len, d_model]
enc_output = paddle.rand((2, 6, 128))
# self attention mask: [batch_size, n_head, tgt_len, tgt_len]
self_attn_mask = paddle.rand((2, 2, 4, 4))
# cross attention mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 4, 6))
decoder_layer = TransformerDecoderLayer(128, 2, 512)
output = decoder_layer(dec_input,
enc_output,
self_attn_mask,
cross_attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerDecoderLayer
>>> # decoder input: [batch_size, tgt_len, d_model]
>>> dec_input = paddle.rand((2, 4, 128))
>>> # encoder output: [batch_size, src_len, d_model]
>>> enc_output = paddle.rand((2, 6, 128))
>>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len]
>>> self_attn_mask = paddle.rand((2, 2, 4, 4))
>>> # cross attention mask: [batch_size, n_head, tgt_len, src_len]
>>> cross_attn_mask = paddle.rand((2, 2, 4, 6))
>>> decoder_layer = TransformerDecoderLayer(128, 2, 512)
>>> output = decoder_layer(dec_input,
... enc_output,
... self_attn_mask,
... cross_attn_mask)
>>> print(output.shape)
[2, 4, 128]
"""
def __init__(
......@@ -1031,23 +1039,25 @@ class TransformerDecoder(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerDecoderLayer, TransformerDecoder
# decoder input: [batch_size, tgt_len, d_model]
dec_input = paddle.rand((2, 4, 128))
# encoder output: [batch_size, src_len, d_model]
enc_output = paddle.rand((2, 6, 128))
# self attention mask: [batch_size, n_head, tgt_len, tgt_len]
self_attn_mask = paddle.rand((2, 2, 4, 4))
# cross attention mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 4, 6))
decoder_layer = TransformerDecoderLayer(128, 2, 512)
decoder = TransformerDecoder(decoder_layer, 2)
output = decoder(dec_input,
enc_output,
self_attn_mask,
cross_attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerDecoderLayer, TransformerDecoder
>>> # decoder input: [batch_size, tgt_len, d_model]
>>> dec_input = paddle.rand((2, 4, 128))
>>> # encoder output: [batch_size, src_len, d_model]
>>> enc_output = paddle.rand((2, 6, 128))
>>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len]
>>> self_attn_mask = paddle.rand((2, 2, 4, 4))
>>> # cross attention mask: [batch_size, n_head, tgt_len, src_len]
>>> cross_attn_mask = paddle.rand((2, 2, 4, 6))
>>> decoder_layer = TransformerDecoderLayer(128, 2, 512)
>>> decoder = TransformerDecoder(decoder_layer, 2)
>>> output = decoder(dec_input,
... enc_output,
... self_attn_mask,
... cross_attn_mask)
>>> print(output.shape)
[2, 4, 128]
"""
def __init__(self, decoder_layer, num_layers, norm=None):
......@@ -1242,25 +1252,27 @@ class Transformer(Layer):
.. code-block:: python
import paddle
from paddle.nn import Transformer
# src: [batch_size, tgt_len, d_model]
enc_input = paddle.rand((2, 4, 128))
# tgt: [batch_size, src_len, d_model]
dec_input = paddle.rand((2, 6, 128))
# src_mask: [batch_size, n_head, src_len, src_len]
enc_self_attn_mask = paddle.rand((2, 2, 4, 4))
# tgt_mask: [batch_size, n_head, tgt_len, tgt_len]
dec_self_attn_mask = paddle.rand((2, 2, 6, 6))
# memory_mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 6, 4))
transformer = Transformer(128, 2, 4, 4, 512)
output = transformer(enc_input,
dec_input,
enc_self_attn_mask,
dec_self_attn_mask,
cross_attn_mask) # [2, 6, 128]
>>> import paddle
>>> from paddle.nn import Transformer
>>> # src: [batch_size, tgt_len, d_model]
>>> enc_input = paddle.rand((2, 4, 128))
>>> # tgt: [batch_size, src_len, d_model]
>>> dec_input = paddle.rand((2, 6, 128))
>>> # src_mask: [batch_size, n_head, src_len, src_len]
>>> enc_self_attn_mask = paddle.rand((2, 2, 4, 4))
>>> # tgt_mask: [batch_size, n_head, tgt_len, tgt_len]
>>> dec_self_attn_mask = paddle.rand((2, 2, 6, 6))
>>> # memory_mask: [batch_size, n_head, tgt_len, src_len]
>>> cross_attn_mask = paddle.rand((2, 2, 6, 4))
>>> transformer = Transformer(128, 2, 4, 4, 512)
>>> output = transformer(enc_input,
... dec_input,
... enc_self_attn_mask,
... dec_self_attn_mask,
... cross_attn_mask)
>>> print(output.shape)
[2, 6, 128]
"""
def __init__(
......@@ -1454,20 +1466,20 @@ class Transformer(Layer):
Examples:
.. code-block:: python
import paddle
from paddle.nn.layer.transformer import Transformer
length = 5
d_model, n_head, dim_feedforward = 8, 4, 64
transformer_paddle = Transformer(
d_model, n_head, dim_feedforward=dim_feedforward)
mask = transformer_paddle.generate_square_subsequent_mask(length)
print(mask)
# [[ 0. -inf -inf -inf -inf]
# [ 0. 0. -inf -inf -inf]
# [ 0. 0. 0. -inf -inf]
# [ 0. 0. 0. 0. -inf]
# [ 0. 0. 0. 0. 0.]]
>>> import paddle
>>> from paddle.nn.layer.transformer import Transformer
>>> length = 5
>>> d_model, n_head, dim_feedforward = 8, 4, 64
>>> transformer_paddle = Transformer(
... d_model, n_head, dim_feedforward=dim_feedforward)
>>> mask = transformer_paddle.generate_square_subsequent_mask(length)
>>> print(mask)
Tensor(shape=[5, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
[[ 0. , -inf., -inf., -inf., -inf.],
[ 0. , 0. , -inf., -inf., -inf.],
[ 0. , 0. , 0. , -inf., -inf.],
[ 0. , 0. , 0. , 0. , -inf.],
[ 0. , 0. , 0. , 0. , 0. ]])
"""
return paddle.tensor.triu(
......
......@@ -46,14 +46,14 @@ class PixelShuffle(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
x = paddle.randn(shape=[2,9,4,4])
pixel_shuffle = nn.PixelShuffle(3)
out = pixel_shuffle(x)
print(out.shape)
# [2, 1, 12, 12]
>>> x = paddle.randn(shape=[2, 9, 4, 4])
>>> pixel_shuffle = nn.PixelShuffle(3)
>>> out = pixel_shuffle(x)
>>> print(out.shape)
[2, 1, 12, 12]
"""
......@@ -109,14 +109,14 @@ class PixelUnshuffle(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
x = paddle.randn([2, 1, 12, 12])
pixel_unshuffle = nn.PixelUnshuffle(3)
out = pixel_unshuffle(x)
print(out.shape)
# [2, 9, 4, 4]
>>> x = paddle.randn([2, 1, 12, 12])
>>> pixel_unshuffle = nn.PixelUnshuffle(3)
>>> out = pixel_unshuffle(x)
>>> print(out.shape)
[2, 9, 4, 4]
"""
......@@ -175,24 +175,28 @@ class ChannelShuffle(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
x = paddle.arange(0, 0.6, 0.1, 'float32')
x = paddle.reshape(x, [1, 6, 1, 1])
# [[[[0. ]],
# [[0.10000000]],
# [[0.20000000]],
# [[0.30000001]],
# [[0.40000001]],
# [[0.50000000]]]]
channel_shuffle = nn.ChannelShuffle(3)
y = channel_shuffle(x)
# [[[[0. ]],
# [[0.20000000]],
# [[0.40000001]],
# [[0.10000000]],
# [[0.30000001]],
# [[0.50000000]]]]
>>> import paddle
>>> import paddle.nn as nn
>>> x = paddle.arange(0, 0.6, 0.1, 'float32')
>>> x = paddle.reshape(x, [1, 6, 1, 1])
>>> print(x)
Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0. ]],
[[0.10000000]],
[[0.20000000]],
[[0.30000001]],
[[0.40000001]],
[[0.50000000]]]])
>>> channel_shuffle = nn.ChannelShuffle(3)
>>> y = channel_shuffle(x)
>>> print(y)
Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0. ]],
[[0.20000000]],
[[0.40000001]],
[[0.10000000]],
[[0.30000001]],
[[0.50000000]]]])
"""
def __init__(self, groups, data_format="NCHW", name=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册