未验证 提交 418cc35d 编写于 作者: C cyberslack_lee 提交者: GitHub

[xdoctest] reformat example code with google style in No.86-90 (#55812)

* norm, test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview
上级 4ff6999a
...@@ -165,14 +165,18 @@ class InstanceNorm1D(_InstanceNormBase): ...@@ -165,14 +165,18 @@ class InstanceNorm1D(_InstanceNormBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand((2, 2, 3)) >>> x = paddle.rand((2, 2, 3))
instance_norm = paddle.nn.InstanceNorm1D(2) >>> instance_norm = paddle.nn.InstanceNorm1D(2)
instance_norm_out = instance_norm(x) >>> instance_norm_out = instance_norm(x)
print(instance_norm_out) >>> print(instance_norm_out)
Tensor(shape=[2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[ 1.32132232, -0.22444785, -1.09687424],
[ 1.29506636, -0.15688568, -1.13818073]],
[[-0.27764025, 1.33961368, -1.06197333],
[ 0.44484580, -1.38489723, 0.94005162]]])
""" """
def __init__( def __init__(
...@@ -255,13 +259,22 @@ class InstanceNorm2D(_InstanceNormBase): ...@@ -255,13 +259,22 @@ class InstanceNorm2D(_InstanceNormBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand((2, 2, 2, 3)) >>> x = paddle.rand((2, 2, 2, 3))
instance_norm = paddle.nn.InstanceNorm2D(2) >>> instance_norm = paddle.nn.InstanceNorm2D(2)
instance_norm_out = instance_norm(x) >>> instance_norm_out = instance_norm(x)
print(instance_norm_out) >>> print(instance_norm_out)
Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 1.26652932, -0.60229748, -1.65705574],
[ 1.06272733, 0.24229208, -0.31219524]],
[[-0.85414171, 0.31684181, -1.42204332],
[ 1.00412714, -0.43966094, 1.39487720]]],
[[[ 0.83324969, 1.25046813, -0.79470295],
[-1.38446140, 0.81851846, -0.72307163]],
[[-0.33560610, 0.95346332, 0.45585334],
[-0.53483474, 1.20336461, -1.74224067]]]])
""" """
def __init__( def __init__(
...@@ -342,13 +355,30 @@ class InstanceNorm3D(_InstanceNormBase): ...@@ -342,13 +355,30 @@ class InstanceNorm3D(_InstanceNormBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand((2, 2, 2, 2, 3)) >>> x = paddle.rand((2, 2, 2, 2, 3))
instance_norm = paddle.nn.InstanceNorm3D(2) >>> instance_norm = paddle.nn.InstanceNorm3D(2)
instance_norm_out = instance_norm(x) >>> instance_norm_out = instance_norm(x)
print(instance_norm_out.numpy) >>> print(instance_norm_out)
Tensor(shape=[2, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[[ 0.60520107, -0.67670596, -1.40020907],
[ 0.46540472, -0.09736639, -0.47771260]],
[[-0.74365318, 0.63718963, -1.41333199],
[ 1.44764769, -0.25489071, 1.90842640]]],
[[[ 1.09773374, 1.49568439, -0.45503727],
[-1.01755965, 1.08368278, -0.38671401]],
[[-0.62252384, 0.60490805, 0.13109155],
[-0.81222630, 0.84286022, -1.96189928]]]],
[[[[ 0.28014541, 0.91674680, 1.71797717],
[-0.52062720, -0.74274176, -0.86439967]],
[[ 0.25707796, -1.23866379, 1.64422870],
[-1.48577297, -0.13187379, 0.16790220]]],
[[[-1.49266160, 1.57909954, 0.46455818],
[-0.14981404, 1.46959865, 0.24957968]],
[[ 0.25134835, -0.03276967, -0.30318922],
[ 0.76263177, -1.11345232, -1.68492818]]]]])
""" """
def __init__( def __init__(
...@@ -410,13 +440,38 @@ class GroupNorm(Layer): ...@@ -410,13 +440,38 @@ class GroupNorm(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2)) >>> x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2))
group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6) >>> group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
group_norm_out = group_norm(x) >>> group_norm_out = group_norm(x)
print(group_norm_out) >>> print(group_norm_out)
Tensor(shape=[2, 6, 2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]]],
[[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]]]])
""" """
def __init__( def __init__(
...@@ -575,13 +630,22 @@ class LayerNorm(Layer): ...@@ -575,13 +630,22 @@ class LayerNorm(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand((2, 2, 2, 3)) >>> x = paddle.rand((2, 2, 2, 3))
layer_norm = paddle.nn.LayerNorm(x.shape[1:]) >>> layer_norm = paddle.nn.LayerNorm(x.shape[1:])
layer_norm_out = layer_norm(x) >>> layer_norm_out = layer_norm(x)
print(layer_norm_out) >>> print(layer_norm_out)
Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 0.60520101, -0.67670590, -1.40020895],
[ 0.46540466, -0.09736638, -0.47771254]],
[[-0.74365306, 0.63718957, -1.41333175],
[ 1.44764745, -0.25489068, 1.90842617]]],
[[[ 1.09773350, 1.49568415, -0.45503747],
[-1.01755989, 1.08368254, -0.38671425]],
[[-0.62252408, 0.60490781, 0.13109133],
[-0.81222653, 0.84285998, -1.96189952]]]])
""" """
def __init__( def __init__(
...@@ -891,17 +955,17 @@ class BatchNorm(Layer): ...@@ -891,17 +955,17 @@ class BatchNorm(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid >>> import paddle.fluid as fluid
import paddle.nn as nn >>> import paddle.nn as nn
from paddle.fluid.dygraph.base import to_variable >>> from paddle.fluid.dygraph.base import to_variable
import numpy as np >>> import numpy as np
x = np.random.random(size=(3, 10, 3, 7)).astype('float32') >>> x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
with fluid.dygraph.guard(): >>> with fluid.dygraph.guard():
x = to_variable(x) ... x = to_variable(x)
batch_norm = nn.layer.norm.BatchNorm(10) ... batch_norm = nn.layer.norm.BatchNorm(10)
hidden1 = batch_norm(x) ... hidden1 = batch_norm(x)
""" """
def __init__( def __init__(
...@@ -1165,13 +1229,16 @@ class BatchNorm1D(_BatchNormBase): ...@@ -1165,13 +1229,16 @@ class BatchNorm1D(_BatchNormBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 1, 3))
>>> batch_norm = paddle.nn.BatchNorm1D(1)
>>> batch_norm_out = batch_norm(x)
x = paddle.rand((2, 1, 3)) >>> print(batch_norm_out)
batch_norm = paddle.nn.BatchNorm1D(1) Tensor(shape=[2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
batch_norm_out = batch_norm(x) [[[ 1.26652932, -0.60229754, -1.65705597]],
[[ 1.06272745, 0.24229205, -0.31219530]]])
print(batch_norm_out)
""" """
def __init__( def __init__(
...@@ -1277,13 +1344,18 @@ class BatchNorm2D(_BatchNormBase): ...@@ -1277,13 +1344,18 @@ class BatchNorm2D(_BatchNormBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand((2, 1, 2, 3)) >>> x = paddle.rand((2, 1, 2, 3))
batch_norm = paddle.nn.BatchNorm2D(1) >>> batch_norm = paddle.nn.BatchNorm2D(1)
batch_norm_out = batch_norm(x) >>> batch_norm_out = batch_norm(x)
print(batch_norm_out) >>> print(batch_norm_out)
Tensor(shape=[2, 1, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 0.60520101, -0.67670590, -1.40020895],
[ 0.46540475, -0.09736633, -0.47771257]]],
[[[-0.74365312, 0.63718963, -1.41333187],
[ 1.44764757, -0.25489068, 1.90842628]]]])
""" """
def _check_data_format(self, input): def _check_data_format(self, input):
...@@ -1363,13 +1435,22 @@ class BatchNorm3D(_BatchNormBase): ...@@ -1363,13 +1435,22 @@ class BatchNorm3D(_BatchNormBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
>>> paddle.seed(100)
x = paddle.rand((2, 1, 2, 2, 3)) >>> x = paddle.rand((2, 1, 2, 2, 3))
batch_norm = paddle.nn.BatchNorm3D(1) >>> batch_norm = paddle.nn.BatchNorm3D(1)
batch_norm_out = batch_norm(x) >>> batch_norm_out = batch_norm(x)
print(batch_norm_out) >>> print(batch_norm_out)
Tensor(shape=[2, 1, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[[ 0.28011751, -0.95211101, -1.64757574],
[ 0.14573872, -0.39522290, -0.76082933]],
[[-1.01646376, 0.31086648, -1.66019011],
[ 1.08991623, -0.54664266, 1.53283834]]]],
[[[[ 1.33958006, 1.71585774, -0.12862551],
[-0.66051245, 1.32629418, -0.06402326]],
[[-0.28699064, 0.87359405, 0.42558217],
[-0.46636176, 1.09858704, -1.55342245]]]]])
""" """
def __init__( def __init__(
...@@ -1485,23 +1566,22 @@ class SyncBatchNorm(_BatchNormBase): ...@@ -1485,23 +1566,22 @@ class SyncBatchNorm(_BatchNormBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
# required: gpu >>> # doctest: +REQUIRES(env:GPU)
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
>>> paddle.device.set_device('gpu')
>>> x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32')
x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32') >>> if paddle.is_compiled_with_cuda():
... sync_batch_norm = nn.SyncBatchNorm(2)
if paddle.is_compiled_with_cuda(): ... hidden1 = sync_batch_norm(x)
sync_batch_norm = nn.SyncBatchNorm(2) ... print(hidden1)
hidden1 = sync_batch_norm(x) Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
print(hidden1) [[[[ 0.26824948, 1.09363246],
# Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, [ 0.26824948, -1.63013160]],
# [[[[ 0.26824948, 1.09363246], [[ 0.80956620, -0.66528702],
# [ 0.26824948, -1.63013160]], [-1.27446556, 1.13018656]]]])
# [[ 0.80956620, -0.66528702],
# [-1.27446556, 1.13018656]]]])
""" """
...@@ -1625,11 +1705,16 @@ class SyncBatchNorm(_BatchNormBase): ...@@ -1625,11 +1705,16 @@ class SyncBatchNorm(_BatchNormBase):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5)) >>> model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5))
sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model) >>> sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
>>> print(sync_model)
Sequential(
(0): Conv2D(3, 5, kernel_size=[3, 3], data_format=NCHW)
(1): SyncBatchNorm(num_features=5, momentum=0.9, epsilon=1e-05)
)
""" """
layer_output = layer layer_output = layer
...@@ -1706,12 +1791,13 @@ class LocalResponseNorm(Layer): ...@@ -1706,12 +1791,13 @@ class LocalResponseNorm(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32") >>> x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32")
m = paddle.nn.LocalResponseNorm(size=5) >>> m = paddle.nn.LocalResponseNorm(size=5)
y = m(x) >>> y = m(x)
print(y.shape) # [3, 3, 112, 112] >>> print(y.shape)
[3, 3, 112, 112]
""" """
def __init__( def __init__(
...@@ -1803,13 +1889,12 @@ class SpectralNorm(Layer): ...@@ -1803,13 +1889,12 @@ class SpectralNorm(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.rand((2,8,32,32)) >>> x = paddle.rand((2,8,32,32))
>>> spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2)
spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2) >>> spectral_norm_out = spectral_norm(x)
spectral_norm_out = spectral_norm(x) >>> print(spectral_norm_out.shape)
[2, 8, 32, 32]
print(spectral_norm_out.shape) # [2, 8, 32, 32]
""" """
......
...@@ -65,13 +65,14 @@ class AvgPool1D(Layer): ...@@ -65,13 +65,14 @@ class AvgPool1D(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) >>> AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0)
pool_out = AvgPool1D(data) >>> pool_out = AvgPool1D(data)
# pool_out shape: [1, 3, 16] >>> print(pool_out.shape)
[1, 3, 16]
""" """
...@@ -169,15 +170,15 @@ class AvgPool2D(Layer): ...@@ -169,15 +170,15 @@ class AvgPool2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
# max pool2d >>> # max pool2d
input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1) >>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
AvgPool2D = nn.AvgPool2D(kernel_size=2, >>> AvgPool2D = nn.AvgPool2D(kernel_size=2, stride=2, padding=0)
stride=2, padding=0) >>> output = AvgPool2D(input)
output = AvgPool2D(input) >>> print(output.shape)
# output.shape [1, 3, 16, 16] [1, 3, 16, 16]
""" """
...@@ -268,15 +269,15 @@ class AvgPool3D(Layer): ...@@ -268,15 +269,15 @@ class AvgPool3D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
# avg pool3d >>> # avg pool3d
input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1) >>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
AvgPool3D = nn.AvgPool3D(kernel_size=2, >>> AvgPool3D = nn.AvgPool3D(kernel_size=2, stride=2, padding=0)
stride=2, padding=0) >>> output = AvgPool3D(input)
output = AvgPool3D(input) >>> print(output.shape)
# output.shape [1, 2, 3, 16, 16] [1, 2, 1, 16, 16]
""" """
...@@ -366,17 +367,21 @@ class MaxPool1D(Layer): ...@@ -366,17 +367,21 @@ class MaxPool1D(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0) >>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0)
pool_out = MaxPool1D(data) >>> pool_out = MaxPool1D(data)
# pool_out shape: [1, 3, 16] >>> print(pool_out.shape)
[1, 3, 16]
MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True) >>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True)
pool_out, indices = MaxPool1D(data) >>> pool_out, indices = MaxPool1D(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] >>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
""" """
...@@ -471,20 +476,23 @@ class MaxPool2D(Layer): ...@@ -471,20 +476,23 @@ class MaxPool2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
# max pool2d >>> # max pool2d
input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1) >>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1)
MaxPool2D = nn.MaxPool2D(kernel_size=2, >>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
stride=2, padding=0) >>> output = MaxPool2D(input)
output = MaxPool2D(input) >>> print(output.shape)
# output.shape [1, 3, 16, 16] [1, 3, 16, 16]
# for return_mask=True >>> # for return_mask=True
MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True) >>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True)
output, max_indices = MaxPool2D(input) >>> output, max_indices = MaxPool2D(input)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], >>> print(output.shape)
[1, 3, 16, 16]
>>> print(max_indices.shape)
[1, 3, 16, 16]
""" """
def __init__( def __init__(
...@@ -568,20 +576,23 @@ class MaxPool3D(Layer): ...@@ -568,20 +576,23 @@ class MaxPool3D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
# max pool3d >>> # max pool3d
input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1) >>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1)
MaxPool3D = nn.MaxPool3D(kernel_size=2, >>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0)
stride=2, padding=0) >>> output = MaxPool3D(input)
output = MaxPool3D(input) >>> print(output.shape)
# output.shape [1, 2, 3, 16, 16] [1, 2, 1, 16, 16]
# for return_mask=True >>> # for return_mask=True
MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True) >>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True)
output, max_indices = MaxPool3D(input) >>> output, max_indices = MaxPool3D(input)
# output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16], >>> print(output.shape)
[1, 2, 1, 16, 16]
>>> print(max_indices.shape)
[1, 2, 1, 16, 16]
""" """
def __init__( def __init__(
...@@ -650,25 +661,26 @@ class AdaptiveAvgPool1D(Layer): ...@@ -650,25 +661,26 @@ class AdaptiveAvgPool1D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
# average adaptive pool1d >>> # average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m], >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension >>> # output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each >>> # of input data into m grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# lstart = floor(i * L / m) >>> # lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m) >>> # lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart) >>> # output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart)
# >>> #
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16) >>> AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16)
pool_out = AdaptiveAvgPool1D(data) >>> pool_out = AdaptiveAvgPool1D(data)
# pool_out shape: [1, 3, 16] >>> print(pool_out.shape)
[1, 3, 16]
""" """
def __init__(self, output_size, name=None): def __init__(self, output_size, name=None):
...@@ -726,28 +738,29 @@ class AdaptiveAvgPool2D(Layer): ...@@ -726,28 +738,29 @@ class AdaptiveAvgPool2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive avg pool2d >>> # adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n], >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each >>> # of input data into m * n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive avg pool performs calculations as follow: >>> # adaptive avg pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# for j in range(n): >>> # for j in range(n):
# hstart = floor(i * H / m) >>> # hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m) >>> # hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n) >>> # wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n) >>> # wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) >>> # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
# >>> #
import paddle >>> import paddle
x = paddle.rand([2, 3, 32, 32]) >>> x = paddle.rand([2, 3, 32, 32])
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3) >>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3)
pool_out = adaptive_avg_pool(x = x) >>> pool_out = adaptive_avg_pool(x = x)
# pool_out.shape is [2, 3, 3, 3] >>> print(pool_out.shape)
[2, 3, 3, 3]
""" """
def __init__(self, output_size, data_format="NCHW", name=None): def __init__(self, output_size, data_format="NCHW", name=None):
...@@ -815,31 +828,32 @@ class AdaptiveAvgPool3D(Layer): ...@@ -815,31 +828,32 @@ class AdaptiveAvgPool3D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive avg pool3d >>> # adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each >>> # of input data into l * m * n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive avg pool performs calculations as follow: >>> # adaptive avg pool performs calculations as follow:
# >>> #
# for i in range(l): >>> # for i in range(l):
# for j in range(m): >>> # for j in range(m):
# for k in range(n): >>> # for k in range(n):
# dstart = floor(i * D / l) >>> # dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l) >>> # dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m) >>> # hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m) >>> # hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n) >>> # wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n) >>> # wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] = >>> # output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) >>> # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle >>> import paddle
x = paddle.rand([2, 3, 8, 32, 32]) >>> x = paddle.rand([2, 3, 8, 32, 32])
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3) >>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3)
pool_out = adaptive_avg_pool(x = x) >>> pool_out = adaptive_avg_pool(x = x)
# pool_out = [2, 3, 3, 3, 3] >>> print(pool_out.shape)
[2, 3, 3, 3, 3]
""" """
def __init__(self, output_size, data_format="NCDHW", name=None): def __init__(self, output_size, data_format="NCDHW", name=None):
...@@ -898,30 +912,34 @@ class AdaptiveMaxPool1D(Layer): ...@@ -898,30 +912,34 @@ class AdaptiveMaxPool1D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
# max adaptive pool1d >>> # max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m], >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension >>> # output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each >>> # of input data into m grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# lstart = floor(i * L / m) >>> # lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m) >>> # lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend]) >>> # output[:, :, i] = max(input[:, :, lstart: lend])
# >>> #
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1)
AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16) >>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16)
pool_out = AdaptiveMaxPool1D(data) >>> pool_out = AdaptiveMaxPool1D(data)
# pool_out shape: [1, 3, 16] >>> print(pool_out.shape)
[1, 3, 16]
# for return_mask = true
AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True) >>> # for return_mask = true
pool_out, indices = AdaptiveMaxPool1D(data) >>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] >>> pool_out, indices = AdaptiveMaxPool1D(data)
>>> print(pool_out.shape)
[1, 3, 16]
>>> print(indices.shape)
[1, 3, 16]
""" """
...@@ -981,27 +999,31 @@ class AdaptiveMaxPool2D(Layer): ...@@ -981,27 +999,31 @@ class AdaptiveMaxPool2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive max pool2d >>> # adaptive max pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n], >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each >>> # of input data into m * n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(m): >>> # for i in range(m):
# for j in range(n): >>> # for j in range(n):
# hstart = floor(i * H / m) >>> # hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m) >>> # hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n) >>> # wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n) >>> # wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) >>> # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
# >>> #
import paddle >>> import paddle
x = paddle.rand([2, 3, 32, 32]) >>> x = paddle.rand([2, 3, 32, 32])
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True) >>> adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True)
pool_out, indices = adaptive_max_pool(x = x) >>> pool_out, indices = adaptive_max_pool(x = x)
>>> print(pool_out.shape)
[2, 3, 3, 3]
>>> print(indices.shape)
[2, 3, 3, 3]
""" """
def __init__(self, output_size, return_mask=False, name=None): def __init__(self, output_size, return_mask=False, name=None):
...@@ -1067,33 +1089,37 @@ class AdaptiveMaxPool3D(Layer): ...@@ -1067,33 +1089,37 @@ class AdaptiveMaxPool3D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive max pool3d >>> # adaptive max pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each >>> # of input data into l * m * n grids averagely and performs poolings in each
# grid to get output. >>> # grid to get output.
# adaptive max pool performs calculations as follow: >>> # adaptive max pool performs calculations as follow:
# >>> #
# for i in range(l): >>> # for i in range(l):
# for j in range(m): >>> # for j in range(m):
# for k in range(n): >>> # for k in range(n):
# dstart = floor(i * D / l) >>> # dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l) >>> # dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m) >>> # hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m) >>> # hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n) >>> # wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n) >>> # wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] = >>> # output[:, :, i, j, k] =
# max(input[:, :, dstart:dend, hstart: hend, wstart: wend]) >>> # max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle >>> import paddle
x = paddle.rand([2, 3, 8, 32, 32]) >>> x = paddle.rand([2, 3, 8, 32, 32])
pool = paddle.nn.AdaptiveMaxPool3D(output_size=4) >>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=4)
out = pool(x) >>> out = pool(x)
# out shape: [2, 3, 4, 4, 4] >>> print(out.shape)
pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True) [2, 3, 4, 4, 4]
out, indices = pool(x) >>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True)
# out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4] >>> out, indices = pool(x)
>>> print(out.shape)
[2, 3, 3, 3, 3]
>>> print(indices.shape)
[2, 3, 3, 3, 3]
""" """
...@@ -1156,15 +1182,19 @@ class MaxUnPool1D(Layer): ...@@ -1156,15 +1182,19 @@ class MaxUnPool1D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.rand(shape=[1, 3, 16]) >>> data = paddle.rand(shape=[1, 3, 16])
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) >>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 3, 8], indices shape: [1, 3, 8] >>> print(pool_out.shape)
Unpool1D = paddle.nn.MaxUnPool1D(kernel_size=2, padding=0) [1, 3, 8]
unpool_out = Unpool1D(pool_out, indices) >>> print(indices.shape)
# unpool_out shape: [1, 3, 16] [1, 3, 8]
>>> Unpool1D = paddle.nn.MaxUnPool1D(kernel_size=2, padding=0)
>>> unpool_out = Unpool1D(pool_out, indices)
>>> print(unpool_out.shape)
[1, 3, 16]
""" """
...@@ -1244,15 +1274,19 @@ class MaxUnPool2D(Layer): ...@@ -1244,15 +1274,19 @@ class MaxUnPool2D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.rand(shape=[1,1,6,6]) >>> data = paddle.rand(shape=[1, 1, 6, 6])
pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True) >>> pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 3, 3], indices shape: [1, 1, 3, 3] >>> print(pool_out.shape)
Unpool2D = paddle.nn.MaxUnPool2D(kernel_size=2, padding=0) [1, 1, 3, 3]
unpool_out = Unpool2D(pool_out, indices) >>> print(indices.shape)
# unpool_out shape: [1, 1, 6, 6] [1, 1, 3, 3]
>>> Unpool2D = paddle.nn.MaxUnPool2D(kernel_size=2, padding=0)
>>> unpool_out = Unpool2D(pool_out, indices)
>>> print(unpool_out.shape)
[1, 1, 6, 6]
""" """
...@@ -1335,15 +1369,19 @@ class MaxUnPool3D(Layer): ...@@ -1335,15 +1369,19 @@ class MaxUnPool3D(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn.functional as F >>> import paddle.nn.functional as F
data = paddle.rand(shape=[1, 1, 4, 4, 6]) >>> data = paddle.rand(shape=[1, 1, 4, 4, 6])
pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True) >>> pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True)
# pool_out shape: [1, 1, 2, 2, 3], indices shape: [1, 1, 2, 2, 3] >>> print(pool_out.shape)
Unpool3D = paddle.nn.MaxUnPool3D(kernel_size=2, padding=0) [1, 1, 2, 2, 3]
unpool_out = Unpool3D(pool_out, indices) >>> print(indices.shape)
# unpool_out shape: [1, 1, 4, 4, 6] [1, 1, 2, 2, 3]
>>> Unpool3D = paddle.nn.MaxUnPool3D(kernel_size=2, padding=0)
>>> unpool_out = Unpool3D(pool_out, indices)
>>> print(unpool_out.shape)
[1, 1, 4, 4, 6]
""" """
......
...@@ -89,14 +89,18 @@ def rnn( ...@@ -89,14 +89,18 @@ def rnn(
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
paddle.disable_static()
cell = paddle.nn.SimpleRNNCell(16, 32) >>> inputs = paddle.rand((4, 23, 16))
>>> prev_h = paddle.randn((4, 32))
inputs = paddle.rand((4, 23, 16)) >>> cell = paddle.nn.SimpleRNNCell(16, 32)
prev_h = paddle.randn((4, 32)) >>> rnn = paddle.nn.RNN(cell)
outputs, final_states = paddle.nn.layer.rnn(cell, inputs, prev_h) >>> outputs, final_states = rnn(inputs, prev_h)
>>> print(outputs.shape)
[4, 23, 32]
>>> print(final_states.shape)
[4, 32]
""" """
...@@ -397,18 +401,17 @@ def birnn( ...@@ -397,18 +401,17 @@ def birnn(
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
paddle.disable_static()
cell_fw = paddle.nn.LSTMCell(16, 32) >>> cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32) >>> cell_bw = paddle.nn.LSTMCell(16, 32)
>>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
inputs = paddle.rand((4, 23, 16)) >>> inputs = paddle.rand((2, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32)) >>> outputs, final_states = rnn(inputs)
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32)) >>> print(outputs.shape)
initial_states = ((hf, cf), (hb, cb)) [2, 23, 64]
outputs, final_states = paddle.nn.layer.birnn( >>> print(final_states[0][0].shape)
cell_fw, cell_bw, inputs, initial_states) [2, 32]
""" """
...@@ -743,16 +746,15 @@ class SimpleRNNCell(RNNCellBase): ...@@ -743,16 +746,15 @@ class SimpleRNNCell(RNNCellBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
cell = paddle.nn.SimpleRNNCell(16, 32) >>> x = paddle.randn((4, 16))
y, h = cell(x, prev_h) >>> prev_h = paddle.randn((4, 32))
print(y.shape)
#[4,32] >>> cell = paddle.nn.SimpleRNNCell(16, 32)
>>> y, h = cell(x, prev_h)
>>> print(y.shape)
[4, 32]
""" """
...@@ -897,22 +899,21 @@ class LSTMCell(RNNCellBase): ...@@ -897,22 +899,21 @@ class LSTMCell(RNNCellBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
prev_c = paddle.randn((4, 32))
cell = paddle.nn.LSTMCell(16, 32) >>> x = paddle.randn((4, 16))
y, (h, c) = cell(x, (prev_h, prev_c)) >>> prev_h = paddle.randn((4, 32))
>>> prev_c = paddle.randn((4, 32))
print(y.shape) >>> cell = paddle.nn.LSTMCell(16, 32)
print(h.shape) >>> y, (h, c) = cell(x, (prev_h, prev_c))
print(c.shape)
#[4,32] >>> print(y.shape)
#[4,32] [4, 32]
#[4,32] >>> print(h.shape)
[4, 32]
>>> print(c.shape)
[4, 32]
""" """
...@@ -1059,19 +1060,19 @@ class GRUCell(RNNCellBase): ...@@ -1059,19 +1060,19 @@ class GRUCell(RNNCellBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
x = paddle.randn((4, 16)) >>> x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32)) >>> prev_h = paddle.randn((4, 32))
cell = paddle.nn.GRUCell(16, 32) >>> cell = paddle.nn.GRUCell(16, 32)
y, h = cell(x, prev_h) >>> y, h = cell(x, prev_h)
print(y.shape) >>> print(y.shape)
print(h.shape) [4, 32]
>>> print(h.shape)
[4, 32]
#[4,32]
#[4,32]
""" """
...@@ -1189,20 +1190,19 @@ class RNN(Layer): ...@@ -1189,20 +1190,19 @@ class RNN(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
inputs = paddle.rand((4, 23, 16)) >>> inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32)) >>> prev_h = paddle.randn((4, 32))
cell = paddle.nn.SimpleRNNCell(16, 32) >>> cell = paddle.nn.SimpleRNNCell(16, 32)
rnn = paddle.nn.RNN(cell) >>> rnn = paddle.nn.RNN(cell)
outputs, final_states = rnn(inputs, prev_h) >>> outputs, final_states = rnn(inputs, prev_h)
print(outputs.shape) >>> print(outputs.shape)
print(final_states.shape) [4, 23, 32]
>>> print(final_states.shape)
#[4,23,32] [4, 32]
#[4,32]
""" """
...@@ -1263,20 +1263,19 @@ class BiRNN(Layer): ...@@ -1263,20 +1263,19 @@ class BiRNN(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
inputs = paddle.rand((2, 23, 16)) >>> cell_fw = paddle.nn.LSTMCell(16, 32)
outputs, final_states = rnn(inputs) >>> cell_bw = paddle.nn.LSTMCell(16, 32)
>>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
print(outputs.shape) >>> inputs = paddle.rand((2, 23, 16))
print(final_states[0][0].shape,len(final_states),len(final_states[0])) >>> outputs, final_states = rnn(inputs)
#[4,23,64] >>> print(outputs.shape)
#[2,32] 2 2 [2, 23, 64]
>>> print(final_states[0][0].shape,len(final_states),len(final_states[0]))
[2, 32] 2 2
""" """
...@@ -1702,19 +1701,19 @@ class SimpleRNN(RNNBase): ...@@ -1702,19 +1701,19 @@ class SimpleRNN(RNNBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
rnn = paddle.nn.SimpleRNN(16, 32, 2) >>> rnn = paddle.nn.SimpleRNN(16, 32, 2)
x = paddle.randn((4, 23, 16)) >>> x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32)) >>> prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h) >>> y, h = rnn(x, prev_h)
print(y.shape) >>> print(y.shape)
print(h.shape) [4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
""" """
...@@ -1833,22 +1832,22 @@ class LSTM(RNNBase): ...@@ -1833,22 +1832,22 @@ class LSTM(RNNBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
rnn = paddle.nn.LSTM(16, 32, 2) >>> rnn = paddle.nn.LSTM(16, 32, 2)
x = paddle.randn((4, 23, 16)) >>> x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32)) >>> prev_h = paddle.randn((2, 4, 32))
prev_c = paddle.randn((2, 4, 32)) >>> prev_c = paddle.randn((2, 4, 32))
y, (h, c) = rnn(x, (prev_h, prev_c)) >>> y, (h, c) = rnn(x, (prev_h, prev_c))
print(y.shape) >>> print(y.shape)
print(h.shape) [4, 23, 32]
print(c.shape) >>> print(h.shape)
[2, 4, 32]
>>> print(c.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
#[2,4,32]
""" """
...@@ -1955,19 +1954,19 @@ class GRU(RNNBase): ...@@ -1955,19 +1954,19 @@ class GRU(RNNBase):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
rnn = paddle.nn.GRU(16, 32, 2) >>> rnn = paddle.nn.GRU(16, 32, 2)
x = paddle.randn((4, 23, 16)) >>> x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32)) >>> prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h) >>> y, h = rnn(x, prev_h)
print(y.shape) >>> print(y.shape)
print(h.shape) [4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
""" """
......
...@@ -141,14 +141,16 @@ class MultiHeadAttention(Layer): ...@@ -141,14 +141,16 @@ class MultiHeadAttention(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
# encoder input: [batch_size, sequence_length, d_model] >>> # encoder input: [batch_size, sequence_length, d_model]
query = paddle.rand((2, 4, 128)) >>> query = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, num_heads, query_len, query_len] >>> # self attention mask: [batch_size, num_heads, query_len, query_len]
attn_mask = paddle.rand((2, 2, 4, 4)) >>> attn_mask = paddle.rand((2, 2, 4, 4))
multi_head_attn = paddle.nn.MultiHeadAttention(128, 2) >>> multi_head_attn = paddle.nn.MultiHeadAttention(128, 2)
output = multi_head_attn(query, None, None, attn_mask=attn_mask) # [2, 4, 128] >>> output = multi_head_attn(query, None, None, attn_mask=attn_mask)
>>> print(output.shape)
[2, 4, 128]
""" """
Cache = collections.namedtuple("Cache", ["k", "v"]) Cache = collections.namedtuple("Cache", ["k", "v"])
...@@ -490,15 +492,17 @@ class TransformerEncoderLayer(Layer): ...@@ -490,15 +492,17 @@ class TransformerEncoderLayer(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.nn import TransformerEncoderLayer >>> from paddle.nn import TransformerEncoderLayer
# encoder input: [batch_size, src_len, d_model] >>> # encoder input: [batch_size, src_len, d_model]
enc_input = paddle.rand((2, 4, 128)) >>> enc_input = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, n_head, src_len, src_len] >>> # self attention mask: [batch_size, n_head, src_len, src_len]
attn_mask = paddle.rand((2, 2, 4, 4)) >>> attn_mask = paddle.rand((2, 2, 4, 4))
encoder_layer = TransformerEncoderLayer(128, 2, 512) >>> encoder_layer = TransformerEncoderLayer(128, 2, 512)
enc_output = encoder_layer(enc_input, attn_mask) # [2, 4, 128] >>> enc_output = encoder_layer(enc_input, attn_mask)
>>> print(enc_output.shape)
[2, 4, 128]
""" """
def __init__( def __init__(
...@@ -659,16 +663,18 @@ class TransformerEncoder(Layer): ...@@ -659,16 +663,18 @@ class TransformerEncoder(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.nn import TransformerEncoderLayer, TransformerEncoder >>> from paddle.nn import TransformerEncoderLayer, TransformerEncoder
# encoder input: [batch_size, src_len, d_model] >>> # encoder input: [batch_size, src_len, d_model]
enc_input = paddle.rand((2, 4, 128)) >>> enc_input = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, n_head, src_len, src_len] >>> # self attention mask: [batch_size, n_head, src_len, src_len]
attn_mask = paddle.rand((2, 2, 4, 4)) >>> attn_mask = paddle.rand((2, 2, 4, 4))
encoder_layer = TransformerEncoderLayer(128, 2, 512) >>> encoder_layer = TransformerEncoderLayer(128, 2, 512)
encoder = TransformerEncoder(encoder_layer, 2) >>> encoder = TransformerEncoder(encoder_layer, 2)
enc_output = encoder(enc_input, attn_mask) # [2, 4, 128] >>> enc_output = encoder(enc_input, attn_mask)
>>> print(enc_output.shape)
[2, 4, 128]
""" """
def __init__(self, encoder_layer, num_layers, norm=None): def __init__(self, encoder_layer, num_layers, norm=None):
...@@ -809,22 +815,24 @@ class TransformerDecoderLayer(Layer): ...@@ -809,22 +815,24 @@ class TransformerDecoderLayer(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.nn import TransformerDecoderLayer >>> from paddle.nn import TransformerDecoderLayer
# decoder input: [batch_size, tgt_len, d_model] >>> # decoder input: [batch_size, tgt_len, d_model]
dec_input = paddle.rand((2, 4, 128)) >>> dec_input = paddle.rand((2, 4, 128))
# encoder output: [batch_size, src_len, d_model] >>> # encoder output: [batch_size, src_len, d_model]
enc_output = paddle.rand((2, 6, 128)) >>> enc_output = paddle.rand((2, 6, 128))
# self attention mask: [batch_size, n_head, tgt_len, tgt_len] >>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len]
self_attn_mask = paddle.rand((2, 2, 4, 4)) >>> self_attn_mask = paddle.rand((2, 2, 4, 4))
# cross attention mask: [batch_size, n_head, tgt_len, src_len] >>> # cross attention mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 4, 6)) >>> cross_attn_mask = paddle.rand((2, 2, 4, 6))
decoder_layer = TransformerDecoderLayer(128, 2, 512) >>> decoder_layer = TransformerDecoderLayer(128, 2, 512)
output = decoder_layer(dec_input, >>> output = decoder_layer(dec_input,
enc_output, ... enc_output,
self_attn_mask, ... self_attn_mask,
cross_attn_mask) # [2, 4, 128] ... cross_attn_mask)
>>> print(output.shape)
[2, 4, 128]
""" """
def __init__( def __init__(
...@@ -1031,23 +1039,25 @@ class TransformerDecoder(Layer): ...@@ -1031,23 +1039,25 @@ class TransformerDecoder(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.nn import TransformerDecoderLayer, TransformerDecoder >>> from paddle.nn import TransformerDecoderLayer, TransformerDecoder
# decoder input: [batch_size, tgt_len, d_model] >>> # decoder input: [batch_size, tgt_len, d_model]
dec_input = paddle.rand((2, 4, 128)) >>> dec_input = paddle.rand((2, 4, 128))
# encoder output: [batch_size, src_len, d_model] >>> # encoder output: [batch_size, src_len, d_model]
enc_output = paddle.rand((2, 6, 128)) >>> enc_output = paddle.rand((2, 6, 128))
# self attention mask: [batch_size, n_head, tgt_len, tgt_len] >>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len]
self_attn_mask = paddle.rand((2, 2, 4, 4)) >>> self_attn_mask = paddle.rand((2, 2, 4, 4))
# cross attention mask: [batch_size, n_head, tgt_len, src_len] >>> # cross attention mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 4, 6)) >>> cross_attn_mask = paddle.rand((2, 2, 4, 6))
decoder_layer = TransformerDecoderLayer(128, 2, 512) >>> decoder_layer = TransformerDecoderLayer(128, 2, 512)
decoder = TransformerDecoder(decoder_layer, 2) >>> decoder = TransformerDecoder(decoder_layer, 2)
output = decoder(dec_input, >>> output = decoder(dec_input,
enc_output, ... enc_output,
self_attn_mask, ... self_attn_mask,
cross_attn_mask) # [2, 4, 128] ... cross_attn_mask)
>>> print(output.shape)
[2, 4, 128]
""" """
def __init__(self, decoder_layer, num_layers, norm=None): def __init__(self, decoder_layer, num_layers, norm=None):
...@@ -1242,25 +1252,27 @@ class Transformer(Layer): ...@@ -1242,25 +1252,27 @@ class Transformer(Layer):
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.nn import Transformer >>> from paddle.nn import Transformer
# src: [batch_size, tgt_len, d_model] >>> # src: [batch_size, tgt_len, d_model]
enc_input = paddle.rand((2, 4, 128)) >>> enc_input = paddle.rand((2, 4, 128))
# tgt: [batch_size, src_len, d_model] >>> # tgt: [batch_size, src_len, d_model]
dec_input = paddle.rand((2, 6, 128)) >>> dec_input = paddle.rand((2, 6, 128))
# src_mask: [batch_size, n_head, src_len, src_len] >>> # src_mask: [batch_size, n_head, src_len, src_len]
enc_self_attn_mask = paddle.rand((2, 2, 4, 4)) >>> enc_self_attn_mask = paddle.rand((2, 2, 4, 4))
# tgt_mask: [batch_size, n_head, tgt_len, tgt_len] >>> # tgt_mask: [batch_size, n_head, tgt_len, tgt_len]
dec_self_attn_mask = paddle.rand((2, 2, 6, 6)) >>> dec_self_attn_mask = paddle.rand((2, 2, 6, 6))
# memory_mask: [batch_size, n_head, tgt_len, src_len] >>> # memory_mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 6, 4)) >>> cross_attn_mask = paddle.rand((2, 2, 6, 4))
transformer = Transformer(128, 2, 4, 4, 512) >>> transformer = Transformer(128, 2, 4, 4, 512)
output = transformer(enc_input, >>> output = transformer(enc_input,
dec_input, ... dec_input,
enc_self_attn_mask, ... enc_self_attn_mask,
dec_self_attn_mask, ... dec_self_attn_mask,
cross_attn_mask) # [2, 6, 128] ... cross_attn_mask)
>>> print(output.shape)
[2, 6, 128]
""" """
def __init__( def __init__(
...@@ -1454,20 +1466,20 @@ class Transformer(Layer): ...@@ -1454,20 +1466,20 @@ class Transformer(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
from paddle.nn.layer.transformer import Transformer >>> from paddle.nn.layer.transformer import Transformer
length = 5 >>> length = 5
d_model, n_head, dim_feedforward = 8, 4, 64 >>> d_model, n_head, dim_feedforward = 8, 4, 64
transformer_paddle = Transformer( >>> transformer_paddle = Transformer(
d_model, n_head, dim_feedforward=dim_feedforward) ... d_model, n_head, dim_feedforward=dim_feedforward)
mask = transformer_paddle.generate_square_subsequent_mask(length) >>> mask = transformer_paddle.generate_square_subsequent_mask(length)
print(mask) >>> print(mask)
Tensor(shape=[5, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[ 0. -inf -inf -inf -inf] [[ 0. , -inf., -inf., -inf., -inf.],
# [ 0. 0. -inf -inf -inf] [ 0. , 0. , -inf., -inf., -inf.],
# [ 0. 0. 0. -inf -inf] [ 0. , 0. , 0. , -inf., -inf.],
# [ 0. 0. 0. 0. -inf] [ 0. , 0. , 0. , 0. , -inf.],
# [ 0. 0. 0. 0. 0.]] [ 0. , 0. , 0. , 0. , 0. ]])
""" """
return paddle.tensor.triu( return paddle.tensor.triu(
......
...@@ -46,14 +46,14 @@ class PixelShuffle(Layer): ...@@ -46,14 +46,14 @@ class PixelShuffle(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
x = paddle.randn(shape=[2,9,4,4]) >>> x = paddle.randn(shape=[2, 9, 4, 4])
pixel_shuffle = nn.PixelShuffle(3) >>> pixel_shuffle = nn.PixelShuffle(3)
out = pixel_shuffle(x) >>> out = pixel_shuffle(x)
print(out.shape) >>> print(out.shape)
# [2, 1, 12, 12] [2, 1, 12, 12]
""" """
...@@ -109,14 +109,14 @@ class PixelUnshuffle(Layer): ...@@ -109,14 +109,14 @@ class PixelUnshuffle(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
x = paddle.randn([2, 1, 12, 12]) >>> x = paddle.randn([2, 1, 12, 12])
pixel_unshuffle = nn.PixelUnshuffle(3) >>> pixel_unshuffle = nn.PixelUnshuffle(3)
out = pixel_unshuffle(x) >>> out = pixel_unshuffle(x)
print(out.shape) >>> print(out.shape)
# [2, 9, 4, 4] [2, 9, 4, 4]
""" """
...@@ -175,24 +175,28 @@ class ChannelShuffle(Layer): ...@@ -175,24 +175,28 @@ class ChannelShuffle(Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle >>> import paddle
import paddle.nn as nn >>> import paddle.nn as nn
x = paddle.arange(0, 0.6, 0.1, 'float32') >>> x = paddle.arange(0, 0.6, 0.1, 'float32')
x = paddle.reshape(x, [1, 6, 1, 1]) >>> x = paddle.reshape(x, [1, 6, 1, 1])
# [[[[0. ]], >>> print(x)
# [[0.10000000]], Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[0.20000000]], [[[[0. ]],
# [[0.30000001]], [[0.10000000]],
# [[0.40000001]], [[0.20000000]],
# [[0.50000000]]]] [[0.30000001]],
channel_shuffle = nn.ChannelShuffle(3) [[0.40000001]],
y = channel_shuffle(x) [[0.50000000]]]])
# [[[[0. ]], >>> channel_shuffle = nn.ChannelShuffle(3)
# [[0.20000000]], >>> y = channel_shuffle(x)
# [[0.40000001]], >>> print(y)
# [[0.10000000]], Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
# [[0.30000001]], [[[[0. ]],
# [[0.50000000]]]] [[0.20000000]],
[[0.40000001]],
[[0.10000000]],
[[0.30000001]],
[[0.50000000]]]])
""" """
def __init__(self, groups, data_format="NCHW", name=None): def __init__(self, groups, data_format="NCHW", name=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册