From 418cc35da83605a68ab424b46145ed672c844097 Mon Sep 17 00:00:00 2001 From: cyberslack_lee Date: Thu, 3 Aug 2023 10:54:19 +0800 Subject: [PATCH] [xdoctest] reformat example code with google style in No.86-90 (#55812) * norm, test=docs_preview * test=docs_preview * test=docs_preview * test=docs_preview --- python/paddle/nn/layer/norm.py | 281 +++++++++------ python/paddle/nn/layer/pooling.py | 482 ++++++++++++++------------ python/paddle/nn/layer/rnn.py | 197 ++++++----- python/paddle/nn/layer/transformer.py | 198 ++++++----- python/paddle/nn/layer/vision.py | 68 ++-- 5 files changed, 682 insertions(+), 544 deletions(-) diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index c85fa4f60ce..e01e426a75f 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -165,14 +165,18 @@ class InstanceNorm1D(_InstanceNormBase): .. code-block:: python - import paddle - - x = paddle.rand((2, 2, 3)) - instance_norm = paddle.nn.InstanceNorm1D(2) - instance_norm_out = instance_norm(x) - - print(instance_norm_out) - + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand((2, 2, 3)) + >>> instance_norm = paddle.nn.InstanceNorm1D(2) + >>> instance_norm_out = instance_norm(x) + + >>> print(instance_norm_out) + Tensor(shape=[2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[ 1.32132232, -0.22444785, -1.09687424], + [ 1.29506636, -0.15688568, -1.13818073]], + [[-0.27764025, 1.33961368, -1.06197333], + [ 0.44484580, -1.38489723, 0.94005162]]]) """ def __init__( @@ -255,13 +259,22 @@ class InstanceNorm2D(_InstanceNormBase): .. code-block:: python - import paddle - - x = paddle.rand((2, 2, 2, 3)) - instance_norm = paddle.nn.InstanceNorm2D(2) - instance_norm_out = instance_norm(x) - - print(instance_norm_out) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand((2, 2, 2, 3)) + >>> instance_norm = paddle.nn.InstanceNorm2D(2) + >>> instance_norm_out = instance_norm(x) + + >>> print(instance_norm_out) + Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[ 1.26652932, -0.60229748, -1.65705574], + [ 1.06272733, 0.24229208, -0.31219524]], + [[-0.85414171, 0.31684181, -1.42204332], + [ 1.00412714, -0.43966094, 1.39487720]]], + [[[ 0.83324969, 1.25046813, -0.79470295], + [-1.38446140, 0.81851846, -0.72307163]], + [[-0.33560610, 0.95346332, 0.45585334], + [-0.53483474, 1.20336461, -1.74224067]]]]) """ def __init__( @@ -342,13 +355,30 @@ class InstanceNorm3D(_InstanceNormBase): .. code-block:: python - import paddle - - x = paddle.rand((2, 2, 2, 2, 3)) - instance_norm = paddle.nn.InstanceNorm3D(2) - instance_norm_out = instance_norm(x) - - print(instance_norm_out.numpy) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand((2, 2, 2, 2, 3)) + >>> instance_norm = paddle.nn.InstanceNorm3D(2) + >>> instance_norm_out = instance_norm(x) + + >>> print(instance_norm_out) + Tensor(shape=[2, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[[ 0.60520107, -0.67670596, -1.40020907], + [ 0.46540472, -0.09736639, -0.47771260]], + [[-0.74365318, 0.63718963, -1.41333199], + [ 1.44764769, -0.25489071, 1.90842640]]], + [[[ 1.09773374, 1.49568439, -0.45503727], + [-1.01755965, 1.08368278, -0.38671401]], + [[-0.62252384, 0.60490805, 0.13109155], + [-0.81222630, 0.84286022, -1.96189928]]]], + [[[[ 0.28014541, 0.91674680, 1.71797717], + [-0.52062720, -0.74274176, -0.86439967]], + [[ 0.25707796, -1.23866379, 1.64422870], + [-1.48577297, -0.13187379, 0.16790220]]], + [[[-1.49266160, 1.57909954, 0.46455818], + [-0.14981404, 1.46959865, 0.24957968]], + [[ 0.25134835, -0.03276967, -0.30318922], + [ 0.76263177, -1.11345232, -1.68492818]]]]]) """ def __init__( @@ -410,13 +440,38 @@ class GroupNorm(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2)) - group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6) - group_norm_out = group_norm(x) - - print(group_norm_out) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2)) + >>> group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6) + >>> group_norm_out = group_norm(x) + + >>> print(group_norm_out) + Tensor(shape=[2, 6, 2, 2], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]]], + [[[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]], + [[-1.34163547, -0.44721183], + [ 0.44721183, 1.34163547]]]]) """ def __init__( @@ -575,13 +630,22 @@ class LayerNorm(Layer): .. code-block:: python - import paddle - - x = paddle.rand((2, 2, 2, 3)) - layer_norm = paddle.nn.LayerNorm(x.shape[1:]) - layer_norm_out = layer_norm(x) - - print(layer_norm_out) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand((2, 2, 2, 3)) + >>> layer_norm = paddle.nn.LayerNorm(x.shape[1:]) + >>> layer_norm_out = layer_norm(x) + + >>> print(layer_norm_out) + Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[ 0.60520101, -0.67670590, -1.40020895], + [ 0.46540466, -0.09736638, -0.47771254]], + [[-0.74365306, 0.63718957, -1.41333175], + [ 1.44764745, -0.25489068, 1.90842617]]], + [[[ 1.09773350, 1.49568415, -0.45503747], + [-1.01755989, 1.08368254, -0.38671425]], + [[-0.62252408, 0.60490781, 0.13109133], + [-0.81222653, 0.84285998, -1.96189952]]]]) """ def __init__( @@ -891,17 +955,17 @@ class BatchNorm(Layer): Examples: .. code-block:: python - import paddle.fluid as fluid - import paddle.nn as nn - from paddle.fluid.dygraph.base import to_variable - import numpy as np + >>> import paddle.fluid as fluid + >>> import paddle.nn as nn + >>> from paddle.fluid.dygraph.base import to_variable + >>> import numpy as np - x = np.random.random(size=(3, 10, 3, 7)).astype('float32') - with fluid.dygraph.guard(): - x = to_variable(x) - batch_norm = nn.layer.norm.BatchNorm(10) - hidden1 = batch_norm(x) + >>> x = np.random.random(size=(3, 10, 3, 7)).astype('float32') + >>> with fluid.dygraph.guard(): + ... x = to_variable(x) + ... batch_norm = nn.layer.norm.BatchNorm(10) + ... hidden1 = batch_norm(x) """ def __init__( @@ -1165,13 +1229,16 @@ class BatchNorm1D(_BatchNormBase): Examples: .. code-block:: python - import paddle - - x = paddle.rand((2, 1, 3)) - batch_norm = paddle.nn.BatchNorm1D(1) - batch_norm_out = batch_norm(x) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand((2, 1, 3)) + >>> batch_norm = paddle.nn.BatchNorm1D(1) + >>> batch_norm_out = batch_norm(x) - print(batch_norm_out) + >>> print(batch_norm_out) + Tensor(shape=[2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[ 1.26652932, -0.60229754, -1.65705597]], + [[ 1.06272745, 0.24229205, -0.31219530]]]) """ def __init__( @@ -1277,13 +1344,18 @@ class BatchNorm2D(_BatchNormBase): Examples: .. code-block:: python - import paddle - - x = paddle.rand((2, 1, 2, 3)) - batch_norm = paddle.nn.BatchNorm2D(1) - batch_norm_out = batch_norm(x) - - print(batch_norm_out) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand((2, 1, 2, 3)) + >>> batch_norm = paddle.nn.BatchNorm2D(1) + >>> batch_norm_out = batch_norm(x) + + >>> print(batch_norm_out) + Tensor(shape=[2, 1, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[ 0.60520101, -0.67670590, -1.40020895], + [ 0.46540475, -0.09736633, -0.47771257]]], + [[[-0.74365312, 0.63718963, -1.41333187], + [ 1.44764757, -0.25489068, 1.90842628]]]]) """ def _check_data_format(self, input): @@ -1363,13 +1435,22 @@ class BatchNorm3D(_BatchNormBase): Examples: .. code-block:: python - import paddle - - x = paddle.rand((2, 1, 2, 2, 3)) - batch_norm = paddle.nn.BatchNorm3D(1) - batch_norm_out = batch_norm(x) - - print(batch_norm_out) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand((2, 1, 2, 2, 3)) + >>> batch_norm = paddle.nn.BatchNorm3D(1) + >>> batch_norm_out = batch_norm(x) + + >>> print(batch_norm_out) + Tensor(shape=[2, 1, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[[ 0.28011751, -0.95211101, -1.64757574], + [ 0.14573872, -0.39522290, -0.76082933]], + [[-1.01646376, 0.31086648, -1.66019011], + [ 1.08991623, -0.54664266, 1.53283834]]]], + [[[[ 1.33958006, 1.71585774, -0.12862551], + [-0.66051245, 1.32629418, -0.06402326]], + [[-0.28699064, 0.87359405, 0.42558217], + [-0.46636176, 1.09858704, -1.55342245]]]]]) """ def __init__( @@ -1485,23 +1566,22 @@ class SyncBatchNorm(_BatchNormBase): Examples: .. code-block:: python - # required: gpu - - import paddle - import paddle.nn as nn - - x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32') + >>> # doctest: +REQUIRES(env:GPU) - if paddle.is_compiled_with_cuda(): - sync_batch_norm = nn.SyncBatchNorm(2) - hidden1 = sync_batch_norm(x) - print(hidden1) - # Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, - # [[[[ 0.26824948, 1.09363246], - # [ 0.26824948, -1.63013160]], + >>> import paddle + >>> import paddle.nn as nn + >>> paddle.device.set_device('gpu') + >>> x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32') - # [[ 0.80956620, -0.66528702], - # [-1.27446556, 1.13018656]]]]) + >>> if paddle.is_compiled_with_cuda(): + ... sync_batch_norm = nn.SyncBatchNorm(2) + ... hidden1 = sync_batch_norm(x) + ... print(hidden1) + Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, + [[[[ 0.26824948, 1.09363246], + [ 0.26824948, -1.63013160]], + [[ 0.80956620, -0.66528702], + [-1.27446556, 1.13018656]]]]) """ @@ -1625,11 +1705,16 @@ class SyncBatchNorm(_BatchNormBase): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5)) - sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model) + >>> model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5)) + >>> sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model) + >>> print(sync_model) + Sequential( + (0): Conv2D(3, 5, kernel_size=[3, 3], data_format=NCHW) + (1): SyncBatchNorm(num_features=5, momentum=0.9, epsilon=1e-05) + ) """ layer_output = layer @@ -1704,14 +1789,15 @@ class LocalResponseNorm(Layer): Examples: - .. code-block:: python + .. code-block:: python - import paddle + >>> import paddle - x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32") - m = paddle.nn.LocalResponseNorm(size=5) - y = m(x) - print(y.shape) # [3, 3, 112, 112] + >>> x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32") + >>> m = paddle.nn.LocalResponseNorm(size=5) + >>> y = m(x) + >>> print(y.shape) + [3, 3, 112, 112] """ def __init__( @@ -1801,15 +1887,14 @@ class SpectralNorm(Layer): None Examples: - .. code-block:: python - - import paddle - x = paddle.rand((2,8,32,32)) - - spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2) - spectral_norm_out = spectral_norm(x) + .. code-block:: python - print(spectral_norm_out.shape) # [2, 8, 32, 32] + >>> import paddle + >>> x = paddle.rand((2,8,32,32)) + >>> spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2) + >>> spectral_norm_out = spectral_norm(x) + >>> print(spectral_norm_out.shape) + [2, 8, 32, 32] """ diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 1a3e53095b5..5a872577100 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -65,13 +65,14 @@ class AvgPool1D(Layer): .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) - AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) - pool_out = AvgPool1D(data) - # pool_out shape: [1, 3, 16] + >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) + >>> AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) + >>> pool_out = AvgPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] """ @@ -169,15 +170,15 @@ class AvgPool2D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - # max pool2d - input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1) - AvgPool2D = nn.AvgPool2D(kernel_size=2, - stride=2, padding=0) - output = AvgPool2D(input) - # output.shape [1, 3, 16, 16] + >>> # max pool2d + >>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1) + >>> AvgPool2D = nn.AvgPool2D(kernel_size=2, stride=2, padding=0) + >>> output = AvgPool2D(input) + >>> print(output.shape) + [1, 3, 16, 16] """ @@ -268,15 +269,15 @@ class AvgPool3D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - # avg pool3d - input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1) - AvgPool3D = nn.AvgPool3D(kernel_size=2, - stride=2, padding=0) - output = AvgPool3D(input) - # output.shape [1, 2, 3, 16, 16] + >>> # avg pool3d + >>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1) + >>> AvgPool3D = nn.AvgPool3D(kernel_size=2, stride=2, padding=0) + >>> output = AvgPool3D(input) + >>> print(output.shape) + [1, 2, 1, 16, 16] """ @@ -366,17 +367,21 @@ class MaxPool1D(Layer): .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) - MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0) - pool_out = MaxPool1D(data) - # pool_out shape: [1, 3, 16] + >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) + >>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0) + >>> pool_out = MaxPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] - MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True) - pool_out, indices = MaxPool1D(data) - # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + >>> MaxPool1D = nn.MaxPool1D(kernel_size=2, stride=2, padding=0, return_mask=True) + >>> pool_out, indices = MaxPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] + >>> print(indices.shape) + [1, 3, 16] """ @@ -471,20 +476,23 @@ class MaxPool2D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - # max pool2d - input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1) - MaxPool2D = nn.MaxPool2D(kernel_size=2, - stride=2, padding=0) - output = MaxPool2D(input) - # output.shape [1, 3, 16, 16] - - # for return_mask=True - MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True) - output, max_indices = MaxPool2D(input) - # output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], + >>> import paddle + >>> import paddle.nn as nn + + >>> # max pool2d + >>> input = paddle.uniform([1, 3, 32, 32], dtype="float32", min=-1, max=1) + >>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) + >>> output = MaxPool2D(input) + >>> print(output.shape) + [1, 3, 16, 16] + + >>> # for return_mask=True + >>> MaxPool2D = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, return_mask=True) + >>> output, max_indices = MaxPool2D(input) + >>> print(output.shape) + [1, 3, 16, 16] + >>> print(max_indices.shape) + [1, 3, 16, 16] """ def __init__( @@ -568,20 +576,23 @@ class MaxPool3D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - # max pool3d - input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1) - MaxPool3D = nn.MaxPool3D(kernel_size=2, - stride=2, padding=0) - output = MaxPool3D(input) - # output.shape [1, 2, 3, 16, 16] - - # for return_mask=True - MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True) - output, max_indices = MaxPool3D(input) - # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16], + >>> import paddle + >>> import paddle.nn as nn + + >>> # max pool3d + >>> input = paddle.uniform([1, 2, 3, 32, 32], dtype="float32", min=-1, max=1) + >>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0) + >>> output = MaxPool3D(input) + >>> print(output.shape) + [1, 2, 1, 16, 16] + + >>> # for return_mask=True + >>> MaxPool3D = nn.MaxPool3D(kernel_size=2, stride=2, padding=0, return_mask=True) + >>> output, max_indices = MaxPool3D(input) + >>> print(output.shape) + [1, 2, 1, 16, 16] + >>> print(max_indices.shape) + [1, 2, 1, 16, 16] """ def __init__( @@ -650,25 +661,26 @@ class AdaptiveAvgPool1D(Layer): Examples: .. code-block:: python - # average adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart) - # - import paddle - import paddle.nn as nn - - data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) - AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16) - pool_out = AdaptiveAvgPool1D(data) - # pool_out shape: [1, 3, 16] + >>> # average adaptive pool1d + >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m], + >>> # output shape is [N, C, m], adaptive pool divide L dimension + >>> # of input data into m grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # lstart = floor(i * L / m) + >>> # lend = ceil((i + 1) * L / m) + >>> # output[:, :, i] = sum(input[:, :, lstart: lend])/(lend - lstart) + >>> # + >>> import paddle + >>> import paddle.nn as nn + + >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) + >>> AdaptiveAvgPool1D = nn.AdaptiveAvgPool1D(output_size=16) + >>> pool_out = AdaptiveAvgPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] """ def __init__(self, output_size, name=None): @@ -726,28 +738,29 @@ class AdaptiveAvgPool2D(Layer): Examples: .. code-block:: python - # adaptive avg pool2d - # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], - # output shape is [N, C, m, n], adaptive pool divide H and W dimensions - # of input data into m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive avg pool performs calculations as follow: - # - # for i in range(m): - # for j in range(n): - # hstart = floor(i * H / m) - # hend = ceil((i + 1) * H / m) - # wstart = floor(i * W / n) - # wend = ceil((i + 1) * W / n) - # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) - # - import paddle - - x = paddle.rand([2, 3, 32, 32]) - - adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3) - pool_out = adaptive_avg_pool(x = x) - # pool_out.shape is [2, 3, 3, 3] + >>> # adaptive avg pool2d + >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + >>> # of input data into m * n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive avg pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # for j in range(n): + >>> # hstart = floor(i * H / m) + >>> # hend = ceil((i + 1) * H / m) + >>> # wstart = floor(i * W / n) + >>> # wend = ceil((i + 1) * W / n) + >>> # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) + >>> # + >>> import paddle + + >>> x = paddle.rand([2, 3, 32, 32]) + + >>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2D(output_size=3) + >>> pool_out = adaptive_avg_pool(x = x) + >>> print(pool_out.shape) + [2, 3, 3, 3] """ def __init__(self, output_size, data_format="NCHW", name=None): @@ -815,31 +828,32 @@ class AdaptiveAvgPool3D(Layer): Examples: .. code-block:: python - # adaptive avg pool3d - # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], - # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions - # of input data into l * m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive avg pool performs calculations as follow: - # - # for i in range(l): - # for j in range(m): - # for k in range(n): - # dstart = floor(i * D / l) - # dend = ceil((i + 1) * D / l) - # hstart = floor(j * H / m) - # hend = ceil((j + 1) * H / m) - # wstart = floor(k * W / n) - # wend = ceil((k + 1) * W / n) - # output[:, :, i, j, k] = - # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) - import paddle - - x = paddle.rand([2, 3, 8, 32, 32]) - - adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3) - pool_out = adaptive_avg_pool(x = x) - # pool_out = [2, 3, 3, 3, 3] + >>> # adaptive avg pool3d + >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + >>> # of input data into l * m * n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive avg pool performs calculations as follow: + >>> # + >>> # for i in range(l): + >>> # for j in range(m): + >>> # for k in range(n): + >>> # dstart = floor(i * D / l) + >>> # dend = ceil((i + 1) * D / l) + >>> # hstart = floor(j * H / m) + >>> # hend = ceil((j + 1) * H / m) + >>> # wstart = floor(k * W / n) + >>> # wend = ceil((k + 1) * W / n) + >>> # output[:, :, i, j, k] = + >>> # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) + >>> import paddle + + >>> x = paddle.rand([2, 3, 8, 32, 32]) + + >>> adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3D(output_size=3) + >>> pool_out = adaptive_avg_pool(x = x) + >>> print(pool_out.shape) + [2, 3, 3, 3, 3] """ def __init__(self, output_size, data_format="NCDHW", name=None): @@ -898,30 +912,34 @@ class AdaptiveMaxPool1D(Layer): Examples: .. code-block:: python - # max adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = max(input[:, :, lstart: lend]) - # - import paddle - import paddle.nn as nn - - data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) - AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16) - pool_out = AdaptiveMaxPool1D(data) - # pool_out shape: [1, 3, 16] - - # for return_mask = true - AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True) - pool_out, indices = AdaptiveMaxPool1D(data) - # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + >>> # max adaptive pool1d + >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m], + >>> # output shape is [N, C, m], adaptive pool divide L dimension + >>> # of input data into m grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # lstart = floor(i * L / m) + >>> # lend = ceil((i + 1) * L / m) + >>> # output[:, :, i] = max(input[:, :, lstart: lend]) + >>> # + >>> import paddle + >>> import paddle.nn as nn + + >>> data = paddle.uniform([1, 3, 32], dtype="float32", min=-1, max=1) + >>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16) + >>> pool_out = AdaptiveMaxPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] + + >>> # for return_mask = true + >>> AdaptiveMaxPool1D = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True) + >>> pool_out, indices = AdaptiveMaxPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] + >>> print(indices.shape) + [1, 3, 16] """ @@ -981,27 +999,31 @@ class AdaptiveMaxPool2D(Layer): Examples: .. code-block:: python - # adaptive max pool2d - # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], - # output shape is [N, C, m, n], adaptive pool divide H and W dimensions - # of input data into m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # for j in range(n): - # hstart = floor(i * H / m) - # hend = ceil((i + 1) * H / m) - # wstart = floor(i * W / n) - # wend = ceil((i + 1) * W / n) - # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) - # - import paddle - - x = paddle.rand([2, 3, 32, 32]) - - adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True) - pool_out, indices = adaptive_max_pool(x = x) + >>> # adaptive max pool2d + >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + >>> # of input data into m * n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # for j in range(n): + >>> # hstart = floor(i * H / m) + >>> # hend = ceil((i + 1) * H / m) + >>> # wstart = floor(i * W / n) + >>> # wend = ceil((i + 1) * W / n) + >>> # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) + >>> # + >>> import paddle + + >>> x = paddle.rand([2, 3, 32, 32]) + + >>> adaptive_max_pool = paddle.nn.AdaptiveMaxPool2D(output_size=3, return_mask=True) + >>> pool_out, indices = adaptive_max_pool(x = x) + >>> print(pool_out.shape) + [2, 3, 3, 3] + >>> print(indices.shape) + [2, 3, 3, 3] """ def __init__(self, output_size, return_mask=False, name=None): @@ -1067,33 +1089,37 @@ class AdaptiveMaxPool3D(Layer): Examples: .. code-block:: python - # adaptive max pool3d - # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], - # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions - # of input data into l * m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(l): - # for j in range(m): - # for k in range(n): - # dstart = floor(i * D / l) - # dend = ceil((i + 1) * D / l) - # hstart = floor(j * H / m) - # hend = ceil((j + 1) * H / m) - # wstart = floor(k * W / n) - # wend = ceil((k + 1) * W / n) - # output[:, :, i, j, k] = - # max(input[:, :, dstart:dend, hstart: hend, wstart: wend]) - import paddle - - x = paddle.rand([2, 3, 8, 32, 32]) - pool = paddle.nn.AdaptiveMaxPool3D(output_size=4) - out = pool(x) - # out shape: [2, 3, 4, 4, 4] - pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True) - out, indices = pool(x) - # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4] + >>> # adaptive max pool3d + >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + >>> # of input data into l * m * n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(l): + >>> # for j in range(m): + >>> # for k in range(n): + >>> # dstart = floor(i * D / l) + >>> # dend = ceil((i + 1) * D / l) + >>> # hstart = floor(j * H / m) + >>> # hend = ceil((j + 1) * H / m) + >>> # wstart = floor(k * W / n) + >>> # wend = ceil((k + 1) * W / n) + >>> # output[:, :, i, j, k] = + >>> # max(input[:, :, dstart:dend, hstart: hend, wstart: wend]) + >>> import paddle + + >>> x = paddle.rand([2, 3, 8, 32, 32]) + >>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=4) + >>> out = pool(x) + >>> print(out.shape) + [2, 3, 4, 4, 4] + >>> pool = paddle.nn.AdaptiveMaxPool3D(output_size=3, return_mask=True) + >>> out, indices = pool(x) + >>> print(out.shape) + [2, 3, 3, 3, 3] + >>> print(indices.shape) + [2, 3, 3, 3, 3] """ @@ -1156,15 +1182,19 @@ class MaxUnPool1D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F + >>> import paddle + >>> import paddle.nn.functional as F - data = paddle.rand(shape=[1, 3, 16]) - pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) - # pool_out shape: [1, 3, 8], indices shape: [1, 3, 8] - Unpool1D = paddle.nn.MaxUnPool1D(kernel_size=2, padding=0) - unpool_out = Unpool1D(pool_out, indices) - # unpool_out shape: [1, 3, 16] + >>> data = paddle.rand(shape=[1, 3, 16]) + >>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(pool_out.shape) + [1, 3, 8] + >>> print(indices.shape) + [1, 3, 8] + >>> Unpool1D = paddle.nn.MaxUnPool1D(kernel_size=2, padding=0) + >>> unpool_out = Unpool1D(pool_out, indices) + >>> print(unpool_out.shape) + [1, 3, 16] """ @@ -1244,15 +1274,19 @@ class MaxUnPool2D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F + >>> import paddle + >>> import paddle.nn.functional as F - data = paddle.rand(shape=[1,1,6,6]) - pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True) - # pool_out shape: [1, 1, 3, 3], indices shape: [1, 1, 3, 3] - Unpool2D = paddle.nn.MaxUnPool2D(kernel_size=2, padding=0) - unpool_out = Unpool2D(pool_out, indices) - # unpool_out shape: [1, 1, 6, 6] + >>> data = paddle.rand(shape=[1, 1, 6, 6]) + >>> pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(pool_out.shape) + [1, 1, 3, 3] + >>> print(indices.shape) + [1, 1, 3, 3] + >>> Unpool2D = paddle.nn.MaxUnPool2D(kernel_size=2, padding=0) + >>> unpool_out = Unpool2D(pool_out, indices) + >>> print(unpool_out.shape) + [1, 1, 6, 6] """ @@ -1335,15 +1369,19 @@ class MaxUnPool3D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F - - data = paddle.rand(shape=[1, 1, 4, 4, 6]) - pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True) - # pool_out shape: [1, 1, 2, 2, 3], indices shape: [1, 1, 2, 2, 3] - Unpool3D = paddle.nn.MaxUnPool3D(kernel_size=2, padding=0) - unpool_out = Unpool3D(pool_out, indices) - # unpool_out shape: [1, 1, 4, 4, 6] + >>> import paddle + >>> import paddle.nn.functional as F + + >>> data = paddle.rand(shape=[1, 1, 4, 4, 6]) + >>> pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(pool_out.shape) + [1, 1, 2, 2, 3] + >>> print(indices.shape) + [1, 1, 2, 2, 3] + >>> Unpool3D = paddle.nn.MaxUnPool3D(kernel_size=2, padding=0) + >>> unpool_out = Unpool3D(pool_out, indices) + >>> print(unpool_out.shape) + [1, 1, 4, 4, 6] """ diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 991df623d96..a2122c2dab3 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -89,14 +89,18 @@ def rnn( .. code-block:: python - import paddle - paddle.disable_static() + >>> import paddle - cell = paddle.nn.SimpleRNNCell(16, 32) + >>> inputs = paddle.rand((4, 23, 16)) + >>> prev_h = paddle.randn((4, 32)) - inputs = paddle.rand((4, 23, 16)) - prev_h = paddle.randn((4, 32)) - outputs, final_states = paddle.nn.layer.rnn(cell, inputs, prev_h) + >>> cell = paddle.nn.SimpleRNNCell(16, 32) + >>> rnn = paddle.nn.RNN(cell) + >>> outputs, final_states = rnn(inputs, prev_h) + >>> print(outputs.shape) + [4, 23, 32] + >>> print(final_states.shape) + [4, 32] """ @@ -397,18 +401,17 @@ def birnn( .. code-block:: python - import paddle - paddle.disable_static() + >>> import paddle - cell_fw = paddle.nn.LSTMCell(16, 32) - cell_bw = paddle.nn.LSTMCell(16, 32) - - inputs = paddle.rand((4, 23, 16)) - hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32)) - hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32)) - initial_states = ((hf, cf), (hb, cb)) - outputs, final_states = paddle.nn.layer.birnn( - cell_fw, cell_bw, inputs, initial_states) + >>> cell_fw = paddle.nn.LSTMCell(16, 32) + >>> cell_bw = paddle.nn.LSTMCell(16, 32) + >>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw) + >>> inputs = paddle.rand((2, 23, 16)) + >>> outputs, final_states = rnn(inputs) + >>> print(outputs.shape) + [2, 23, 64] + >>> print(final_states[0][0].shape) + [2, 32] """ @@ -743,16 +746,15 @@ class SimpleRNNCell(RNNCellBase): .. code-block:: python - import paddle - - x = paddle.randn((4, 16)) - prev_h = paddle.randn((4, 32)) + >>> import paddle - cell = paddle.nn.SimpleRNNCell(16, 32) - y, h = cell(x, prev_h) - print(y.shape) + >>> x = paddle.randn((4, 16)) + >>> prev_h = paddle.randn((4, 32)) - #[4,32] + >>> cell = paddle.nn.SimpleRNNCell(16, 32) + >>> y, h = cell(x, prev_h) + >>> print(y.shape) + [4, 32] """ @@ -897,22 +899,21 @@ class LSTMCell(RNNCellBase): .. code-block:: python - import paddle - - x = paddle.randn((4, 16)) - prev_h = paddle.randn((4, 32)) - prev_c = paddle.randn((4, 32)) + >>> import paddle - cell = paddle.nn.LSTMCell(16, 32) - y, (h, c) = cell(x, (prev_h, prev_c)) + >>> x = paddle.randn((4, 16)) + >>> prev_h = paddle.randn((4, 32)) + >>> prev_c = paddle.randn((4, 32)) - print(y.shape) - print(h.shape) - print(c.shape) + >>> cell = paddle.nn.LSTMCell(16, 32) + >>> y, (h, c) = cell(x, (prev_h, prev_c)) - #[4,32] - #[4,32] - #[4,32] + >>> print(y.shape) + [4, 32] + >>> print(h.shape) + [4, 32] + >>> print(c.shape) + [4, 32] """ @@ -1059,19 +1060,19 @@ class GRUCell(RNNCellBase): .. code-block:: python - import paddle + >>> import paddle - x = paddle.randn((4, 16)) - prev_h = paddle.randn((4, 32)) + >>> x = paddle.randn((4, 16)) + >>> prev_h = paddle.randn((4, 32)) - cell = paddle.nn.GRUCell(16, 32) - y, h = cell(x, prev_h) + >>> cell = paddle.nn.GRUCell(16, 32) + >>> y, h = cell(x, prev_h) - print(y.shape) - print(h.shape) + >>> print(y.shape) + [4, 32] + >>> print(h.shape) + [4, 32] - #[4,32] - #[4,32] """ @@ -1189,20 +1190,19 @@ class RNN(Layer): .. code-block:: python - import paddle + >>> import paddle - inputs = paddle.rand((4, 23, 16)) - prev_h = paddle.randn((4, 32)) + >>> inputs = paddle.rand((4, 23, 16)) + >>> prev_h = paddle.randn((4, 32)) - cell = paddle.nn.SimpleRNNCell(16, 32) - rnn = paddle.nn.RNN(cell) - outputs, final_states = rnn(inputs, prev_h) + >>> cell = paddle.nn.SimpleRNNCell(16, 32) + >>> rnn = paddle.nn.RNN(cell) + >>> outputs, final_states = rnn(inputs, prev_h) - print(outputs.shape) - print(final_states.shape) - - #[4,23,32] - #[4,32] + >>> print(outputs.shape) + [4, 23, 32] + >>> print(final_states.shape) + [4, 32] """ @@ -1263,20 +1263,19 @@ class BiRNN(Layer): .. code-block:: python - import paddle - - cell_fw = paddle.nn.LSTMCell(16, 32) - cell_bw = paddle.nn.LSTMCell(16, 32) - rnn = paddle.nn.BiRNN(cell_fw, cell_bw) + >>> import paddle - inputs = paddle.rand((2, 23, 16)) - outputs, final_states = rnn(inputs) + >>> cell_fw = paddle.nn.LSTMCell(16, 32) + >>> cell_bw = paddle.nn.LSTMCell(16, 32) + >>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw) - print(outputs.shape) - print(final_states[0][0].shape,len(final_states),len(final_states[0])) + >>> inputs = paddle.rand((2, 23, 16)) + >>> outputs, final_states = rnn(inputs) - #[4,23,64] - #[2,32] 2 2 + >>> print(outputs.shape) + [2, 23, 64] + >>> print(final_states[0][0].shape,len(final_states),len(final_states[0])) + [2, 32] 2 2 """ @@ -1702,19 +1701,19 @@ class SimpleRNN(RNNBase): .. code-block:: python - import paddle + >>> import paddle - rnn = paddle.nn.SimpleRNN(16, 32, 2) + >>> rnn = paddle.nn.SimpleRNN(16, 32, 2) - x = paddle.randn((4, 23, 16)) - prev_h = paddle.randn((2, 4, 32)) - y, h = rnn(x, prev_h) + >>> x = paddle.randn((4, 23, 16)) + >>> prev_h = paddle.randn((2, 4, 32)) + >>> y, h = rnn(x, prev_h) - print(y.shape) - print(h.shape) + >>> print(y.shape) + [4, 23, 32] + >>> print(h.shape) + [2, 4, 32] - #[4,23,32] - #[2,4,32] """ @@ -1833,22 +1832,22 @@ class LSTM(RNNBase): .. code-block:: python - import paddle + >>> import paddle - rnn = paddle.nn.LSTM(16, 32, 2) + >>> rnn = paddle.nn.LSTM(16, 32, 2) - x = paddle.randn((4, 23, 16)) - prev_h = paddle.randn((2, 4, 32)) - prev_c = paddle.randn((2, 4, 32)) - y, (h, c) = rnn(x, (prev_h, prev_c)) + >>> x = paddle.randn((4, 23, 16)) + >>> prev_h = paddle.randn((2, 4, 32)) + >>> prev_c = paddle.randn((2, 4, 32)) + >>> y, (h, c) = rnn(x, (prev_h, prev_c)) - print(y.shape) - print(h.shape) - print(c.shape) + >>> print(y.shape) + [4, 23, 32] + >>> print(h.shape) + [2, 4, 32] + >>> print(c.shape) + [2, 4, 32] - #[4,23,32] - #[2,4,32] - #[2,4,32] """ @@ -1955,19 +1954,19 @@ class GRU(RNNBase): .. code-block:: python - import paddle + >>> import paddle - rnn = paddle.nn.GRU(16, 32, 2) + >>> rnn = paddle.nn.GRU(16, 32, 2) - x = paddle.randn((4, 23, 16)) - prev_h = paddle.randn((2, 4, 32)) - y, h = rnn(x, prev_h) + >>> x = paddle.randn((4, 23, 16)) + >>> prev_h = paddle.randn((2, 4, 32)) + >>> y, h = rnn(x, prev_h) - print(y.shape) - print(h.shape) + >>> print(y.shape) + [4, 23, 32] + >>> print(h.shape) + [2, 4, 32] - #[4,23,32] - #[2,4,32] """ diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 335b47d2599..e2e3f052240 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -141,14 +141,16 @@ class MultiHeadAttention(Layer): .. code-block:: python - import paddle - - # encoder input: [batch_size, sequence_length, d_model] - query = paddle.rand((2, 4, 128)) - # self attention mask: [batch_size, num_heads, query_len, query_len] - attn_mask = paddle.rand((2, 2, 4, 4)) - multi_head_attn = paddle.nn.MultiHeadAttention(128, 2) - output = multi_head_attn(query, None, None, attn_mask=attn_mask) # [2, 4, 128] + >>> import paddle + + >>> # encoder input: [batch_size, sequence_length, d_model] + >>> query = paddle.rand((2, 4, 128)) + >>> # self attention mask: [batch_size, num_heads, query_len, query_len] + >>> attn_mask = paddle.rand((2, 2, 4, 4)) + >>> multi_head_attn = paddle.nn.MultiHeadAttention(128, 2) + >>> output = multi_head_attn(query, None, None, attn_mask=attn_mask) + >>> print(output.shape) + [2, 4, 128] """ Cache = collections.namedtuple("Cache", ["k", "v"]) @@ -490,15 +492,17 @@ class TransformerEncoderLayer(Layer): .. code-block:: python - import paddle - from paddle.nn import TransformerEncoderLayer - - # encoder input: [batch_size, src_len, d_model] - enc_input = paddle.rand((2, 4, 128)) - # self attention mask: [batch_size, n_head, src_len, src_len] - attn_mask = paddle.rand((2, 2, 4, 4)) - encoder_layer = TransformerEncoderLayer(128, 2, 512) - enc_output = encoder_layer(enc_input, attn_mask) # [2, 4, 128] + >>> import paddle + >>> from paddle.nn import TransformerEncoderLayer + + >>> # encoder input: [batch_size, src_len, d_model] + >>> enc_input = paddle.rand((2, 4, 128)) + >>> # self attention mask: [batch_size, n_head, src_len, src_len] + >>> attn_mask = paddle.rand((2, 2, 4, 4)) + >>> encoder_layer = TransformerEncoderLayer(128, 2, 512) + >>> enc_output = encoder_layer(enc_input, attn_mask) + >>> print(enc_output.shape) + [2, 4, 128] """ def __init__( @@ -659,16 +663,18 @@ class TransformerEncoder(Layer): .. code-block:: python - import paddle - from paddle.nn import TransformerEncoderLayer, TransformerEncoder - - # encoder input: [batch_size, src_len, d_model] - enc_input = paddle.rand((2, 4, 128)) - # self attention mask: [batch_size, n_head, src_len, src_len] - attn_mask = paddle.rand((2, 2, 4, 4)) - encoder_layer = TransformerEncoderLayer(128, 2, 512) - encoder = TransformerEncoder(encoder_layer, 2) - enc_output = encoder(enc_input, attn_mask) # [2, 4, 128] + >>> import paddle + >>> from paddle.nn import TransformerEncoderLayer, TransformerEncoder + + >>> # encoder input: [batch_size, src_len, d_model] + >>> enc_input = paddle.rand((2, 4, 128)) + >>> # self attention mask: [batch_size, n_head, src_len, src_len] + >>> attn_mask = paddle.rand((2, 2, 4, 4)) + >>> encoder_layer = TransformerEncoderLayer(128, 2, 512) + >>> encoder = TransformerEncoder(encoder_layer, 2) + >>> enc_output = encoder(enc_input, attn_mask) + >>> print(enc_output.shape) + [2, 4, 128] """ def __init__(self, encoder_layer, num_layers, norm=None): @@ -809,22 +815,24 @@ class TransformerDecoderLayer(Layer): .. code-block:: python - import paddle - from paddle.nn import TransformerDecoderLayer - - # decoder input: [batch_size, tgt_len, d_model] - dec_input = paddle.rand((2, 4, 128)) - # encoder output: [batch_size, src_len, d_model] - enc_output = paddle.rand((2, 6, 128)) - # self attention mask: [batch_size, n_head, tgt_len, tgt_len] - self_attn_mask = paddle.rand((2, 2, 4, 4)) - # cross attention mask: [batch_size, n_head, tgt_len, src_len] - cross_attn_mask = paddle.rand((2, 2, 4, 6)) - decoder_layer = TransformerDecoderLayer(128, 2, 512) - output = decoder_layer(dec_input, - enc_output, - self_attn_mask, - cross_attn_mask) # [2, 4, 128] + >>> import paddle + >>> from paddle.nn import TransformerDecoderLayer + + >>> # decoder input: [batch_size, tgt_len, d_model] + >>> dec_input = paddle.rand((2, 4, 128)) + >>> # encoder output: [batch_size, src_len, d_model] + >>> enc_output = paddle.rand((2, 6, 128)) + >>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len] + >>> self_attn_mask = paddle.rand((2, 2, 4, 4)) + >>> # cross attention mask: [batch_size, n_head, tgt_len, src_len] + >>> cross_attn_mask = paddle.rand((2, 2, 4, 6)) + >>> decoder_layer = TransformerDecoderLayer(128, 2, 512) + >>> output = decoder_layer(dec_input, + ... enc_output, + ... self_attn_mask, + ... cross_attn_mask) + >>> print(output.shape) + [2, 4, 128] """ def __init__( @@ -1031,23 +1039,25 @@ class TransformerDecoder(Layer): .. code-block:: python - import paddle - from paddle.nn import TransformerDecoderLayer, TransformerDecoder - - # decoder input: [batch_size, tgt_len, d_model] - dec_input = paddle.rand((2, 4, 128)) - # encoder output: [batch_size, src_len, d_model] - enc_output = paddle.rand((2, 6, 128)) - # self attention mask: [batch_size, n_head, tgt_len, tgt_len] - self_attn_mask = paddle.rand((2, 2, 4, 4)) - # cross attention mask: [batch_size, n_head, tgt_len, src_len] - cross_attn_mask = paddle.rand((2, 2, 4, 6)) - decoder_layer = TransformerDecoderLayer(128, 2, 512) - decoder = TransformerDecoder(decoder_layer, 2) - output = decoder(dec_input, - enc_output, - self_attn_mask, - cross_attn_mask) # [2, 4, 128] + >>> import paddle + >>> from paddle.nn import TransformerDecoderLayer, TransformerDecoder + + >>> # decoder input: [batch_size, tgt_len, d_model] + >>> dec_input = paddle.rand((2, 4, 128)) + >>> # encoder output: [batch_size, src_len, d_model] + >>> enc_output = paddle.rand((2, 6, 128)) + >>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len] + >>> self_attn_mask = paddle.rand((2, 2, 4, 4)) + >>> # cross attention mask: [batch_size, n_head, tgt_len, src_len] + >>> cross_attn_mask = paddle.rand((2, 2, 4, 6)) + >>> decoder_layer = TransformerDecoderLayer(128, 2, 512) + >>> decoder = TransformerDecoder(decoder_layer, 2) + >>> output = decoder(dec_input, + ... enc_output, + ... self_attn_mask, + ... cross_attn_mask) + >>> print(output.shape) + [2, 4, 128] """ def __init__(self, decoder_layer, num_layers, norm=None): @@ -1242,25 +1252,27 @@ class Transformer(Layer): .. code-block:: python - import paddle - from paddle.nn import Transformer - - # src: [batch_size, tgt_len, d_model] - enc_input = paddle.rand((2, 4, 128)) - # tgt: [batch_size, src_len, d_model] - dec_input = paddle.rand((2, 6, 128)) - # src_mask: [batch_size, n_head, src_len, src_len] - enc_self_attn_mask = paddle.rand((2, 2, 4, 4)) - # tgt_mask: [batch_size, n_head, tgt_len, tgt_len] - dec_self_attn_mask = paddle.rand((2, 2, 6, 6)) - # memory_mask: [batch_size, n_head, tgt_len, src_len] - cross_attn_mask = paddle.rand((2, 2, 6, 4)) - transformer = Transformer(128, 2, 4, 4, 512) - output = transformer(enc_input, - dec_input, - enc_self_attn_mask, - dec_self_attn_mask, - cross_attn_mask) # [2, 6, 128] + >>> import paddle + >>> from paddle.nn import Transformer + + >>> # src: [batch_size, tgt_len, d_model] + >>> enc_input = paddle.rand((2, 4, 128)) + >>> # tgt: [batch_size, src_len, d_model] + >>> dec_input = paddle.rand((2, 6, 128)) + >>> # src_mask: [batch_size, n_head, src_len, src_len] + >>> enc_self_attn_mask = paddle.rand((2, 2, 4, 4)) + >>> # tgt_mask: [batch_size, n_head, tgt_len, tgt_len] + >>> dec_self_attn_mask = paddle.rand((2, 2, 6, 6)) + >>> # memory_mask: [batch_size, n_head, tgt_len, src_len] + >>> cross_attn_mask = paddle.rand((2, 2, 6, 4)) + >>> transformer = Transformer(128, 2, 4, 4, 512) + >>> output = transformer(enc_input, + ... dec_input, + ... enc_self_attn_mask, + ... dec_self_attn_mask, + ... cross_attn_mask) + >>> print(output.shape) + [2, 6, 128] """ def __init__( @@ -1454,20 +1466,20 @@ class Transformer(Layer): Examples: .. code-block:: python - import paddle - from paddle.nn.layer.transformer import Transformer - length = 5 - d_model, n_head, dim_feedforward = 8, 4, 64 - transformer_paddle = Transformer( - d_model, n_head, dim_feedforward=dim_feedforward) - mask = transformer_paddle.generate_square_subsequent_mask(length) - print(mask) - - # [[ 0. -inf -inf -inf -inf] - # [ 0. 0. -inf -inf -inf] - # [ 0. 0. 0. -inf -inf] - # [ 0. 0. 0. 0. -inf] - # [ 0. 0. 0. 0. 0.]] + >>> import paddle + >>> from paddle.nn.layer.transformer import Transformer + >>> length = 5 + >>> d_model, n_head, dim_feedforward = 8, 4, 64 + >>> transformer_paddle = Transformer( + ... d_model, n_head, dim_feedforward=dim_feedforward) + >>> mask = transformer_paddle.generate_square_subsequent_mask(length) + >>> print(mask) + Tensor(shape=[5, 5], dtype=float32, place=Place(cpu), stop_gradient=True, + [[ 0. , -inf., -inf., -inf., -inf.], + [ 0. , 0. , -inf., -inf., -inf.], + [ 0. , 0. , 0. , -inf., -inf.], + [ 0. , 0. , 0. , 0. , -inf.], + [ 0. , 0. , 0. , 0. , 0. ]]) """ return paddle.tensor.triu( diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py index a48be90e74c..996699e513c 100644 --- a/python/paddle/nn/layer/vision.py +++ b/python/paddle/nn/layer/vision.py @@ -46,14 +46,14 @@ class PixelShuffle(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - x = paddle.randn(shape=[2,9,4,4]) - pixel_shuffle = nn.PixelShuffle(3) - out = pixel_shuffle(x) - print(out.shape) - # [2, 1, 12, 12] + >>> x = paddle.randn(shape=[2, 9, 4, 4]) + >>> pixel_shuffle = nn.PixelShuffle(3) + >>> out = pixel_shuffle(x) + >>> print(out.shape) + [2, 1, 12, 12] """ @@ -109,14 +109,14 @@ class PixelUnshuffle(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - x = paddle.randn([2, 1, 12, 12]) - pixel_unshuffle = nn.PixelUnshuffle(3) - out = pixel_unshuffle(x) - print(out.shape) - # [2, 9, 4, 4] + >>> x = paddle.randn([2, 1, 12, 12]) + >>> pixel_unshuffle = nn.PixelUnshuffle(3) + >>> out = pixel_unshuffle(x) + >>> print(out.shape) + [2, 9, 4, 4] """ @@ -175,24 +175,28 @@ class ChannelShuffle(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - x = paddle.arange(0, 0.6, 0.1, 'float32') - x = paddle.reshape(x, [1, 6, 1, 1]) - # [[[[0. ]], - # [[0.10000000]], - # [[0.20000000]], - # [[0.30000001]], - # [[0.40000001]], - # [[0.50000000]]]] - channel_shuffle = nn.ChannelShuffle(3) - y = channel_shuffle(x) - # [[[[0. ]], - # [[0.20000000]], - # [[0.40000001]], - # [[0.10000000]], - # [[0.30000001]], - # [[0.50000000]]]] + >>> import paddle + >>> import paddle.nn as nn + >>> x = paddle.arange(0, 0.6, 0.1, 'float32') + >>> x = paddle.reshape(x, [1, 6, 1, 1]) + >>> print(x) + Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0. ]], + [[0.10000000]], + [[0.20000000]], + [[0.30000001]], + [[0.40000001]], + [[0.50000000]]]]) + >>> channel_shuffle = nn.ChannelShuffle(3) + >>> y = channel_shuffle(x) + >>> print(y) + Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0. ]], + [[0.20000000]], + [[0.40000001]], + [[0.10000000]], + [[0.30000001]], + [[0.50000000]]]]) """ def __init__(self, groups, data_format="NCHW", name=None): -- GitLab