From 4ff6999aa5e63b54c00c82eddb10d22a70c8ca59 Mon Sep 17 00:00:00 2001 From: cyberslack_lee Date: Thu, 3 Aug 2023 10:53:45 +0800 Subject: [PATCH] [xdoctest] reformat example code with google style No.80-85 (#55806) * [Doctest]fix No.80-85, test=docs_preview * test=docs_preview * test=docs_preview * test=docs_preview * test=docs_preview * test=docs_preview * test=docs_preview * fix * Apply suggestions from code review * Apply suggestions from code review * Apply suggestions from code review * test=docs_preview * test=docs_preview * test=docs_preview * test=docs_preview --------- Co-authored-by: Nyakku Shigure --- python/paddle/nn/functional/pooling.py | 468 +++++---- .../paddle/nn/functional/sparse_attention.py | 89 +- python/paddle/nn/layer/activation.py | 493 ++++----- python/paddle/nn/layer/common.py | 684 ++++++------- python/paddle/nn/layer/layers.py | 950 ++++++++++-------- 5 files changed, 1451 insertions(+), 1233 deletions(-) diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 143f37ddc4d..955d63469d3 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -219,13 +219,14 @@ def avg_pool1d( Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - data = paddle.uniform([1, 3, 32], paddle.float32) - AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) - pool_out = AvgPool1D(data) - # pool_out shape: [1, 3, 16] + >>> import paddle + >>> import paddle.nn as nn + + >>> data = paddle.uniform([1, 3, 32], paddle.float32) + >>> AvgPool1D = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) + >>> pool_out = AvgPool1D(data) + >>> print(pool_out.shape) + [1, 3, 16] """ """NCL to NCHW""" data_format = "NCHW" @@ -350,15 +351,16 @@ def avg_pool2d( Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F + >>> import paddle + >>> import paddle.nn.functional as F - # avg pool2d - x = paddle.uniform([1, 3, 32, 32], paddle.float32) - out = F.avg_pool2d(x, - kernel_size=2, - stride=2, padding=0) - # out.shape [1, 3, 16, 16] + >>> # avg pool2d + >>> x = paddle.uniform([1, 3, 32, 32], paddle.float32) + >>> out = F.avg_pool2d(x, + ... kernel_size=2, + ... stride=2, padding=0) + >>> print(out.shape) + [1, 3, 16, 16] """ kernel_size = convert_to_list(kernel_size, 2, 'pool_size') if stride is None: @@ -480,16 +482,16 @@ def avg_pool3d( Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.uniform([1, 3, 32, 32, 32], paddle.float32) - # avg pool3d - out = paddle.nn.functional.avg_pool3d( - x, - kernel_size = 2, - stride = 2, - padding=0) - # out.shape: [1, 3, 16, 16, 16] + >>> x = paddle.uniform([1, 3, 32, 32, 32], paddle.float32) + >>> # avg pool3d + >>> out = paddle.nn.functional.avg_pool3d(x, + ... kernel_size = 2, + ... stride = 2, + ... padding=0) + >>> print(out.shape) + [1, 3, 16, 16, 16] """ kernel_size = convert_to_list(kernel_size, 3, 'pool_size') if stride is None: @@ -599,14 +601,18 @@ def max_pool1d( Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F - - data = paddle.uniform([1, 3, 32], paddle.float32) - pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) - # pool_out shape: [1, 3, 16] - pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) - # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] + >>> import paddle + >>> import paddle.nn.functional as F + + >>> data = paddle.uniform([1, 3, 32], paddle.float32) + >>> pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) + >>> print(pool_out.shape) + [1, 3, 16] + >>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(pool_out.shape) + [1, 3, 16] + >>> print(indices.shape) + [1, 3, 16] """ """NCL to NCHW""" data_format = "NCHW" @@ -789,14 +795,18 @@ def max_unpool1d( Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F + >>> import paddle + >>> import paddle.nn.functional as F - data = paddle.rand(shape=[1, 3, 16]) - pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) - # pool_out shape: [1, 3, 8], indices shape: [1, 3, 8] - unpool_out = F.max_unpool1d(pool_out, indices, kernel_size=2, padding=0) - # unpool_out shape: [1, 3, 16] + >>> data = paddle.rand(shape=[1, 3, 16]) + >>> pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(pool_out.shape) + [1, 3, 8] + >>> print(indices.shape) + [1, 3, 8] + >>> unpool_out = F.max_unpool1d(pool_out, indices, kernel_size=2, padding=0) + >>> print(unpool_out.shape) + [1, 3, 16] """ """NCL to NCHW""" @@ -926,18 +936,23 @@ def max_unpool2d( Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F + >>> import paddle + >>> import paddle.nn.functional as F - data = paddle.rand(shape=[1,1,6,6]) - pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True) - # pool_out shape: [1, 1, 3, 3], indices shape: [1, 1, 3, 3] - unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0) - # unpool_out shape: [1, 1, 6, 6] + >>> data = paddle.rand(shape=[1, 1, 6, 6]) + >>> pool_out, indices = F.max_pool2d(data, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(pool_out.shape) + [1, 1, 3, 3] + >>> print(indices.shape) + [1, 1, 3, 3] + >>> unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0) + >>> print(unpool_out.shape) + [1, 1, 6, 6] - # specify a different output size than input size - unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0, output_size=[7,7]) - # unpool_out shape: [1, 1, 7, 7] + >>> # specify a different output size than input size + >>> unpool_out = F.max_unpool2d(pool_out, indices, kernel_size=2, padding=0, output_size=[7, 7]) + >>> print(unpool_out.shape) + [1, 1, 7, 7] """ if x.ndim != 4: @@ -1073,14 +1088,18 @@ def max_unpool3d( Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F + >>> import paddle + >>> import paddle.nn.functional as F - data = paddle.rand(shape=[1, 1, 4, 4, 6]) - pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True) - # pool_out shape: [1, 1, 2, 2, 3], indices shape: [1, 1, 2, 2, 3] - unpool_out = F.max_unpool3d(pool_out, indices, kernel_size=2, padding=0) - # unpool_out shape: [1, 1, 4, 4, 6] + >>> data = paddle.rand(shape=[1, 1, 4, 4, 6]) + >>> pool_out, indices = F.max_pool3d(data, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(pool_out.shape) + [1, 1, 2, 2, 3] + >>> print(indices.shape) + [1, 1, 2, 2, 3] + >>> unpool_out = F.max_unpool3d(pool_out, indices, kernel_size=2, padding=0) + >>> print(unpool_out.shape) + [1, 1, 4, 4, 6] """ if x.ndim != 5: @@ -1200,16 +1219,20 @@ def max_pool2d( Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F - - # max pool2d - x = paddle.uniform([1, 3, 32, 32], paddle.float32) - out = F.max_pool2d(x, kernel_size=2, stride=2, padding=0) - # output.shape [1, 3, 16, 16] - # for return_mask=True - out, max_indices = F.max_pool2d(x, kernel_size=2, stride=2, padding=0, return_mask=True) - # out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], + >>> import paddle + >>> import paddle.nn.functional as F + + >>> # max pool2d + >>> x = paddle.uniform([1, 3, 32, 32], paddle.float32) + >>> out = F.max_pool2d(x, kernel_size=2, stride=2, padding=0) + >>> print(out.shape) + [1, 3, 16, 16] + >>> # for return_mask=True + >>> out, max_indices = F.max_pool2d(x, kernel_size=2, stride=2, padding=0, return_mask=True) + >>> print(out.shape) + [1, 3, 16, 16] + >>> print(max_indices.shape) + [1, 3, 16, 16] """ kernel_size = convert_to_list(kernel_size, 2, 'pool_size') @@ -1359,24 +1382,30 @@ def max_pool3d( Examples: .. code-block:: python - import paddle - import paddle.nn.functional as F - - # max pool3d - x = paddle.uniform([1, 3, 32, 32, 32]) - output = F.max_pool3d(x, - kernel_size=2, - stride=2, padding=0) - # output.shape [1, 3, 16, 16, 16] - # for return_mask=True - x = paddle.uniform([1, 3, 32, 32, 32]) - output, max_indices = paddle.nn.functional.max_pool3d(x, - kernel_size=2, - stride=2, - padding=0, - return_mask=True) - - # output.shape [1, 3, 16, 16, 16], max_indices.shape [1, 3, 16, 16, 16] + >>> import paddle + >>> import paddle.nn.functional as F + + >>> # max pool3d + >>> x = paddle.uniform([1, 3, 32, 32, 32]) + >>> output = F.max_pool3d(x, + ... kernel_size=2, + ... stride=2, + ... padding=0) + >>> print(output.shape) + [1, 3, 16, 16, 16] + + >>> # for return_mask=True + >>> x = paddle.uniform([1, 3, 32, 32, 32]) + >>> output, max_indices = paddle.nn.functional.max_pool3d(x, + ... kernel_size=2, + ... stride=2, + ... padding=0, + ... return_mask=True) + ... + >>> print(output.shape) + [1, 3, 16, 16, 16] + >>> print(max_indices.shape) + [1, 3, 16, 16, 16] """ kernel_size = convert_to_list(kernel_size, 3, 'pool_size') @@ -1468,24 +1497,25 @@ def adaptive_avg_pool1d(x, output_size, name=None): Examples: .. code-block:: python - # average adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) - # - import paddle - import paddle.nn.functional as F - - data = paddle.uniform([1, 3, 32]) - pool_out = F.adaptive_avg_pool1d(data, output_size=16) - # pool_out shape: [1, 3, 16]) + >>> # average adaptive pool1d + >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m], + >>> # output shape is [N, C, m], adaptive pool divide L dimension + >>> # of input data into m grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # lstart = floor(i * L / m) + >>> # lend = ceil((i + 1) * L / m) + >>> # output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend) + >>> # + >>> import paddle + >>> import paddle.nn.functional as F + + >>> data = paddle.uniform([1, 3, 32]) + >>> pool_out = F.adaptive_avg_pool1d(data, output_size=16) + >>> print(pool_out.shape) + [1, 3, 16] """ pool_type = 'avg' _check_input(x, 3) @@ -1567,29 +1597,29 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): Examples: .. code-block:: python - # adaptive avg pool2d - # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], - # output shape is [N, C, m, n], adaptive pool divide H and W dimensions - # of input data into m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive avg pool performs calculations as follow: - # - # for i in range(m): - # for j in range(n): - # hstart = floor(i * H / m) - # hend = ceil((i + 1) * H / m) - # wstart = floor(i * W / n) - # wend = ceil((i + 1) * W / n) - # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) - # - import paddle - - x = paddle.rand([2, 3, 32, 32]) - # x.shape is [2, 3, 32, 32] - out = paddle.nn.functional.adaptive_avg_pool2d( - x = x, - output_size=[3, 3]) - # out.shape is [2, 3, 3, 3] + >>> # adaptive avg pool2d + >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + >>> # of input data into m * n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive avg pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # for j in range(n): + >>> # hstart = floor(i * H / m) + >>> # hend = ceil((i + 1) * H / m) + >>> # wstart = floor(i * W / n) + >>> # wend = ceil((i + 1) * W / n) + >>> # output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend]) + >>> # + >>> import paddle + + >>> x = paddle.rand([2, 3, 32, 32]) + >>> # x.shape is [2, 3, 32, 32] + >>> out = paddle.nn.functional.adaptive_avg_pool2d(x = x, + ... output_size=[3, 3]) + >>> print(out.shape) + [2, 3, 3, 3] """ if data_format not in ["NCHW", "NHWC"]: @@ -1700,31 +1730,31 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): Examples: .. code-block:: python - # adaptive avg pool3d - # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], - # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions - # of input data into l * m * n grids averagely and performs poolings in each - # grid to get output. - # adaptive avg pool performs calculations as follow: - # - # for i in range(l): - # for j in range(m): - # for k in range(n): - # dstart = floor(i * D / l) - # dend = ceil((i + 1) * D / l) - # hstart = floor(j * H / m) - # hend = ceil((j + 1) * H / m) - # wstart = floor(k * W / n) - # wend = ceil((k + 1) * W / n) - # output[:, :, i, j, k] = - # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) - import paddle - - input_data = paddle.randn(shape=(2, 3, 8, 32, 32)) - out = paddle.nn.functional.adaptive_avg_pool3d( - x = input_data, - output_size=[3, 3, 3]) - # out.shape is [2, 3, 3, 3, 3] + >>> # adaptive avg pool3d + >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + >>> # of input data into l * m * n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive avg pool performs calculations as follow: + >>> # + >>> # for i in range(l): + >>> # for j in range(m): + >>> # for k in range(n): + >>> # dstart = floor(i * D / l) + >>> # dend = ceil((i + 1) * D / l) + >>> # hstart = floor(j * H / m) + >>> # hend = ceil((j + 1) * H / m) + >>> # wstart = floor(k * W / n) + >>> # wend = ceil((k + 1) * W / n) + >>> # output[:, :, i, j, k] = + >>> # avg(input[:, :, dstart:dend, hstart: hend, wstart: wend]) + >>> import paddle + + >>> input_data = paddle.randn(shape=(2, 3, 8, 32, 32)) + >>> out = paddle.nn.functional.adaptive_avg_pool3d(x = input_data, + ... output_size=[3, 3, 3]) + >>> print(out.shape) + [2, 3, 3, 3, 3] """ if data_format not in ["NCDHW", "NDHWC"]: @@ -1815,26 +1845,30 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None): Examples: .. code-block:: python - # max adaptive pool1d - # suppose input data in shape of [N, C, L], `output_size` is m or [m], - # output shape is [N, C, m], adaptive pool divide L dimension - # of input data into m grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # lstart = floor(i * L / m) - # lend = ceil((i + 1) * L / m) - # output[:, :, i] = max(input[:, :, lstart: lend]) - # - import paddle - import paddle.nn.functional as F - - data = paddle.uniform([1, 3, 32], paddle.float32) - pool_out = F.adaptive_max_pool1d(data, output_size=16) - # pool_out shape: [1, 3, 16]) - pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True) - # pool_out shape: [1, 3, 16] indices shape: [1, 3, 16] + >>> # max adaptive pool1d + >>> # suppose input data in shape of [N, C, L], `output_size` is m or [m], + >>> # output shape is [N, C, m], adaptive pool divide L dimension + >>> # of input data into m grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # lstart = floor(i * L / m) + >>> # lend = ceil((i + 1) * L / m) + >>> # output[:, :, i] = max(input[:, :, lstart: lend]) + >>> # + >>> import paddle + >>> import paddle.nn.functional as F + + >>> data = paddle.uniform([1, 3, 32], paddle.float32) + >>> pool_out = F.adaptive_max_pool1d(data, output_size=16) + >>> print(pool_out.shape) + [1, 3, 16] + >>> pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_mask=True) + >>> print(pool_out.shape) + [1, 3, 16] + >>> print(indices.shape) + [1, 3, 16] """ _check_input(x, 3) @@ -1901,28 +1935,28 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None): Examples: .. code-block:: python - # max adaptive pool2d - # suppose input data in the shape of [N, C, H, W], `output_size` is [m, n] - # output shape is [N, C, m, n], adaptive pool divide H and W dimensions - # of input data into m*n grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(m): - # for j in range(n): - # hstart = floor(i * H / m) - # hend = ceil((i + 1) * H / m) - # wstart = floor(i * W / n) - # wend = ceil((i + 1) * W / n) - # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) - # - import paddle - - input_data = paddle.randn(shape=(2, 3, 32, 32)) - out = paddle.nn.functional.adaptive_max_pool2d( - x = input_data, - output_size=[3, 3]) - # out.shape is [2, 3, 3, 3] + >>> # max adaptive pool2d + >>> # suppose input data in the shape of [N, C, H, W], `output_size` is [m, n] + >>> # output shape is [N, C, m, n], adaptive pool divide H and W dimensions + >>> # of input data into m*n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(m): + >>> # for j in range(n): + >>> # hstart = floor(i * H / m) + >>> # hend = ceil((i + 1) * H / m) + >>> # wstart = floor(i * W / n) + >>> # wend = ceil((i + 1) * W / n) + >>> # output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend]) + >>> # + >>> import paddle + + >>> input_data = paddle.randn(shape=(2, 3, 32, 32)) + >>> out = paddle.nn.functional.adaptive_max_pool2d(x = input_data, + ... output_size=[3, 3]) + >>> print(out.shape) + [2, 3, 3, 3] """ _check_input(x, 4) @@ -1987,31 +2021,31 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None): Examples: .. code-block:: python - # adaptive max pool3d - # suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n] - # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions - # of input data into m*n grids averagely and performs poolings in each - # grid to get output. - # adaptive max pool performs calculations as follow: - # - # for i in range(l): - # for j in range(m): - # for k in range(n): - # dstart = floor(i * D / l) - # dend = ceil((i + 1) * D / l) - # hstart = floor(i * H / m) - # hend = ceil((i + 1) * H / m) - # wstart = floor(i * W / n) - # wend = ceil((i + 1) * W / n) - # output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend]) - # - import paddle - - input_data = paddle.randn(shape=(2, 3, 8, 32, 32)) - out = paddle.nn.functional.adaptive_max_pool3d( - x = input_data, - output_size=[3, 3, 3]) - # out.shape is [2, 3, 3, 3, 3] + >>> # adaptive max pool3d + >>> # suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n] + >>> # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions + >>> # of input data into m*n grids averagely and performs poolings in each + >>> # grid to get output. + >>> # adaptive max pool performs calculations as follow: + >>> # + >>> # for i in range(l): + >>> # for j in range(m): + >>> # for k in range(n): + >>> # dstart = floor(i * D / l) + >>> # dend = ceil((i + 1) * D / l) + >>> # hstart = floor(i * H / m) + >>> # hend = ceil((i + 1) * H / m) + >>> # wstart = floor(i * W / n) + >>> # wend = ceil((i + 1) * W / n) + >>> # output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend]) + >>> # + >>> import paddle + + >>> input_data = paddle.randn(shape=(2, 3, 8, 32, 32)) + >>> out = paddle.nn.functional.adaptive_max_pool3d(x = input_data, + ... output_size=[3, 3, 3]) + >>> print(out.shape) + [2, 3, 3, 3, 3] """ _check_input(x, 5) diff --git a/python/paddle/nn/functional/sparse_attention.py b/python/paddle/nn/functional/sparse_attention.py index df95efb1705..bef511a3fa8 100644 --- a/python/paddle/nn/functional/sparse_attention.py +++ b/python/paddle/nn/functional/sparse_attention.py @@ -88,50 +88,51 @@ def sparse_attention( Examples: .. code-block:: python - # required: skiptest - import paddle - - paddle.disable_static() - - # `query`, `key` and `value` all have shape [1, 1, 4, 2] - query = paddle.to_tensor([[[[0, 1, ], [2, 3], - [0, 1], [2, 3]]]], dtype="float32") - key = paddle.to_tensor([[[[0, 1], [2, 3], - [0, 1], [2, 3]]]], dtype="float32") - value = paddle.to_tensor([[[[0, 1], [2, 3], - [0, 1], [2, 3]]]], dtype="float32") - - - offset = paddle.to_tensor([[[0, 2, 4, 6, 8]]], dtype="int32") - columns = paddle.to_tensor([[[0, 1, 0, 1, 2, 3, 2, 3]]], dtype="int32") - - print(offset.shape) # (1, 1, 5) - print(columns.shape) # (1, 1, 8) - - key_padding_mask = paddle.to_tensor([[1, 1, 1, 0]], dtype="float32") - attention_mask = paddle.to_tensor([[1, 0, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 1], - [1, 1, 1, 1]], dtype="float32") - output_mask = paddle.nn.functional.sparse_attention(query, key, - value, offset, columns, - key_padding_mask=key_padding_mask, - attn_mask=attention_mask) - print(output_mask) - # Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, - # [[[[0. , 1. ], - # [1.99830270, 2.99830270], - # [0. , 1. ], - # [0. , 1. ]]]]) - - output = paddle.nn.functional.sparse_attention(query, key, - value, offset, columns) - print(output) - # Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, - # [[[[1.60885942, 2.60885954], - # [1.99830270, 2.99830270], - # [1.60885942, 2.60885954], - # [1.99830270, 2.99830270]]]]) + >>> # doctest: +SKIP('This API is only used in CUDA11.3 and above.') + >>> import paddle + + >>> paddle.disable_static() + + >>> # `query`, `key` and `value` all have shape [1, 1, 4, 2] + >>> query = paddle.to_tensor([[[[0, 1, ], [2, 3], + ... [0, 1], [2, 3]]]], dtype="float32") + >>> key = paddle.to_tensor([[[[0, 1], [2, 3], + ... [0, 1], [2, 3]]]], dtype="float32") + >>> value = paddle.to_tensor([[[[0, 1], [2, 3], + ... [0, 1], [2, 3]]]], dtype="float32") + ... + >>> offset = paddle.to_tensor([[[0, 2, 4, 6, 8]]], dtype="int32") + >>> columns = paddle.to_tensor([[[0, 1, 0, 1, 2, 3, 2, 3]]], dtype="int32") + ... + >>> print(offset.shape) + [1, 1, 5] + >>> print(columns.shape) + [1, 1, 8] + ... + >>> key_padding_mask = paddle.to_tensor([[1, 1, 1, 0]], dtype="float32") + >>> attention_mask = paddle.to_tensor([[1, 0, 1, 1], + ... [1, 1, 1, 1], + ... [1, 1, 1, 1], + ... [1, 1, 1, 1]], dtype="float32") + >>> output_mask = paddle.nn.functional.sparse_attention(query, key, + ... value, offset, columns, + ... key_padding_mask=key_padding_mask, + ... attn_mask=attention_mask) + >>> print(output_mask) + Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[0. , 1. ], + [1.99830270, 2.99830270], + [0. , 1. ], + [0. , 1. ]]]]) + + >>> output = paddle.nn.functional.sparse_attention(query, key, + ... value, offset, columns) + >>> print(output) + Tensor(shape=[1, 1, 4, 2], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[1.60885942, 2.60885954], + [1.99830270, 2.99830270], + [1.60885942, 2.60885954], + [1.99830270, 2.99830270]]]]) """ if in_dynamic_mode(): ( diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index 3a28e63c203..4bcb19ea95c 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -43,13 +43,15 @@ class CELU(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.to_tensor([[-1. ,6.], [1., 15.6]]) - m = paddle.nn.CELU(0.2) - out = m(x) - # [[-0.19865242, 6. ], - # [ 1. , 15.60000038]] + >>> import paddle + + >>> x = paddle.to_tensor([[-1. ,6.], [1., 15.6]]) + >>> m = paddle.nn.CELU(0.2) + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-0.19865242, 6. ], + [ 1. , 15.60000038]]) """ def __init__(self, alpha=1.0, name=None): @@ -91,13 +93,15 @@ class ELU(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([[-1. ,6.], [1., 15.6]]) - m = paddle.nn.ELU(0.2) - out = m(x) - # [[-0.12642411 6. ] - # [ 1. 15.6 ]] + >>> x = paddle.to_tensor([[-1. ,6.], [1., 15.6]]) + >>> m = paddle.nn.ELU(0.2) + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-0.12642412, 6. ], + [ 1. , 15.60000038]]) """ def __init__(self, alpha=1.0, name=None): @@ -141,15 +145,20 @@ class GELU(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.to_tensor([[-1, 0.5],[1, 1.5]]) - - m = paddle.nn.GELU() - out = m(x) # [-0.158655 0.345731 0.841345 1.39979] - - m = paddle.nn.GELU(True) - out = m(x) # [-0.158808 0.345714 0.841192 1.39957] + >>> import paddle + >>> x = paddle.to_tensor([[-1, 0.5],[1, 1.5]]) + >>> m = paddle.nn.GELU() + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-0.15865529, 0.34573123], + [ 0.84134471, 1.39978933]]) + >>> m = paddle.nn.GELU(True) + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-0.15880796, 0.34571400], + [ 0.84119201, 1.39957154]]) """ def __init__(self, approximate=False, name=None): @@ -193,11 +202,14 @@ class Hardshrink(Layer): .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-1, 0.3, 2.5]) - m = paddle.nn.Hardshrink() - out = m(x) # [-1., 0., 2.5] + >>> x = paddle.to_tensor([-1, 0.3, 2.5]) + >>> m = paddle.nn.Hardshrink() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [-1. , 0. , 2.50000000]) """ def __init__(self, threshold=0.5, name=None): @@ -244,11 +256,14 @@ class Hardswish(Layer): .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-4., 5., 1.]) - m = paddle.nn.Hardswish() - out = m(x) # [0., 5., 0.666667] + >>> x = paddle.to_tensor([-4., 5., 1.]) + >>> m = paddle.nn.Hardswish() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0. , 5. , 0.66666669]) """ def __init__(self, name=None): @@ -282,14 +297,14 @@ class Tanh(Layer): .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) - m = paddle.nn.Tanh() - out = m(x) - print(out) - # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-0.37994894, -0.19737533, 0.09966800, 0.29131261]) + >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) + >>> m = paddle.nn.Tanh() + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.37994900, -0.19737528, 0.09966799, 0.29131261]) """ def __init__(self, name=None): @@ -333,11 +348,14 @@ class Hardtanh(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-1.5, 0.3, 2.5]) - m = paddle.nn.Hardtanh() - out = m(x) # [-1., 0.3, 1.] + >>> x = paddle.to_tensor([-1.5, 0.3, 2.5]) + >>> m = paddle.nn.Hardtanh() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [-1. , 0.30000001, 1. ]) """ def __init__(self, min=-1.0, max=1.0, name=None): @@ -386,25 +404,25 @@ class PReLU(Layer): Examples: .. code-block:: python - import paddle - paddle.set_default_dtype("float64") - - data = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0], - [ 3.0, -4.0, 5.0, -6.0], - [-7.0, -8.0, 8.0, 9.0]], - [[ 1.0, -2.0, -3.0, 4.0], - [-5.0, 6.0, 7.0, -8.0], - [ 6.0, 7.0, 8.0, 9.0]]]]) - - m = paddle.nn.PReLU(1, 0.25) - out = m(data) - print(out) - # [[[[-0.5 , 3. , -1. , 5. ], - # [ 3. , -1. , 5. , -1.5 ], - # [-1.75, -2. , 8. , 9. ]], - # [[ 1. , -0.5 , -0.75, 4. ], - # [-1.25, 6. , 7. , -2. ], - # [ 6. , 7. , 8. , 9. ]]]] + >>> import paddle + + >>> data = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0], + ... [ 3.0, -4.0, 5.0, -6.0], + ... [-7.0, -8.0, 8.0, 9.0]], + ... [[ 1.0, -2.0, -3.0, 4.0], + ... [-5.0, 6.0, 7.0, -8.0], + ... [ 6.0, 7.0, 8.0, 9.0]]]]) + ... + >>> m = paddle.nn.PReLU(1, 0.25) + >>> out = m(data) + >>> print(out) + Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[[-0.50000000, 3. , -1. , 5. ], + [ 3. , -1. , 5. , -1.50000000], + [-1.75000000, -2. , 8. , 9. ]], + [[ 1. , -0.50000000, -0.75000000, 4. ], + [-1.25000000, 6. , 7. , -2. ], + [ 6. , 7. , 8. , 9. ]]]]) """ def __init__( @@ -495,24 +513,26 @@ class RReLU(Layer): Examples: .. code-block:: python - import paddle - - input_tensor = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0], - [ 3.0, -4.0, 5.0, -6.0], - [-7.0, -8.0, 8.0, 9.0]], - [[ 1.0, -2.0, -3.0, 4.0], - [-5.0, 6.0, 7.0, -8.0], - [ 6.0, 7.0, 8.0, 9.0]]]], dtype='float32') - - rrelu_layer = paddle.nn.RReLU(0.1, 0.3) - out = rrelu_layer(input_tensor) - print(out) - #[[[[-0.20000899 3. -0.88108218 5. ] - # [ 3. -0.55175185 5. -1.07761011] - # [-1.06806871 -1.98962009 8. 9. ]] - # [[ 1. -0.52382672 -0.65515128 4. ] - # [-1.37663394 6. 7. -2.34657836] - # [ 6. 7. 8. 9. ]]]] + >>> import paddle + >>> paddle.seed(2023) + + >>> input_tensor = paddle.to_tensor([[[[-2.0, 3.0, -4.0, 5.0], + ... [ 3.0, -4.0, 5.0, -6.0], + ... [-7.0, -8.0, 8.0, 9.0]], + ... [[ 1.0, -2.0, -3.0, 4.0], + ... [-5.0, 6.0, 7.0, -8.0], + ... [ 6.0, 7.0, 8.0, 9.0]]]], dtype='float32') + ... + >>> rrelu_layer = paddle.nn.RReLU(0.1, 0.3) + >>> out = rrelu_layer(input_tensor) + >>> print(out) + Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[-0.54633451, 3. , -0.81611776, 5. ], + [ 3. , -0.60768753, 5. , -1.68630385], + [-1.29360127, -1.45026064, 8. , 9. ]], + [[ 1. , -0.58808362, -0.74662417, 4. ], + [-1.01785135, 6. , 7. , -1.97268605], + [ 6. , 7. , 8. , 9. ]]]]) """ def __init__(self, lower=1.0 / 8.0, upper=1.0 / 3.0, name=None): @@ -554,13 +574,14 @@ class ReLU(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-2., 0., 1.]) - m = paddle.nn.ReLU() - out = m(x) - print(out) - # [0., 0., 1.] + >>> x = paddle.to_tensor([-2., 0., 1.]) + >>> m = paddle.nn.ReLU() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [0., 0., 1.]) """ def __init__(self, name=None): @@ -596,13 +617,14 @@ class ReLU6(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-1., 0.3, 6.5]) - m = paddle.nn.ReLU6() - out = m(x) - print(out) - # [0, 0.3, 6] + >>> x = paddle.to_tensor([-1., 0.3, 6.5]) + >>> m = paddle.nn.ReLU6() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [0. , 0.30000000, 6. ]) """ def __init__(self, name=None): @@ -644,13 +666,15 @@ class SELU(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]]) - m = paddle.nn.SELU() - out = m(x) - print(out) - # [[0, 1.050701],[2.101402, 3.152103]] + >>> x = paddle.to_tensor([[0.0, 1.0],[2.0, 3.0]]) + >>> m = paddle.nn.SELU() + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[0. , 1.05070102], + [2.10140204, 3.15210295]]) """ def __init__( @@ -703,11 +727,14 @@ class LeakyReLU(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - m = paddle.nn.LeakyReLU() - x = paddle.to_tensor([-2.0, 0, 1]) - out = m(x) # [-0.02, 0., 1.] + >>> m = paddle.nn.LeakyReLU() + >>> x = paddle.to_tensor([-2.0, 0, 1]) + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.02000000, 0. , 1. ]) """ def __init__(self, negative_slope=0.01, name=None): @@ -744,11 +771,14 @@ class Sigmoid(Layer): .. code-block:: python - import paddle + >>> import paddle - m = paddle.nn.Sigmoid() - x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) - out = m(x) # [0.7310586, 0.880797, 0.95257413, 0.98201376] + >>> m = paddle.nn.Sigmoid() + >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.73105860, 0.88079703, 0.95257413, 0.98201376]) """ def __init__(self, name=None): @@ -795,11 +825,14 @@ class Hardsigmoid(Layer): .. code-block:: python - import paddle + >>> import paddle - m = paddle.nn.Hardsigmoid() - x = paddle.to_tensor([-4., 5., 1.]) - out = m(x) # [0., 1, 0.666667] + >>> m = paddle.nn.Hardsigmoid() + >>> x = paddle.to_tensor([-4., 5., 1.]) + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [0. , 1. , 0.66666669]) """ def __init__(self, name=None): @@ -836,11 +869,14 @@ class Softplus(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32') - m = paddle.nn.Softplus() - out = m(x) # [0.513015, 0.598139, 0.744397, 0.854355] + >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3], dtype='float32') + >>> m = paddle.nn.Softplus() + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.51301527, 0.59813893, 0.74439669, 0.85435522]) """ def __init__(self, beta=1, threshold=20, name=None): @@ -887,14 +923,14 @@ class Softshrink(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8]) - m = paddle.nn.Softshrink() - out = m(x) - print(out) - # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-0.39999998, 0. , 0. , 0.30000001]) + >>> x = paddle.to_tensor([-0.9, -0.2, 0.1, 0.8]) + >>> m = paddle.nn.Softshrink() + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.39999998, 0. , 0. , 0.30000001]) """ def __init__(self, threshold=0.5, name=None): @@ -929,14 +965,14 @@ class Softsign(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) - m = paddle.nn.Softsign() - out = m(x) - print(out) - # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-0.28571430, -0.16666666, 0.09090909, 0.23076925]) + >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) + >>> m = paddle.nn.Softsign() + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.28571430, -0.16666666, 0.09090909, 0.23076925]) """ def __init__(self, name=None): @@ -970,14 +1006,14 @@ class Swish(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-2., 0., 1.]) - m = paddle.nn.Swish() - out = m(x) - print(out) - # Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-0.23840584, 0. , 0.73105854]) + >>> x = paddle.to_tensor([-2., 0., 1.]) + >>> m = paddle.nn.Swish() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.23840584, 0. , 0.73105860]) """ def __init__(self, name=None): @@ -1017,11 +1053,14 @@ class Mish(Layer): .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-5., 0., 5.]) - m = paddle.nn.Mish() - out = m(x) # [-0.03357624, 0., 4.99955208] + >>> x = paddle.to_tensor([-5., 0., 5.]) + >>> m = paddle.nn.Mish() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.03357624, 0. , 4.99955177]) """ @@ -1056,14 +1095,14 @@ class Tanhshrink(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) - m = paddle.nn.Tanhshrink() - out = m(x) - print(out) - # Tensor(shape=[4], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [-0.02005106, -0.00262468, 0.00033200, 0.00868741]) + >>> x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) + >>> m = paddle.nn.Tanhshrink() + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.02005100, -0.00262472, 0.00033201, 0.00868741]) """ def __init__(self, name=None): @@ -1105,14 +1144,14 @@ class ThresholdedReLU(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([2., 0., 1.]) - m = paddle.nn.ThresholdedReLU() - out = m(x) - print(out) - # Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [2., 0., 0.]) + >>> x = paddle.to_tensor([2., 0., 1.]) + >>> m = paddle.nn.ThresholdedReLU() + >>> out = m(x) + >>> print(out) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [2., 0., 0.]) """ def __init__(self, threshold=1.0, name=None): @@ -1148,11 +1187,14 @@ class Silu(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) - m = paddle.nn.Silu() - out = m(x) # [ 0.731059, 1.761594, 2.857722, 3.928055 ] + >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) + >>> m = paddle.nn.Silu() + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.73105860, 1.76159406, 2.85772228, 3.92805505]) """ def __init__(self, name=None): @@ -1187,11 +1229,14 @@ class LogSigmoid(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) - m = paddle.nn.LogSigmoid() - out = m(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] + >>> x = paddle.to_tensor([1.0, 2.0, 3.0, 4.0]) + >>> m = paddle.nn.LogSigmoid() + >>> out = m(x) + >>> print(out) + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=True, + [-0.31326166, -0.12692805, -0.04858733, -0.01814996]) """ def __init__(self, name=None): @@ -1299,22 +1344,25 @@ class Softmax(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0], - [3.0, 4.0, 5.0, 6.0], - [7.0, 8.0, 8.0, 9.0]], - [[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [6.0, 7.0, 8.0, 9.0]]], dtype='float32') - m = paddle.nn.Softmax() - out = m(x) - # [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.07232949, 0.19661193, 0.19661193, 0.53444665]], - # [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.0320586 , 0.08714432, 0.23688282, 0.64391426], - # [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] + >>> import paddle + + >>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0], + ... [3.0, 4.0, 5.0, 6.0], + ... [7.0, 8.0, 8.0, 9.0]], + ... [[1.0, 2.0, 3.0, 4.0], + ... [5.0, 6.0, 7.0, 8.0], + ... [6.0, 7.0, 8.0, 9.0]]], dtype='float32') + >>> m = paddle.nn.Softmax() + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[0.03205860, 0.08714432, 0.23688284, 0.64391428], + [0.03205860, 0.08714432, 0.23688284, 0.64391428], + [0.07232949, 0.19661194, 0.19661194, 0.53444666]], + [[0.03205860, 0.08714432, 0.23688284, 0.64391428], + [0.03205860, 0.08714432, 0.23688284, 0.64391428], + [0.03205860, 0.08714432, 0.23688284, 0.64391428]]]) + """ def __init__(self, axis=-1, name=None): @@ -1357,23 +1405,26 @@ class LogSoftmax(Layer): Examples: .. code-block:: python - import paddle - - x = [[[-2.0, 3.0, -4.0, 5.0], - [3.0, -4.0, 5.0, -6.0], - [-7.0, -8.0, 8.0, 9.0]], - [[1.0, -2.0, -3.0, 4.0], - [-5.0, 6.0, 7.0, -8.0], - [6.0, 7.0, 8.0, 9.0]]] - m = paddle.nn.LogSoftmax() - x = paddle.to_tensor(x) - out = m(x) - # [[[ -7.1278396 -2.1278396 -9.127839 -0.12783948] - # [ -2.1270514 -9.127051 -0.12705144 -11.127051 ] - # [-16.313261 -17.313261 -1.3132617 -0.31326184]] - # [[ -3.0518122 -6.051812 -7.051812 -0.051812 ] - # [-12.313267 -1.3132664 -0.3132665 -15.313267 ] - # [ -3.4401896 -2.4401896 -1.4401896 -0.44018966]]] + >>> import paddle + + >>> x = [[[-2.0, 3.0, -4.0, 5.0], + ... [ 3.0, -4.0, 5.0, -6.0], + ... [-7.0, -8.0, 8.0, 9.0]], + ... [[ 1.0, -2.0, -3.0, 4.0], + ... [-5.0, 6.0, 7.0, -8.0], + ... [ 6.0, 7.0, 8.0, 9.0]]] + >>> m = paddle.nn.LogSoftmax() + >>> x = paddle.to_tensor(x) + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[-7.12783957 , -2.12783957 , -9.12783909 , -0.12783945 ], + [-2.12705135 , -9.12705135 , -0.12705141 , -11.12705135], + [-16.31326103, -17.31326103, -1.31326187 , -0.31326184 ]], + [[-3.05181193 , -6.05181217 , -7.05181217 , -0.05181199 ], + [-12.31326675, -1.31326652 , -0.31326646 , -15.31326675], + [-3.44018984 , -2.44018984 , -1.44018972 , -0.44018975 ]]]) + """ def __init__(self, axis=-1, name=None): @@ -1426,20 +1477,17 @@ class Maxout(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.rand([1, 2, 3, 4]) - # [[[[0.5002636 0.22272532 0.17402348 0.2874594 ] - # [0.95313174 0.6228939 0.7129065 0.7087491 ] - # [0.02879342 0.88725346 0.61093384 0.38833922]] - # [[0.5231306 0.03807496 0.91661984 0.15602879] - # [0.666127 0.616567 0.30741522 0.24044901] - # [0.7142536 0.7351477 0.31588817 0.23782359]]]] - m = paddle.nn.Maxout(groups=2) - out = m(x) - # [[[[0.5231306 0.22272532 0.91661984 0.2874594 ] - # [0.95313174 0.6228939 0.7129065 0.7087491 ] - # [0.7142536 0.88725346 0.61093384 0.38833922]]]] + >>> import paddle + >>> paddle.seed(100) + + >>> x = paddle.rand([1, 2, 3, 4]) + >>> m = paddle.nn.Maxout(groups=2) + >>> out = m(x) + >>> print(out) + Tensor(shape=[1, 1, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0.85139430, 0.95717543, 0.43864486, 0.51577556], + [0.84765935, 0.45680618, 0.39412445, 0.72039396], + [0.59444654, 0.78120756, 0.78364515, 0.90572405]]]]) """ def __init__(self, groups, axis=1, name=None): @@ -1473,25 +1521,20 @@ class Softmax2D(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.rand([1, 2, 3, 4]) - # [[[[0.42496058 0.1172187 0.14664008 0.8151267 ] - # [0.24430142 0.42052492 0.60372984 0.79307914] - # [0.4539401 0.90458065 0.10235776 0.62009853]] - - # [[0.11731581 0.16053623 0.05667042 0.91876775] - # [0.9413854 0.30770817 0.6788164 0.9543593 ] - # [0.4145064 0.75909156 0.11598814 0.73599935]]]] - m = paddle.nn.Softmax2D() - out = m(x) - # [[[[0.5763103 0.48917228 0.5224772 0.4741129 ] - # [0.3324591 0.5281743 0.48123717 0.45976716] - # [0.5098571 0.5363083 0.49659243 0.4710572 ]] - - # [[0.42368975 0.51082766 0.47752273 0.5258871 ] - # [0.66754097 0.47182566 0.5187628 0.5402329 ] - # [0.49014282 0.46369177 0.50340754 0.5289428 ]]]] + >>> import paddle + >>> paddle.seed(100) + + >>> x = paddle.rand([1, 2, 3, 4]) + >>> m = paddle.nn.Softmax2D() + >>> out = m(x) + >>> print(out) + Tensor(shape=[1, 2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0.42608523, 0.32081410, 0.39483935, 0.55642301], + [0.38131708, 0.45118359, 0.44891062, 0.46053308], + [0.35746980, 0.60766530, 0.38638926, 0.70425135]], + [[0.57391477, 0.67918587, 0.60516071, 0.44357699], + [0.61868292, 0.54881644, 0.55108935, 0.53946698], + [0.64253020, 0.39233473, 0.61361068, 0.29574865]]]]) """ diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 64caff4c169..539a030ad21 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -50,18 +50,22 @@ class Identity(Layer): Examples: .. code-block:: python - import paddle - - input_tensor = paddle.randn(shape=[3, 2]) - layer = paddle.nn.Identity() - out = layer(input_tensor) - # input_tensor: [[-0.32342386 -1.200079 ] - # [ 0.7979031 -0.90978354] - # [ 0.40597573 1.8095392 ]] - # out: [[-0.32342386 -1.200079 ] - # [ 0.7979031 -0.90978354] - # [ 0.40597573 1.8095392 ]] - + >>> import paddle + >>> paddle.seed(100) + + >>> input_tensor = paddle.randn(shape=[3, 2]) + >>> layer = paddle.nn.Identity() + >>> out = layer(input_tensor) + >>> print(input_tensor) + Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-1.41661501, 0.25904641], + [ 0.00979547, -0.30324230], + [-1.34256756, -0.76540256]]) + >>> print(out) + Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-1.41661501, 0.25904641], + [ 0.00979547, -0.30324230], + [-1.34256756, -0.76540256]]) """ @@ -120,28 +124,35 @@ class Linear(Layer): Examples: .. code-block:: python - import paddle - - # Define the linear layer. - weight_attr = paddle.ParamAttr( - name="weight", - initializer=paddle.nn.initializer.Constant(value=0.5)) - bias_attr = paddle.ParamAttr( - name="bias", - initializer=paddle.nn.initializer.Constant(value=1.0)) - linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr) - # linear.weight: [[0.5 0.5 0.5 0.5] - # [0.5 0.5 0.5 0.5]] - # linear.bias: [1. 1. 1. 1.] - - x = paddle.randn((3, 2), dtype="float32") - # x: [[-0.32342386 -1.200079 ] - # [ 0.7979031 -0.90978354] - # [ 0.40597573 1.8095392 ]] - y = linear(x) - # y: [[0.23824859 0.23824859 0.23824859 0.23824859] - # [0.9440598 0.9440598 0.9440598 0.9440598 ] - # [2.1077576 2.1077576 2.1077576 2.1077576 ]] + >>> import paddle + >>> paddle.seed(100) + + >>> # Define the linear layer. + >>> weight_attr = paddle.ParamAttr( + ... name="weight", + ... initializer=paddle.nn.initializer.Constant(value=0.5)) + >>> bias_attr = paddle.ParamAttr( + ... name="bias", + ... initializer=paddle.nn.initializer.Constant(value=1.0)) + >>> linear = paddle.nn.Linear(2, 4, weight_attr=weight_attr, bias_attr=bias_attr) + >>> print(linear.weight) + Parameter containing: + Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=False, + [[0.50000000, 0.50000000, 0.50000000, 0.50000000], + [0.50000000, 0.50000000, 0.50000000, 0.50000000]]) + + >>> print(linear.bias) + Parameter containing: + Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=False, + [1., 1., 1., 1.]) + + >>> x = paddle.randn((3, 2), dtype="float32") + >>> y = linear(x) + >>> print(y) + Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=False, + [[ 0.42121571, 0.42121571, 0.42121571, 0.42121571], + [ 0.85327661, 0.85327661, 0.85327661, 0.85327661], + [-0.05398512, -0.05398512, -0.05398512, -0.05398512]]) """ def __init__( @@ -237,19 +248,22 @@ class LinearCompress(Layer): Examples: .. code-block:: python - import paddle - - # Define the linear layer. - paddle.set_default_dtype('float16') - weight_attr = paddle.ParamAttr( - name="weight", - initializer=paddle.nn.initializer.Constant(value=0.5)) - bias_attr = paddle.ParamAttr( - name="bias", - initializer=paddle.nn.initializer.Constant(value=1.0)) - linear = paddle.nn.LinearCompress(128, 64, weight_attr=weight_attr, bias_attr=bias_attr, bits=8, algo='weight_only') - x = paddle.randn((3, 128), dtype="float16") - y = linear(x) + >>> import paddle + >>> paddle.seed(100) + + >>> # Define the linear layer. + >>> paddle.set_default_dtype('float16') + >>> weight_attr = paddle.ParamAttr( + ... name="weight", + ... initializer=paddle.nn.initializer.Constant(value=0.5)) + + >>> bias_attr = paddle.ParamAttr( + ... name="bias", + ... initializer=paddle.nn.initializer.Constant(value=1.0)) + + >>> linear = paddle.nn.LinearCompress(128, 64, weight_attr=weight_attr, bias_attr=bias_attr, bits=8, algo='weight_only') + >>> x = paddle.randn((3, 128), dtype="float16") + >>> y = linear(x) """ def __init__( @@ -527,14 +541,14 @@ class Upsample(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - input = paddle.rand([2,3,6,10], dtype="float32") - upsample_out = paddle.nn.Upsample(size=[12,12]) + >>> input = paddle.rand([2, 3, 6, 10], dtype="float32") + >>> upsample_out = paddle.nn.Upsample(size=[12, 12]) - output = upsample_out(x=input) - print(output.shape) - # [2, 3, 12, 12] + >>> output = upsample_out(x=input) + >>> print(output.shape) + [2, 3, 12, 12] """ @@ -627,15 +641,15 @@ class UpsamplingNearest2D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - input_data = paddle.rand(shape=(2,3,6,10)).astype("float32") - upsample_out = paddle.nn.UpsamplingNearest2D(size=[12,12]) - input = paddle.to_tensor(input_data) - output = upsample_out(x=input) - print(output.shape) - # [2L, 3L, 12L, 12L] + >>> input_data = paddle.rand(shape=(2, 3, 6, 10)).astype("float32") + >>> upsample_out = paddle.nn.UpsamplingNearest2D(size=[12, 12]) + >>> input = paddle.to_tensor(input_data) + >>> output = upsample_out(x=input) + >>> print(output.shape) + [2, 3, 12, 12] """ def __init__( @@ -713,15 +727,15 @@ class UpsamplingBilinear2D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - input_data = paddle.rand(shape=(2,3,6,10)).astype("float32") - upsample_out = paddle.nn.UpsamplingBilinear2D(size=[12,12]) - input = paddle.to_tensor(input_data) - output = upsample_out(x=input) - print(output.shape) - # [2L, 3L, 12L, 12L] + >>> input_data = paddle.rand(shape=(2, 3, 6, 10)).astype("float32") + >>> upsample_out = paddle.nn.UpsamplingBilinear2D(size=[12, 12]) + >>> input = paddle.to_tensor(input_data) + >>> output = upsample_out(x=input) + >>> print(output.shape) + [2, 3, 12, 12] """ def __init__( @@ -798,15 +812,19 @@ class Bilinear(Layer): Tensor: A 2-D Tensor of shape [batch_size, out_features]. Examples: - .. code-block:: python + .. code-block:: python + + >>> import paddle - import paddle + >>> layer1 = paddle.rand((5, 5)).astype('float32') + >>> layer2 = paddle.rand((5, 4)).astype('float32') + >>> bilinear = paddle.nn.Bilinear(in1_features=5, + ... in2_features=4, + ... out_features=1000) - layer1 = paddle.rand((5, 5)).astype('float32') - layer2 = paddle.rand((5, 4)).astype('float32') - bilinear = paddle.nn.Bilinear( - in1_features=5, in2_features=4, out_features=1000) - result = bilinear(layer1,layer2) # result shape [5, 1000] + >>> result = bilinear(layer1,layer2) + >>> print(result.shape) + [5, 1000] """ @@ -897,23 +915,24 @@ class Dropout(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle + >>> paddle.seed(2023) - x = paddle.to_tensor([[1,2,3], [4,5,6]], dtype="float32") - m = paddle.nn.Dropout(p=0.5) + >>> x = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype="float32") + >>> m = paddle.nn.Dropout(p=0.5) - y_train = m(x) - print(y_train) - # Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[2., 0., 6.], - # [0., 0., 0.]]) + >>> y_train = m(x) + >>> print(y_train) + Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[2., 4., 0.], + [8., 0., 0.]]) - m.eval() # switch the model to test phase - y_test = m(x) - print(y_test) - # Tensor(shape=[2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[1., 2., 3.], - # [4., 5., 6.]]) + >>> m.eval() # switch the model to test phase + >>> y_test = m(x) + >>> print(y_test) + Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[1., 2., 3.], + [4., 5., 6.]]) """ def __init__(self, p=0.5, axis=None, mode="upscale_in_train", name=None): @@ -967,36 +986,33 @@ class Dropout2D(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.rand([2, 2, 1, 3], dtype="float32") - print(x) - # Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[[[0.10052059, 0.93890846, 0.45351565]], - # [[0.47507706, 0.45021373, 0.11331241]]], - - # [[[0.53358698, 0.97375143, 0.34997326]], - # [[0.24758087, 0.52628899, 0.17970420]]]]) - - m = paddle.nn.Dropout2D(p=0.5) - y_train = m(x) - print(y_train) - # Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[[[0. , 0. , 0. ]], - # [[0.95015413, 0.90042746, 0.22662482]]], - - # [[[1.06717396, 1.94750285, 0.69994652]], - # [[0. , 0. , 0. ]]]]) - - m.eval() # switch the model to test phase - y_test = m(x) - print(y_test) - # Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[[[0.10052059, 0.93890846, 0.45351565]], - # [[0.47507706, 0.45021373, 0.11331241]]], - - # [[[0.53358698, 0.97375143, 0.34997326]], - # [[0.24758087, 0.52628899, 0.17970420]]]]) + >>> import paddle + >>> paddle.seed(100) + >>> x = paddle.rand([2, 2, 1, 3], dtype="float32") + >>> print(x) + Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0.55355281, 0.20714243, 0.01162981]], + [[0.51577556, 0.36369765, 0.26091650]]], + [[[0.18905126, 0.56219709, 0.00808361]], + [[0.78120756, 0.32112977, 0.90572405]]]]) + + >>> m = paddle.nn.Dropout2D(p=0.5) + >>> y_train = m(x) + >>> print(y_train) + Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[1.10710561, 0.41428486, 0.02325963]], + [[1.03155112, 0.72739530, 0.52183300]]], + [[[0. , 0. , 0. ]], + [[0. , 0. , 0. ]]]]) + + >>> m.eval() # switch the model to test phase + >>> y_test = m(x) + >>> print(y_test) + Tensor(shape=[2, 2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0.55355281, 0.20714243, 0.01162981]], + [[0.51577556, 0.36369765, 0.26091650]]], + [[[0.18905126, 0.56219709, 0.00808361]], + [[0.78120756, 0.32112977, 0.90572405]]]]) """ def __init__(self, p=0.5, data_format='NCHW', name=None): @@ -1048,48 +1064,35 @@ class Dropout3D(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.arange(24, dtype="float32").reshape((1, 2, 2, 2, 3)) - print(x) - # Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[[[[0. , 1. , 2. ], - # [3. , 4. , 5. ]], - # [[6. , 7. , 8. ], - # [9. , 10., 11.]]], - - # [[[12., 13., 14.], - # [15., 16., 17.]], - # [[18., 19., 20.], - # [21., 22., 23.]]]]]) - - m = paddle.nn.Dropout3D(p=0.5) - y_train = m(x) - print(y_train) - # Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[[[[0. , 2. , 4. ], - # [6. , 8. , 10.]], - # [[12., 14., 16.], - # [18., 20., 22.]]], - - # [[[0. , 0. , 0. ], - # [0. , 0. , 0. ]], - # [[0. , 0. , 0. ], - # [0. , 0. , 0. ]]]]]) - - m.eval() # switch the model to test phase - y_test = m(x) - print(y_test) - # Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[[[[0. , 1. , 2. ], - # [3. , 4. , 5. ]], - # [[6. , 7. , 8. ], - # [9. , 10., 11.]]], - - # [[[12., 13., 14.], - # [15., 16., 17.]], - # [[18., 19., 20.], - # [21., 22., 23.]]]]]) + >>> import paddle + + >>> x = paddle.arange(24, dtype="float32").reshape((1, 2, 2, 2, 3)) + >>> print(x) + Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[[0. , 1. , 2. ], + [3. , 4. , 5. ]], + [[6. , 7. , 8. ], + [9. , 10., 11.]]], + [[[12., 13., 14.], + [15., 16., 17.]], + [[18., 19., 20.], + [21., 22., 23.]]]]]) + + >>> m = paddle.nn.Dropout3D(p=0.5) + >>> y_train = m(x) + + >>> m.eval() # switch the model to test phase + >>> y_test = m(x) + >>> print(y_test) + Tensor(shape=[1, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[[0. , 1. , 2. ], + [3. , 4. , 5. ]], + [[6. , 7. , 8. ], + [9. , 10., 11.]]], + [[[12., 13., 14.], + [15., 16., 17.]], + [[18., 19., 20.], + [21., 22., 23.]]]]]) """ def __init__(self, p=0.5, data_format='NCDHW', name=None): @@ -1139,22 +1142,23 @@ class AlphaDropout(Layer): Examples: .. code-block:: python - import paddle - - x = paddle.to_tensor([[-1, 1], [-1, 1]], dtype="float32") - m = paddle.nn.AlphaDropout(p=0.5) - y_train = m(x) - print(y_train) - # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[-0.77919382, 1.66559887], - # [-0.77919382, -0.77919382]]) - - m.eval() # switch the model to test phase - y_test = m(x) - print(y_test) - # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[-1., 1.], - # [-1., 1.]]) + >>> import paddle + >>> paddle.seed(2023) + + >>> x = paddle.to_tensor([[-1, 1], [-1, 1]], dtype="float32") + >>> m = paddle.nn.AlphaDropout(p=0.5) + >>> y_train = m(x) + >>> print(y_train) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-0.10721093, 1.66559887], + [-0.77919382, 1.66559887]]) + + >>> m.eval() # switch the model to test phase + >>> y_test = m(x) + >>> print(y_test) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-1., 1.], + [-1., 1.]]) """ def __init__(self, p=0.5, name=None): @@ -1201,18 +1205,19 @@ class Pad1D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - input_shape = (1, 2, 3) - pad = [1, 2] - mode = "constant" - data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 - my_pad = nn.Pad1D(padding=pad, mode=mode) - result = my_pad(data) - print(result) - # [[[0. 1. 2. 3. 0. 0.] - # [0. 4. 5. 6. 0. 0.]]] + >>> import paddle + >>> import paddle.nn as nn + + >>> input_shape = (1, 2, 3) + >>> pad = [1, 2] + >>> mode = "constant" + >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 + >>> my_pad = nn.Pad1D(padding=pad, mode=mode) + >>> result = my_pad(data) + >>> print(result) + Tensor(shape=[1, 2, 6], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[0., 1., 2., 3., 0., 0.], + [0., 4., 5., 6., 0., 0.]]]) """ def __init__( @@ -1271,21 +1276,22 @@ class Pad2D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - input_shape = (1, 1, 2, 3) - pad = [1, 0, 1, 2] - mode = "constant" - data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 - my_pad = nn.Pad2D(padding=pad, mode=mode) - result = my_pad(data) - print(result) - # [[[[0. 0. 0. 0.] - # [0. 1. 2. 3.] - # [0. 4. 5. 6.] - # [0. 0. 0. 0.] - # [0. 0. 0. 0.]]]] + >>> import paddle + >>> import paddle.nn as nn + + >>> input_shape = (1, 1, 2, 3) + >>> pad = [1, 0, 1, 2] + >>> mode = "constant" + >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 + >>> my_pad = nn.Pad2D(padding=pad, mode=mode) + >>> result = my_pad(data) + >>> print(result) + Tensor(shape=[1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0., 0., 0., 0.], + [0., 1., 2., 3.], + [0., 4., 5., 6.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]]]]) """ def __init__( @@ -1336,26 +1342,24 @@ class ZeroPad2D(Layer): The data type is same as input x. Examples: - Examples are as follows. .. code-block:: python - import paddle - import paddle.nn as nn - - input_shape = paddle.to_tensor([1, 1, 2, 3]) - pad = [1, 0, 1, 2] - data = paddle.arange(paddle.prod(input_shape), dtype="float32").reshape(input_shape) + 1 - - my_pad = nn.ZeroPad2D(padding=pad) - result = my_pad(data) - - print(result) - # [[[[0. 0. 0. 0.] - # [0. 1. 2. 3.] - # [0. 4. 5. 6.] - # [0. 0. 0. 0.] - # [0. 0. 0. 0.]]]] + >>> import paddle + >>> import paddle.nn as nn + + >>> input_shape = paddle.to_tensor([1, 1, 2, 3]) + >>> pad = [1, 0, 1, 2] + >>> data = paddle.arange(paddle.prod(input_shape), dtype="float32").reshape(input_shape) + 1 + >>> my_pad = nn.ZeroPad2D(padding=pad) + >>> result = my_pad(data) + >>> print(result) + Tensor(shape=[1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[0., 0., 0., 0.], + [0., 1., 2., 3.], + [0., 4., 5., 6.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]]]]) """ def __init__(self, padding, data_format="NCHW", name=None): @@ -1412,21 +1416,22 @@ class Pad3D(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - input_shape = (1, 1, 1, 2, 3) - pad = [1, 0, 1, 2, 0, 0] - mode = "constant" - data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 - my_pad = nn.Pad3D(padding=pad, mode=mode) - result = my_pad(data) - print(result) - # [[[[[0. 0. 0. 0.] - # [0. 1. 2. 3.] - # [0. 4. 5. 6.] - # [0. 0. 0. 0.] - # [0. 0. 0. 0.]]]]] + >>> import paddle + >>> import paddle.nn as nn + + >>> input_shape = (1, 1, 1, 2, 3) + >>> pad = [1, 0, 1, 2, 0, 0] + >>> mode = "constant" + >>> data = paddle.arange(paddle.prod(paddle.to_tensor(input_shape)), dtype="float32").reshape(input_shape) + 1 + >>> my_pad = nn.Pad3D(padding=pad, mode=mode) + >>> result = my_pad(data) + >>> print(result) + Tensor(shape=[1, 1, 1, 5, 4], dtype=float32, place=Place(cpu), stop_gradient=True, + [[[[[0., 0., 0., 0.], + [0., 1., 2., 3.], + [0., 4., 5., 6.], + [0., 0., 0., 0.], + [0., 0., 0., 0.]]]]]) """ def __init__( @@ -1476,13 +1481,13 @@ class CosineSimilarity(Layer): Case 0: x1 = [[0.8024077 0.9927354 0.27238318 0.8344984 ] - [0.48949873 0.5797396 0.65444374 0.66510963] - [0.1031398 0.9614342 0.08365563 0.6796464 ] - [0.10760343 0.7461209 0.7726148 0.5801006 ]] + [0.48949873 0.5797396 0.65444374 0.66510963] + [0.1031398 0.9614342 0.08365563 0.6796464 ] + [0.10760343 0.7461209 0.7726148 0.5801006 ]] x2 = [[0.62913156 0.1536727 0.9847992 0.04591406] - [0.9098952 0.15715368 0.8671125 0.3156102 ] - [0.4427798 0.54136837 0.5276275 0.32394758] - [0.3769419 0.8535014 0.48041078 0.9256797 ]] + [0.9098952 0.15715368 0.8671125 0.3156102 ] + [0.4427798 0.54136837 0.5276275 0.32394758] + [0.3769419 0.8535014 0.48041078 0.9256797 ]] axis = 1 eps = 1e-8 Out: [0.5275037 0.8368967 0.75037485 0.9245899] @@ -1490,19 +1495,19 @@ class CosineSimilarity(Layer): Code Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - x1 = paddle.to_tensor([[1., 2., 3.], - [2., 3., 4.]], dtype="float32") - x2 = paddle.to_tensor([[8., 3., 3.], - [2., 3., 4.]], dtype="float32") + >>> x1 = paddle.to_tensor([[1., 2., 3.], + ... [2., 3., 4.]], dtype="float32") + >>> x2 = paddle.to_tensor([[8., 3., 3.], + ... [2., 3., 4.]], dtype="float32") - cos_sim_func = nn.CosineSimilarity(axis=0) - result = cos_sim_func(x1, x2) - print(result) - # Tensor(shape=[3], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [0.65079135, 0.98058069, 1. ]) + >>> cos_sim_func = nn.CosineSimilarity(axis=0) + >>> result = cos_sim_func(x1, x2) + >>> print(result) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.65079135, 0.98058069, 1. ]) """ def __init__(self, axis=1, eps=1e-8): @@ -1544,19 +1549,16 @@ class Embedding(Layer): output is a Tensor: out.shape = [3, 2, 16] out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], - [0.345421456, 0.524563927, ..., 0.144534654]], - + [0.345421456, 0.524563927, ..., 0.144534654]], [[0.345249859, 0.124939536, ..., 0.194353745], - [0.945345345, 0.435394634, ..., 0.435345365]], - + [0.945345345, 0.435394634, ..., 0.435345365]], [[0.945345345, 0.435394634, ..., 0.435345365], - [0.0, 0.0, ..., 0.0 ]]] # padding data + [0.0, 0.0, ..., 0.0 ]]] # padding data The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 It will pad all-zero data when ids is 127. Parameters: - num_embeddings (int): Just one element which indicate the size - of the dictionary of embeddings. + num_embeddings (int): Just one element which indicate the size of the dictionary of embeddings. embedding_dim (int): Just one element which indicate the size of each embedding vector respectively. padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-num_embeddings, num_embeddings). If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted @@ -1574,9 +1576,8 @@ class Embedding(Layer): The local word vector needs to be transformed into numpy format, and the shape of local word vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer` is used to load custom or pre-trained word vectors. See code example for details. - name(str|None, optional): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Attribute: **weight** (Parameter): the learnable weights of this layer. @@ -1588,36 +1589,36 @@ class Embedding(Layer): .. code-block:: python - import paddle - - x = paddle.to_tensor([[0], [1], [3]], dtype="int64", stop_gradient=False) - embedding = paddle.nn.Embedding(4, 3, sparse=True) - - w0 = paddle.to_tensor([[0., 0., 0.], - [1., 1., 1.], - [2., 2., 2.], - [3., 3., 3.]], dtype="float32") - embedding.weight.set_value(w0) - print(embedding.weight) - # Tensor(shape=[4, 3], dtype=float32, place=Place(gpu:0), stop_gradient=False, - # [[0., 0., 0.], - # [1., 1., 1.], - # [2., 2., 2.], - # [3., 3., 3.]]) - - adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01) - adam.clear_grad() - - - out = embedding(x) - print(out) - # Tensor(shape=[3, 1, 3], dtype=float32, place=Place(gpu:0), stop_gradient=False, - # [[[0., 0., 0.]], - # [[1., 1., 1.]], - # [[3., 3., 3.]]]) - - out.backward() - adam.step() + >>> import paddle + + >>> x = paddle.to_tensor([[0], [1], [3]], dtype="int64", stop_gradient=False) + >>> embedding = paddle.nn.Embedding(4, 3, sparse=True) + + >>> w0 = paddle.to_tensor([[0., 0., 0.], + ... [1., 1., 1.], + ... [2., 2., 2.], + ... [3., 3., 3.]], dtype="float32") + >>> embedding.weight.set_value(w0) + >>> print(embedding.weight) + Parameter containing: + Tensor(shape=[4, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[0., 0., 0.], + [1., 1., 1.], + [2., 2., 2.], + [3., 3., 3.]]) + + >>> adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01) + >>> adam.clear_grad() + + >>> out = embedding(x) + >>> print(out) + Tensor(shape=[3, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[[0., 0., 0.]], + [[1., 1., 1.]], + [[3., 3., 3.]]]) + + >>> out.backward() + >>> adam.step() """ @@ -1708,36 +1709,35 @@ class Unfold(Layer): Parameters: - kernel_sizes(int|list): The size of convolution kernel, should be [k_h, k_w] - or an integer k treated as [k, k]. - strides(int|list): The strides, should be [stride_h, stride_w] - or an integer stride treated as [sride, stride]. - For default, strides will be [1, 1]. - paddings(int|list): The paddings of each dimension, should be - [padding_top, padding_left, padding_bottom, padding_right] - or [padding_h, padding_w] or an integer padding. - If [padding_h, padding_w] was given, it will expanded to - [padding_h, padding_w, padding_h, padding_w]. If an integer - padding was given, [padding, padding, padding, padding] will - be used. For default, paddings will be [0, 0, 0, 0] - dilations(int|list): the dilations of convolution kernel, should be - [dilation_h, dilation_w], or an integer dilation treated as - [dilation, dilation]. For default, it will be [1, 1]. - name(str, optional): The default value is None. - Normally there is no need for user to set this property. - For more information, please refer to :ref:`api_guide_Name` + kernel_sizes(int|list): The size of convolution kernel, should be [k_h, k_w] + or an integer k treated as [k, k]. + strides(int|list, optional): The strides, should be [stride_h, stride_w] + or an integer stride treated as [sride, stride]. For default, strides will be [1, 1]. + paddings(int|list, optional): The paddings of each dimension, should be + [padding_top, padding_left, padding_bottom, padding_right] or [padding_h, padding_w] + or an integer padding. If [padding_h, padding_w] was given, it will expanded to + [padding_h, padding_w, padding_h, padding_w]. If an integer padding was given, + [padding, padding, padding, padding] will be used. For default, + paddings will be [0, 0, 0, 0]. + dilations(int|list, optional): The dilations of convolution kernel, should be + [dilation_h, dilation_w], or an integer dilation treated as [dilation, dilation]. + For default, it will be [1, 1]. + name(str, optional): The default value is None. Normally there is no need for user to + set this property. For more information, please refer to :ref:`api_guide_Name` Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn + + >>> x = paddle.randn((100, 3, 224, 224)) + >>> unfold = nn.Unfold(kernel_sizes=[3, 3]) + >>> result = unfold(x) + >>> print(result.shape) + [100, 27, 49284] - x = paddle.randn((100,3,224,224)) - unfold = nn.Unfold(kernel_sizes=[3, 3]) - result = unfold(x) - print(result) """ def __init__( @@ -1790,21 +1790,21 @@ class Fold(Layer): C_{out} &= \frac{C_{in}}{kernel\_sizes[0]\times kernel\_sizes[1]} \\ Parameters: - output_sizes(list): The size of output size, should be [output_size_h, output_size_w] + output_sizes(list): The size of output size, should be [output_size_h, output_size_w] or an interger o treated as [o, o]. kernel_sizes(int|list|tuple): The size of convolution kernel, should be [k_h, k_w] or an integer k treated as [k, k]. - strides(int|list|tuple, optional): The strides, should be [stride_h, stride_w] + strides(int|list|tuple, optional): The strides, should be [stride_h, stride_w] or an integer stride treated as [sride, stride]. For default, strides will be [1, 1]. - paddings(int|list|tuple, optional): The paddings of each dimension, should be + paddings(int|list|tuple, optional): The paddings of each dimension, should be [padding_top, padding_left, padding_bottom, padding_right] or [padding_h, padding_w] or an integer padding. If [padding_h, padding_w] was given, it will expanded to [padding_h, padding_w, padding_h, padding_w]. If an integer padding was given, [padding, padding, padding, padding] will be used. For default, paddings will be [0, 0, 0, 0] - dilations(int|list|tuple, optional): the dilations of convolution kernel, should be + dilations(int|list|tuple, optional): The dilations of convolution kernel, should be [dilation_h, dilation_w], or an integer dilation treated as [dilation, dilation]. For default, it will be [1, 1]. name(str, optional): The default value is None. @@ -1820,13 +1820,14 @@ class Fold(Layer): .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - x = paddle.randn([2,3*2*2,12]) - fold = nn.Fold(output_sizes=[4, 5], kernel_sizes=2) - y = fold(x) - # y.shape = [2,3,4,5] + >>> x = paddle.randn([2, 3*2*2, 12]) + >>> fold = nn.Fold(output_sizes=[4, 5], kernel_sizes=2) + >>> y = fold(x) + >>> print(y.shape) + [2, 3, 4, 5] """ def __init__( @@ -1886,12 +1887,13 @@ class Flatten(Layer): .. code-block:: python - import paddle + >>> import paddle - inp = paddle.ones([5, 2, 3, 4]).astype('float32') - flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) - y = flatten(inp) - # y.shape = [5, 6, 4] + >>> inp = paddle.ones([5, 2, 3, 4]).astype('float32') + >>> flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) + >>> y = flatten(inp) + >>> print(y.shape) + [5, 6, 4] """ @@ -1928,15 +1930,15 @@ class Unflatten(Layer): .. code-block:: python - import paddle + >>> import paddle - x = paddle.randn(shape=[4, 6, 8]) - shape = [2, 3] - axis = 1 - unflatten = paddle.nn.Unflatten(axis, shape) - res = unflatten(x) - print(res.shape) - # [4, 2, 3, 8] + >>> x = paddle.randn(shape=[4, 6, 8]) + >>> shape = [2, 3] + >>> axis = 1 + >>> unflatten = paddle.nn.Unflatten(axis, shape) + >>> res = unflatten(x) + >>> print(res.shape) + [4, 2, 3, 8] """ diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py index abd15b00424..2e3bd180129 100644 --- a/python/paddle/nn/layer/layers.py +++ b/python/paddle/nn/layer/layers.py @@ -357,22 +357,38 @@ class Layer: Examples: .. code-block:: python - import paddle - class MyLayer(paddle.nn.Layer): - def __init__(self): - super().__init__() - self._linear = paddle.nn.Linear(1, 1) - self._dropout = paddle.nn.Dropout(p=0.5) - def forward(self, input): - temp = self._linear(input) - temp = self._dropout(temp) - return temp - x = paddle.randn([10, 1], 'float32') - mylayer = MyLayer() - mylayer.eval() # set mylayer._dropout to eval mode - out = mylayer(x) - mylayer.train() # set mylayer._dropout to train mode - out = mylayer(x) + >>> import paddle + >>> paddle.seed(100) + + >>> class MyLayer(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear = paddle.nn.Linear(1, 1) + ... self._dropout = paddle.nn.Dropout(p=0.5) + ... + ... def forward(self, input): + ... temp = self._linear(input) + ... temp = self._dropout(temp) + ... return temp + ... + >>> x = paddle.randn([10, 1], 'float32') + >>> mylayer = MyLayer() + >>> mylayer.eval() # set mylayer._dropout to eval mode + >>> out = mylayer(x) + >>> mylayer.train() # set mylayer._dropout to train mode + >>> out = mylayer(x) + >>> print(out) + Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[-3.44879317], + [ 0. ], + [ 0. ], + [-0.73825276], + [ 0. ], + [ 0. ], + [ 0.64444798], + [-3.22185946], + [ 0. ], + [-0.68077987]]) """ def __init__(self, name_scope=None, dtype="float32"): @@ -419,25 +435,38 @@ class Layer: Examples: .. code-block:: python - import paddle - - class MyLayer(paddle.nn.Layer): - def __init__(self): - super().__init__() - self._linear = paddle.nn.Linear(1, 1) - self._dropout = paddle.nn.Dropout(p=0.5) - - def forward(self, input): - temp = self._linear(input) - temp = self._dropout(temp) - return temp - - x = paddle.randn([10, 1], 'float32') - mylayer = MyLayer() - mylayer.eval() # set mylayer._dropout to eval mode - out = mylayer(x) - mylayer.train() # set mylayer._dropout to train mode - out = mylayer(x) + >>> import paddle + >>> paddle.seed(100) + + >>> class MyLayer(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear = paddle.nn.Linear(1, 1) + ... self._dropout = paddle.nn.Dropout(p=0.5) + ... + ... def forward(self, input): + ... temp = self._linear(input) + ... temp = self._dropout(temp) + ... return temp + ... + >>> x = paddle.randn([10, 1], 'float32') + >>> mylayer = MyLayer() + >>> mylayer.eval() # set mylayer._dropout to eval mode + >>> out = mylayer(x) + >>> mylayer.train() # set mylayer._dropout to train mode + >>> out = mylayer(x) + >>> print(out) + Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[-3.44879317], + [ 0. ], + [ 0. ], + [-0.73825276], + [ 0. ], + [ 0. ], + [ 0.64444798], + [-3.22185946], + [ 0. ], + [-0.68077987]]) """ # global setting in dygraph @@ -461,24 +490,35 @@ class Layer: Example:: .. code-block:: python - import paddle - - class MyLayer(paddle.nn.Layer): - def __init__(self): - super().__init__() - self._linear = paddle.nn.Linear(1, 1) - self._dropout = paddle.nn.Dropout(p=0.5) - - def forward(self, input): - temp = self._linear(input) - temp = self._dropout(temp) - return temp - - x = paddle.randn([10, 1], 'float32') - mylayer = MyLayer() - mylayer.eval() # set mylayer._dropout to eval mode - out = mylayer(x) - print(out) + >>> import paddle + >>> paddle.seed(100) + >>> class MyLayer(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear = paddle.nn.Linear(1, 1) + ... self._dropout = paddle.nn.Dropout(p=0.5) + ... + ... def forward(self, input): + ... temp = self._linear(input) + ... temp = self._dropout(temp) + ... return temp + ... + >>> x = paddle.randn([10, 1], 'float32') + >>> mylayer = MyLayer() + >>> mylayer.eval() # set mylayer._dropout to eval mode + >>> out = mylayer(x) + >>> print(out) + Tensor(shape=[10, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[-1.72439659], + [ 0.31532824], + [ 0.01192369], + [-0.36912638], + [-1.63426113], + [-0.93169814], + [ 0.32222399], + [-1.61092973], + [ 0.77209264], + [-0.34038994]]) """ # global setting in dygraph @@ -506,22 +546,41 @@ class Layer: Example:: .. code-block:: python - import paddle - import paddle.nn as nn - - net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2)) - - def init_weights(layer): - if type(layer) == nn.Linear: - print('before init weight:', layer.weight.numpy()) - new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9) - layer.weight.set_value(new_weight) - print('after init weight:', layer.weight.numpy()) - - net.apply(init_weights) - - print(net.state_dict()) - + >>> import paddle + >>> import paddle.nn as nn + >>> paddle.seed(2023) + + >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2)) + + >>> def init_weights(layer): + ... if type(layer) == nn.Linear: + ... print('before init weight:', layer.weight.numpy()) + ... new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9) + ... layer.weight.set_value(new_weight) + ... print('after init weight:', layer.weight.numpy()) + ... + >>> net.apply(init_weights) + + >>> print(net.state_dict()) + before init weight: [[ 0.89611185 0.04935038] + [-0.5888344 0.99266374]] + after init weight: [[0.9 0.9] + [0.9 0.9]] + before init weight: [[-0.18615901 -0.22924072] + [ 1.1517721 0.59859073]] + after init weight: [[0.9 0.9] + [0.9 0.9]] + OrderedDict([('0.weight', Parameter containing: + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False, + [[0.89999998, 0.89999998], + [0.89999998, 0.89999998]])), ('0.bias', Parameter containing: + Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False, + [0., 0.])), ('1.weight', Parameter containing: + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False, + [[0.89999998, 0.89999998], + [0.89999998, 0.89999998]])), ('1.bias', Parameter containing: + Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False, + [0., 0.]))]) """ for layer in self.children(): layer.apply(fn) @@ -541,18 +600,19 @@ class Layer: Example:: .. code-block:: python - import paddle - - class LinearNet(paddle.nn.Layer): - def __init__(self): - super().__init__(name_scope = "demo_linear_net") - self._linear = paddle.nn.Linear(1, 1) + >>> import paddle - def forward(self, x): - return self._linear(x) - - linear_net = LinearNet() - print(linear_net.full_name()) # demo_linear_net_0 + >>> class LinearNet(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__(name_scope = "demo_linear_net") + ... self._linear = paddle.nn.Linear(1, 1) + ... + ... def forward(self, x): + ... return self._linear(x) + ... + >>> linear_net = LinearNet() + >>> print(linear_net.full_name()) + demo_linear_net_0 """ return self._full_name @@ -576,33 +636,33 @@ class Layer: Examples: .. code-block:: python - import paddle - import numpy as np - - # the forward_post_hook change the output of the layer: output = output * 2 - def forward_post_hook(layer, input, output): - # user can use layer, input and output for information statistis tasks + >>> import paddle + >>> import numpy as np - # change the output - return output * 2 + >>> # the forward_post_hook change the output of the layer: output = output * 2 + >>> def forward_post_hook(layer, input, output): + ... # user can use layer, input and output for information statistis tasks + ... + ... # change the output + ... return output * 2 + ... + >>> linear = paddle.nn.Linear(13, 5) - linear = paddle.nn.Linear(13, 5) + >>> # register the hook + >>> forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook) - # register the hook - forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook) + >>> value1 = np.arange(26).reshape(2, 13).astype("float32") + >>> in1 = paddle.to_tensor(value1) - value1 = np.arange(26).reshape(2, 13).astype("float32") - in1 = paddle.to_tensor(value1) + >>> out0 = linear(in1) - out0 = linear(in1) + >>> # remove the hook + >>> forward_post_hook_handle.remove() - # remove the hook - forward_post_hook_handle.remove() + >>> out1 = linear(in1) - out1 = linear(in1) - - # hook change the linear's output to output * 2, so out0 is equal to out1 * 2. - assert (out0.numpy() == (out1.numpy()) * 2).any() + >>> # hook change the linear's output to output * 2, so out0 is equal to out1 * 2. + >>> assert (out0.numpy() == (out1.numpy()) * 2).any() """ hook_remove_helper = HookRemoveHelper(self._forward_post_hooks) @@ -630,35 +690,35 @@ class Layer: Examples: .. code-block:: python - import paddle - import numpy as np - - # the forward_pre_hook change the input of the layer: input = input * 2 - def forward_pre_hook(layer, input): - # user can use layer and input for information statistis tasks + >>> import paddle + >>> import numpy as np - # change the input - input_return = (input[0] * 2) - return input_return + >>> # the forward_pre_hook change the input of the layer: input = input * 2 + >>> def forward_pre_hook(layer, input): + ... # user can use layer and input for information statistis tasks + ... + ... # change the input + ... input_return = (input[0] * 2) + ... return input_return + ... + >>> linear = paddle.nn.Linear(13, 5) - linear = paddle.nn.Linear(13, 5) + >>> # register the hook + >>> forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook) - # register the hook - forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook) + >>> value0 = np.arange(26).reshape(2, 13).astype("float32") + >>> in0 = paddle.to_tensor(value0) + >>> out0 = linear(in0) - value0 = np.arange(26).reshape(2, 13).astype("float32") - in0 = paddle.to_tensor(value0) - out0 = linear(in0) + >>> # remove the hook + >>> forward_pre_hook_handle.remove() - # remove the hook - forward_pre_hook_handle.remove() + >>> value1 = value0 * 2 + >>> in1 = paddle.to_tensor(value1) + >>> out1 = linear(in1) - value1 = value0 * 2 - in1 = paddle.to_tensor(value1) - out1 = linear(in1) - - # hook change the linear's input to input * 2, so out0 is equal to out1. - assert (out0.numpy() == out1.numpy()).any() + >>> # hook change the linear's input to input * 2, so out0 is equal to out1. + >>> assert (out0.numpy() == out1.numpy()).any() """ hook_remove_helper = HookRemoveHelper(self._forward_pre_hooks) self._forward_pre_hooks[hook_remove_helper._hook_id] = hook @@ -691,22 +751,31 @@ class Layer: Examples: .. code-block:: python - import paddle - - class MyLayer(paddle.nn.Layer): - def __init__(self): - super().__init__() - self._linear = paddle.nn.Linear(1, 1) - w_tmp = self.create_parameter([1,1]) - self.add_parameter("w_tmp", w_tmp) - - def forward(self, input): - return self._linear(input) - - mylayer = MyLayer() - for name, param in mylayer.named_parameters(): - print(name, param) # will print w_tmp,_linear.weight,_linear.bias - + >>> import paddle + >>> paddle.seed(2023) + + >>> class MyLayer(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear = paddle.nn.Linear(1, 1) + ... w_tmp = self.create_parameter([1,1]) + ... self.add_parameter("w_tmp", w_tmp) + ... + ... def forward(self, input): + ... return self._linear(input) + ... + >>> mylayer = MyLayer() + >>> for name, param in mylayer.named_parameters(): + ... print(name, param) # will print w_tmp,_linear.weight,_linear.bias + w_tmp Parameter containing: + Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[0.06979191]]) + _linear.weight Parameter containing: + Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[1.26729357]]) + _linear.bias Parameter containing: + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False, + [0.]) """ temp_attr = copy.deepcopy(attr) if isinstance(temp_attr, str) and temp_attr == "": @@ -738,22 +807,22 @@ class Layer: Examples: .. code-block:: python - import paddle - - class MyLinear(paddle.nn.Layer): - def __init__(self, - in_features, - out_features): - super().__init__() - self.linear = paddle.nn.Linear( 10, 10) - - self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype) - - def forward(self, input): - out = self.linear(input) - paddle.assign( out, self.back_var) - - return out + >>> import paddle + + >>> class MyLinear(paddle.nn.Layer): + ... def __init__(self, + ... in_features, + ... out_features): + ... super().__init__() + ... self.linear = paddle.nn.Linear( 10, 10) + ... + ... self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype) + ... + ... def forward(self, input): + ... out = self.linear(input) + ... paddle.assign( out, self.back_var) + ... + ... return out """ if name is not None: @@ -790,22 +859,22 @@ class Layer: Examples: .. code-block:: python - import paddle - - class MyLinear(paddle.nn.Layer): - def __init__(self, - in_features, - out_features): - super().__init__() - self.linear = paddle.nn.Linear( 10, 10) - - self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype) - - def forward(self, input): - out = self.linear(input) - paddle.assign( out, self.back_var) - - return out + >>> import paddle + + >>> class MyLinear(paddle.nn.Layer): + ... def __init__(self, + ... in_features, + ... out_features): + ... super().__init__() + ... self.linear = paddle.nn.Linear(10, 10) + ... + ... self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype) + ... + ... def forward(self, input): + ... out = self.linear(input) + ... paddle.assign(out, self.back_var) + ... + ... return out """ if name is not None: @@ -833,10 +902,16 @@ class Layer: Examples: .. code-block:: python - import paddle + >>> import paddle + >>> paddle.seed(100) - linear = paddle.nn.Linear(1,1) - print(linear.parameters()) # print linear_0.w_0 and linear_0.b_0 + >>> linear = paddle.nn.Linear(1, 1) + >>> print(linear.parameters()) + [Parameter containing: + Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[0.18551230]]), Parameter containing: + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False, + [0.])] """ ret = [ @@ -858,15 +933,16 @@ class Layer: Examples: .. code-block:: python - import paddle + >>> import paddle - linear1 = paddle.nn.Linear(10, 3) - linear2 = paddle.nn.Linear(3, 10, bias_attr=False) - model = paddle.nn.Sequential(linear1, linear2) + >>> linear1 = paddle.nn.Linear(10, 3) + >>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False) + >>> model = paddle.nn.Sequential(linear1, linear2) - layer_list = list(model.children()) + >>> layer_list = list(model.children()) - print(layer_list) # [, ] + >>> print(layer_list) + [Linear(in_features=10, out_features=3, dtype=float32), Linear(in_features=3, out_features=10, dtype=float32)] """ for _, layer in self.named_children(): @@ -882,16 +958,15 @@ class Layer: Examples: .. code-block:: python - import paddle - - linear1 = paddle.nn.Linear(10, 3) - linear2 = paddle.nn.Linear(3, 10, bias_attr=False) - model = paddle.nn.Sequential(linear1, linear2) - for prefix, layer in model.named_children(): - print(prefix, layer) - # ('0', ) - # ('1', ) + >>> import paddle + >>> linear1 = paddle.nn.Linear(10, 3) + >>> linear2 = paddle.nn.Linear(3, 10, bias_attr=False) + >>> model = paddle.nn.Sequential(linear1, linear2) + >>> for prefix, layer in model.named_children(): + ... print(prefix, layer) + 0 Linear(in_features=10, out_features=3, dtype=float32) + 1 Linear(in_features=3, out_features=10, dtype=float32) """ memo = set() for name, layer in self._sub_layers.items(): @@ -913,21 +988,22 @@ class Layer: Examples: .. code-block:: python - import paddle - - class MyLayer(paddle.nn.Layer): - def __init__(self): - super().__init__() - self._linear = paddle.nn.Linear(1, 1) - self._dropout = paddle.nn.Dropout(p=0.5) - - def forward(self, input): - temp = self._linear(input) - temp = self._dropout(temp) - return temp - - mylayer = MyLayer() - print(mylayer.sublayers()) # [, ] + >>> import paddle + + >>> class MyLayer(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear = paddle.nn.Linear(1, 1) + ... self._dropout = paddle.nn.Dropout(p=0.5) + ... + ... def forward(self, input): + ... temp = self._linear(input) + ... temp = self._dropout(temp) + ... return temp + ... + >>> mylayer = MyLayer() + >>> print(mylayer.sublayers()) + [Linear(in_features=1, out_features=1, dtype=float32), Dropout(p=0.5, axis=None, mode=upscale_in_train)] """ ret = [ @@ -951,14 +1027,37 @@ class Layer: Examples: .. code-block:: python - import paddle - - fc1 = paddle.nn.Linear(10, 3) - fc2 = paddle.nn.Linear(3, 10, bias_attr=False) - model = paddle.nn.Sequential(fc1, fc2) - for name, param in model.named_parameters(): - print(name, param) - + >>> import paddle + >>> paddle.seed(100) + + >>> fc1 = paddle.nn.Linear(10, 3) + >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False) + >>> model = paddle.nn.Sequential(fc1, fc2) + >>> for name, param in model.named_parameters(): + ... print(name, param) + 0.weight Parameter containing: + Tensor(shape=[10, 3], dtype=float32, place=Place(cpu), stop_gradient=False, + [[ 0.07276392, -0.39791510, -0.66356444], + [ 0.02143478, -0.18519843, -0.32485050], + [-0.42249614, 0.08450919, -0.66838276], + [ 0.38208580, -0.24303678, 0.55127048], + [ 0.47745085, 0.62117910, -0.08336520], + [-0.28653207, 0.47237599, -0.05868882], + [-0.14385653, 0.29945642, 0.12832761], + [-0.21237159, 0.38539791, -0.62760031], + [ 0.02637231, 0.20621127, 0.43255770], + [-0.19984481, -0.26259184, -0.29696006]]) + 0.bias Parameter containing: + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=False, + [0., 0., 0.]) + 1.weight Parameter containing: + Tensor(shape=[3, 10], dtype=float32, place=Place(cpu), stop_gradient=False, + [[ 0.01985580, -0.40268910, 0.41172385, -0.47249708, -0.09002256, + -0.00533628, -0.52048630, 0.62360322, 0.20848787, -0.02033746], + [ 0.58281910, 0.12841827, 0.12907702, 0.02325618, -0.07746267, + 0.31950659, -0.37924835, -0.59209681, -0.11732036, -0.58378261], + [-0.62100595, 0.22293305, 0.28229684, -0.03687060, -0.59323978, + 0.08411229, 0.53275704, 0.40431368, 0.03171402, -0.17922515]]) """ params_set = set() named_sublayers = ( @@ -991,14 +1090,15 @@ class Layer: Examples: .. code-block:: python - import paddle - - fc1 = paddle.nn.Linear(10, 3) - fc2 = paddle.nn.Linear(3, 10, bias_attr=False) - model = paddle.nn.Sequential(fc1, fc2) - for prefix, layer in model.named_sublayers(): - print(prefix, layer) + >>> import paddle + >>> fc1 = paddle.nn.Linear(10, 3) + >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False) + >>> model = paddle.nn.Sequential(fc1, fc2) + >>> for prefix, layer in model.named_sublayers(): + ... print(prefix, layer) + 0 Linear(in_features=10, out_features=3, dtype=float32) + 1 Linear(in_features=3, out_features=10, dtype=float32) """ if layers_set is None: layers_set = set() @@ -1039,16 +1139,18 @@ class Layer: Examples: .. code-block:: python - import numpy as np - import paddle + >>> import numpy as np + >>> import paddle - linear = paddle.nn.Linear(10, 3) - value = np.array([0]).astype("float32") - buffer = paddle.to_tensor(value) - linear.register_buffer("buf_name", buffer, persistable=True) + >>> linear = paddle.nn.Linear(10, 3) + >>> value = np.array([0]).astype("float32") + >>> buffer = paddle.to_tensor(value) + >>> linear.register_buffer("buf_name", buffer, persistable=True) - # get the buffer by attribute. - print(linear.buf_name) + >>> # get the buffer by attribute. + >>> print(linear.buf_name) + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.]) """ @@ -1097,15 +1199,17 @@ class Layer: Examples: .. code-block:: python - import numpy as np - import paddle + >>> import numpy as np + >>> import paddle - linear = paddle.nn.Linear(10, 3) - value = np.array([0]).astype("float32") - buffer = paddle.to_tensor(value) - linear.register_buffer("buf_name", buffer, persistable=True) + >>> linear = paddle.nn.Linear(10, 3) + >>> value = np.array([0]).astype("float32") + >>> buffer = paddle.to_tensor(value) + >>> linear.register_buffer("buf_name", buffer, persistable=True) - print(linear.buffers()) # == print([linear.buf_name]) + >>> print(linear.buffers()) + [Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.])] """ ret = [ @@ -1131,26 +1235,29 @@ class Layer: Examples: .. code-block:: python - import numpy as np - import paddle - - fc1 = paddle.nn.Linear(10, 3) - buffer1 = paddle.to_tensor(np.array([0]).astype("float32")) - # register a tensor as buffer by specific `persistable` - fc1.register_buffer("buf_name_1", buffer1, persistable=True) - - fc2 = paddle.nn.Linear(3, 10) - buffer2 = paddle.to_tensor(np.array([1]).astype("float32")) - # register a buffer by assigning an attribute with Tensor. - # The `persistable` can only be False by this way. - fc2.buf_name_2 = buffer2 - - model = paddle.nn.Sequential(fc1, fc2) - - # get all named buffers - for name, buffer in model.named_buffers(): - print(name, buffer) - + >>> import numpy as np + >>> import paddle + + >>> fc1 = paddle.nn.Linear(10, 3) + >>> buffer1 = paddle.to_tensor(np.array([0]).astype("float32")) + >>> # register a tensor as buffer by specific `persistable` + >>> fc1.register_buffer("buf_name_1", buffer1, persistable=True) + + >>> fc2 = paddle.nn.Linear(3, 10) + >>> buffer2 = paddle.to_tensor(np.array([1]).astype("float32")) + >>> # register a buffer by assigning an attribute with Tensor. + >>> # The `persistable` can only be False by this way. + >>> fc2.buf_name_2 = buffer2 + + >>> model = paddle.nn.Sequential(fc1, fc2) + + >>> # get all named buffers + >>> for name, buffer in model.named_buffers(): + ... print(name, buffer) + 0.buf_name_1 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.]) + 1.buf_name_2 Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True, + [1.]) """ buffers_set = set() named_sublayers = ( @@ -1177,18 +1284,18 @@ class Layer: Examples: .. code-block:: python - import paddle - import numpy as np + >>> import paddle + >>> import numpy as np - value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5) - adam = paddle.optimizer.Adam(learning_rate=0.01, - parameters=linear.parameters()) - out = linear(a) - out.backward() - adam.step() - linear.clear_gradients() + >>> value = np.arange(26).reshape(2, 13).astype("float32") + >>> a = paddle.to_tensor(value) + >>> linear = paddle.nn.Linear(13, 5) + >>> adam = paddle.optimizer.Adam(learning_rate=0.01, + ... parameters=linear.parameters()) + >>> out = linear(a) + >>> out.backward() + >>> adam.step() + >>> linear.clear_gradients() """ for p in self.parameters(): @@ -1271,29 +1378,30 @@ class Layer: Examples: .. code-block:: python - import paddle - - class MySequential(paddle.nn.Layer): - def __init__(self, *layers): - super().__init__() - if len(layers) > 0 and isinstance(layers[0], tuple): - for name, layer in layers: - self.add_sublayer(name, layer) - else: - for idx, layer in enumerate(layers): - self.add_sublayer(str(idx), layer) - - def forward(self, input): - for layer in self._sub_layers.values(): - input = layer(input) - return input - - fc1 = paddle.nn.Linear(10, 3) - fc2 = paddle.nn.Linear(3, 10, bias_attr=False) - model = MySequential(fc1, fc2) - for prefix, layer in model.named_sublayers(): - print(prefix, layer) - + >>> import paddle + + >>> class MySequential(paddle.nn.Layer): + ... def __init__(self, *layers): + ... super().__init__() + ... if len(layers) > 0 and isinstance(layers[0], tuple): + ... for name, layer in layers: + ... self.add_sublayer(name, layer) + ... else: + ... for idx, layer in enumerate(layers): + ... self.add_sublayer(str(idx), layer) + ... + ... def forward(self, input): + ... for layer in self._sub_layers.values(): + ... input = layer(input) + ... return input + ... + >>> fc1 = paddle.nn.Linear(10, 3) + >>> fc2 = paddle.nn.Linear(3, 10, bias_attr=False) + >>> model = MySequential(fc1, fc2) + >>> for prefix, layer in model.named_sublayers(): + ... print(prefix, layer) + 0 Linear(in_features=10, out_features=3, dtype=float32) + 1 Linear(in_features=3, out_features=10, dtype=float32) """ assert isinstance(sublayer, Layer) or sublayer is None @@ -1313,22 +1421,31 @@ class Layer: Examples: .. code-block:: python - import paddle - - class MyLayer(paddle.nn.Layer): - def __init__(self): - super().__init__() - self._linear = paddle.nn.Linear(1, 1) - w_tmp = self.create_parameter([1,1]) - self.add_parameter("w_tmp", w_tmp) - - def forward(self, input): - return self._linear(input) - - mylayer = MyLayer() - for name, param in mylayer.named_parameters(): - print(name, param) # will print w_tmp,_linear.weight,_linear.bias - + >>> import paddle + >>> paddle.seed(100) + + >>> class MyLayer(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self._linear = paddle.nn.Linear(1, 1) + ... w_tmp = self.create_parameter([1,1]) + ... self.add_parameter("w_tmp", w_tmp) + ... + ... def forward(self, input): + ... return self._linear(input) + ... + >>> mylayer = MyLayer() + >>> for name, param in mylayer.named_parameters(): + ... print(name, param) + w_tmp Parameter containing: + Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[-1.01448846]]) + _linear.weight Parameter containing: + Tensor(shape=[1, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[0.18551230]]) + _linear.bias Parameter containing: + Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=False, + [0.]) """ if '_parameters' not in self.__dict__: raise RuntimeError("super().__init__() should be called firstly.") @@ -1580,23 +1697,21 @@ class Layer: Examples: .. code-block:: python - import paddle - import numpy as np - - class Mylayer(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.linear1 = paddle.nn.Linear(10, 10) - self.linear2 = paddle.nn.Linear(5, 5) - self.conv2d = paddle.nn.Conv2D(3, 2, 3) - self.embedding = paddle.nn.Embedding(128, 16) - self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32')) - - mylayer = Mylayer() - print(dir(mylayer)) - # only parts are shown, because of list have too much content - # ['__call__', '__class__', ... , 'conv2d', 'embedding', 'h_0', 'linear1', 'linear2', ... , 'sublayers', 'train'] - + >>> import paddle + >>> import numpy as np + + >>> class Mylayer(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.linear1 = paddle.nn.Linear(10, 10) + ... self.linear2 = paddle.nn.Linear(5, 5) + ... self.conv2d = paddle.nn.Conv2D(3, 2, 3) + ... self.embedding = paddle.nn.Embedding(128, 16) + ... self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32')) + ... + >>> mylayer = Mylayer() + >>> print(dir(mylayer)) + ['__call__', '__class__', '__delattr__', '__dict__', ..., 'training'] """ method = dir(self.__class__) attrs = list(self.__dict__.keys()) @@ -1756,12 +1871,12 @@ class Layer: Examples: .. code-block:: python - import paddle + >>> import paddle - emb = paddle.nn.Embedding(10, 10) + >>> emb = paddle.nn.Embedding(10, 10) - state_dict = emb.to_static_state_dict() - paddle.save( state_dict, "paddle_dy.pdparams") + >>> state_dict = emb.to_static_state_dict() + >>> paddle.save( state_dict, "paddle_dy.pdparams") ''' return self._state_dict_impl( @@ -1793,12 +1908,12 @@ class Layer: Examples: .. code-block:: python - import paddle + >>> import paddle - emb = paddle.nn.Embedding(10, 10) + >>> emb = paddle.nn.Embedding(10, 10) - state_dict = emb.state_dict() - paddle.save( state_dict, "paddle_dy.pdparams") + >>> state_dict = emb.state_dict() + >>> paddle.save( state_dict, "paddle_dy.pdparams") ''' return self._state_dict_impl( @@ -1825,14 +1940,14 @@ class Layer: Examples: .. code-block:: python - import paddle + >>> import paddle - emb = paddle.nn.Embedding(10, 10) + >>> emb = paddle.nn.Embedding(10, 10) - state_dict = emb.state_dict() - paddle.save(state_dict, "paddle_dy.pdparams") - para_state_dict = paddle.load("paddle_dy.pdparams") - emb.set_state_dict(para_state_dict) + >>> state_dict = emb.state_dict() + >>> paddle.save(state_dict, "paddle_dy.pdparams") + >>> para_state_dict = paddle.load("paddle_dy.pdparams") + >>> emb.set_state_dict(para_state_dict) ''' missing_keys = [] @@ -1950,32 +2065,40 @@ class Layer: Examples: .. code-block:: python - # required: skip - import paddle - - linear=paddle.nn.Linear(2, 2) - linear.weight - #Parameter containing: - #Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False, - # [[-0.32770029, 0.38653070], - # [ 0.46030545, 0.08158520]]) - - linear.to(dtype='float64') - linear.weight - #Tenor(shape=[2, 2], dtype=float64, place=CUDAPlace(0), stop_gradient=False, - # [[-0.32770029, 0.38653070], - # [ 0.46030545, 0.08158520]]) - - linear.to(device='cpu') - linear.weight - #Tensor(shape=[2, 2], dtype=float64, place=CPUPlace, stop_gradient=False, - # [[-0.32770029, 0.38653070], - # [ 0.46030545, 0.08158520]]) - linear.to(device=paddle.CUDAPinnedPlace(), blocking=False) - linear.weight - #Tensor(shape=[2, 2], dtype=float64, place=CUDAPinnedPlace, stop_gradient=False, - # [[-0.04989364, -0.56889004], - # [ 0.33960250, 0.96878713]]) + >>> import paddle + >>> paddle.seed(2023) + + >>> linear=paddle.nn.Linear(2, 2) + >>> linear.weight + >>> print(linear.weight) + Parameter containing: + Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False, + [[ 0.89611185, 0.04935038], + [-0.58883440, 0.99266374]]) + + >>> linear.to(dtype='float64') + >>> linear.weight + >>> print(linear.weight) + Parameter containing: + Tensor(shape=[2, 2], dtype=float64, place=Place(gpu:0), stop_gradient=False, + [[ 0.89611185, 0.04935038], + [-0.58883440, 0.99266374]]) + + >>> linear.to(device='cpu') + >>> linear.weight + >>> print(linear.weight) + Parameter containing: + Tensor(shape=[2, 2], dtype=float64, place=Place(cpu), stop_gradient=False, + [[ 0.89611185, 0.04935038], + [-0.58883440, 0.99266374]]) + + >>> # doctest: +REQUIRES(env:GPU) + >>> linear.to(device=paddle.CUDAPinnedPlace(), blocking=False) + >>> linear.weight + >>> print(linear.weight) + Tensor(shape=[2, 2], dtype=float64, place=Place(gpu_pinned), stop_gradient=False, + [[ 0.89611185, 0.04935038], + [-0.58883440, 0.99266374]]) ''' return self._to_impl( @@ -2161,21 +2284,25 @@ class Layer: Examples: .. code-block:: python - import paddle - - class Model(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.linear = paddle.nn.Linear(1, 1) - self.dropout = paddle.nn.Dropout(p=0.5) - - def forward(self, input): - out = self.linear(input) - out = self.dropout(out) - return out - - model = Model() - model.float() + >>> import paddle + + >>> class Model(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.linear = paddle.nn.Linear(1, 1) + ... self.dropout = paddle.nn.Dropout(p=0.5) + ... + ... def forward(self, input): + ... out = self.linear(input) + ... out = self.dropout(out) + ... return out + ... + >>> model = Model() + >>> model.float() + Model( + (linear): Linear(in_features=1, out_features=1, dtype=paddle.float32) + (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train) + ) ''' excluded_layers = [] if excluded_layers is None else excluded_layers @@ -2213,21 +2340,26 @@ class Layer: Examples: .. code-block:: python - import paddle - - class Model(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.linear = paddle.nn.Linear(1, 1) - self.dropout = paddle.nn.Dropout(p=0.5) - - def forward(self, input): - out = self.linear(input) - out = self.dropout(out) - return out - - model = Model() - model.float16() + >>> # doctest: +SKIP('Paddle compiled by the user does not support float16, so keep original data type.') + >>> import paddle + + >>> class Model(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.linear = paddle.nn.Linear(1, 1) + ... self.dropout = paddle.nn.Dropout(p=0.5) + ... + ... def forward(self, input): + ... out = self.linear(input) + ... out = self.dropout(out) + ... return out + ... + >>> model = Model() + >>> model.float16() + Model( + (linear): Linear(in_features=1, out_features=1, dtype=float32) + (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train) + ) ''' if paddle.amp.is_float16_supported() is False: @@ -2273,21 +2405,27 @@ class Layer: Examples: .. code-block:: python - import paddle - - class Model(paddle.nn.Layer): - def __init__(self): - super().__init__() - self.linear = paddle.nn.Linear(1, 1) - self.dropout = paddle.nn.Dropout(p=0.5) - - def forward(self, input): - out = self.linear(input) - out = self.dropout(out) - return out - - model = Model() - model.bfloat16() + >>> # doctest: +SKIP('bfloat need V100 compile') + >>> import paddle + + >>> class Model(paddle.nn.Layer): + ... def __init__(self): + ... super().__init__() + ... self.linear = paddle.nn.Linear(1, 1) + ... self.dropout = paddle.nn.Dropout(p=0.5) + ... + ... def forward(self, input): + ... out = self.linear(input) + ... out = self.dropout(out) + ... return out + ... + >>> model = Model() + >>> model.bfloat16() + >>> #UserWarning: Paddle compiled by the user does not support bfloat16, so keep original data type. + Model( + (linear): Linear(in_features=1, out_features=1, dtype=float32) + (dropout): Dropout(p=0.5, axis=None, mode=upscale_in_train) + ) ''' if paddle.amp.is_bfloat16_supported() is False: -- GitLab