未验证 提交 418cc35d 编写于 作者: C cyberslack_lee 提交者: GitHub

[xdoctest] reformat example code with google style in No.86-90 (#55812)

* norm, test=docs_preview

* test=docs_preview

* test=docs_preview

* test=docs_preview
上级 4ff6999a
......@@ -165,14 +165,18 @@ class InstanceNorm1D(_InstanceNormBase):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 3))
instance_norm = paddle.nn.InstanceNorm1D(2)
instance_norm_out = instance_norm(x)
print(instance_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 3))
>>> instance_norm = paddle.nn.InstanceNorm1D(2)
>>> instance_norm_out = instance_norm(x)
>>> print(instance_norm_out)
Tensor(shape=[2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[ 1.32132232, -0.22444785, -1.09687424],
[ 1.29506636, -0.15688568, -1.13818073]],
[[-0.27764025, 1.33961368, -1.06197333],
[ 0.44484580, -1.38489723, 0.94005162]]])
"""
def __init__(
......@@ -255,13 +259,22 @@ class InstanceNorm2D(_InstanceNormBase):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 2, 3))
instance_norm = paddle.nn.InstanceNorm2D(2)
instance_norm_out = instance_norm(x)
print(instance_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 2, 3))
>>> instance_norm = paddle.nn.InstanceNorm2D(2)
>>> instance_norm_out = instance_norm(x)
>>> print(instance_norm_out)
Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 1.26652932, -0.60229748, -1.65705574],
[ 1.06272733, 0.24229208, -0.31219524]],
[[-0.85414171, 0.31684181, -1.42204332],
[ 1.00412714, -0.43966094, 1.39487720]]],
[[[ 0.83324969, 1.25046813, -0.79470295],
[-1.38446140, 0.81851846, -0.72307163]],
[[-0.33560610, 0.95346332, 0.45585334],
[-0.53483474, 1.20336461, -1.74224067]]]])
"""
def __init__(
......@@ -342,13 +355,30 @@ class InstanceNorm3D(_InstanceNormBase):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 2, 2, 3))
instance_norm = paddle.nn.InstanceNorm3D(2)
instance_norm_out = instance_norm(x)
print(instance_norm_out.numpy)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 2, 2, 3))
>>> instance_norm = paddle.nn.InstanceNorm3D(2)
>>> instance_norm_out = instance_norm(x)
>>> print(instance_norm_out)
Tensor(shape=[2, 2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[[ 0.60520107, -0.67670596, -1.40020907],
[ 0.46540472, -0.09736639, -0.47771260]],
[[-0.74365318, 0.63718963, -1.41333199],
[ 1.44764769, -0.25489071, 1.90842640]]],
[[[ 1.09773374, 1.49568439, -0.45503727],
[-1.01755965, 1.08368278, -0.38671401]],
[[-0.62252384, 0.60490805, 0.13109155],
[-0.81222630, 0.84286022, -1.96189928]]]],
[[[[ 0.28014541, 0.91674680, 1.71797717],
[-0.52062720, -0.74274176, -0.86439967]],
[[ 0.25707796, -1.23866379, 1.64422870],
[-1.48577297, -0.13187379, 0.16790220]]],
[[[-1.49266160, 1.57909954, 0.46455818],
[-0.14981404, 1.46959865, 0.24957968]],
[[ 0.25134835, -0.03276967, -0.30318922],
[ 0.76263177, -1.11345232, -1.68492818]]]]])
"""
def __init__(
......@@ -410,13 +440,38 @@ class GroupNorm(Layer):
Examples:
.. code-block:: python
import paddle
x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2))
group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
group_norm_out = group_norm(x)
print(group_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.arange(48, dtype="float32").reshape((2, 6, 2, 2))
>>> group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6)
>>> group_norm_out = group_norm(x)
>>> print(group_norm_out)
Tensor(shape=[2, 6, 2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]]],
[[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]],
[[-1.34163547, -0.44721183],
[ 0.44721183, 1.34163547]]]])
"""
def __init__(
......@@ -575,13 +630,22 @@ class LayerNorm(Layer):
.. code-block:: python
import paddle
x = paddle.rand((2, 2, 2, 3))
layer_norm = paddle.nn.LayerNorm(x.shape[1:])
layer_norm_out = layer_norm(x)
print(layer_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 2, 2, 3))
>>> layer_norm = paddle.nn.LayerNorm(x.shape[1:])
>>> layer_norm_out = layer_norm(x)
>>> print(layer_norm_out)
Tensor(shape=[2, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 0.60520101, -0.67670590, -1.40020895],
[ 0.46540466, -0.09736638, -0.47771254]],
[[-0.74365306, 0.63718957, -1.41333175],
[ 1.44764745, -0.25489068, 1.90842617]]],
[[[ 1.09773350, 1.49568415, -0.45503747],
[-1.01755989, 1.08368254, -0.38671425]],
[[-0.62252408, 0.60490781, 0.13109133],
[-0.81222653, 0.84285998, -1.96189952]]]])
"""
def __init__(
......@@ -891,17 +955,17 @@ class BatchNorm(Layer):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.nn as nn
from paddle.fluid.dygraph.base import to_variable
import numpy as np
>>> import paddle.fluid as fluid
>>> import paddle.nn as nn
>>> from paddle.fluid.dygraph.base import to_variable
>>> import numpy as np
x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
with fluid.dygraph.guard():
x = to_variable(x)
batch_norm = nn.layer.norm.BatchNorm(10)
hidden1 = batch_norm(x)
>>> x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
>>> with fluid.dygraph.guard():
... x = to_variable(x)
... batch_norm = nn.layer.norm.BatchNorm(10)
... hidden1 = batch_norm(x)
"""
def __init__(
......@@ -1165,13 +1229,16 @@ class BatchNorm1D(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2, 1, 3))
batch_norm = paddle.nn.BatchNorm1D(1)
batch_norm_out = batch_norm(x)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 1, 3))
>>> batch_norm = paddle.nn.BatchNorm1D(1)
>>> batch_norm_out = batch_norm(x)
print(batch_norm_out)
>>> print(batch_norm_out)
Tensor(shape=[2, 1, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[ 1.26652932, -0.60229754, -1.65705597]],
[[ 1.06272745, 0.24229205, -0.31219530]]])
"""
def __init__(
......@@ -1277,13 +1344,18 @@ class BatchNorm2D(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2, 1, 2, 3))
batch_norm = paddle.nn.BatchNorm2D(1)
batch_norm_out = batch_norm(x)
print(batch_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 1, 2, 3))
>>> batch_norm = paddle.nn.BatchNorm2D(1)
>>> batch_norm_out = batch_norm(x)
>>> print(batch_norm_out)
Tensor(shape=[2, 1, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[ 0.60520101, -0.67670590, -1.40020895],
[ 0.46540475, -0.09736633, -0.47771257]]],
[[[-0.74365312, 0.63718963, -1.41333187],
[ 1.44764757, -0.25489068, 1.90842628]]]])
"""
def _check_data_format(self, input):
......@@ -1363,13 +1435,22 @@ class BatchNorm3D(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2, 1, 2, 2, 3))
batch_norm = paddle.nn.BatchNorm3D(1)
batch_norm_out = batch_norm(x)
print(batch_norm_out)
>>> import paddle
>>> paddle.seed(100)
>>> x = paddle.rand((2, 1, 2, 2, 3))
>>> batch_norm = paddle.nn.BatchNorm3D(1)
>>> batch_norm_out = batch_norm(x)
>>> print(batch_norm_out)
Tensor(shape=[2, 1, 2, 2, 3], dtype=float32, place=Place(cpu), stop_gradient=False,
[[[[[ 0.28011751, -0.95211101, -1.64757574],
[ 0.14573872, -0.39522290, -0.76082933]],
[[-1.01646376, 0.31086648, -1.66019011],
[ 1.08991623, -0.54664266, 1.53283834]]]],
[[[[ 1.33958006, 1.71585774, -0.12862551],
[-0.66051245, 1.32629418, -0.06402326]],
[[-0.28699064, 0.87359405, 0.42558217],
[-0.46636176, 1.09858704, -1.55342245]]]]])
"""
def __init__(
......@@ -1485,23 +1566,22 @@ class SyncBatchNorm(_BatchNormBase):
Examples:
.. code-block:: python
# required: gpu
import paddle
import paddle.nn as nn
x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32')
>>> # doctest: +REQUIRES(env:GPU)
if paddle.is_compiled_with_cuda():
sync_batch_norm = nn.SyncBatchNorm(2)
hidden1 = sync_batch_norm(x)
print(hidden1)
# Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
# [[[[ 0.26824948, 1.09363246],
# [ 0.26824948, -1.63013160]],
>>> import paddle
>>> import paddle.nn as nn
>>> paddle.device.set_device('gpu')
>>> x = paddle.to_tensor([[[[0.3, 0.4], [0.3, 0.07]], [[0.83, 0.37], [0.18, 0.93]]]]).astype('float32')
# [[ 0.80956620, -0.66528702],
# [-1.27446556, 1.13018656]]]])
>>> if paddle.is_compiled_with_cuda():
... sync_batch_norm = nn.SyncBatchNorm(2)
... hidden1 = sync_batch_norm(x)
... print(hidden1)
Tensor(shape=[1, 2, 2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=False,
[[[[ 0.26824948, 1.09363246],
[ 0.26824948, -1.63013160]],
[[ 0.80956620, -0.66528702],
[-1.27446556, 1.13018656]]]])
"""
......@@ -1625,11 +1705,16 @@ class SyncBatchNorm(_BatchNormBase):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5))
sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
>>> model = nn.Sequential(nn.Conv2D(3, 5, 3), nn.BatchNorm2D(5))
>>> sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
>>> print(sync_model)
Sequential(
(0): Conv2D(3, 5, kernel_size=[3, 3], data_format=NCHW)
(1): SyncBatchNorm(num_features=5, momentum=0.9, epsilon=1e-05)
)
"""
layer_output = layer
......@@ -1704,14 +1789,15 @@ class LocalResponseNorm(Layer):
Examples:
.. code-block:: python
.. code-block:: python
import paddle
>>> import paddle
x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32")
m = paddle.nn.LocalResponseNorm(size=5)
y = m(x)
print(y.shape) # [3, 3, 112, 112]
>>> x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32")
>>> m = paddle.nn.LocalResponseNorm(size=5)
>>> y = m(x)
>>> print(y.shape)
[3, 3, 112, 112]
"""
def __init__(
......@@ -1801,15 +1887,14 @@ class SpectralNorm(Layer):
None
Examples:
.. code-block:: python
import paddle
x = paddle.rand((2,8,32,32))
spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2)
spectral_norm_out = spectral_norm(x)
.. code-block:: python
print(spectral_norm_out.shape) # [2, 8, 32, 32]
>>> import paddle
>>> x = paddle.rand((2,8,32,32))
>>> spectral_norm = paddle.nn.SpectralNorm(x.shape, dim=1, power_iters=2)
>>> spectral_norm_out = spectral_norm(x)
>>> print(spectral_norm_out.shape)
[2, 8, 32, 32]
"""
......
此差异已折叠。
......@@ -89,14 +89,18 @@ def rnn(
.. code-block:: python
import paddle
paddle.disable_static()
>>> import paddle
cell = paddle.nn.SimpleRNNCell(16, 32)
>>> inputs = paddle.rand((4, 23, 16))
>>> prev_h = paddle.randn((4, 32))
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.nn.layer.rnn(cell, inputs, prev_h)
>>> cell = paddle.nn.SimpleRNNCell(16, 32)
>>> rnn = paddle.nn.RNN(cell)
>>> outputs, final_states = rnn(inputs, prev_h)
>>> print(outputs.shape)
[4, 23, 32]
>>> print(final_states.shape)
[4, 32]
"""
......@@ -397,18 +401,17 @@ def birnn(
.. code-block:: python
import paddle
paddle.disable_static()
>>> import paddle
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.nn.layer.birnn(
cell_fw, cell_bw, inputs, initial_states)
>>> cell_fw = paddle.nn.LSTMCell(16, 32)
>>> cell_bw = paddle.nn.LSTMCell(16, 32)
>>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
>>> inputs = paddle.rand((2, 23, 16))
>>> outputs, final_states = rnn(inputs)
>>> print(outputs.shape)
[2, 23, 64]
>>> print(final_states[0][0].shape)
[2, 32]
"""
......@@ -743,16 +746,15 @@ class SimpleRNNCell(RNNCellBase):
.. code-block:: python
import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
>>> import paddle
cell = paddle.nn.SimpleRNNCell(16, 32)
y, h = cell(x, prev_h)
print(y.shape)
>>> x = paddle.randn((4, 16))
>>> prev_h = paddle.randn((4, 32))
#[4,32]
>>> cell = paddle.nn.SimpleRNNCell(16, 32)
>>> y, h = cell(x, prev_h)
>>> print(y.shape)
[4, 32]
"""
......@@ -897,22 +899,21 @@ class LSTMCell(RNNCellBase):
.. code-block:: python
import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
prev_c = paddle.randn((4, 32))
>>> import paddle
cell = paddle.nn.LSTMCell(16, 32)
y, (h, c) = cell(x, (prev_h, prev_c))
>>> x = paddle.randn((4, 16))
>>> prev_h = paddle.randn((4, 32))
>>> prev_c = paddle.randn((4, 32))
print(y.shape)
print(h.shape)
print(c.shape)
>>> cell = paddle.nn.LSTMCell(16, 32)
>>> y, (h, c) = cell(x, (prev_h, prev_c))
#[4,32]
#[4,32]
#[4,32]
>>> print(y.shape)
[4, 32]
>>> print(h.shape)
[4, 32]
>>> print(c.shape)
[4, 32]
"""
......@@ -1059,19 +1060,19 @@ class GRUCell(RNNCellBase):
.. code-block:: python
import paddle
>>> import paddle
x = paddle.randn((4, 16))
prev_h = paddle.randn((4, 32))
>>> x = paddle.randn((4, 16))
>>> prev_h = paddle.randn((4, 32))
cell = paddle.nn.GRUCell(16, 32)
y, h = cell(x, prev_h)
>>> cell = paddle.nn.GRUCell(16, 32)
>>> y, h = cell(x, prev_h)
print(y.shape)
print(h.shape)
>>> print(y.shape)
[4, 32]
>>> print(h.shape)
[4, 32]
#[4,32]
#[4,32]
"""
......@@ -1189,20 +1190,19 @@ class RNN(Layer):
.. code-block:: python
import paddle
>>> import paddle
inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
>>> inputs = paddle.rand((4, 23, 16))
>>> prev_h = paddle.randn((4, 32))
cell = paddle.nn.SimpleRNNCell(16, 32)
rnn = paddle.nn.RNN(cell)
outputs, final_states = rnn(inputs, prev_h)
>>> cell = paddle.nn.SimpleRNNCell(16, 32)
>>> rnn = paddle.nn.RNN(cell)
>>> outputs, final_states = rnn(inputs, prev_h)
print(outputs.shape)
print(final_states.shape)
#[4,23,32]
#[4,32]
>>> print(outputs.shape)
[4, 23, 32]
>>> print(final_states.shape)
[4, 32]
"""
......@@ -1263,20 +1263,19 @@ class BiRNN(Layer):
.. code-block:: python
import paddle
cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)
rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
>>> import paddle
inputs = paddle.rand((2, 23, 16))
outputs, final_states = rnn(inputs)
>>> cell_fw = paddle.nn.LSTMCell(16, 32)
>>> cell_bw = paddle.nn.LSTMCell(16, 32)
>>> rnn = paddle.nn.BiRNN(cell_fw, cell_bw)
print(outputs.shape)
print(final_states[0][0].shape,len(final_states),len(final_states[0]))
>>> inputs = paddle.rand((2, 23, 16))
>>> outputs, final_states = rnn(inputs)
#[4,23,64]
#[2,32] 2 2
>>> print(outputs.shape)
[2, 23, 64]
>>> print(final_states[0][0].shape,len(final_states),len(final_states[0]))
[2, 32] 2 2
"""
......@@ -1702,19 +1701,19 @@ class SimpleRNN(RNNBase):
.. code-block:: python
import paddle
>>> import paddle
rnn = paddle.nn.SimpleRNN(16, 32, 2)
>>> rnn = paddle.nn.SimpleRNN(16, 32, 2)
x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h)
>>> x = paddle.randn((4, 23, 16))
>>> prev_h = paddle.randn((2, 4, 32))
>>> y, h = rnn(x, prev_h)
print(y.shape)
print(h.shape)
>>> print(y.shape)
[4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
"""
......@@ -1833,22 +1832,22 @@ class LSTM(RNNBase):
.. code-block:: python
import paddle
>>> import paddle
rnn = paddle.nn.LSTM(16, 32, 2)
>>> rnn = paddle.nn.LSTM(16, 32, 2)
x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
prev_c = paddle.randn((2, 4, 32))
y, (h, c) = rnn(x, (prev_h, prev_c))
>>> x = paddle.randn((4, 23, 16))
>>> prev_h = paddle.randn((2, 4, 32))
>>> prev_c = paddle.randn((2, 4, 32))
>>> y, (h, c) = rnn(x, (prev_h, prev_c))
print(y.shape)
print(h.shape)
print(c.shape)
>>> print(y.shape)
[4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
>>> print(c.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
#[2,4,32]
"""
......@@ -1955,19 +1954,19 @@ class GRU(RNNBase):
.. code-block:: python
import paddle
>>> import paddle
rnn = paddle.nn.GRU(16, 32, 2)
>>> rnn = paddle.nn.GRU(16, 32, 2)
x = paddle.randn((4, 23, 16))
prev_h = paddle.randn((2, 4, 32))
y, h = rnn(x, prev_h)
>>> x = paddle.randn((4, 23, 16))
>>> prev_h = paddle.randn((2, 4, 32))
>>> y, h = rnn(x, prev_h)
print(y.shape)
print(h.shape)
>>> print(y.shape)
[4, 23, 32]
>>> print(h.shape)
[2, 4, 32]
#[4,23,32]
#[2,4,32]
"""
......
......@@ -141,14 +141,16 @@ class MultiHeadAttention(Layer):
.. code-block:: python
import paddle
# encoder input: [batch_size, sequence_length, d_model]
query = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, num_heads, query_len, query_len]
attn_mask = paddle.rand((2, 2, 4, 4))
multi_head_attn = paddle.nn.MultiHeadAttention(128, 2)
output = multi_head_attn(query, None, None, attn_mask=attn_mask) # [2, 4, 128]
>>> import paddle
>>> # encoder input: [batch_size, sequence_length, d_model]
>>> query = paddle.rand((2, 4, 128))
>>> # self attention mask: [batch_size, num_heads, query_len, query_len]
>>> attn_mask = paddle.rand((2, 2, 4, 4))
>>> multi_head_attn = paddle.nn.MultiHeadAttention(128, 2)
>>> output = multi_head_attn(query, None, None, attn_mask=attn_mask)
>>> print(output.shape)
[2, 4, 128]
"""
Cache = collections.namedtuple("Cache", ["k", "v"])
......@@ -490,15 +492,17 @@ class TransformerEncoderLayer(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerEncoderLayer
# encoder input: [batch_size, src_len, d_model]
enc_input = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, n_head, src_len, src_len]
attn_mask = paddle.rand((2, 2, 4, 4))
encoder_layer = TransformerEncoderLayer(128, 2, 512)
enc_output = encoder_layer(enc_input, attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerEncoderLayer
>>> # encoder input: [batch_size, src_len, d_model]
>>> enc_input = paddle.rand((2, 4, 128))
>>> # self attention mask: [batch_size, n_head, src_len, src_len]
>>> attn_mask = paddle.rand((2, 2, 4, 4))
>>> encoder_layer = TransformerEncoderLayer(128, 2, 512)
>>> enc_output = encoder_layer(enc_input, attn_mask)
>>> print(enc_output.shape)
[2, 4, 128]
"""
def __init__(
......@@ -659,16 +663,18 @@ class TransformerEncoder(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerEncoderLayer, TransformerEncoder
# encoder input: [batch_size, src_len, d_model]
enc_input = paddle.rand((2, 4, 128))
# self attention mask: [batch_size, n_head, src_len, src_len]
attn_mask = paddle.rand((2, 2, 4, 4))
encoder_layer = TransformerEncoderLayer(128, 2, 512)
encoder = TransformerEncoder(encoder_layer, 2)
enc_output = encoder(enc_input, attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerEncoderLayer, TransformerEncoder
>>> # encoder input: [batch_size, src_len, d_model]
>>> enc_input = paddle.rand((2, 4, 128))
>>> # self attention mask: [batch_size, n_head, src_len, src_len]
>>> attn_mask = paddle.rand((2, 2, 4, 4))
>>> encoder_layer = TransformerEncoderLayer(128, 2, 512)
>>> encoder = TransformerEncoder(encoder_layer, 2)
>>> enc_output = encoder(enc_input, attn_mask)
>>> print(enc_output.shape)
[2, 4, 128]
"""
def __init__(self, encoder_layer, num_layers, norm=None):
......@@ -809,22 +815,24 @@ class TransformerDecoderLayer(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerDecoderLayer
# decoder input: [batch_size, tgt_len, d_model]
dec_input = paddle.rand((2, 4, 128))
# encoder output: [batch_size, src_len, d_model]
enc_output = paddle.rand((2, 6, 128))
# self attention mask: [batch_size, n_head, tgt_len, tgt_len]
self_attn_mask = paddle.rand((2, 2, 4, 4))
# cross attention mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 4, 6))
decoder_layer = TransformerDecoderLayer(128, 2, 512)
output = decoder_layer(dec_input,
enc_output,
self_attn_mask,
cross_attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerDecoderLayer
>>> # decoder input: [batch_size, tgt_len, d_model]
>>> dec_input = paddle.rand((2, 4, 128))
>>> # encoder output: [batch_size, src_len, d_model]
>>> enc_output = paddle.rand((2, 6, 128))
>>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len]
>>> self_attn_mask = paddle.rand((2, 2, 4, 4))
>>> # cross attention mask: [batch_size, n_head, tgt_len, src_len]
>>> cross_attn_mask = paddle.rand((2, 2, 4, 6))
>>> decoder_layer = TransformerDecoderLayer(128, 2, 512)
>>> output = decoder_layer(dec_input,
... enc_output,
... self_attn_mask,
... cross_attn_mask)
>>> print(output.shape)
[2, 4, 128]
"""
def __init__(
......@@ -1031,23 +1039,25 @@ class TransformerDecoder(Layer):
.. code-block:: python
import paddle
from paddle.nn import TransformerDecoderLayer, TransformerDecoder
# decoder input: [batch_size, tgt_len, d_model]
dec_input = paddle.rand((2, 4, 128))
# encoder output: [batch_size, src_len, d_model]
enc_output = paddle.rand((2, 6, 128))
# self attention mask: [batch_size, n_head, tgt_len, tgt_len]
self_attn_mask = paddle.rand((2, 2, 4, 4))
# cross attention mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 4, 6))
decoder_layer = TransformerDecoderLayer(128, 2, 512)
decoder = TransformerDecoder(decoder_layer, 2)
output = decoder(dec_input,
enc_output,
self_attn_mask,
cross_attn_mask) # [2, 4, 128]
>>> import paddle
>>> from paddle.nn import TransformerDecoderLayer, TransformerDecoder
>>> # decoder input: [batch_size, tgt_len, d_model]
>>> dec_input = paddle.rand((2, 4, 128))
>>> # encoder output: [batch_size, src_len, d_model]
>>> enc_output = paddle.rand((2, 6, 128))
>>> # self attention mask: [batch_size, n_head, tgt_len, tgt_len]
>>> self_attn_mask = paddle.rand((2, 2, 4, 4))
>>> # cross attention mask: [batch_size, n_head, tgt_len, src_len]
>>> cross_attn_mask = paddle.rand((2, 2, 4, 6))
>>> decoder_layer = TransformerDecoderLayer(128, 2, 512)
>>> decoder = TransformerDecoder(decoder_layer, 2)
>>> output = decoder(dec_input,
... enc_output,
... self_attn_mask,
... cross_attn_mask)
>>> print(output.shape)
[2, 4, 128]
"""
def __init__(self, decoder_layer, num_layers, norm=None):
......@@ -1242,25 +1252,27 @@ class Transformer(Layer):
.. code-block:: python
import paddle
from paddle.nn import Transformer
# src: [batch_size, tgt_len, d_model]
enc_input = paddle.rand((2, 4, 128))
# tgt: [batch_size, src_len, d_model]
dec_input = paddle.rand((2, 6, 128))
# src_mask: [batch_size, n_head, src_len, src_len]
enc_self_attn_mask = paddle.rand((2, 2, 4, 4))
# tgt_mask: [batch_size, n_head, tgt_len, tgt_len]
dec_self_attn_mask = paddle.rand((2, 2, 6, 6))
# memory_mask: [batch_size, n_head, tgt_len, src_len]
cross_attn_mask = paddle.rand((2, 2, 6, 4))
transformer = Transformer(128, 2, 4, 4, 512)
output = transformer(enc_input,
dec_input,
enc_self_attn_mask,
dec_self_attn_mask,
cross_attn_mask) # [2, 6, 128]
>>> import paddle
>>> from paddle.nn import Transformer
>>> # src: [batch_size, tgt_len, d_model]
>>> enc_input = paddle.rand((2, 4, 128))
>>> # tgt: [batch_size, src_len, d_model]
>>> dec_input = paddle.rand((2, 6, 128))
>>> # src_mask: [batch_size, n_head, src_len, src_len]
>>> enc_self_attn_mask = paddle.rand((2, 2, 4, 4))
>>> # tgt_mask: [batch_size, n_head, tgt_len, tgt_len]
>>> dec_self_attn_mask = paddle.rand((2, 2, 6, 6))
>>> # memory_mask: [batch_size, n_head, tgt_len, src_len]
>>> cross_attn_mask = paddle.rand((2, 2, 6, 4))
>>> transformer = Transformer(128, 2, 4, 4, 512)
>>> output = transformer(enc_input,
... dec_input,
... enc_self_attn_mask,
... dec_self_attn_mask,
... cross_attn_mask)
>>> print(output.shape)
[2, 6, 128]
"""
def __init__(
......@@ -1454,20 +1466,20 @@ class Transformer(Layer):
Examples:
.. code-block:: python
import paddle
from paddle.nn.layer.transformer import Transformer
length = 5
d_model, n_head, dim_feedforward = 8, 4, 64
transformer_paddle = Transformer(
d_model, n_head, dim_feedforward=dim_feedforward)
mask = transformer_paddle.generate_square_subsequent_mask(length)
print(mask)
# [[ 0. -inf -inf -inf -inf]
# [ 0. 0. -inf -inf -inf]
# [ 0. 0. 0. -inf -inf]
# [ 0. 0. 0. 0. -inf]
# [ 0. 0. 0. 0. 0.]]
>>> import paddle
>>> from paddle.nn.layer.transformer import Transformer
>>> length = 5
>>> d_model, n_head, dim_feedforward = 8, 4, 64
>>> transformer_paddle = Transformer(
... d_model, n_head, dim_feedforward=dim_feedforward)
>>> mask = transformer_paddle.generate_square_subsequent_mask(length)
>>> print(mask)
Tensor(shape=[5, 5], dtype=float32, place=Place(cpu), stop_gradient=True,
[[ 0. , -inf., -inf., -inf., -inf.],
[ 0. , 0. , -inf., -inf., -inf.],
[ 0. , 0. , 0. , -inf., -inf.],
[ 0. , 0. , 0. , 0. , -inf.],
[ 0. , 0. , 0. , 0. , 0. ]])
"""
return paddle.tensor.triu(
......
......@@ -46,14 +46,14 @@ class PixelShuffle(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
x = paddle.randn(shape=[2,9,4,4])
pixel_shuffle = nn.PixelShuffle(3)
out = pixel_shuffle(x)
print(out.shape)
# [2, 1, 12, 12]
>>> x = paddle.randn(shape=[2, 9, 4, 4])
>>> pixel_shuffle = nn.PixelShuffle(3)
>>> out = pixel_shuffle(x)
>>> print(out.shape)
[2, 1, 12, 12]
"""
......@@ -109,14 +109,14 @@ class PixelUnshuffle(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
>>> import paddle
>>> import paddle.nn as nn
x = paddle.randn([2, 1, 12, 12])
pixel_unshuffle = nn.PixelUnshuffle(3)
out = pixel_unshuffle(x)
print(out.shape)
# [2, 9, 4, 4]
>>> x = paddle.randn([2, 1, 12, 12])
>>> pixel_unshuffle = nn.PixelUnshuffle(3)
>>> out = pixel_unshuffle(x)
>>> print(out.shape)
[2, 9, 4, 4]
"""
......@@ -175,24 +175,28 @@ class ChannelShuffle(Layer):
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
x = paddle.arange(0, 0.6, 0.1, 'float32')
x = paddle.reshape(x, [1, 6, 1, 1])
# [[[[0. ]],
# [[0.10000000]],
# [[0.20000000]],
# [[0.30000001]],
# [[0.40000001]],
# [[0.50000000]]]]
channel_shuffle = nn.ChannelShuffle(3)
y = channel_shuffle(x)
# [[[[0. ]],
# [[0.20000000]],
# [[0.40000001]],
# [[0.10000000]],
# [[0.30000001]],
# [[0.50000000]]]]
>>> import paddle
>>> import paddle.nn as nn
>>> x = paddle.arange(0, 0.6, 0.1, 'float32')
>>> x = paddle.reshape(x, [1, 6, 1, 1])
>>> print(x)
Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0. ]],
[[0.10000000]],
[[0.20000000]],
[[0.30000001]],
[[0.40000001]],
[[0.50000000]]]])
>>> channel_shuffle = nn.ChannelShuffle(3)
>>> y = channel_shuffle(x)
>>> print(y)
Tensor(shape=[1, 6, 1, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
[[[[0. ]],
[[0.20000000]],
[[0.40000001]],
[[0.10000000]],
[[0.30000001]],
[[0.50000000]]]])
"""
def __init__(self, groups, data_format="NCHW", name=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册