未验证 提交 b994c89d 编写于 作者: X xiaoguoguo626807 提交者: GitHub

【fluid api clear】remove transpose (#47917)

* remove transpose

* codestyle check

* modify CI_STATIC

* modify CI_STATIC

* modify enable static()

* remove unused import

* fix conflict about stack

* fix conflict about stack

* fix conflict about stack

* endless conflict
上级 5f36e775
......@@ -326,7 +326,7 @@ def basic_gru(
)
if batch_first:
input = layers.transpose(input, [1, 0, 2])
input = paddle.transpose(input, [1, 0, 2])
mask = None
if sequence_length:
......@@ -334,7 +334,7 @@ def basic_gru(
mask = layers.sequence_mask(
sequence_length, maxlen=max_seq_len, dtype='float32'
)
mask = layers.transpose(mask, [1, 0])
mask = paddle.transpose(mask, [1, 0])
direc_num = 1
if bidirectional:
......@@ -425,7 +425,7 @@ def basic_gru(
)
if batch_first:
rnn_out = layers.transpose(rnn_out, [1, 0, 2])
rnn_out = paddle.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden
else:
......@@ -433,7 +433,7 @@ def basic_gru(
last_hidden = fw_last_hidden
if batch_first:
rnn_out = layers.transpose(rnn_out, [1, 0, 2])
rnn_out = paddle.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden
......@@ -610,7 +610,7 @@ def basic_lstm(
)
if batch_first:
input = layers.transpose(input, [1, 0, 2])
input = paddle.transpose(input, [1, 0, 2])
mask = None
if sequence_length:
......@@ -619,7 +619,7 @@ def basic_lstm(
sequence_length, maxlen=max_seq_len, dtype='float32'
)
mask = layers.transpose(mask, [1, 0])
mask = paddle.transpose(mask, [1, 0])
direc_num = 1
if bidirectional:
......@@ -740,7 +740,7 @@ def basic_lstm(
)
if batch_first:
rnn_out = layers.transpose(rnn_out, [1, 0, 2])
rnn_out = paddle.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden, last_cell
else:
......@@ -749,7 +749,7 @@ def basic_lstm(
last_cell = fw_last_cell
if batch_first:
rnn_out = layers.transpose(rnn_out, [1, 0, 2])
rnn_out = paddle.transpose(rnn_out, [1, 0, 2])
return rnn_out, last_hidden, last_cell
......
......@@ -625,10 +625,12 @@ class StaticRNN:
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
vocab_size, hidden_size=10000, 200
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64')
# create word sequence
x_emb = layers.embedding(
......@@ -637,7 +639,7 @@ class StaticRNN:
dtype='float32',
is_sparse=False)
# transform batch size to dim 1
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
rnn = fluid.layers.StaticRNN()
with rnn.step():
......@@ -714,10 +716,12 @@ class StaticRNN:
Examples 1:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
vocab_size, hidden_size=10000, 200
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64')
# create word sequence
x_emb = layers.embedding(
......@@ -726,7 +730,7 @@ class StaticRNN:
dtype='float32',
is_sparse=False)
# transform batch size to dim 1
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
rnn = fluid.layers.StaticRNN()
with rnn.step():
......@@ -742,9 +746,11 @@ class StaticRNN:
Examples 2:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
vocab_size, hidden_size=10000, 200
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64')
# create word sequence
x_emb = layers.embedding(
......@@ -753,7 +759,7 @@ class StaticRNN:
dtype='float32',
is_sparse=False)
# transform batch size to dim 1
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
boot_memory = fluid.layers.data(name='boot', shape=[hidden_size], dtype='float32', lod_level=1)
rnn = fluid.layers.StaticRNN()
with rnn.step():
......@@ -842,10 +848,12 @@ class StaticRNN:
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
vocab_size, hidden_size=10000, 200
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64')
# create word sequence
x_emb = layers.embedding(
......@@ -854,7 +862,7 @@ class StaticRNN:
dtype='float32',
is_sparse=False)
# transform batch size to dim 1
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
rnn = fluid.layers.StaticRNN()
with rnn.step():
......@@ -893,10 +901,12 @@ class StaticRNN:
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
vocab_size, hidden_size=10000, 200
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64')
# create word sequence
x_emb = layers.embedding(
......@@ -905,7 +915,7 @@ class StaticRNN:
dtype='float32',
is_sparse=False)
# transform batch size to dim 1
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
rnn = fluid.layers.StaticRNN()
with rnn.step():
......@@ -953,10 +963,12 @@ class StaticRNN:
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
vocab_size, hidden_size=10000, 200
paddle.enable_static()
x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64')
# create word sequence
x_emb = layers.embedding(
......@@ -965,7 +977,7 @@ class StaticRNN:
dtype='float32',
is_sparse=False)
# transform batch size to dim 1
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
rnn = fluid.layers.StaticRNN()
with rnn.step():
......
......@@ -774,7 +774,7 @@ def detection_output(
code_type='decode_center_size',
)
scores = nn.softmax(input=scores)
scores = nn.transpose(scores, perm=[0, 2, 1])
scores = paddle.transpose(scores, perm=[0, 2, 1])
scores.stop_gradient = True
nmsed_outs = helper.create_variable_for_type_inference(
dtype=decoded_box.dtype
......@@ -2443,7 +2443,7 @@ def multi_box_head(
stride=stride,
)
mbox_loc = nn.transpose(mbox_loc, perm=[0, 2, 3, 1])
mbox_loc = paddle.transpose(mbox_loc, perm=[0, 2, 3, 1])
mbox_loc_flatten = nn.flatten(mbox_loc, axis=1)
mbox_locs.append(mbox_loc_flatten)
......@@ -2456,7 +2456,7 @@ def multi_box_head(
padding=pad,
stride=stride,
)
conf_loc = nn.transpose(conf_loc, perm=[0, 2, 3, 1])
conf_loc = paddle.transpose(conf_loc, perm=[0, 2, 3, 1])
conf_loc_flatten = nn.flatten(conf_loc, axis=1)
mbox_confs.append(conf_loc_flatten)
......
......@@ -89,7 +89,6 @@ __all__ = [
'l2_normalize',
'matmul',
'topk',
'transpose',
'im2sequence',
'row_conv',
'multiplex',
......@@ -4875,108 +4874,6 @@ def ctc_greedy_decoder(
return ctc_out, ctc_out_len
def transpose(x, perm, name=None):
"""
Permute the data dimensions of `input` according to `perm`.
The `i`-th dimension of the returned tensor will correspond to the
perm[i]-th dimension of `input`.
Args:
x (Tensor): The input Tensor. It is a N-D Tensor of data types bool, float32, float64, int32.
perm (list|tuple): Permute the input according to the data of perm.
name (str): The name of this layer. It is optional.
Returns:
Tensor: A transposed n-D Tensor, with data type being bool, float32, float64, int32, int64.
For Example:
.. code-block:: text
x = [[[ 1 2 3 4] [ 5 6 7 8] [ 9 10 11 12]]
[[13 14 15 16] [17 18 19 20] [21 22 23 24]]]
shape(x) = [2,3,4]
# Example 1
perm0 = [1,0,2]
y_perm0 = [[[ 1 2 3 4] [13 14 15 16]]
[[ 5 6 7 8] [17 18 19 20]]
[[ 9 10 11 12] [21 22 23 24]]]
shape(y_perm0) = [3,2,4]
# Example 2
perm1 = [2,1,0]
y_perm1 = [[[ 1 13] [ 5 17] [ 9 21]]
[[ 2 14] [ 6 18] [10 22]]
[[ 3 15] [ 7 19] [11 23]]
[[ 4 16] [ 8 20] [12 24]]]
shape(y_perm1) = [4,3,2]
Examples:
.. code-block:: python
import paddle
x = paddle.randn([2, 3, 4])
x_transposed = paddle.transpose(x, perm=[1, 0, 2])
print(x_transposed.shape)
# [3L, 2L, 4L]
"""
if in_dygraph_mode():
return _C_ops.transpose(x, perm)
else:
if _in_legacy_dygraph():
out, _ = _legacy_C_ops.transpose2(x, 'axis', perm)
return out
check_variable_and_dtype(
x,
'x',
[
'bool',
'float16',
'float32',
'float64',
'int32',
'int64',
'complex64',
'complex128',
],
'transpose',
)
check_type(perm, 'perm', (list, tuple), 'transpose')
if isinstance(perm, tuple):
perm = list(perm)
if len(perm) != len(x.shape):
raise ValueError(
"Input(perm) is the permutation of dimensions of Input(x), "
"its length should be equal to dimensions of Input(x), "
"but received dimension of Input(x) is %s, "
"the length of Input(perm) is %s." % (len(x.shape), len(perm))
)
for idx, dim in enumerate(perm):
if dim >= len(x.shape):
raise ValueError(
"Each element in Input(perm) should be less than Input(x)'s dimension, "
"but %d-th element in Input(perm) is %d which exceeds Input(x)'s "
"dimension %d." % (idx, perm[idx], len(x.shape))
)
helper = LayerHelper('transpose', **locals())
out = helper.create_variable_for_type_inference(x.dtype)
x_shape = helper.create_variable_for_type_inference(x.dtype)
helper.append_op(
type='transpose2',
inputs={'X': [x]},
outputs={'Out': [out], 'XShape': [x_shape]},
attrs={'axis': perm},
)
return out
def im2sequence(
input,
filter_size=1,
......
......@@ -16,6 +16,7 @@ import sys
from functools import partial, reduce
import warnings
import paddle
from paddle.utils import deprecated
from . import nn
......@@ -563,7 +564,7 @@ def _maybe_copy(state, new_state, step_mask):
def _transpose_batch_time(x):
perm = [1, 0] + list(range(2, len(x.shape)))
return nn.transpose(x, perm)
return paddle.transpose(x, perm)
def _rnn_dynamic_graph(
......@@ -591,7 +592,7 @@ def _rnn_dynamic_graph(
mask = sequence_lod.sequence_mask(
sequence_length, maxlen=time_steps, dtype=inputs.dtype
)
mask = nn.transpose(mask, [1, 0])
mask = paddle.transpose(mask, [1, 0])
if is_reverse:
inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs)
......@@ -678,7 +679,7 @@ def _rnn_static_graph(
maxlen=max_seq_len,
dtype=flatten(initial_states)[0].dtype,
)
mask = nn.transpose(mask, [1, 0])
mask = paddle.transpose(mask, [1, 0])
if is_reverse:
inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs)
mask = tensor.reverse(mask, axis=[0]) if sequence_length else None
......@@ -1032,14 +1033,14 @@ class BeamSearchDecoder(Decoder):
expand_times = [1] * len(x.shape)
expand_times[1] = beam_size
x = paddle.tile(x, expand_times) # [batch_size, beam_size, ...]
x = nn.transpose(
x = paddle.transpose(
x, list(range(2, len(x.shape))) + [0, 1]
) # [..., batch_size, beam_size]
# use 0 to copy to avoid wrong shape
x = paddle.reshape(
x, shape=[0] * (len(x.shape) - 2) + [-1]
) # [..., batch_size * beam_size]
x = nn.transpose(
x = paddle.transpose(
x, [len(x.shape) - 1] + list(range(0, len(x.shape) - 1))
) # [batch_size * beam_size, ...]
return x
......@@ -1557,7 +1558,9 @@ def _dynamic_decode_imperative(
if not output_time_major:
final_outputs = map_structure(
lambda x: nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))),
lambda x: paddle.transpose(
x, [1, 0] + list(range(2, len(x.shape)))
),
final_outputs,
)
......@@ -1629,7 +1632,7 @@ def _dynamic_decode_declarative(
return new_state
def _transpose_batch_time(x):
return nn.transpose(x, [1, 0] + list(range(2, len(x.shape))))
return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape))))
def _create_array_out_of_while(dtype):
current_block_idx = default_main_program().current_block_idx
......
......@@ -577,7 +577,7 @@ def scaled_dot_product_attention(
# permute the dimensions into:
# [batch_size, num_heads, max_sequence_len, hidden_size_per_head]
return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])
return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3])
def __combine_heads(x):
"""
......@@ -598,7 +598,7 @@ def scaled_dot_product_attention(
if len(x.shape) != 4:
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
trans_x = paddle.transpose(x, perm=[0, 2, 1, 3])
return paddle.reshape(
x=trans_x,
shape=list(
......
......@@ -16,6 +16,7 @@ import unittest
import copy
import paddle
from paddle.distributed.fleet import auto
from paddle.distributed.auto_parallel.cluster import Cluster
from paddle.distributed.auto_parallel.operators.common import (
......@@ -151,9 +152,7 @@ class TestDistOpCost(unittest.TestCase):
auto.ProcessMesh([0, 1], dim_names=["x"]),
["x", None],
)
out = paddle.fluid.layers.transpose(
out, [1, 0]
) # [8, 2] [-1, 0]
out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0]
# matmul
param1 = paddle.fluid.layers.create_parameter(
......@@ -188,9 +187,7 @@ class TestDistOpCost(unittest.TestCase):
tmp_out, param2
) # [8, 4] [-1, 0]
out8 = paddle.fluid.layers.transpose(
out2, [1, 0]
) # [4, 8] [0, -1]
out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1]
# reshape
out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1]
......@@ -266,9 +263,7 @@ class TestDistOpCost(unittest.TestCase):
auto.ProcessMesh([0, 1], dim_names=["x"]),
["x", None],
)
out = paddle.fluid.layers.transpose(
out, [1, 0]
) # [8, 2] [-1, 0]
out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0]
# matmul_v2
param1 = paddle.fluid.layers.create_parameter(
......@@ -300,9 +295,7 @@ class TestDistOpCost(unittest.TestCase):
tmp_out = paddle.matmul(out1, tmp_param)
out2 = paddle.matmul(tmp_out, param2) # [8, 4] [-1, 0]
out8 = paddle.fluid.layers.transpose(
out2, [1, 0]
) # [4, 8] [0, -1]
out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1]
# reshape
out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1]
......@@ -377,9 +370,7 @@ class TestDistOpCost(unittest.TestCase):
auto.ProcessMesh([0, 1], dim_names=["x"]),
["x", None],
)
out = paddle.fluid.layers.transpose(
out, [1, 0]
) # [8, 2] [-1, 0]
out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0]
# mul
param1 = paddle.fluid.layers.create_parameter(
......@@ -414,9 +405,7 @@ class TestDistOpCost(unittest.TestCase):
tmp_out, param2
) # [8, 4] [-1, 0]
out8 = paddle.fluid.layers.transpose(
out2, [1, 0]
) # [4, 8] [0, -1]
out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1]
# reshape
out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1]
......
......@@ -328,15 +328,16 @@ class MultiHeadAttentionLayer(Layer):
reshaped_q = paddle.reshape(
x=q, shape=[0, 0, self._n_head, self._d_key]
)
transpose_q = fluid.layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
transpose_q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
reshaped_k = paddle.reshape(
x=k, shape=[0, 0, self._n_head, self._d_key]
)
transpose_k = fluid.layers.transpose(x=reshaped_k, perm=[0, 2, 1, 3])
transpose_k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3])
reshaped_v = paddle.reshape(
x=v, shape=[0, 0, self._n_head, self._d_value]
)
transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3])
transpose_v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3])
# scale dot product attention
product = fluid.layers.matmul(
......@@ -362,7 +363,8 @@ class MultiHeadAttentionLayer(Layer):
# combine heads
if len(out.shape) != 4:
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = fluid.layers.transpose(out, perm=[0, 2, 1, 3])
trans_x = paddle.transpose(out, perm=[0, 2, 1, 3])
final_out = paddle.reshape(
x=trans_x,
shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
......
......@@ -23,6 +23,7 @@ import glob
import random
import tarfile
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP
......@@ -1148,7 +1149,7 @@ def multi_head_attention(
# permute the dimensions into:
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])
return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3])
def __combine_heads(x):
"""
......@@ -1160,7 +1161,7 @@ def multi_head_attention(
if len(x.shape) != 4:
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
trans_x = paddle.transpose(x, perm=[0, 2, 1, 3])
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
return paddle.reshape(
......
......@@ -176,7 +176,7 @@ class BaseModel(fluid.dygraph.Layer):
)
def _transpose_batch_time(self, x):
return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape))))
return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape))))
def _merge_batch_beams(self, x):
return paddle.reshape(x, shape=(-1, x.shape[2]))
......@@ -234,7 +234,7 @@ class BaseModel(fluid.dygraph.Layer):
enc_len_mask = fluid.layers.sequence_mask(
src_sequence_length, maxlen=max_seq_len, dtype="float32"
)
enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0])
enc_len_mask = paddle.transpose(enc_len_mask, [1, 0])
# TODO: Because diff exits if call while_loop in static graph.
# In while block, a Variable created in parent block participates in the calculation of gradient,
......@@ -336,7 +336,7 @@ class BaseModel(fluid.dygraph.Layer):
enc_len_mask = fluid.layers.sequence_mask(
src_sequence_length, maxlen=max_seq_len, dtype="float32"
)
enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0])
enc_len_mask = paddle.transpose(enc_len_mask, [1, 0])
for k in range(args.max_seq_len):
enc_step_input = src_emb[k]
......@@ -643,7 +643,7 @@ class AttentionModel(fluid.dygraph.Layer):
)
def _transpose_batch_time(self, x):
return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape))))
return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape))))
def _merge_batch_beams(self, x):
return paddle.reshape(x, shape=(-1, x.shape[2]))
......@@ -653,14 +653,14 @@ class AttentionModel(fluid.dygraph.Layer):
expand_times = [1] * len(x.shape)
expand_times[1] = self.beam_size
x = fluid.layers.expand(x, expand_times) # [batch_size, beam_size, ...]
x = fluid.layers.transpose(
x = paddle.transpose(
x, list(range(2, len(x.shape))) + [0, 1]
) # [..., batch_size, beam_size]
# use 0 to copy to avoid wrong shape
x = paddle.reshape(
x, shape=[0] * (len(x.shape) - 2) + [-1]
) # [..., batch_size * beam_size]
x = fluid.layers.transpose(
x = paddle.transpose(
x, [len(x.shape) - 1] + list(range(0, len(x.shape) - 1))
) # [batch_size * beam_size, ...]
return x
......@@ -691,9 +691,9 @@ class AttentionModel(fluid.dygraph.Layer):
attn = fluid.layers.matmul(query, memory, transpose_y=True)
if mask is not None:
attn = fluid.layers.transpose(attn, [1, 0, 2])
attn = paddle.transpose(attn, [1, 0, 2])
attn = fluid.layers.elementwise_add(attn, mask * 1000000000, -1)
attn = fluid.layers.transpose(attn, [1, 0, 2])
attn = paddle.transpose(attn, [1, 0, 2])
weight = fluid.layers.softmax(attn)
weight_memory = fluid.layers.matmul(weight, memory)
......@@ -743,7 +743,7 @@ class AttentionModel(fluid.dygraph.Layer):
src_sequence_length, maxlen=max_seq_len, dtype="float32"
)
enc_padding_mask = enc_len_mask - 1.0
enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0])
enc_len_mask = paddle.transpose(enc_len_mask, [1, 0])
enc_outputs = []
# TODO: Because diff exits if call while_loop in static graph.
......
......@@ -122,12 +122,12 @@ class SimpleLSTMRNN(fluid.Layer):
last_hidden = paddle.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size]
)
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.concat(cell_array, 1)
last_cell = paddle.reshape(
last_cell, shape=[-1, self._num_layers, self._hidden_size]
)
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
......
......@@ -138,12 +138,13 @@ class MultiHeadAttention(Layer):
k = self.k_fc(keys)
v = self.v_fc(values)
# split head
q = paddle.reshape(x=q, shape=[0, 0, self.n_head, self.d_key])
q = layers.transpose(x=q, perm=[0, 2, 1, 3])
q = paddle.transpose(x=q, perm=[0, 2, 1, 3])
k = paddle.reshape(x=k, shape=[0, 0, self.n_head, self.d_key])
k = layers.transpose(x=k, perm=[0, 2, 1, 3])
k = paddle.transpose(x=k, perm=[0, 2, 1, 3])
v = paddle.reshape(x=v, shape=[0, 0, self.n_head, self.d_value])
v = layers.transpose(x=v, perm=[0, 2, 1, 3])
v = paddle.transpose(x=v, perm=[0, 2, 1, 3])
if cache is not None:
cache_k, cache_v = cache["k"], cache["v"]
......@@ -160,8 +161,10 @@ class MultiHeadAttention(Layer):
if self.dropout_rate:
weights = layers.dropout(weights, dropout_prob=self.dropout_rate)
out = layers.matmul(weights, v)
out = layers.transpose(out, perm=[0, 2, 1, 3])
out = paddle.transpose(out, perm=[0, 2, 1, 3])
out = paddle.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]])
out = self.proj_fc(out)
return out
......@@ -703,7 +706,7 @@ class Transformer(Layer):
def merge_batch_beams(tensor):
var_dim_in_state = 2 # count in beam dim
tensor = layers.transpose(
tensor = paddle.transpose(
tensor,
list(range(var_dim_in_state, len(tensor.shape)))
+ list(range(0, var_dim_in_state)),
......@@ -714,7 +717,7 @@ class Transformer(Layer):
[0] * (len(tensor.shape) - var_dim_in_state)
+ [batch_size * beam_size],
)
res = layers.transpose(
res = paddle.transpose(
tensor,
list(
range(
......@@ -728,7 +731,7 @@ class Transformer(Layer):
def split_batch_beams(tensor):
var_dim_in_state = 1
tensor = layers.transpose(
tensor = paddle.transpose(
tensor,
list(range(var_dim_in_state, len(tensor.shape)))
+ list(range(0, var_dim_in_state)),
......@@ -738,7 +741,7 @@ class Transformer(Layer):
[0] * (len(tensor.shape) - var_dim_in_state)
+ [batch_size, beam_size],
)
res = layers.transpose(
res = paddle.transpose(
tensor,
list(
range(
......@@ -878,7 +881,7 @@ class Transformer(Layer):
predict_ids = paddle.stack(predict_ids, axis=0)
parent_ids = paddle.stack(parent_ids, axis=0)
finished_seq = layers.transpose(
finished_seq = paddle.transpose(
layers.gather_tree(predict_ids, parent_ids), [1, 2, 0]
)
finished_scores = topk_scores
......
......@@ -16,6 +16,7 @@ import os
import sys
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import declarative
from paddle.fluid.param_attr import ParamAttr
......@@ -345,9 +346,7 @@ class YOLOv3(fluid.dygraph.Layer):
name="yolo_box" + str(i),
)
self.boxes.append(boxes)
self.scores.append(
fluid.layers.transpose(scores, perm=[0, 2, 1])
)
self.scores.append(paddle.transpose(scores, perm=[0, 2, 1]))
self.downsample //= 2
if not self.is_train:
......
......@@ -46,7 +46,7 @@ class TestBase(IPUOpTest):
x = paddle.static.data(
name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
)
out = paddle.fluid.layers.transpose(x, **self.attrs)
out = paddle.transpose(x, **self.attrs)
self.fetch_list = [out.name]
def run_model(self, exec_mode):
......
......@@ -15,6 +15,7 @@
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
from paddle.fluid.core import PassVersionChecker
import paddle
......@@ -27,8 +28,10 @@ class TestMKLDNNCpuBfloat16Pass(InferencePassTest):
x = fluid.data(
name='x', shape=[-1] + self.shape_x, dtype=self.d_type
)
out = fluid.layers.transpose(x, perm=[0, 1, 2, 3])
out = paddle.transpose(x, perm=[0, 1, 2, 3])
out = paddle.reshape(out, [0, 0, 0, 0])
out = fluid.layers.fc(out, size=1)
self.feeds = {
......
......@@ -14,6 +14,7 @@
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from inference_pass_test import InferencePassTest
......@@ -36,8 +37,9 @@ class TestMKLDNNMatmulFuseOp(InferencePassTest):
name='y', shape=[-1] + self.shape_y, dtype=self.d_type
)
out = fluid.layers.matmul(x, y)
out = fluid.layers.transpose(out, perm=[0, 2, 1, 3])
out = paddle.transpose(out, perm=[0, 2, 1, 3])
out = paddle.reshape(out, [0, 0, self.shape_y[0] * self.shape_y[2]])
out = fluid.layers.relu(out)
return out
......@@ -77,7 +79,7 @@ class TestMKLDNNMatmulOpNotFusedWrongTransposeAxis(TestMKLDNNMatmulFuseOp):
name='y', shape=[-1] + self.shape_y, dtype=self.d_type
)
out = fluid.layers.matmul(x, y)
out = fluid.layers.transpose(out, perm=[0, 1, 2, 3])
out = paddle.transpose(out, perm=[0, 1, 2, 3])
out = paddle.reshape(out, [0, 0, 0, 0])
out = fluid.layers.fc(out, size=1)
return out
......@@ -100,11 +102,10 @@ class TestMKLDNNMatmulOpNotFusedBreakPattern(TestMKLDNNMatmulFuseOp):
name='y', shape=[-1] + self.shape_y, dtype=self.d_type
)
out = fluid.layers.matmul(x, y)
out = fluid.layers.transpose(out, perm=[0, 2, 1, 3])
out = fluid.layers.transpose(
out, perm=[0, 1, 2, 3]
) # breaks pattern
out = paddle.transpose(out, perm=[0, 2, 1, 3])
out = paddle.transpose(out, perm=[0, 1, 2, 3]) # breaks pattern
out = paddle.reshape(out, [0, 0, self.shape_y[0] * self.shape_y[2]])
out = fluid.layers.relu(out)
return out
......
......@@ -33,8 +33,10 @@ class TestReshapeTransposeMatmulV2OneDNNFusePass(InferencePassTest):
weight = fluid.layers.create_parameter(
shape=self.weight_shape, dtype="float32"
)
reshape = paddle.reshape(data, shape=self.reshape_shape)
transpose = fluid.layers.transpose(reshape, self.tranpose_perm)
transpose = paddle.transpose(reshape, self.tranpose_perm)
matmul = paddle.matmul(
transpose,
weight,
......
......@@ -15,6 +15,8 @@
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
......@@ -64,7 +66,7 @@ class TRTAnchorGeneratorBaseTest(InferencePassTest):
stride=self.stride,
)
if self.dynamic_shape_params is not None:
anchor = fluid.layers.transpose(anchor, [2, 3, 0, 1])
anchor = paddle.transpose(anchor, [2, 3, 0, 1])
out = fluid.layers.batch_norm(anchor, is_test=True)
self.fetch_list = [out, var]
......
......@@ -15,6 +15,8 @@
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
......@@ -28,8 +30,9 @@ class ShuffleChannelFuseTRTPassTest(InferencePassTest):
name="data", shape=[-1, 6, 64, 64], dtype="float32"
)
reshape1 = paddle.reshape(x=data, shape=[-1, 2, 3, 64, 64])
trans = fluid.layers.transpose(x=reshape1, perm=[0, 2, 1, 3, 4])
trans = paddle.transpose(x=reshape1, perm=[0, 2, 1, 3, 4])
reshape2 = paddle.reshape(x=trans, shape=[-1, 6, 64, 64])
out = fluid.layers.batch_norm(reshape2, is_test=True)
self.feeds = {
......
......@@ -17,6 +17,8 @@ import shutil
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
......@@ -226,7 +228,7 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest):
self.fetch_list = [out]
def append_transpose(self, data):
return fluid.layers.transpose(data, [0, 3, 1, 2])
return paddle.transpose(data, [0, 3, 1, 2])
def test_check_output(self):
if core.is_compiled_with_cuda():
......
......@@ -15,6 +15,8 @@
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
......@@ -30,8 +32,8 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest):
data2 = fluid.data(
name="data2", shape=[8, 32, 128], dtype="float32"
)
trans1 = fluid.layers.transpose(data1, perm=[0, 2, 1])
trans2 = fluid.layers.transpose(data2, perm=[0, 2, 1])
trans1 = paddle.transpose(data1, perm=[0, 2, 1])
trans2 = paddle.transpose(data2, perm=[0, 2, 1])
flatt1 = fluid.layers.flatten(trans1)
flatt2 = fluid.layers.flatten(trans2)
concat_out = fluid.layers.concat([flatt1, flatt2], axis=1)
......
......@@ -192,26 +192,26 @@ class TestTransposeOpError(unittest.TestCase):
def test_x_Variable_check():
# the Input(x)'s type must be Variable
fluid.layers.transpose("not_variable", perm=[1, 0, 2])
paddle.transpose("not_variable", perm=[1, 0, 2])
self.assertRaises(TypeError, test_x_Variable_check)
def test_perm_list_check():
# Input(perm)'s type must be list
fluid.layers.transpose(x, perm="[1, 0, 2]")
paddle.transpose(x, perm="[1, 0, 2]")
self.assertRaises(TypeError, test_perm_list_check)
def test_perm_length_and_x_dim_check():
# Input(perm) is the permutation of dimensions of Input(input)
# its length should be equal to dimensions of Input(input)
fluid.layers.transpose(x, perm=[1, 0, 2, 3, 4])
paddle.transpose(x, perm=[1, 0, 2, 3, 4])
self.assertRaises(ValueError, test_perm_length_and_x_dim_check)
def test_each_elem_value_check():
# Each element in Input(perm) should be less than Input(x)'s dimension
fluid.layers.transpose(x, perm=[3, 5, 7])
paddle.transpose(x, perm=[3, 5, 7])
self.assertRaises(ValueError, test_each_elem_value_check)
......
......@@ -74,7 +74,7 @@ class TestCholeskyOp(OpTest):
root = layers.create_parameter(
dtype=root_data.dtype, shape=root_data.shape
)
root_t = layers.transpose(root, self.trans_dims)
root_t = paddle.transpose(root, self.trans_dims)
x = layers.matmul(x=root, y=root_t) + 1e-05
out = paddle.cholesky(x, upper=self.attrs["upper"])
grad_check(root, out, x_init=root_data, place=place)
......
......@@ -154,7 +154,7 @@ def lm_model(
hidden_array.append(pre_hidden)
cell_array.append(pre_cell)
input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2])
input_embedding = paddle.transpose(input_embedding, perm=[1, 0, 2])
rnn = PaddingRNN()
with rnn.step():
......@@ -230,7 +230,7 @@ def lm_model(
c, axes=[0], starts=[num_steps - 1], ends=[num_steps]
)
last_cell_array.append(last_c)
real_res = layers.transpose(x=real_res, perm=[1, 0, 2])
real_res = paddle.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = layers.concat(last_hidden_array, 0)
last_cell = layers.concat(last_cell_array, 0)
......@@ -317,17 +317,17 @@ def lm_model(
last_hidden = paddle.reshape(
last_hidden, shape=[-1, num_layers, hidden_size]
)
last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = layers.concat(cell_array, 1)
last_cell = paddle.reshape(
last_cell, shape=[-1, num_layers, hidden_size]
)
last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2])
last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
real_res = layers.concat(res, 0)
real_res = paddle.reshape(real_res, shape=[len, -1, hidden_size])
real_res = layers.transpose(x=real_res, perm=[1, 0, 2])
real_res = paddle.transpose(x=real_res, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
......@@ -404,7 +404,7 @@ def lm_model(
init_cell=init_cell_reshape,
)
elif rnn_model == "cudnn":
x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
x_emb = paddle.transpose(x_emb, perm=[1, 0, 2])
rnn_out, last_hidden, last_cell = layers.lstm(
x_emb,
init_hidden_reshape,
......@@ -417,7 +417,7 @@ def lm_model(
low=-init_scale, high=init_scale
),
)
rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2])
rnn_out = paddle.transpose(rnn_out, perm=[1, 0, 2])
elif rnn_model == "basic_lstm":
rnn_out, last_hidden, last_cell = basic_lstm(
x_emb,
......
......@@ -119,9 +119,10 @@ class DeepCF(fluid.Layer):
def forward(self, users, items):
# users_emb = self._user_emb(users)
# items_emb = self._item_emb(items)
users_emb = paddle.gather(self._rating_matrix, users)
items_emb = paddle.gather(
fluid.layers.transpose(self._rating_matrix, [1, 0]), items
paddle.transpose(self._rating_matrix, [1, 0]), items
)
users_emb.stop_gradient = True
items_emb.stop_gradient = True
......
......@@ -63,7 +63,7 @@ class SimpleNet(fluid.Layer):
def forward(self, input, label):
x_emb = self.embedding(input)
projection = fluid.layers.matmul(
x_emb, fluid.layers.transpose(self.embedding.weight, perm=[1, 0])
x_emb, paddle.transpose(self.embedding.weight, perm=[1, 0])
)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
......
......@@ -264,7 +264,7 @@ class EncoderNet(fluid.dygraph.Layer):
# stride=[1, 1],
# filter_size=[conv_features.shape[2], 1])
transpose_conv_features = fluid.layers.transpose(
transpose_conv_features = paddle.transpose(
conv_features, perm=[0, 3, 1, 2]
)
sliced_feature = paddle.reshape(
......
......@@ -131,17 +131,17 @@ class SimpleLSTMRNN(fluid.Layer):
paddle.reshape(self._input, shape=[1, -1, self._hidden_size])
)
real_res = fluid.layers.concat(res, 0)
real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2])
real_res = paddle.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = fluid.layers.concat(self.hidden_array, 1)
last_hidden = paddle.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size]
)
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.concat(self.cell_array, 1)
last_cell = paddle.reshape(
last_cell, shape=[-1, self._num_layers, self._hidden_size]
)
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
......
......@@ -126,17 +126,17 @@ class SimpleLSTMRNN(fluid.Layer):
paddle.reshape(self._input, shape=[1, -1, self._hidden_size])
)
real_res = fluid.layers.concat(res, 0)
real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2])
real_res = paddle.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = fluid.layers.concat(self.hidden_array, 1)
last_hidden = paddle.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size]
)
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.concat(self.cell_array, 1)
last_cell = paddle.reshape(
last_cell, shape=[-1, self._num_layers, self._hidden_size]
)
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
......
......@@ -14,6 +14,7 @@
import os
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding
......@@ -128,17 +129,17 @@ class SimpleLSTMRNN(fluid.Layer):
paddle.reshape(self._input, shape=[1, -1, self._hidden_size])
)
real_res = fluid.layers.concat(res, 0)
real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2])
real_res = paddle.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = fluid.layers.concat(self.hidden_array, 1)
last_hidden = paddle.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size]
)
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.concat(self.cell_array, 1)
last_cell = paddle.reshape(
last_cell, shape=[-1, self._num_layers, self._hidden_size]
)
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
......
......@@ -73,7 +73,7 @@ class SimpleNet(fluid.Layer):
fc = fluid.layers.matmul(x_emb, self.softmax_weight)
fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
projection = fluid.layers.matmul(
fc, fluid.layers.transpose(self.embedding.weight, perm=[1, 0])
fc, paddle.transpose(self.embedding.weight, perm=[1, 0])
)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
......
......@@ -479,15 +479,16 @@ class MultiHeadAttentionLayer(Layer):
reshaped_q = paddle.reshape(
x=q, shape=[0, 0, self._n_head, self._d_key]
)
transpose_q = fluid.layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
transpose_q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3])
reshaped_k = paddle.reshape(
x=k, shape=[0, 0, self._n_head, self._d_key]
)
transpose_k = fluid.layers.transpose(x=reshaped_k, perm=[0, 2, 1, 3])
transpose_k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3])
reshaped_v = paddle.reshape(
x=v, shape=[0, 0, self._n_head, self._d_value]
)
transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3])
transpose_v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3])
# scale dot product attention
product = fluid.layers.matmul(
......@@ -513,7 +514,7 @@ class MultiHeadAttentionLayer(Layer):
# combine heads
if len(out.shape) != 4:
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = fluid.layers.transpose(out, perm=[0, 2, 1, 3])
trans_x = paddle.transpose(out, perm=[0, 2, 1, 3])
final_out = paddle.reshape(
x=trans_x,
shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
......
......@@ -15,6 +15,7 @@
import unittest
import numpy
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.core as core
......@@ -335,7 +336,7 @@ class TestRnnError(unittest.TestCase):
name="sequence_length", shape=[None], dtype='int64'
)
inputs_dynamic_rnn = layers.transpose(
inputs_dynamic_rnn = paddle.transpose(
inputs_basic_lstm, perm=[1, 0, 2]
)
cell = LSTMCell(hidden_size, name="LSTMCell_for_rnn")
......@@ -428,7 +429,7 @@ class TestRnn(unittest.TestCase):
name="sequence_length", shape=[None], dtype='int64'
)
inputs_dynamic_rnn = layers.transpose(inputs_basic_lstm, perm=[1, 0, 2])
inputs_dynamic_rnn = paddle.transpose(inputs_basic_lstm, perm=[1, 0, 2])
cell = LSTMCell(self.hidden_size, name="LSTMCell_for_rnn")
output, final_state = dynamic_rnn(
cell=cell,
......@@ -436,7 +437,7 @@ class TestRnn(unittest.TestCase):
sequence_length=sequence_length,
is_reverse=False,
)
output_new = layers.transpose(output, perm=[1, 0, 2])
output_new = paddle.transpose(output, perm=[1, 0, 2])
rnn_out, last_hidden, last_cell = basic_lstm(
inputs_basic_lstm,
......
......@@ -138,17 +138,17 @@ class SimpleLSTMRNN(fluid.Layer):
paddle.reshape(self._input, shape=[1, -1, self._hidden_size])
)
real_res = fluid.layers.concat(res, 0)
real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2])
real_res = paddle.transpose(x=real_res, perm=[1, 0, 2])
last_hidden = fluid.layers.concat(self.hidden_array, 1)
last_hidden = paddle.reshape(
last_hidden, shape=[-1, self._num_layers, self._hidden_size]
)
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.concat(self.cell_array, 1)
last_cell = paddle.reshape(
last_cell, shape=[-1, self._num_layers, self._hidden_size]
)
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
......
......@@ -291,7 +291,7 @@ class TestTransposeOpError(unittest.TestCase):
def test_x_Variable_check():
# the Input(x)'s type must be Variable
fluid.layers.transpose("not_variable", perm=[1, 0, 2])
paddle.transpose("not_variable", perm=[1, 0, 2])
self.assertRaises(TypeError, test_x_Variable_check)
......@@ -300,26 +300,26 @@ class TestTransposeOpError(unittest.TestCase):
x1 = fluid.layers.data(
name='x1', shape=[10, 5, 3], dtype='int8'
)
fluid.layers.transpose(x1, perm=[1, 0, 2])
paddle.transpose(x1, perm=[1, 0, 2])
self.assertRaises(TypeError, test_x_dtype_check)
def test_perm_list_check():
# Input(perm)'s type must be list
fluid.layers.transpose(x, perm="[1, 0, 2]")
paddle.transpose(x, perm="[1, 0, 2]")
self.assertRaises(TypeError, test_perm_list_check)
def test_perm_length_and_x_dim_check():
# Input(perm) is the permutation of dimensions of Input(input)
# its length should be equal to dimensions of Input(input)
fluid.layers.transpose(x, perm=[1, 0, 2, 3, 4])
paddle.transpose(x, perm=[1, 0, 2, 3, 4])
self.assertRaises(ValueError, test_perm_length_and_x_dim_check)
def test_each_elem_value_check():
# Each element in Input(perm) should be less than Input(x)'s dimension
fluid.layers.transpose(x, perm=[3, 5, 7])
paddle.transpose(x, perm=[3, 5, 7])
self.assertRaises(ValueError, test_each_elem_value_check)
......
......@@ -121,7 +121,7 @@ def multi_head_attention(
# permute the dimensions into:
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])
return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3])
def __combine_heads(x):
"""
......@@ -133,7 +133,7 @@ def multi_head_attention(
if len(x.shape) != 4:
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
trans_x = paddle.transpose(x, perm=[0, 2, 1, 3])
# FIXME(guosheng): Decouple the program desc with batch_size.
return paddle.reshape(
x=trans_x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册