diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py index 78813126e26ac82a0b9e7fff1fa958ba5d98e95e..df6a38852ff8cb8148ec4d36438e971556c017b5 100644 --- a/python/paddle/fluid/contrib/layers/rnn_impl.py +++ b/python/paddle/fluid/contrib/layers/rnn_impl.py @@ -326,7 +326,7 @@ def basic_gru( ) if batch_first: - input = layers.transpose(input, [1, 0, 2]) + input = paddle.transpose(input, [1, 0, 2]) mask = None if sequence_length: @@ -334,7 +334,7 @@ def basic_gru( mask = layers.sequence_mask( sequence_length, maxlen=max_seq_len, dtype='float32' ) - mask = layers.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) direc_num = 1 if bidirectional: @@ -425,7 +425,7 @@ def basic_gru( ) if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden else: @@ -433,7 +433,7 @@ def basic_gru( last_hidden = fw_last_hidden if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden @@ -610,7 +610,7 @@ def basic_lstm( ) if batch_first: - input = layers.transpose(input, [1, 0, 2]) + input = paddle.transpose(input, [1, 0, 2]) mask = None if sequence_length: @@ -619,7 +619,7 @@ def basic_lstm( sequence_length, maxlen=max_seq_len, dtype='float32' ) - mask = layers.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) direc_num = 1 if bidirectional: @@ -740,7 +740,7 @@ def basic_lstm( ) if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, last_cell else: @@ -749,7 +749,7 @@ def basic_lstm( last_cell = fw_last_cell if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, last_cell diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index ee53f23684ca96d1bbaf4f957838216c53a34633..7ad02f364dfd9bc40206c82326103561090e3baf 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -625,10 +625,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -637,7 +639,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -714,10 +716,12 @@ class StaticRNN: Examples 1: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -726,7 +730,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -742,9 +746,11 @@ class StaticRNN: Examples 2: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -753,7 +759,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) boot_memory = fluid.layers.data(name='boot', shape=[hidden_size], dtype='float32', lod_level=1) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -842,10 +848,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -854,7 +862,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -893,10 +901,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -905,7 +915,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -953,10 +963,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -965,7 +977,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index bfa063c105270d68601f2fd95db9b8b312174eb6..d89c6b9c1f046dc9d4c71574cba24ffbe4450580 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -774,7 +774,7 @@ def detection_output( code_type='decode_center_size', ) scores = nn.softmax(input=scores) - scores = nn.transpose(scores, perm=[0, 2, 1]) + scores = paddle.transpose(scores, perm=[0, 2, 1]) scores.stop_gradient = True nmsed_outs = helper.create_variable_for_type_inference( dtype=decoded_box.dtype @@ -2443,7 +2443,7 @@ def multi_box_head( stride=stride, ) - mbox_loc = nn.transpose(mbox_loc, perm=[0, 2, 3, 1]) + mbox_loc = paddle.transpose(mbox_loc, perm=[0, 2, 3, 1]) mbox_loc_flatten = nn.flatten(mbox_loc, axis=1) mbox_locs.append(mbox_loc_flatten) @@ -2456,7 +2456,7 @@ def multi_box_head( padding=pad, stride=stride, ) - conf_loc = nn.transpose(conf_loc, perm=[0, 2, 3, 1]) + conf_loc = paddle.transpose(conf_loc, perm=[0, 2, 3, 1]) conf_loc_flatten = nn.flatten(conf_loc, axis=1) mbox_confs.append(conf_loc_flatten) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index c9308f98c283fdd473937561501311e500d4cbdf..d782bf973a5f1c84bec3ce7458571cd19b9a4ece 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -89,7 +89,6 @@ __all__ = [ 'l2_normalize', 'matmul', 'topk', - 'transpose', 'im2sequence', 'row_conv', 'multiplex', @@ -4875,108 +4874,6 @@ def ctc_greedy_decoder( return ctc_out, ctc_out_len -def transpose(x, perm, name=None): - """ - Permute the data dimensions of `input` according to `perm`. - - The `i`-th dimension of the returned tensor will correspond to the - perm[i]-th dimension of `input`. - - Args: - x (Tensor): The input Tensor. It is a N-D Tensor of data types bool, float32, float64, int32. - perm (list|tuple): Permute the input according to the data of perm. - name (str): The name of this layer. It is optional. - - Returns: - Tensor: A transposed n-D Tensor, with data type being bool, float32, float64, int32, int64. - - For Example: - - .. code-block:: text - - x = [[[ 1 2 3 4] [ 5 6 7 8] [ 9 10 11 12]] - [[13 14 15 16] [17 18 19 20] [21 22 23 24]]] - shape(x) = [2,3,4] - - # Example 1 - perm0 = [1,0,2] - y_perm0 = [[[ 1 2 3 4] [13 14 15 16]] - [[ 5 6 7 8] [17 18 19 20]] - [[ 9 10 11 12] [21 22 23 24]]] - shape(y_perm0) = [3,2,4] - - # Example 2 - perm1 = [2,1,0] - y_perm1 = [[[ 1 13] [ 5 17] [ 9 21]] - [[ 2 14] [ 6 18] [10 22]] - [[ 3 15] [ 7 19] [11 23]] - [[ 4 16] [ 8 20] [12 24]]] - shape(y_perm1) = [4,3,2] - - Examples: - - .. code-block:: python - - import paddle - - x = paddle.randn([2, 3, 4]) - x_transposed = paddle.transpose(x, perm=[1, 0, 2]) - print(x_transposed.shape) - # [3L, 2L, 4L] - - """ - if in_dygraph_mode(): - return _C_ops.transpose(x, perm) - else: - if _in_legacy_dygraph(): - out, _ = _legacy_C_ops.transpose2(x, 'axis', perm) - return out - - check_variable_and_dtype( - x, - 'x', - [ - 'bool', - 'float16', - 'float32', - 'float64', - 'int32', - 'int64', - 'complex64', - 'complex128', - ], - 'transpose', - ) - check_type(perm, 'perm', (list, tuple), 'transpose') - if isinstance(perm, tuple): - perm = list(perm) - if len(perm) != len(x.shape): - raise ValueError( - "Input(perm) is the permutation of dimensions of Input(x), " - "its length should be equal to dimensions of Input(x), " - "but received dimension of Input(x) is %s, " - "the length of Input(perm) is %s." % (len(x.shape), len(perm)) - ) - for idx, dim in enumerate(perm): - if dim >= len(x.shape): - raise ValueError( - "Each element in Input(perm) should be less than Input(x)'s dimension, " - "but %d-th element in Input(perm) is %d which exceeds Input(x)'s " - "dimension %d." % (idx, perm[idx], len(x.shape)) - ) - - helper = LayerHelper('transpose', **locals()) - out = helper.create_variable_for_type_inference(x.dtype) - x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='transpose2', - inputs={'X': [x]}, - outputs={'Out': [out], 'XShape': [x_shape]}, - attrs={'axis': perm}, - ) - return out - - def im2sequence( input, filter_size=1, diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 9b384203fa885f6826054a98a55da9005697fde7..f7ce8d1e6ca18167fa1c3c88c66439b6223ff6c8 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -16,6 +16,7 @@ import sys from functools import partial, reduce import warnings + import paddle from paddle.utils import deprecated from . import nn @@ -563,7 +564,7 @@ def _maybe_copy(state, new_state, step_mask): def _transpose_batch_time(x): perm = [1, 0] + list(range(2, len(x.shape))) - return nn.transpose(x, perm) + return paddle.transpose(x, perm) def _rnn_dynamic_graph( @@ -591,7 +592,7 @@ def _rnn_dynamic_graph( mask = sequence_lod.sequence_mask( sequence_length, maxlen=time_steps, dtype=inputs.dtype ) - mask = nn.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) if is_reverse: inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs) @@ -678,7 +679,7 @@ def _rnn_static_graph( maxlen=max_seq_len, dtype=flatten(initial_states)[0].dtype, ) - mask = nn.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) if is_reverse: inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs) mask = tensor.reverse(mask, axis=[0]) if sequence_length else None @@ -1032,14 +1033,14 @@ class BeamSearchDecoder(Decoder): expand_times = [1] * len(x.shape) expand_times[1] = beam_size x = paddle.tile(x, expand_times) # [batch_size, beam_size, ...] - x = nn.transpose( + x = paddle.transpose( x, list(range(2, len(x.shape))) + [0, 1] ) # [..., batch_size, beam_size] # use 0 to copy to avoid wrong shape x = paddle.reshape( x, shape=[0] * (len(x.shape) - 2) + [-1] ) # [..., batch_size * beam_size] - x = nn.transpose( + x = paddle.transpose( x, [len(x.shape) - 1] + list(range(0, len(x.shape) - 1)) ) # [batch_size * beam_size, ...] return x @@ -1557,7 +1558,9 @@ def _dynamic_decode_imperative( if not output_time_major: final_outputs = map_structure( - lambda x: nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))), + lambda x: paddle.transpose( + x, [1, 0] + list(range(2, len(x.shape))) + ), final_outputs, ) @@ -1629,7 +1632,7 @@ def _dynamic_decode_declarative( return new_state def _transpose_batch_time(x): - return nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))) + return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape)))) def _create_array_out_of_while(dtype): current_block_idx = default_main_program().current_block_idx diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index eab247452fc92be5e0c47ed28b65a03f8f0a51b6..c4bd6cf81f5f425711b01590a101b6354d40bf58 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -577,7 +577,7 @@ def scaled_dot_product_attention( # permute the dimensions into: # [batch_size, num_heads, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3]) def __combine_heads(x): """ @@ -598,7 +598,7 @@ def scaled_dot_product_attention( if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) return paddle.reshape( x=trans_x, shape=list( diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py index 517debaa58842cc33cc5f99ace8fc28cbaef31e2..92ce41aa608cb1601fd533a4c1b06a0d079e2aa0 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py @@ -16,6 +16,7 @@ import unittest import copy import paddle + from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.operators.common import ( @@ -151,9 +152,7 @@ class TestDistOpCost(unittest.TestCase): auto.ProcessMesh([0, 1], dim_names=["x"]), ["x", None], ) - out = paddle.fluid.layers.transpose( - out, [1, 0] - ) # [8, 2] [-1, 0] + out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0] # matmul param1 = paddle.fluid.layers.create_parameter( @@ -188,9 +187,7 @@ class TestDistOpCost(unittest.TestCase): tmp_out, param2 ) # [8, 4] [-1, 0] - out8 = paddle.fluid.layers.transpose( - out2, [1, 0] - ) # [4, 8] [0, -1] + out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1] # reshape out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1] @@ -266,9 +263,7 @@ class TestDistOpCost(unittest.TestCase): auto.ProcessMesh([0, 1], dim_names=["x"]), ["x", None], ) - out = paddle.fluid.layers.transpose( - out, [1, 0] - ) # [8, 2] [-1, 0] + out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0] # matmul_v2 param1 = paddle.fluid.layers.create_parameter( @@ -300,9 +295,7 @@ class TestDistOpCost(unittest.TestCase): tmp_out = paddle.matmul(out1, tmp_param) out2 = paddle.matmul(tmp_out, param2) # [8, 4] [-1, 0] - out8 = paddle.fluid.layers.transpose( - out2, [1, 0] - ) # [4, 8] [0, -1] + out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1] # reshape out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1] @@ -377,9 +370,7 @@ class TestDistOpCost(unittest.TestCase): auto.ProcessMesh([0, 1], dim_names=["x"]), ["x", None], ) - out = paddle.fluid.layers.transpose( - out, [1, 0] - ) # [8, 2] [-1, 0] + out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0] # mul param1 = paddle.fluid.layers.create_parameter( @@ -414,9 +405,7 @@ class TestDistOpCost(unittest.TestCase): tmp_out, param2 ) # [8, 4] [-1, 0] - out8 = paddle.fluid.layers.transpose( - out2, [1, 0] - ) # [4, 8] [0, -1] + out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1] # reshape out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1] diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index 21d357fcdd18045ea2892c30dba0c2abff7895c0..d3fd734d6aa76d167392f113792235ec348bb196 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -328,15 +328,16 @@ class MultiHeadAttentionLayer(Layer): reshaped_q = paddle.reshape( x=q, shape=[0, 0, self._n_head, self._d_key] ) - transpose_q = fluid.layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) + + transpose_q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) reshaped_k = paddle.reshape( x=k, shape=[0, 0, self._n_head, self._d_key] ) - transpose_k = fluid.layers.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) + transpose_k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = paddle.reshape( x=v, shape=[0, 0, self._n_head, self._d_value] ) - transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) + transpose_v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) # scale dot product attention product = fluid.layers.matmul( @@ -362,7 +363,8 @@ class MultiHeadAttentionLayer(Layer): # combine heads if len(out.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) + + trans_x = paddle.transpose(out, perm=[0, 2, 1, 3]) final_out = paddle.reshape( x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index fbe292e1f368c191f447e74ebbabf591e5a6e3c8..24de04dc6fb56d85da8011ca6b465f6adeebde07 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -23,6 +23,7 @@ import glob import random import tarfile +import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP @@ -1148,7 +1149,7 @@ def multi_head_attention( # permute the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3]) def __combine_heads(x): """ @@ -1160,7 +1161,7 @@ def multi_head_attention( if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. return paddle.reshape( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index 539400ad927ad1d4de23f9bd3e9bd930ae36cd46..4b52df98c22d590cc8e4b8a701df87e23e10df4c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -176,7 +176,7 @@ class BaseModel(fluid.dygraph.Layer): ) def _transpose_batch_time(self, x): - return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape)))) + return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape)))) def _merge_batch_beams(self, x): return paddle.reshape(x, shape=(-1, x.shape[2])) @@ -234,7 +234,7 @@ class BaseModel(fluid.dygraph.Layer): enc_len_mask = fluid.layers.sequence_mask( src_sequence_length, maxlen=max_seq_len, dtype="float32" ) - enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) + enc_len_mask = paddle.transpose(enc_len_mask, [1, 0]) # TODO: Because diff exits if call while_loop in static graph. # In while block, a Variable created in parent block participates in the calculation of gradient, @@ -336,7 +336,7 @@ class BaseModel(fluid.dygraph.Layer): enc_len_mask = fluid.layers.sequence_mask( src_sequence_length, maxlen=max_seq_len, dtype="float32" ) - enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) + enc_len_mask = paddle.transpose(enc_len_mask, [1, 0]) for k in range(args.max_seq_len): enc_step_input = src_emb[k] @@ -643,7 +643,7 @@ class AttentionModel(fluid.dygraph.Layer): ) def _transpose_batch_time(self, x): - return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape)))) + return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape)))) def _merge_batch_beams(self, x): return paddle.reshape(x, shape=(-1, x.shape[2])) @@ -653,14 +653,14 @@ class AttentionModel(fluid.dygraph.Layer): expand_times = [1] * len(x.shape) expand_times[1] = self.beam_size x = fluid.layers.expand(x, expand_times) # [batch_size, beam_size, ...] - x = fluid.layers.transpose( + x = paddle.transpose( x, list(range(2, len(x.shape))) + [0, 1] ) # [..., batch_size, beam_size] # use 0 to copy to avoid wrong shape x = paddle.reshape( x, shape=[0] * (len(x.shape) - 2) + [-1] ) # [..., batch_size * beam_size] - x = fluid.layers.transpose( + x = paddle.transpose( x, [len(x.shape) - 1] + list(range(0, len(x.shape) - 1)) ) # [batch_size * beam_size, ...] return x @@ -691,9 +691,9 @@ class AttentionModel(fluid.dygraph.Layer): attn = fluid.layers.matmul(query, memory, transpose_y=True) if mask is not None: - attn = fluid.layers.transpose(attn, [1, 0, 2]) + attn = paddle.transpose(attn, [1, 0, 2]) attn = fluid.layers.elementwise_add(attn, mask * 1000000000, -1) - attn = fluid.layers.transpose(attn, [1, 0, 2]) + attn = paddle.transpose(attn, [1, 0, 2]) weight = fluid.layers.softmax(attn) weight_memory = fluid.layers.matmul(weight, memory) @@ -743,7 +743,7 @@ class AttentionModel(fluid.dygraph.Layer): src_sequence_length, maxlen=max_seq_len, dtype="float32" ) enc_padding_mask = enc_len_mask - 1.0 - enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) + enc_len_mask = paddle.transpose(enc_len_mask, [1, 0]) enc_outputs = [] # TODO: Because diff exits if call while_loop in static graph. diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index e87c727f7d7168a004da6e79e3ed33906a37dfbe..10e8f2cf68e854b86a458fd7a326cbfc073bc106 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -122,12 +122,12 @@ class SimpleLSTMRNN(fluid.Layer): last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index e26699bacfb52809bc607670b763ae0b1eb8de56..796329ab555d6db837200bb1777dc485807c78b9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -138,12 +138,13 @@ class MultiHeadAttention(Layer): k = self.k_fc(keys) v = self.v_fc(values) # split head + q = paddle.reshape(x=q, shape=[0, 0, self.n_head, self.d_key]) - q = layers.transpose(x=q, perm=[0, 2, 1, 3]) + q = paddle.transpose(x=q, perm=[0, 2, 1, 3]) k = paddle.reshape(x=k, shape=[0, 0, self.n_head, self.d_key]) - k = layers.transpose(x=k, perm=[0, 2, 1, 3]) + k = paddle.transpose(x=k, perm=[0, 2, 1, 3]) v = paddle.reshape(x=v, shape=[0, 0, self.n_head, self.d_value]) - v = layers.transpose(x=v, perm=[0, 2, 1, 3]) + v = paddle.transpose(x=v, perm=[0, 2, 1, 3]) if cache is not None: cache_k, cache_v = cache["k"], cache["v"] @@ -160,8 +161,10 @@ class MultiHeadAttention(Layer): if self.dropout_rate: weights = layers.dropout(weights, dropout_prob=self.dropout_rate) out = layers.matmul(weights, v) - out = layers.transpose(out, perm=[0, 2, 1, 3]) + + out = paddle.transpose(out, perm=[0, 2, 1, 3]) out = paddle.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]]) + out = self.proj_fc(out) return out @@ -703,7 +706,7 @@ class Transformer(Layer): def merge_batch_beams(tensor): var_dim_in_state = 2 # count in beam dim - tensor = layers.transpose( + tensor = paddle.transpose( tensor, list(range(var_dim_in_state, len(tensor.shape))) + list(range(0, var_dim_in_state)), @@ -714,7 +717,7 @@ class Transformer(Layer): [0] * (len(tensor.shape) - var_dim_in_state) + [batch_size * beam_size], ) - res = layers.transpose( + res = paddle.transpose( tensor, list( range( @@ -728,7 +731,7 @@ class Transformer(Layer): def split_batch_beams(tensor): var_dim_in_state = 1 - tensor = layers.transpose( + tensor = paddle.transpose( tensor, list(range(var_dim_in_state, len(tensor.shape))) + list(range(0, var_dim_in_state)), @@ -738,7 +741,7 @@ class Transformer(Layer): [0] * (len(tensor.shape) - var_dim_in_state) + [batch_size, beam_size], ) - res = layers.transpose( + res = paddle.transpose( tensor, list( range( @@ -878,7 +881,7 @@ class Transformer(Layer): predict_ids = paddle.stack(predict_ids, axis=0) parent_ids = paddle.stack(parent_ids, axis=0) - finished_seq = layers.transpose( + finished_seq = paddle.transpose( layers.gather_tree(predict_ids, parent_ids), [1, 2, 0] ) finished_scores = topk_scores diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py index 28078aba7893c427a9c2df0cc7b4b654a7bd8cce..58dae8bcfeb593030f9bf87371b623cab2b85ec2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py @@ -16,6 +16,7 @@ import os import sys import paddle + import paddle.fluid as fluid from paddle.fluid.dygraph import declarative from paddle.fluid.param_attr import ParamAttr @@ -345,9 +346,7 @@ class YOLOv3(fluid.dygraph.Layer): name="yolo_box" + str(i), ) self.boxes.append(boxes) - self.scores.append( - fluid.layers.transpose(scores, perm=[0, 2, 1]) - ) + self.scores.append(paddle.transpose(scores, perm=[0, 2, 1])) self.downsample //= 2 if not self.is_train: diff --git a/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py index 34e0457c2ded0384f0102cfdd2f02d9f780d6517..e683f82521ad2e0f0fb9aa7f0bbc48305616cd81 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py @@ -46,7 +46,7 @@ class TestBase(IPUOpTest): x = paddle.static.data( name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32' ) - out = paddle.fluid.layers.transpose(x, **self.attrs) + out = paddle.transpose(x, **self.attrs) self.fetch_list = [out.name] def run_model(self, exec_mode): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py index 5792db6af95c09299dca162fa7778d4b9bc8636c..0a74492ec6f7383f6b1dbc66a63435994fa0d360 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py @@ -15,6 +15,7 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest +import paddle import paddle.fluid as fluid from paddle.fluid.core import PassVersionChecker import paddle @@ -27,8 +28,10 @@ class TestMKLDNNCpuBfloat16Pass(InferencePassTest): x = fluid.data( name='x', shape=[-1] + self.shape_x, dtype=self.d_type ) - out = fluid.layers.transpose(x, perm=[0, 1, 2, 3]) + + out = paddle.transpose(x, perm=[0, 1, 2, 3]) out = paddle.reshape(out, [0, 0, 0, 0]) + out = fluid.layers.fc(out, size=1) self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py index a320dfbe4dcff5ab86f2a5b4197d65f150cf777c..79546196660f412c1d1b4cea0c4d37d20e7e1b7c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py @@ -14,6 +14,7 @@ import unittest import numpy as np + import paddle import paddle.fluid as fluid from inference_pass_test import InferencePassTest @@ -36,8 +37,9 @@ class TestMKLDNNMatmulFuseOp(InferencePassTest): name='y', shape=[-1] + self.shape_y, dtype=self.d_type ) out = fluid.layers.matmul(x, y) - out = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) + out = paddle.transpose(out, perm=[0, 2, 1, 3]) out = paddle.reshape(out, [0, 0, self.shape_y[0] * self.shape_y[2]]) + out = fluid.layers.relu(out) return out @@ -77,7 +79,7 @@ class TestMKLDNNMatmulOpNotFusedWrongTransposeAxis(TestMKLDNNMatmulFuseOp): name='y', shape=[-1] + self.shape_y, dtype=self.d_type ) out = fluid.layers.matmul(x, y) - out = fluid.layers.transpose(out, perm=[0, 1, 2, 3]) + out = paddle.transpose(out, perm=[0, 1, 2, 3]) out = paddle.reshape(out, [0, 0, 0, 0]) out = fluid.layers.fc(out, size=1) return out @@ -100,11 +102,10 @@ class TestMKLDNNMatmulOpNotFusedBreakPattern(TestMKLDNNMatmulFuseOp): name='y', shape=[-1] + self.shape_y, dtype=self.d_type ) out = fluid.layers.matmul(x, y) - out = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) - out = fluid.layers.transpose( - out, perm=[0, 1, 2, 3] - ) # breaks pattern + out = paddle.transpose(out, perm=[0, 2, 1, 3]) + out = paddle.transpose(out, perm=[0, 1, 2, 3]) # breaks pattern out = paddle.reshape(out, [0, 0, self.shape_y[0] * self.shape_y[2]]) + out = fluid.layers.relu(out) return out diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py index 188d111c4552880506e264c9a7dc2b82da95b8c4..b5a345b2cfc56b76b09270ae2e2086b1e4e7fac1 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py @@ -33,8 +33,10 @@ class TestReshapeTransposeMatmulV2OneDNNFusePass(InferencePassTest): weight = fluid.layers.create_parameter( shape=self.weight_shape, dtype="float32" ) + reshape = paddle.reshape(data, shape=self.reshape_shape) - transpose = fluid.layers.transpose(reshape, self.tranpose_perm) + transpose = paddle.transpose(reshape, self.tranpose_perm) + matmul = paddle.matmul( transpose, weight, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py index cd05c8528bc1b318373bb645807ba00cab85bca0..2030282f0c24c134906d53796993daae59e5eee6 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py @@ -15,6 +15,8 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import PassVersionChecker @@ -64,7 +66,7 @@ class TRTAnchorGeneratorBaseTest(InferencePassTest): stride=self.stride, ) if self.dynamic_shape_params is not None: - anchor = fluid.layers.transpose(anchor, [2, 3, 0, 1]) + anchor = paddle.transpose(anchor, [2, 3, 0, 1]) out = fluid.layers.batch_norm(anchor, is_test=True) self.fetch_list = [out, var] diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py index 754149f7b3489979d7052219ad0f29ab292c9b77..51f4af19c611cea0cd6fcbc9af8f52e53a23abb0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py @@ -15,6 +15,8 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid from paddle.fluid.core import PassVersionChecker from paddle.fluid.core import AnalysisConfig @@ -28,8 +30,9 @@ class ShuffleChannelFuseTRTPassTest(InferencePassTest): name="data", shape=[-1, 6, 64, 64], dtype="float32" ) reshape1 = paddle.reshape(x=data, shape=[-1, 2, 3, 64, 64]) - trans = fluid.layers.transpose(x=reshape1, perm=[0, 2, 1, 3, 4]) + trans = paddle.transpose(x=reshape1, perm=[0, 2, 1, 3, 4]) reshape2 = paddle.reshape(x=trans, shape=[-1, 6, 64, 64]) + out = fluid.layers.batch_norm(reshape2, is_test=True) self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py index b91b068adb8285a556003e9ad815b5983a21c068..86a995c45c01ccbe20bd598550aec78eab06a62e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py @@ -17,6 +17,8 @@ import shutil import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import PassVersionChecker @@ -226,7 +228,7 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest): self.fetch_list = [out] def append_transpose(self, data): - return fluid.layers.transpose(data, [0, 3, 1, 2]) + return paddle.transpose(data, [0, 3, 1, 2]) def test_check_output(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py index 8fc8b464dda1e03c0d553de785f43fd5a558a99e..9fc54820d3322c8490bcb1d0ca0e1c53e259d435 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py @@ -15,6 +15,8 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import AnalysisConfig @@ -30,8 +32,8 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest): data2 = fluid.data( name="data2", shape=[8, 32, 128], dtype="float32" ) - trans1 = fluid.layers.transpose(data1, perm=[0, 2, 1]) - trans2 = fluid.layers.transpose(data2, perm=[0, 2, 1]) + trans1 = paddle.transpose(data1, perm=[0, 2, 1]) + trans2 = paddle.transpose(data2, perm=[0, 2, 1]) flatt1 = fluid.layers.flatten(trans1) flatt2 = fluid.layers.flatten(trans2) concat_out = fluid.layers.concat([flatt1, flatt2], axis=1) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py index 30e45198879567fd0331ce7f6a15cef9996e0d78..a802f9da215b45e35636cf1a50f67d7c8ec508ea 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py @@ -192,26 +192,26 @@ class TestTransposeOpError(unittest.TestCase): def test_x_Variable_check(): # the Input(x)'s type must be Variable - fluid.layers.transpose("not_variable", perm=[1, 0, 2]) + paddle.transpose("not_variable", perm=[1, 0, 2]) self.assertRaises(TypeError, test_x_Variable_check) def test_perm_list_check(): # Input(perm)'s type must be list - fluid.layers.transpose(x, perm="[1, 0, 2]") + paddle.transpose(x, perm="[1, 0, 2]") self.assertRaises(TypeError, test_perm_list_check) def test_perm_length_and_x_dim_check(): # Input(perm) is the permutation of dimensions of Input(input) # its length should be equal to dimensions of Input(input) - fluid.layers.transpose(x, perm=[1, 0, 2, 3, 4]) + paddle.transpose(x, perm=[1, 0, 2, 3, 4]) self.assertRaises(ValueError, test_perm_length_and_x_dim_check) def test_each_elem_value_check(): # Each element in Input(perm) should be less than Input(x)'s dimension - fluid.layers.transpose(x, perm=[3, 5, 7]) + paddle.transpose(x, perm=[3, 5, 7]) self.assertRaises(ValueError, test_each_elem_value_check) diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py index 1f5d60843139aa0943431d10cf40401ec439941f..0688a782f7287dcf97b7a77de18a998f1652105c 100644 --- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py +++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py @@ -74,7 +74,7 @@ class TestCholeskyOp(OpTest): root = layers.create_parameter( dtype=root_data.dtype, shape=root_data.shape ) - root_t = layers.transpose(root, self.trans_dims) + root_t = paddle.transpose(root, self.trans_dims) x = layers.matmul(x=root, y=root_t) + 1e-05 out = paddle.cholesky(x, upper=self.attrs["upper"]) grad_check(root, out, x_init=root_data, place=place) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index 220a6d13b81fa5a63c484b01967c12d109a10ab3..6539c6370fa684090310536e57a217d8d188fa0e 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -154,7 +154,7 @@ def lm_model( hidden_array.append(pre_hidden) cell_array.append(pre_cell) - input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2]) + input_embedding = paddle.transpose(input_embedding, perm=[1, 0, 2]) rnn = PaddingRNN() with rnn.step(): @@ -230,7 +230,7 @@ def lm_model( c, axes=[0], starts=[num_steps - 1], ends=[num_steps] ) last_cell_array.append(last_c) - real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) last_cell = layers.concat(last_cell_array, 0) @@ -317,17 +317,17 @@ def lm_model( last_hidden = paddle.reshape( last_hidden, shape=[-1, num_layers, hidden_size] ) - last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, num_layers, hidden_size] ) - last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = paddle.reshape(real_res, shape=[len, -1, hidden_size]) - real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell @@ -404,7 +404,7 @@ def lm_model( init_cell=init_cell_reshape, ) elif rnn_model == "cudnn": - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = layers.lstm( x_emb, init_hidden_reshape, @@ -417,7 +417,7 @@ def lm_model( low=-init_scale, high=init_scale ), ) - rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, perm=[1, 0, 2]) elif rnn_model == "basic_lstm": rnn_out, last_hidden, last_cell = basic_lstm( x_emb, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py index 2b0291b601e7587e09df9bc71f16c251f5e60fab..8f9c24223aa17270992ff57beff60cc6fda9ef2c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py @@ -119,9 +119,10 @@ class DeepCF(fluid.Layer): def forward(self, users, items): # users_emb = self._user_emb(users) # items_emb = self._item_emb(items) + users_emb = paddle.gather(self._rating_matrix, users) items_emb = paddle.gather( - fluid.layers.transpose(self._rating_matrix, [1, 0]), items + paddle.transpose(self._rating_matrix, [1, 0]), items ) users_emb.stop_gradient = True items_emb.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index 73f8973ebaf1b3587b85ecdcc697898be696ff1e..ee4060b8b0570a4de00bd1fc60cb352f27b16186 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -63,7 +63,7 @@ class SimpleNet(fluid.Layer): def forward(self, input, label): x_emb = self.embedding(input) projection = fluid.layers.matmul( - x_emb, fluid.layers.transpose(self.embedding.weight, perm=[1, 0]) + x_emb, paddle.transpose(self.embedding.weight, perm=[1, 0]) ) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 46b568dec44bd3b73c2aff7ff2c5f532306a1872..0eb20a7dca158bcebb0a2563349980292fc01a3f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -264,7 +264,7 @@ class EncoderNet(fluid.dygraph.Layer): # stride=[1, 1], # filter_size=[conv_features.shape[2], 1]) - transpose_conv_features = fluid.layers.transpose( + transpose_conv_features = paddle.transpose( conv_features, perm=[0, 3, 1, 2] ) sliced_feature = paddle.reshape( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index a3e603b5a96182b7cad7ad0fc06e3612f066e2e3..183aafd8246137814c00498e47450b98968d37e9 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -131,17 +131,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 4968a2fe28adc237f02bc451bc2f9d70cb053af5..31ad8fc3faef2cfb2bc6576521c2a5e02b2e763f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -126,17 +126,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index a450d7e871f55dc0ba44e4ad0bbb90cae9337f47..9bf869c25e0b5b0f033e618270963e4329156859 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -14,6 +14,7 @@ import os import unittest +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.nn import Embedding @@ -128,17 +129,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index f137de9dc2cb23a364c1a4da2a0f4192f3ab38fa..fbf8f243987883ecfff04e2332a35b27a51218bc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -73,7 +73,7 @@ class SimpleNet(fluid.Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = fluid.layers.elementwise_add(fc, self.softmax_bias) projection = fluid.layers.matmul( - fc, fluid.layers.transpose(self.embedding.weight, perm=[1, 0]) + fc, paddle.transpose(self.embedding.weight, perm=[1, 0]) ) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) loss = fluid.layers.softmax_with_cross_entropy( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 5c6f224a5ee19d68c1f15241efd8d8d6d6cfc8ea..700ae9a9c878a56c1f1d4bfb8384e1e315d4a01e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -479,15 +479,16 @@ class MultiHeadAttentionLayer(Layer): reshaped_q = paddle.reshape( x=q, shape=[0, 0, self._n_head, self._d_key] ) - transpose_q = fluid.layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) + + transpose_q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) reshaped_k = paddle.reshape( x=k, shape=[0, 0, self._n_head, self._d_key] ) - transpose_k = fluid.layers.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) + transpose_k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = paddle.reshape( x=v, shape=[0, 0, self._n_head, self._d_value] ) - transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) + transpose_v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) # scale dot product attention product = fluid.layers.matmul( @@ -513,7 +514,7 @@ class MultiHeadAttentionLayer(Layer): # combine heads if len(out.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(out, perm=[0, 2, 1, 3]) final_out = paddle.reshape( x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index 1d04b4310539ff3e169d9cf060440c67d282317f..638c3bbe0025b9ba079ed1df83eaad5c7536398f 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -15,6 +15,7 @@ import unittest import numpy +import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle.fluid.core as core @@ -335,7 +336,7 @@ class TestRnnError(unittest.TestCase): name="sequence_length", shape=[None], dtype='int64' ) - inputs_dynamic_rnn = layers.transpose( + inputs_dynamic_rnn = paddle.transpose( inputs_basic_lstm, perm=[1, 0, 2] ) cell = LSTMCell(hidden_size, name="LSTMCell_for_rnn") @@ -428,7 +429,7 @@ class TestRnn(unittest.TestCase): name="sequence_length", shape=[None], dtype='int64' ) - inputs_dynamic_rnn = layers.transpose(inputs_basic_lstm, perm=[1, 0, 2]) + inputs_dynamic_rnn = paddle.transpose(inputs_basic_lstm, perm=[1, 0, 2]) cell = LSTMCell(self.hidden_size, name="LSTMCell_for_rnn") output, final_state = dynamic_rnn( cell=cell, @@ -436,7 +437,7 @@ class TestRnn(unittest.TestCase): sequence_length=sequence_length, is_reverse=False, ) - output_new = layers.transpose(output, perm=[1, 0, 2]) + output_new = paddle.transpose(output, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = basic_lstm( inputs_basic_lstm, diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index 8c4c8aa60de0a5f6a5df472043a54b868eb285cb..720575d4457d5bb6a6436f9f1853265cd29c98fd 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -138,17 +138,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py index 67c2b8772c5ddde71a2d1b4a13d03b717b4bc0bd..5a310f6bf89437a0039f1cde3ef5794da1703743 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py @@ -291,7 +291,7 @@ class TestTransposeOpError(unittest.TestCase): def test_x_Variable_check(): # the Input(x)'s type must be Variable - fluid.layers.transpose("not_variable", perm=[1, 0, 2]) + paddle.transpose("not_variable", perm=[1, 0, 2]) self.assertRaises(TypeError, test_x_Variable_check) @@ -300,26 +300,26 @@ class TestTransposeOpError(unittest.TestCase): x1 = fluid.layers.data( name='x1', shape=[10, 5, 3], dtype='int8' ) - fluid.layers.transpose(x1, perm=[1, 0, 2]) + paddle.transpose(x1, perm=[1, 0, 2]) self.assertRaises(TypeError, test_x_dtype_check) def test_perm_list_check(): # Input(perm)'s type must be list - fluid.layers.transpose(x, perm="[1, 0, 2]") + paddle.transpose(x, perm="[1, 0, 2]") self.assertRaises(TypeError, test_perm_list_check) def test_perm_length_and_x_dim_check(): # Input(perm) is the permutation of dimensions of Input(input) # its length should be equal to dimensions of Input(input) - fluid.layers.transpose(x, perm=[1, 0, 2, 3, 4]) + paddle.transpose(x, perm=[1, 0, 2, 3, 4]) self.assertRaises(ValueError, test_perm_length_and_x_dim_check) def test_each_elem_value_check(): # Each element in Input(perm) should be less than Input(x)'s dimension - fluid.layers.transpose(x, perm=[3, 5, 7]) + paddle.transpose(x, perm=[3, 5, 7]) self.assertRaises(ValueError, test_each_elem_value_check) diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py index 842d9320dafc732848f0e592f10aa383039fd059..cf564e771e26f0e34633c9758c8d6700a8572986 100644 --- a/python/paddle/fluid/tests/unittests/transformer_model.py +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -121,7 +121,7 @@ def multi_head_attention( # permute the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3]) def __combine_heads(x): """ @@ -133,7 +133,7 @@ def multi_head_attention( if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) # FIXME(guosheng): Decouple the program desc with batch_size. return paddle.reshape( x=trans_x,