From b994c89deda98c098d435048585c11d98e9bbc0e Mon Sep 17 00:00:00 2001 From: xiaoguoguo626807 <100397923+xiaoguoguo626807@users.noreply.github.com> Date: Wed, 23 Nov 2022 12:44:55 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90fluid=20api=20clear=E3=80=91remove=20t?= =?UTF-8?q?ranspose=20(#47917)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove transpose * codestyle check * modify CI_STATIC * modify CI_STATIC * modify enable static() * remove unused import * fix conflict about stack * fix conflict about stack * fix conflict about stack * endless conflict --- .../paddle/fluid/contrib/layers/rnn_impl.py | 16 +-- python/paddle/fluid/layers/control_flow.py | 24 +++- python/paddle/fluid/layers/detection.py | 6 +- python/paddle/fluid/layers/nn.py | 103 ------------------ python/paddle/fluid/layers/rnn.py | 17 +-- python/paddle/fluid/nets.py | 4 +- .../auto_parallel/test_dist_op_cost.py | 25 ++--- .../fleet/parallel_dygraph_transformer.py | 10 +- .../fluid/tests/unittests/dist_transformer.py | 5 +- .../seq2seq_dygraph_model.py | 18 +-- .../dygraph_to_static/test_ptb_lm.py | 4 +- .../transformer_dygraph_model.py | 21 ++-- .../unittests/dygraph_to_static/yolov3.py | 5 +- .../unittests/ipu/test_transpose_op_ipu.py | 2 +- .../test_mkldnn_cpu_bfloat16_pass.py | 5 +- .../test_mkldnn_matmul_op_output_fuse_pass.py | 13 ++- ...n_reshape_transpose_matmul_v2_fuse_pass.py | 4 +- .../inference/test_trt_anchor_generator_op.py | 4 +- .../test_trt_shuffle_channel_detect_pass.py | 5 +- .../ir/inference/test_trt_subgraph_pass.py | 4 +- ..._trt_transpose_flatten_concat_fuse_pass.py | 6 +- .../unittests/mlu/test_transpose_op_mlu.py | 8 +- .../fluid/tests/unittests/test_cholesky_op.py | 2 +- .../test_eager_deletion_padding_rnn.py | 14 +-- .../tests/unittests/test_imperative_deepcf.py | 3 +- ..._imperative_lod_tensor_to_selected_rows.py | 2 +- .../test_imperative_ocr_attention_model.py | 2 +- .../unittests/test_imperative_ptb_rnn.py | 6 +- .../unittests/test_imperative_save_load.py | 6 +- .../unittests/test_imperative_save_load_v2.py | 7 +- ..._imperative_selected_rows_to_lod_tensor.py | 2 +- ..._imperative_transformer_sorted_gradient.py | 9 +- .../tests/unittests/test_rnn_cell_api.py | 7 +- .../tests/unittests/test_static_save_load.py | 6 +- .../tests/unittests/test_transpose_op.py | 10 +- .../tests/unittests/transformer_model.py | 4 +- 36 files changed, 157 insertions(+), 232 deletions(-) diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py index 78813126e2..df6a38852f 100644 --- a/python/paddle/fluid/contrib/layers/rnn_impl.py +++ b/python/paddle/fluid/contrib/layers/rnn_impl.py @@ -326,7 +326,7 @@ def basic_gru( ) if batch_first: - input = layers.transpose(input, [1, 0, 2]) + input = paddle.transpose(input, [1, 0, 2]) mask = None if sequence_length: @@ -334,7 +334,7 @@ def basic_gru( mask = layers.sequence_mask( sequence_length, maxlen=max_seq_len, dtype='float32' ) - mask = layers.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) direc_num = 1 if bidirectional: @@ -425,7 +425,7 @@ def basic_gru( ) if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden else: @@ -433,7 +433,7 @@ def basic_gru( last_hidden = fw_last_hidden if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden @@ -610,7 +610,7 @@ def basic_lstm( ) if batch_first: - input = layers.transpose(input, [1, 0, 2]) + input = paddle.transpose(input, [1, 0, 2]) mask = None if sequence_length: @@ -619,7 +619,7 @@ def basic_lstm( sequence_length, maxlen=max_seq_len, dtype='float32' ) - mask = layers.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) direc_num = 1 if bidirectional: @@ -740,7 +740,7 @@ def basic_lstm( ) if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, last_cell else: @@ -749,7 +749,7 @@ def basic_lstm( last_cell = fw_last_cell if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, [1, 0, 2]) return rnn_out, last_hidden, last_cell diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index ee53f23684..7ad02f364d 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -625,10 +625,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -637,7 +639,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -714,10 +716,12 @@ class StaticRNN: Examples 1: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -726,7 +730,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -742,9 +746,11 @@ class StaticRNN: Examples 2: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -753,7 +759,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) boot_memory = fluid.layers.data(name='boot', shape=[hidden_size], dtype='float32', lod_level=1) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -842,10 +848,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -854,7 +862,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -893,10 +901,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -905,7 +915,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): @@ -953,10 +963,12 @@ class StaticRNN: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers vocab_size, hidden_size=10000, 200 + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1, 1], dtype='int64') # create word sequence x_emb = layers.embedding( @@ -965,7 +977,7 @@ class StaticRNN: dtype='float32', is_sparse=False) # transform batch size to dim 1 - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn = fluid.layers.StaticRNN() with rnn.step(): diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index bfa063c105..d89c6b9c1f 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -774,7 +774,7 @@ def detection_output( code_type='decode_center_size', ) scores = nn.softmax(input=scores) - scores = nn.transpose(scores, perm=[0, 2, 1]) + scores = paddle.transpose(scores, perm=[0, 2, 1]) scores.stop_gradient = True nmsed_outs = helper.create_variable_for_type_inference( dtype=decoded_box.dtype @@ -2443,7 +2443,7 @@ def multi_box_head( stride=stride, ) - mbox_loc = nn.transpose(mbox_loc, perm=[0, 2, 3, 1]) + mbox_loc = paddle.transpose(mbox_loc, perm=[0, 2, 3, 1]) mbox_loc_flatten = nn.flatten(mbox_loc, axis=1) mbox_locs.append(mbox_loc_flatten) @@ -2456,7 +2456,7 @@ def multi_box_head( padding=pad, stride=stride, ) - conf_loc = nn.transpose(conf_loc, perm=[0, 2, 3, 1]) + conf_loc = paddle.transpose(conf_loc, perm=[0, 2, 3, 1]) conf_loc_flatten = nn.flatten(conf_loc, axis=1) mbox_confs.append(conf_loc_flatten) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index c9308f98c2..d782bf973a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -89,7 +89,6 @@ __all__ = [ 'l2_normalize', 'matmul', 'topk', - 'transpose', 'im2sequence', 'row_conv', 'multiplex', @@ -4875,108 +4874,6 @@ def ctc_greedy_decoder( return ctc_out, ctc_out_len -def transpose(x, perm, name=None): - """ - Permute the data dimensions of `input` according to `perm`. - - The `i`-th dimension of the returned tensor will correspond to the - perm[i]-th dimension of `input`. - - Args: - x (Tensor): The input Tensor. It is a N-D Tensor of data types bool, float32, float64, int32. - perm (list|tuple): Permute the input according to the data of perm. - name (str): The name of this layer. It is optional. - - Returns: - Tensor: A transposed n-D Tensor, with data type being bool, float32, float64, int32, int64. - - For Example: - - .. code-block:: text - - x = [[[ 1 2 3 4] [ 5 6 7 8] [ 9 10 11 12]] - [[13 14 15 16] [17 18 19 20] [21 22 23 24]]] - shape(x) = [2,3,4] - - # Example 1 - perm0 = [1,0,2] - y_perm0 = [[[ 1 2 3 4] [13 14 15 16]] - [[ 5 6 7 8] [17 18 19 20]] - [[ 9 10 11 12] [21 22 23 24]]] - shape(y_perm0) = [3,2,4] - - # Example 2 - perm1 = [2,1,0] - y_perm1 = [[[ 1 13] [ 5 17] [ 9 21]] - [[ 2 14] [ 6 18] [10 22]] - [[ 3 15] [ 7 19] [11 23]] - [[ 4 16] [ 8 20] [12 24]]] - shape(y_perm1) = [4,3,2] - - Examples: - - .. code-block:: python - - import paddle - - x = paddle.randn([2, 3, 4]) - x_transposed = paddle.transpose(x, perm=[1, 0, 2]) - print(x_transposed.shape) - # [3L, 2L, 4L] - - """ - if in_dygraph_mode(): - return _C_ops.transpose(x, perm) - else: - if _in_legacy_dygraph(): - out, _ = _legacy_C_ops.transpose2(x, 'axis', perm) - return out - - check_variable_and_dtype( - x, - 'x', - [ - 'bool', - 'float16', - 'float32', - 'float64', - 'int32', - 'int64', - 'complex64', - 'complex128', - ], - 'transpose', - ) - check_type(perm, 'perm', (list, tuple), 'transpose') - if isinstance(perm, tuple): - perm = list(perm) - if len(perm) != len(x.shape): - raise ValueError( - "Input(perm) is the permutation of dimensions of Input(x), " - "its length should be equal to dimensions of Input(x), " - "but received dimension of Input(x) is %s, " - "the length of Input(perm) is %s." % (len(x.shape), len(perm)) - ) - for idx, dim in enumerate(perm): - if dim >= len(x.shape): - raise ValueError( - "Each element in Input(perm) should be less than Input(x)'s dimension, " - "but %d-th element in Input(perm) is %d which exceeds Input(x)'s " - "dimension %d." % (idx, perm[idx], len(x.shape)) - ) - - helper = LayerHelper('transpose', **locals()) - out = helper.create_variable_for_type_inference(x.dtype) - x_shape = helper.create_variable_for_type_inference(x.dtype) - helper.append_op( - type='transpose2', - inputs={'X': [x]}, - outputs={'Out': [out], 'XShape': [x_shape]}, - attrs={'axis': perm}, - ) - return out - - def im2sequence( input, filter_size=1, diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 9b384203fa..f7ce8d1e6c 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -16,6 +16,7 @@ import sys from functools import partial, reduce import warnings + import paddle from paddle.utils import deprecated from . import nn @@ -563,7 +564,7 @@ def _maybe_copy(state, new_state, step_mask): def _transpose_batch_time(x): perm = [1, 0] + list(range(2, len(x.shape))) - return nn.transpose(x, perm) + return paddle.transpose(x, perm) def _rnn_dynamic_graph( @@ -591,7 +592,7 @@ def _rnn_dynamic_graph( mask = sequence_lod.sequence_mask( sequence_length, maxlen=time_steps, dtype=inputs.dtype ) - mask = nn.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) if is_reverse: inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs) @@ -678,7 +679,7 @@ def _rnn_static_graph( maxlen=max_seq_len, dtype=flatten(initial_states)[0].dtype, ) - mask = nn.transpose(mask, [1, 0]) + mask = paddle.transpose(mask, [1, 0]) if is_reverse: inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs) mask = tensor.reverse(mask, axis=[0]) if sequence_length else None @@ -1032,14 +1033,14 @@ class BeamSearchDecoder(Decoder): expand_times = [1] * len(x.shape) expand_times[1] = beam_size x = paddle.tile(x, expand_times) # [batch_size, beam_size, ...] - x = nn.transpose( + x = paddle.transpose( x, list(range(2, len(x.shape))) + [0, 1] ) # [..., batch_size, beam_size] # use 0 to copy to avoid wrong shape x = paddle.reshape( x, shape=[0] * (len(x.shape) - 2) + [-1] ) # [..., batch_size * beam_size] - x = nn.transpose( + x = paddle.transpose( x, [len(x.shape) - 1] + list(range(0, len(x.shape) - 1)) ) # [batch_size * beam_size, ...] return x @@ -1557,7 +1558,9 @@ def _dynamic_decode_imperative( if not output_time_major: final_outputs = map_structure( - lambda x: nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))), + lambda x: paddle.transpose( + x, [1, 0] + list(range(2, len(x.shape))) + ), final_outputs, ) @@ -1629,7 +1632,7 @@ def _dynamic_decode_declarative( return new_state def _transpose_batch_time(x): - return nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))) + return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape)))) def _create_array_out_of_while(dtype): current_block_idx = default_main_program().current_block_idx diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index eab247452f..c4bd6cf81f 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -577,7 +577,7 @@ def scaled_dot_product_attention( # permute the dimensions into: # [batch_size, num_heads, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3]) def __combine_heads(x): """ @@ -598,7 +598,7 @@ def scaled_dot_product_attention( if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) return paddle.reshape( x=trans_x, shape=list( diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py index 517debaa58..92ce41aa60 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel/test_dist_op_cost.py @@ -16,6 +16,7 @@ import unittest import copy import paddle + from paddle.distributed.fleet import auto from paddle.distributed.auto_parallel.cluster import Cluster from paddle.distributed.auto_parallel.operators.common import ( @@ -151,9 +152,7 @@ class TestDistOpCost(unittest.TestCase): auto.ProcessMesh([0, 1], dim_names=["x"]), ["x", None], ) - out = paddle.fluid.layers.transpose( - out, [1, 0] - ) # [8, 2] [-1, 0] + out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0] # matmul param1 = paddle.fluid.layers.create_parameter( @@ -188,9 +187,7 @@ class TestDistOpCost(unittest.TestCase): tmp_out, param2 ) # [8, 4] [-1, 0] - out8 = paddle.fluid.layers.transpose( - out2, [1, 0] - ) # [4, 8] [0, -1] + out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1] # reshape out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1] @@ -266,9 +263,7 @@ class TestDistOpCost(unittest.TestCase): auto.ProcessMesh([0, 1], dim_names=["x"]), ["x", None], ) - out = paddle.fluid.layers.transpose( - out, [1, 0] - ) # [8, 2] [-1, 0] + out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0] # matmul_v2 param1 = paddle.fluid.layers.create_parameter( @@ -300,9 +295,7 @@ class TestDistOpCost(unittest.TestCase): tmp_out = paddle.matmul(out1, tmp_param) out2 = paddle.matmul(tmp_out, param2) # [8, 4] [-1, 0] - out8 = paddle.fluid.layers.transpose( - out2, [1, 0] - ) # [4, 8] [0, -1] + out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1] # reshape out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1] @@ -377,9 +370,7 @@ class TestDistOpCost(unittest.TestCase): auto.ProcessMesh([0, 1], dim_names=["x"]), ["x", None], ) - out = paddle.fluid.layers.transpose( - out, [1, 0] - ) # [8, 2] [-1, 0] + out = paddle.transpose(out, [1, 0]) # [8, 2] [-1, 0] # mul param1 = paddle.fluid.layers.create_parameter( @@ -414,9 +405,7 @@ class TestDistOpCost(unittest.TestCase): tmp_out, param2 ) # [8, 4] [-1, 0] - out8 = paddle.fluid.layers.transpose( - out2, [1, 0] - ) # [4, 8] [0, -1] + out8 = paddle.transpose(out2, [1, 0]) # [4, 8] [0, -1] # reshape out9 = paddle.reshape(out8, [8, 2, 4]) # [4, 2, 4] [0, -1, -1] diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index 21d357fcdd..d3fd734d6a 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -328,15 +328,16 @@ class MultiHeadAttentionLayer(Layer): reshaped_q = paddle.reshape( x=q, shape=[0, 0, self._n_head, self._d_key] ) - transpose_q = fluid.layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) + + transpose_q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) reshaped_k = paddle.reshape( x=k, shape=[0, 0, self._n_head, self._d_key] ) - transpose_k = fluid.layers.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) + transpose_k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = paddle.reshape( x=v, shape=[0, 0, self._n_head, self._d_value] ) - transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) + transpose_v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) # scale dot product attention product = fluid.layers.matmul( @@ -362,7 +363,8 @@ class MultiHeadAttentionLayer(Layer): # combine heads if len(out.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) + + trans_x = paddle.transpose(out, perm=[0, 2, 1, 3]) final_out = paddle.reshape( x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index fbe292e1f3..24de04dc6f 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -23,6 +23,7 @@ import glob import random import tarfile +import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP @@ -1148,7 +1149,7 @@ def multi_head_attention( # permute the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3]) def __combine_heads(x): """ @@ -1160,7 +1161,7 @@ def multi_head_attention( if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) # The value 0 in shape attr means copying the corresponding dimension # size of the input as the output dimension size. return paddle.reshape( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index 539400ad92..4b52df98c2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -176,7 +176,7 @@ class BaseModel(fluid.dygraph.Layer): ) def _transpose_batch_time(self, x): - return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape)))) + return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape)))) def _merge_batch_beams(self, x): return paddle.reshape(x, shape=(-1, x.shape[2])) @@ -234,7 +234,7 @@ class BaseModel(fluid.dygraph.Layer): enc_len_mask = fluid.layers.sequence_mask( src_sequence_length, maxlen=max_seq_len, dtype="float32" ) - enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) + enc_len_mask = paddle.transpose(enc_len_mask, [1, 0]) # TODO: Because diff exits if call while_loop in static graph. # In while block, a Variable created in parent block participates in the calculation of gradient, @@ -336,7 +336,7 @@ class BaseModel(fluid.dygraph.Layer): enc_len_mask = fluid.layers.sequence_mask( src_sequence_length, maxlen=max_seq_len, dtype="float32" ) - enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) + enc_len_mask = paddle.transpose(enc_len_mask, [1, 0]) for k in range(args.max_seq_len): enc_step_input = src_emb[k] @@ -643,7 +643,7 @@ class AttentionModel(fluid.dygraph.Layer): ) def _transpose_batch_time(self, x): - return fluid.layers.transpose(x, [1, 0] + list(range(2, len(x.shape)))) + return paddle.transpose(x, [1, 0] + list(range(2, len(x.shape)))) def _merge_batch_beams(self, x): return paddle.reshape(x, shape=(-1, x.shape[2])) @@ -653,14 +653,14 @@ class AttentionModel(fluid.dygraph.Layer): expand_times = [1] * len(x.shape) expand_times[1] = self.beam_size x = fluid.layers.expand(x, expand_times) # [batch_size, beam_size, ...] - x = fluid.layers.transpose( + x = paddle.transpose( x, list(range(2, len(x.shape))) + [0, 1] ) # [..., batch_size, beam_size] # use 0 to copy to avoid wrong shape x = paddle.reshape( x, shape=[0] * (len(x.shape) - 2) + [-1] ) # [..., batch_size * beam_size] - x = fluid.layers.transpose( + x = paddle.transpose( x, [len(x.shape) - 1] + list(range(0, len(x.shape) - 1)) ) # [batch_size * beam_size, ...] return x @@ -691,9 +691,9 @@ class AttentionModel(fluid.dygraph.Layer): attn = fluid.layers.matmul(query, memory, transpose_y=True) if mask is not None: - attn = fluid.layers.transpose(attn, [1, 0, 2]) + attn = paddle.transpose(attn, [1, 0, 2]) attn = fluid.layers.elementwise_add(attn, mask * 1000000000, -1) - attn = fluid.layers.transpose(attn, [1, 0, 2]) + attn = paddle.transpose(attn, [1, 0, 2]) weight = fluid.layers.softmax(attn) weight_memory = fluid.layers.matmul(weight, memory) @@ -743,7 +743,7 @@ class AttentionModel(fluid.dygraph.Layer): src_sequence_length, maxlen=max_seq_len, dtype="float32" ) enc_padding_mask = enc_len_mask - 1.0 - enc_len_mask = fluid.layers.transpose(enc_len_mask, [1, 0]) + enc_len_mask = paddle.transpose(enc_len_mask, [1, 0]) enc_outputs = [] # TODO: Because diff exits if call while_loop in static graph. diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index e87c727f7d..10e8f2cf68 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -122,12 +122,12 @@ class SimpleLSTMRNN(fluid.Layer): last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index e26699bacf..796329ab55 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -138,12 +138,13 @@ class MultiHeadAttention(Layer): k = self.k_fc(keys) v = self.v_fc(values) # split head + q = paddle.reshape(x=q, shape=[0, 0, self.n_head, self.d_key]) - q = layers.transpose(x=q, perm=[0, 2, 1, 3]) + q = paddle.transpose(x=q, perm=[0, 2, 1, 3]) k = paddle.reshape(x=k, shape=[0, 0, self.n_head, self.d_key]) - k = layers.transpose(x=k, perm=[0, 2, 1, 3]) + k = paddle.transpose(x=k, perm=[0, 2, 1, 3]) v = paddle.reshape(x=v, shape=[0, 0, self.n_head, self.d_value]) - v = layers.transpose(x=v, perm=[0, 2, 1, 3]) + v = paddle.transpose(x=v, perm=[0, 2, 1, 3]) if cache is not None: cache_k, cache_v = cache["k"], cache["v"] @@ -160,8 +161,10 @@ class MultiHeadAttention(Layer): if self.dropout_rate: weights = layers.dropout(weights, dropout_prob=self.dropout_rate) out = layers.matmul(weights, v) - out = layers.transpose(out, perm=[0, 2, 1, 3]) + + out = paddle.transpose(out, perm=[0, 2, 1, 3]) out = paddle.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]]) + out = self.proj_fc(out) return out @@ -703,7 +706,7 @@ class Transformer(Layer): def merge_batch_beams(tensor): var_dim_in_state = 2 # count in beam dim - tensor = layers.transpose( + tensor = paddle.transpose( tensor, list(range(var_dim_in_state, len(tensor.shape))) + list(range(0, var_dim_in_state)), @@ -714,7 +717,7 @@ class Transformer(Layer): [0] * (len(tensor.shape) - var_dim_in_state) + [batch_size * beam_size], ) - res = layers.transpose( + res = paddle.transpose( tensor, list( range( @@ -728,7 +731,7 @@ class Transformer(Layer): def split_batch_beams(tensor): var_dim_in_state = 1 - tensor = layers.transpose( + tensor = paddle.transpose( tensor, list(range(var_dim_in_state, len(tensor.shape))) + list(range(0, var_dim_in_state)), @@ -738,7 +741,7 @@ class Transformer(Layer): [0] * (len(tensor.shape) - var_dim_in_state) + [batch_size, beam_size], ) - res = layers.transpose( + res = paddle.transpose( tensor, list( range( @@ -878,7 +881,7 @@ class Transformer(Layer): predict_ids = paddle.stack(predict_ids, axis=0) parent_ids = paddle.stack(parent_ids, axis=0) - finished_seq = layers.transpose( + finished_seq = paddle.transpose( layers.gather_tree(predict_ids, parent_ids), [1, 2, 0] ) finished_scores = topk_scores diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py index 28078aba78..58dae8bcfe 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py @@ -16,6 +16,7 @@ import os import sys import paddle + import paddle.fluid as fluid from paddle.fluid.dygraph import declarative from paddle.fluid.param_attr import ParamAttr @@ -345,9 +346,7 @@ class YOLOv3(fluid.dygraph.Layer): name="yolo_box" + str(i), ) self.boxes.append(boxes) - self.scores.append( - fluid.layers.transpose(scores, perm=[0, 2, 1]) - ) + self.scores.append(paddle.transpose(scores, perm=[0, 2, 1])) self.downsample //= 2 if not self.is_train: diff --git a/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py index 34e0457c2d..e683f82521 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_transpose_op_ipu.py @@ -46,7 +46,7 @@ class TestBase(IPUOpTest): x = paddle.static.data( name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32' ) - out = paddle.fluid.layers.transpose(x, **self.attrs) + out = paddle.transpose(x, **self.attrs) self.fetch_list = [out.name] def run_model(self, exec_mode): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py index 5792db6af9..0a74492ec6 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_cpu_bfloat16_pass.py @@ -15,6 +15,7 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest +import paddle import paddle.fluid as fluid from paddle.fluid.core import PassVersionChecker import paddle @@ -27,8 +28,10 @@ class TestMKLDNNCpuBfloat16Pass(InferencePassTest): x = fluid.data( name='x', shape=[-1] + self.shape_x, dtype=self.d_type ) - out = fluid.layers.transpose(x, perm=[0, 1, 2, 3]) + + out = paddle.transpose(x, perm=[0, 1, 2, 3]) out = paddle.reshape(out, [0, 0, 0, 0]) + out = fluid.layers.fc(out, size=1) self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py index a320dfbe4d..7954619666 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_op_output_fuse_pass.py @@ -14,6 +14,7 @@ import unittest import numpy as np + import paddle import paddle.fluid as fluid from inference_pass_test import InferencePassTest @@ -36,8 +37,9 @@ class TestMKLDNNMatmulFuseOp(InferencePassTest): name='y', shape=[-1] + self.shape_y, dtype=self.d_type ) out = fluid.layers.matmul(x, y) - out = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) + out = paddle.transpose(out, perm=[0, 2, 1, 3]) out = paddle.reshape(out, [0, 0, self.shape_y[0] * self.shape_y[2]]) + out = fluid.layers.relu(out) return out @@ -77,7 +79,7 @@ class TestMKLDNNMatmulOpNotFusedWrongTransposeAxis(TestMKLDNNMatmulFuseOp): name='y', shape=[-1] + self.shape_y, dtype=self.d_type ) out = fluid.layers.matmul(x, y) - out = fluid.layers.transpose(out, perm=[0, 1, 2, 3]) + out = paddle.transpose(out, perm=[0, 1, 2, 3]) out = paddle.reshape(out, [0, 0, 0, 0]) out = fluid.layers.fc(out, size=1) return out @@ -100,11 +102,10 @@ class TestMKLDNNMatmulOpNotFusedBreakPattern(TestMKLDNNMatmulFuseOp): name='y', shape=[-1] + self.shape_y, dtype=self.d_type ) out = fluid.layers.matmul(x, y) - out = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) - out = fluid.layers.transpose( - out, perm=[0, 1, 2, 3] - ) # breaks pattern + out = paddle.transpose(out, perm=[0, 2, 1, 3]) + out = paddle.transpose(out, perm=[0, 1, 2, 3]) # breaks pattern out = paddle.reshape(out, [0, 0, self.shape_y[0] * self.shape_y[2]]) + out = fluid.layers.relu(out) return out diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py index 188d111c45..b5a345b2cf 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_reshape_transpose_matmul_v2_fuse_pass.py @@ -33,8 +33,10 @@ class TestReshapeTransposeMatmulV2OneDNNFusePass(InferencePassTest): weight = fluid.layers.create_parameter( shape=self.weight_shape, dtype="float32" ) + reshape = paddle.reshape(data, shape=self.reshape_shape) - transpose = fluid.layers.transpose(reshape, self.tranpose_perm) + transpose = paddle.transpose(reshape, self.tranpose_perm) + matmul = paddle.matmul( transpose, weight, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py index cd05c8528b..2030282f0c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py @@ -15,6 +15,8 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import PassVersionChecker @@ -64,7 +66,7 @@ class TRTAnchorGeneratorBaseTest(InferencePassTest): stride=self.stride, ) if self.dynamic_shape_params is not None: - anchor = fluid.layers.transpose(anchor, [2, 3, 0, 1]) + anchor = paddle.transpose(anchor, [2, 3, 0, 1]) out = fluid.layers.batch_norm(anchor, is_test=True) self.fetch_list = [out, var] diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py index 754149f7b3..51f4af19c6 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py @@ -15,6 +15,8 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid from paddle.fluid.core import PassVersionChecker from paddle.fluid.core import AnalysisConfig @@ -28,8 +30,9 @@ class ShuffleChannelFuseTRTPassTest(InferencePassTest): name="data", shape=[-1, 6, 64, 64], dtype="float32" ) reshape1 = paddle.reshape(x=data, shape=[-1, 2, 3, 64, 64]) - trans = fluid.layers.transpose(x=reshape1, perm=[0, 2, 1, 3, 4]) + trans = paddle.transpose(x=reshape1, perm=[0, 2, 1, 3, 4]) reshape2 = paddle.reshape(x=trans, shape=[-1, 6, 64, 64]) + out = fluid.layers.batch_norm(reshape2, is_test=True) self.feeds = { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py index b91b068adb..86a995c45c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py @@ -17,6 +17,8 @@ import shutil import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import PassVersionChecker @@ -226,7 +228,7 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest): self.fetch_list = [out] def append_transpose(self, data): - return fluid.layers.transpose(data, [0, 3, 1, 2]) + return paddle.transpose(data, [0, 3, 1, 2]) def test_check_output(self): if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py index 8fc8b464dd..9fc54820d3 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py @@ -15,6 +15,8 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest + +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import AnalysisConfig @@ -30,8 +32,8 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest): data2 = fluid.data( name="data2", shape=[8, 32, 128], dtype="float32" ) - trans1 = fluid.layers.transpose(data1, perm=[0, 2, 1]) - trans2 = fluid.layers.transpose(data2, perm=[0, 2, 1]) + trans1 = paddle.transpose(data1, perm=[0, 2, 1]) + trans2 = paddle.transpose(data2, perm=[0, 2, 1]) flatt1 = fluid.layers.flatten(trans1) flatt2 = fluid.layers.flatten(trans2) concat_out = fluid.layers.concat([flatt1, flatt2], axis=1) diff --git a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py index 30e4519887..a802f9da21 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_transpose_op_mlu.py @@ -192,26 +192,26 @@ class TestTransposeOpError(unittest.TestCase): def test_x_Variable_check(): # the Input(x)'s type must be Variable - fluid.layers.transpose("not_variable", perm=[1, 0, 2]) + paddle.transpose("not_variable", perm=[1, 0, 2]) self.assertRaises(TypeError, test_x_Variable_check) def test_perm_list_check(): # Input(perm)'s type must be list - fluid.layers.transpose(x, perm="[1, 0, 2]") + paddle.transpose(x, perm="[1, 0, 2]") self.assertRaises(TypeError, test_perm_list_check) def test_perm_length_and_x_dim_check(): # Input(perm) is the permutation of dimensions of Input(input) # its length should be equal to dimensions of Input(input) - fluid.layers.transpose(x, perm=[1, 0, 2, 3, 4]) + paddle.transpose(x, perm=[1, 0, 2, 3, 4]) self.assertRaises(ValueError, test_perm_length_and_x_dim_check) def test_each_elem_value_check(): # Each element in Input(perm) should be less than Input(x)'s dimension - fluid.layers.transpose(x, perm=[3, 5, 7]) + paddle.transpose(x, perm=[3, 5, 7]) self.assertRaises(ValueError, test_each_elem_value_check) diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py index 1f5d608431..0688a782f7 100644 --- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py +++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py @@ -74,7 +74,7 @@ class TestCholeskyOp(OpTest): root = layers.create_parameter( dtype=root_data.dtype, shape=root_data.shape ) - root_t = layers.transpose(root, self.trans_dims) + root_t = paddle.transpose(root, self.trans_dims) x = layers.matmul(x=root, y=root_t) + 1e-05 out = paddle.cholesky(x, upper=self.attrs["upper"]) grad_check(root, out, x_init=root_data, place=place) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index 220a6d13b8..6539c6370f 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -154,7 +154,7 @@ def lm_model( hidden_array.append(pre_hidden) cell_array.append(pre_cell) - input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2]) + input_embedding = paddle.transpose(input_embedding, perm=[1, 0, 2]) rnn = PaddingRNN() with rnn.step(): @@ -230,7 +230,7 @@ def lm_model( c, axes=[0], starts=[num_steps - 1], ends=[num_steps] ) last_cell_array.append(last_c) - real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) last_cell = layers.concat(last_cell_array, 0) @@ -317,17 +317,17 @@ def lm_model( last_hidden = paddle.reshape( last_hidden, shape=[-1, num_layers, hidden_size] ) - last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, num_layers, hidden_size] ) - last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = paddle.reshape(real_res, shape=[len, -1, hidden_size]) - real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell @@ -404,7 +404,7 @@ def lm_model( init_cell=init_cell_reshape, ) elif rnn_model == "cudnn": - x_emb = layers.transpose(x_emb, perm=[1, 0, 2]) + x_emb = paddle.transpose(x_emb, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = layers.lstm( x_emb, init_hidden_reshape, @@ -417,7 +417,7 @@ def lm_model( low=-init_scale, high=init_scale ), ) - rnn_out = layers.transpose(rnn_out, perm=[1, 0, 2]) + rnn_out = paddle.transpose(rnn_out, perm=[1, 0, 2]) elif rnn_model == "basic_lstm": rnn_out, last_hidden, last_cell = basic_lstm( x_emb, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py index 2b0291b601..8f9c24223a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py @@ -119,9 +119,10 @@ class DeepCF(fluid.Layer): def forward(self, users, items): # users_emb = self._user_emb(users) # items_emb = self._item_emb(items) + users_emb = paddle.gather(self._rating_matrix, users) items_emb = paddle.gather( - fluid.layers.transpose(self._rating_matrix, [1, 0]), items + paddle.transpose(self._rating_matrix, [1, 0]), items ) users_emb.stop_gradient = True items_emb.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index 73f8973eba..ee4060b8b0 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -63,7 +63,7 @@ class SimpleNet(fluid.Layer): def forward(self, input, label): x_emb = self.embedding(input) projection = fluid.layers.matmul( - x_emb, fluid.layers.transpose(self.embedding.weight, perm=[1, 0]) + x_emb, paddle.transpose(self.embedding.weight, perm=[1, 0]) ) projection = fluid.layers.elementwise_add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 46b568dec4..0eb20a7dca 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -264,7 +264,7 @@ class EncoderNet(fluid.dygraph.Layer): # stride=[1, 1], # filter_size=[conv_features.shape[2], 1]) - transpose_conv_features = fluid.layers.transpose( + transpose_conv_features = paddle.transpose( conv_features, perm=[0, 3, 1, 2] ) sliced_feature = paddle.reshape( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index a3e603b5a9..183aafd824 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -131,17 +131,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 4968a2fe28..31ad8fc3fa 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -126,17 +126,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index a450d7e871..9bf869c25e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -14,6 +14,7 @@ import os import unittest +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.dygraph.nn import Embedding @@ -128,17 +129,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index f137de9dc2..fbf8f24398 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -73,7 +73,7 @@ class SimpleNet(fluid.Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = fluid.layers.elementwise_add(fc, self.softmax_bias) projection = fluid.layers.matmul( - fc, fluid.layers.transpose(self.embedding.weight, perm=[1, 0]) + fc, paddle.transpose(self.embedding.weight, perm=[1, 0]) ) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) loss = fluid.layers.softmax_with_cross_entropy( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 5c6f224a5e..700ae9a9c8 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -479,15 +479,16 @@ class MultiHeadAttentionLayer(Layer): reshaped_q = paddle.reshape( x=q, shape=[0, 0, self._n_head, self._d_key] ) - transpose_q = fluid.layers.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) + + transpose_q = paddle.transpose(x=reshaped_q, perm=[0, 2, 1, 3]) reshaped_k = paddle.reshape( x=k, shape=[0, 0, self._n_head, self._d_key] ) - transpose_k = fluid.layers.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) + transpose_k = paddle.transpose(x=reshaped_k, perm=[0, 2, 1, 3]) reshaped_v = paddle.reshape( x=v, shape=[0, 0, self._n_head, self._d_value] ) - transpose_v = fluid.layers.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) + transpose_v = paddle.transpose(x=reshaped_v, perm=[0, 2, 1, 3]) # scale dot product attention product = fluid.layers.matmul( @@ -513,7 +514,7 @@ class MultiHeadAttentionLayer(Layer): # combine heads if len(out.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = fluid.layers.transpose(out, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(out, perm=[0, 2, 1, 3]) final_out = paddle.reshape( x=trans_x, shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]], diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index 1d04b43105..638c3bbe00 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -15,6 +15,7 @@ import unittest import numpy +import paddle import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle.fluid.core as core @@ -335,7 +336,7 @@ class TestRnnError(unittest.TestCase): name="sequence_length", shape=[None], dtype='int64' ) - inputs_dynamic_rnn = layers.transpose( + inputs_dynamic_rnn = paddle.transpose( inputs_basic_lstm, perm=[1, 0, 2] ) cell = LSTMCell(hidden_size, name="LSTMCell_for_rnn") @@ -428,7 +429,7 @@ class TestRnn(unittest.TestCase): name="sequence_length", shape=[None], dtype='int64' ) - inputs_dynamic_rnn = layers.transpose(inputs_basic_lstm, perm=[1, 0, 2]) + inputs_dynamic_rnn = paddle.transpose(inputs_basic_lstm, perm=[1, 0, 2]) cell = LSTMCell(self.hidden_size, name="LSTMCell_for_rnn") output, final_state = dynamic_rnn( cell=cell, @@ -436,7 +437,7 @@ class TestRnn(unittest.TestCase): sequence_length=sequence_length, is_reverse=False, ) - output_new = layers.transpose(output, perm=[1, 0, 2]) + output_new = paddle.transpose(output, perm=[1, 0, 2]) rnn_out, last_hidden, last_cell = basic_lstm( inputs_basic_lstm, diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index 8c4c8aa60d..720575d445 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -138,17 +138,17 @@ class SimpleLSTMRNN(fluid.Layer): paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) ) real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = fluid.layers.concat(self.hidden_array, 1) last_hidden = paddle.reshape( last_hidden, shape=[-1, self._num_layers, self._hidden_size] ) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = fluid.layers.concat(self.cell_array, 1) last_cell = paddle.reshape( last_cell, shape=[-1, self._num_layers, self._hidden_size] ) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) return real_res, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py index 67c2b8772c..5a310f6bf8 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py @@ -291,7 +291,7 @@ class TestTransposeOpError(unittest.TestCase): def test_x_Variable_check(): # the Input(x)'s type must be Variable - fluid.layers.transpose("not_variable", perm=[1, 0, 2]) + paddle.transpose("not_variable", perm=[1, 0, 2]) self.assertRaises(TypeError, test_x_Variable_check) @@ -300,26 +300,26 @@ class TestTransposeOpError(unittest.TestCase): x1 = fluid.layers.data( name='x1', shape=[10, 5, 3], dtype='int8' ) - fluid.layers.transpose(x1, perm=[1, 0, 2]) + paddle.transpose(x1, perm=[1, 0, 2]) self.assertRaises(TypeError, test_x_dtype_check) def test_perm_list_check(): # Input(perm)'s type must be list - fluid.layers.transpose(x, perm="[1, 0, 2]") + paddle.transpose(x, perm="[1, 0, 2]") self.assertRaises(TypeError, test_perm_list_check) def test_perm_length_and_x_dim_check(): # Input(perm) is the permutation of dimensions of Input(input) # its length should be equal to dimensions of Input(input) - fluid.layers.transpose(x, perm=[1, 0, 2, 3, 4]) + paddle.transpose(x, perm=[1, 0, 2, 3, 4]) self.assertRaises(ValueError, test_perm_length_and_x_dim_check) def test_each_elem_value_check(): # Each element in Input(perm) should be less than Input(x)'s dimension - fluid.layers.transpose(x, perm=[3, 5, 7]) + paddle.transpose(x, perm=[3, 5, 7]) self.assertRaises(ValueError, test_each_elem_value_check) diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py index 842d9320da..cf564e771e 100644 --- a/python/paddle/fluid/tests/unittests/transformer_model.py +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -121,7 +121,7 @@ def multi_head_attention( # permute the dimensions into: # [batch_size, n_head, max_sequence_len, hidden_size_per_head] - return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + return paddle.transpose(x=reshaped, perm=[0, 2, 1, 3]) def __combine_heads(x): """ @@ -133,7 +133,7 @@ def multi_head_attention( if len(x.shape) != 4: raise ValueError("Input(x) should be a 4-D Tensor.") - trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + trans_x = paddle.transpose(x, perm=[0, 2, 1, 3]) # FIXME(guosheng): Decouple the program desc with batch_size. return paddle.reshape( x=trans_x, -- GitLab