From a3ae080aa1eda28277795f049ba37f2df6b8d68a Mon Sep 17 00:00:00 2001 From: Charles-hit <56987902+Charles-hit@users.noreply.github.com> Date: Fri, 2 Dec 2022 21:50:11 +0800 Subject: [PATCH] remove softmax api from fluid (#48388) * move softmax to paddle2.0 * fix some bugs * resolve conflict * remove some code * modify code style * fix bugs * fix code * fix move code * fix some bugs * fix code * fix some code * modify the header file * fix bugs * fix some examples * fix mish example * fix code --- python/paddle/fluid/layers/detection.py | 2 +- python/paddle/fluid/layers/nn.py | 164 ++---------------- python/paddle/fluid/layers/rnn.py | 4 +- .../fleet/parallel_dygraph_se_resnext.py | 2 +- .../fleet/parallel_dygraph_transformer.py | 4 +- .../fluid/tests/unittests/dist_transformer.py | 6 +- .../seq2seq_dygraph_model.py | 6 +- .../unittests/dygraph_to_static/test_dict.py | 4 +- .../dygraph_to_static/test_ifelse.py | 2 +- .../dygraph_to_static/test_mobile_net.py | 2 +- .../test_reinforcement_learning.py | 2 +- .../dygraph_to_static/test_se_resnet.py | 2 +- .../transformer_dygraph_model.py | 4 +- .../unittests/ipu/test_dy2static_fp16_ipu.py | 2 +- .../tests/unittests/ipu/test_dy2static_ipu.py | 2 +- .../unittests/ipu/test_modelruntime_ipu.py | 2 +- .../tests/unittests/ipu/test_print_op_ipu.py | 2 +- .../unittests/ipu/test_softmax_op_ipu.py | 2 +- .../test_mkldnn_inplace_fuse_pass.py | 2 +- .../ir/inference/test_trt_activation_pass.py | 2 +- .../ir/inference/test_trt_fc_fuse_pass.py | 19 +- .../ir/inference/test_trt_gather_op.py | 4 +- .../unittests/ir/test_ir_fc_fuse_pass.py | 3 +- .../unittests/npu/test_softmax_op_npu.py | 2 +- .../test_imperative_ocr_attention_model.py | 2 +- .../test_imperative_reinforcement.py | 2 +- .../unittests/test_imperative_se_resnext.py | 4 +- ..._imperative_transformer_sorted_gradient.py | 4 +- .../fluid/tests/unittests/test_layers.py | 6 +- .../fluid/tests/unittests/test_mean_op.py | 2 +- .../tests/unittests/test_recurrent_op.py | 2 +- .../tests/unittests/test_rnn_decode_api.py | 4 +- .../tests/unittests/xpu/test_mean_op_xpu.py | 2 +- 33 files changed, 69 insertions(+), 205 deletions(-) diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index f021ab8f3d..d490b0457d 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -626,7 +626,7 @@ def detection_output( target_box=loc, code_type='decode_center_size', ) - scores = nn.softmax(input=scores) + scores = paddle.nn.functional.softmax(scores) scores = paddle.transpose(scores, perm=[0, 2, 1]) scores.stop_gradient = True nmsed_outs = helper.create_variable_for_type_inference( diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 717c965727..4dab44ebe5 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -68,7 +68,6 @@ __all__ = [ 'linear_chain_crf', 'crf_decoding', 'conv2d', - 'softmax', 'pool2d', 'batch_norm', 'dropout', @@ -145,7 +144,7 @@ def _get_reduce_dim(dim, input): else: raise TypeError( "The type of dim must be int, list, tuple or range, but received {}".format( - type(axis) + type(dim) ) ) if dim is None: @@ -679,7 +678,7 @@ def _pull_gpups_sparse( size(int|list of int): The embedding size parameter of each input, which indicates the size of each embedding vector respectively. dtype(str): The dtype refers to the data type of output tensor. Only supports - float32 now. + float32 now. Returns: Variable|list of Variable: The tensor variable storing the embeddings of the \ @@ -742,7 +741,7 @@ def _pull_box_sparse( size(int): The embedding size parameter, which indicates the size of each embedding vector respectively. dtype(str): The dtype refers to the data type of output tensor. Only supports - float32 now. + float32 now. Returns: Variable|list of Variable: The tensor variable storing the embeddings of the \ @@ -1123,147 +1122,6 @@ def dropout( return out -@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax") -def softmax(input, use_cudnn=True, name=None, axis=-1): - r""" - This operator implements the softmax layer. The calculation process is as follows: - - 1. The dimension :attr:`axis` of the ``input`` will be permuted to the last. - - 2. Then the input tensor will be logically flattened to a 2-D matrix. The matrix's - second dimension(row length) is the same as the dimension :attr:`axis` of the input - tensor, and the first dimension(column length) is the product of all other - dimensions of the input tensor. For each row of the matrix, the softmax operator - squashes the K-dimensional(K is the width of the matrix, which is also the size - of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a - K-dimensional vector of real values in the range [0, 1] that add up to 1. - - 3. After the softmax operation is completed, the inverse operations of steps 1 and 2 - are performed to restore the two-dimensional matrix to the same dimension as the ``input``. - - It computes the exponential of the given dimension and the sum of exponential - values of all the other dimensions in the K-dimensional vector input. - Then the ratio of the exponential of the given dimension and the sum of - exponential values of all the other dimensions is the output of the softmax - operator. - - For each row :math:`i` and each column :math:`j` in the matrix, we have: - - .. math:: - - Out[i, j] = \\frac{\\exp(X[i, j])}{\\sum_j(exp(X[i, j])} - - Example: - - .. code-block:: text - - Case 1: - Input: - X.shape = [2, 3, 4] - X.data = [[[2.0, 3.0, 4.0, 5.0], - [3.0, 4.0, 5.0, 6.0], - [7.0, 8.0, 8.0, 9.0]], - [[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [6.0, 7.0, 8.0, 9.0]]] - - Attrs: - axis = -1 - - Output: - Out.shape = [2, 3, 4] - Out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], - [0.0320586 , 0.08714432, 0.23688282, 0.64391426], - [0.07232949, 0.19661193, 0.19661193, 0.53444665]], - [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], - [0.0320586 , 0.08714432, 0.23688282, 0.64391426], - [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] - - Case 2: - Input: - X.shape = [2, 3, 4] - X.data = [[[2.0, 3.0, 4.0, 5.0], - [3.0, 4.0, 5.0, 6.0], - [7.0, 8.0, 8.0, 9.0]], - [[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [6.0, 7.0, 8.0, 9.0]]] - Attrs: - axis = 1 - - Output: - Out.shape = [2, 3, 4] - Out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783], - [0.01786798, 0.01786798, 0.04661262, 0.04661262], - [0.97555875, 0.97555875, 0.93623955, 0.93623955]], - [[0.00490169, 0.00490169, 0.00490169, 0.00490169], - [0.26762315, 0.26762315, 0.26762315, 0.26762315], - [0.72747516, 0.72747516, 0.72747516, 0.72747516]]] - - Args: - input (Tensor): The input tensor. A multi-dimension ``Tensor`` with type float32 or float64. - use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn \ - library is installed. To improve performance, set use_cudnn to True by default. - name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . Default: None. - will be named automatically. Default: None. - axis (int, optional): The index of dimension to perform softmax calculations, it should - be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of - input tensor. Default: -1. -1 means the last dimension. - - Returns: - Tensor: ``Tensor`` indicates the output of softmax. The data type and shape are the same as ``input`` . - - Examples: - - .. code-block:: python - - import paddle - import paddle.nn.functional as F - - x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0], - [3.0, 4.0, 5.0, 6.0], - [7.0, 8.0, 8.0, 9.0]], - [[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [6.0, 7.0, 8.0, 9.0]]], dtype='float32') - y = F.softmax(x, axis=1) - print(y) - # [[[0.00657326, 0.00657326, 0.01714783, 0.01714783], - # [0.01786798, 0.01786798, 0.04661262, 0.04661262], - # [0.97555870, 0.97555870, 0.93623954, 0.93623954]], - # [[0.00490169, 0.00490169, 0.00490169, 0.00490169], - # [0.26762316, 0.26762316, 0.26762316, 0.26762316], - # [0.72747517, 0.72747517, 0.72747517, 0.72747517]]] - - """ - - if in_dygraph_mode(): - return _C_ops.softmax(input, axis) - - if _non_static_mode(): - return _legacy_C_ops.softmax( - input, 'axis', axis, 'use_cudnn', use_cudnn - ) - - inputs = {"X": [input]} - attrs = {"axis": axis, "use_cudnn": use_cudnn} - - helper = LayerHelper('softmax', **locals()) - check_variable_and_dtype( - input, 'input/x', ['float16', 'float32', 'float64'], 'softmax' - ) - - dtype = helper.input_dtype() - softmax_out = helper.create_variable_for_type_inference(dtype) - helper.append_op( - type="softmax", - inputs={"X": input}, - outputs={"Out": softmax_out}, - attrs=attrs, - ) - return softmax_out - - def conv2d( input, num_filters, @@ -1788,7 +1646,7 @@ def pool2d( if pool_padding == "VALID": padding_algorithm = "VALID" pool_padding = [0, 0] - if ceil_mode != False: + if ceil_mode is not False: raise ValueError( "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. " "Received ceil_mode: True." @@ -6643,7 +6501,7 @@ def deformable_roi_pooling( ) input_channels = input.shape[1] - if position_sensitive == False: + if position_sensitive is False: output_channels = input_channels else: output_channels = input_channels / pooled_height / pooled_width @@ -6841,11 +6699,11 @@ def mish(x, threshold=20, name=None): .. math:: - out = \\begin{cases} - x \\ast \\tanh(x), \\text{if } x > \\text{threshold} \\\\ - x \\ast \\tanh(e^{x}), \\text{if } x < -\\text{threshold} \\\\ - x \\ast \\tanh(\\ln(1 + e^{x})), \\text{otherwise} - \\end{cases} + out = \\begin{cases} + x \\ast \\tanh(x), \\text{if } x > \\text{threshold} \\\\ + x \\ast \\tanh(e^{x}), \\text{if } x < -\\text{threshold} \\\\ + x \\ast \\tanh(\\ln(1 + e^{x})), \\text{otherwise} + \\end{cases} Args: x (Variable): Input feature, multi-dimensional Tensor. The data type @@ -6867,9 +6725,11 @@ def mish(x, threshold=20, name=None): .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() DATATYPE='float32' x_data = np.array([i for i in range(1,5)]).reshape([1,1,4]).astype(DATATYPE) diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 83c4d6c2cb..60ac537ffc 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -1304,7 +1304,7 @@ class BeamSearchDecoder(Decoder): self.noend_mask_tensor, "float64" ) - step_log_probs = paddle.log(nn.softmax(logits)) + step_log_probs = paddle.log(paddle.nn.functional.softmax(logits)) step_log_probs = self._mask_probs(step_log_probs, beam_state.finished) log_probs = nn.elementwise_add( x=step_log_probs, y=beam_state.log_probs, axis=0 @@ -2330,7 +2330,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper): if self.softmax_temperature is not None else outputs ) - probs = nn.softmax(logits) + probs = paddle.nn.functional.softmax(logits) # TODO: remove this stop_gradient. The stop_gradient of sample_ids can # not pass to probs, since sampling_id op does not have corresponding # grad op and thus can not pass. diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py index 13e83741ea..164f1410ed 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_se_resnext.py @@ -354,7 +354,7 @@ class TestSeResNeXt(TestParallelDyGraphRunnerBase): label.stop_gradient = True out = model(img) - softmax_out = fluid.layers.softmax(out, use_cudnn=False) + softmax_out = paddle.nn.functional.softmax(out, use_cudnn=False) loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = paddle.mean(x=loss) return avg_loss diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index 5cfd8a6078..41c8afd629 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -342,7 +342,7 @@ class MultiHeadAttentionLayer(Layer): ) if attn_bias is not None: product += attn_bias - weights = fluid.layers.softmax(product) + weights = paddle.nn.functional.softmax(product) if self._dropout_rate: weights_droped = fluid.layers.dropout( weights, @@ -849,7 +849,7 @@ class WrapDecoderLayer(Layer): if dec_inputs is None: # Return probs for independent decoder program. - predict_out = fluid.layers.softmax(predict) + predict_out = paddle.nn.functional.softmax(predict) return predict_out return predict diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 7106c426bc..cb60e1c599 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -1177,7 +1177,7 @@ def multi_head_attention( product = layers.matmul(x=scaled_q, y=k, transpose_y=True) if attn_bias: product += attn_bias - weights = layers.softmax(product) + weights = paddle.nn.functional.softmax(product) if dropout_rate: weights = layers.dropout( weights, @@ -1715,7 +1715,7 @@ def wrap_decoder( bias_attr=const_bias_attr, ) if dec_inputs is None: - predict = layers.softmax(predict) + predict = paddle.nn.functional.softmax(predict) return predict @@ -1834,7 +1834,7 @@ def fast_decode( logits = paddle.reshape(logits, (-1, trg_vocab_size)) topk_scores, topk_indices = layers.topk( - input=layers.softmax(logits), k=beam_size + input=paddle.nn.functional.softmax(logits), k=beam_size ) accu_scores = layers.elementwise_add( x=paddle.log(topk_scores), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index bf1dfdcad2..d364b8a1a5 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -435,7 +435,9 @@ class BaseModel(fluid.dygraph.Layer): cell_outputs = self._split_batch_beams(step_input) cell_outputs = self.fc(cell_outputs) - step_log_probs = paddle.log(fluid.layers.softmax(cell_outputs)) + step_log_probs = paddle.log( + paddle.nn.functional.softmax(cell_outputs) + ) noend_array = [-self.kinf] * self.tar_vocab_size noend_array[self.beam_end_token] = 0 noend_mask_tensor = to_variable( @@ -703,7 +705,7 @@ class AttentionModel(fluid.dygraph.Layer): attn = paddle.transpose(attn, [1, 0, 2]) attn = paddle.add(attn, mask * 1000000000) attn = paddle.transpose(attn, [1, 0, 2]) - weight = fluid.layers.softmax(attn) + weight = paddle.nn.functional.softmax(attn) weight_memory = fluid.layers.matmul(weight, memory) return weight_memory diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py index 57bd7c2936..742e828aa9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_dict.py @@ -67,7 +67,7 @@ class SubNetWithDict(fluid.dygraph.Layer): cache["k"], cache["v"] = k, v weight = fluid.layers.matmul(x=q, y=k, transpose_y=True) - weight = fluid.layers.softmax(weight) + weight = paddle.nn.functional.softmax(weight) out = fluid.layers.matmul(weight, v) return out @@ -113,7 +113,7 @@ class MainNetWithDict(fluid.dygraph.Layer): # Test to call function defined outside of class. def update_cache(cache): for k, val in cache.items(): - cache[k] = fluid.layers.softmax(val) + cache[k] = paddle.nn.functional.softmax(val) return cache diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py index c17bfd2508..8cc543a19f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ifelse.py @@ -308,7 +308,7 @@ class NetWithExternalFunc(fluid.dygraph.Layer): # Test to call function behind caller. def softmax(x): - return fluid.layers.softmax(x) + return paddle.nn.functional.softmax(x) class TestNetWithExternalFunc(TestDygraphIfElseNet): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index 4c5e306718..8358c12edc 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -535,7 +535,7 @@ def train_mobilenet(args, to_static): out = net(img) t_end = time.time() - softmax_out = fluid.layers.softmax(out, use_cudnn=False) + softmax_out = paddle.nn.functional.softmax(out) loss = fluid.layers.cross_entropy( input=softmax_out, label=label ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py index b98d9c304d..13aace2003 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_reinforcement_learning.py @@ -48,7 +48,7 @@ class Policy(Layer): x = fluid.layers.relu(x) action_scores = self.affine2(x) - log_prob = fluid.layers.softmax(action_scores, axis=1) + log_prob = paddle.nn.functional.softmax(action_scores, axis=1) return log_prob diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index 109fc99754..70ee21713c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -343,7 +343,7 @@ class SeResNeXt(fluid.dygraph.Layer): y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output]) out = self.out(y) - softmax_out = fluid.layers.softmax(out) + softmax_out = paddle.nn.functional.softmax(out) loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = paddle.mean(x=loss) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index f4c0815884..e6f03170b4 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -153,7 +153,7 @@ class MultiHeadAttention(Layer): ) if attn_bias is not None: product += attn_bias - weights = layers.softmax(product) + weights = paddle.nn.functional.softmax(product) if self.dropout_rate: weights = layers.dropout(weights, dropout_prob=self.dropout_rate) out = layers.matmul(weights, v) @@ -840,7 +840,7 @@ class Transformer(Layer): ) caches = map_structure(split_batch_beams, caches) step_log_probs = split_batch_beams( - paddle.log(fluid.layers.softmax(logits)) + paddle.log(paddle.nn.functional.softmax(logits)) ) step_log_probs = mask_probs( diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py index 8a13e5abb5..f685eac6d3 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_fp16_ipu.py @@ -33,7 +33,7 @@ class SimpleLayer(paddle.nn.Layer): x = self.conv(x) x = paddle.flatten(x, 1, -1) if target is not None: - x = paddle.fluid.layers.softmax(x) + x = paddle.nn.functional.softmax(x) loss = paddle.fluid.layers.cross_entropy(x, target) if self.use_ipu: loss = paddle.incubate.identity_loss(loss, 1) diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py index dbdfab2882..4ca2599217 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py @@ -48,7 +48,7 @@ class SimpleLayer(paddle.nn.Layer): x = paddle.flatten(x, 1, -1) if target is not None: if self.use_softmax: - x = paddle.fluid.layers.softmax(x) + x = paddle.nn.functional.softmax(x) if self.loss_op: loss = self.loss_op(x, target) else: diff --git a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py index 9fda7f780e..2e13687df1 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_modelruntime_ipu.py @@ -32,7 +32,7 @@ class SimpleLayer(paddle.nn.Layer): x = self.conv(x) x = paddle.flatten(x, 1, -1) if target is not None: - x = paddle.fluid.layers.softmax(x) + x = paddle.nn.functional.softmax(x) loss = paddle.fluid.layers.cross_entropy(x, target) return x, loss return x diff --git a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py index ccf0a38bbf..782c195c5d 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py @@ -119,7 +119,7 @@ class SimpleLayer(paddle.nn.Layer): print(x) x = paddle.flatten(x, 1, -1) if target is not None: - x = paddle.fluid.layers.softmax(x) + x = paddle.nn.functional.softmax(x) loss = paddle.fluid.layers.cross_entropy(x, target) loss = paddle.incubate.identity_loss(loss, 1) return x, loss diff --git a/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py index 485515d7d7..53c7e1ad92 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_softmax_op_ipu.py @@ -47,7 +47,7 @@ class TestBase(IPUOpTest): x = paddle.static.data( name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32' ) - out = paddle.fluid.layers.softmax(x, **self.attrs) + out = paddle.nn.functional.softmax(x, **self.attrs) self.fetch_list = [out.name] def run_model(self, exec_mode): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py index 47668a42ec..386dcf7b40 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_inplace_fuse_pass.py @@ -32,7 +32,7 @@ class MkldnnInplacePassTest(InferencePassTest): conv_out_1 = fluid.layers.conv2d( data, num_filters=3, filter_size=3, bias_attr=False ) - softmax_out = fluid.layers.softmax(conv_out_1) + softmax_out = paddle.nn.functional.softmax(conv_out_1) relu_out = fluid.layers.relu(conv_out_1) eltwise_out = fluid.layers.elementwise_add( softmax_out, relu_out, axis=-1 diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py index 3597f11c55..29393ff96c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py @@ -77,7 +77,7 @@ class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest): class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest): def append_act(self, x): - return fluid.layers.softmax(x) + return paddle.nn.functional.softmax(x) class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py index 3f5daf0d92..a3b297a268 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py @@ -17,6 +17,7 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import AnalysisConfig @@ -31,7 +32,7 @@ class FCFusePassTRTTest(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=128, num_flatten_dims=1, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = { "data": np.random.random((32, 128, 2, 2)).astype("float32") @@ -61,7 +62,7 @@ class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=64, num_flatten_dims=1, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = { "data": np.random.random((32, 128, 32, 8)).astype("float32") @@ -89,7 +90,7 @@ class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=32, num_flatten_dims=2, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = { "data": np.random.random((3, 24, 16, 16)).astype("float32") @@ -115,7 +116,7 @@ class FCFusePassTRTDynamicDims2Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=64, num_flatten_dims=1, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = {"data": np.random.random((32, 128)).astype("float32")} self.enable_trt = True @@ -147,7 +148,7 @@ class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=64, num_flatten_dims=1, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} self.enable_trt = True @@ -179,7 +180,7 @@ class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=64, num_flatten_dims=2, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} self.enable_trt = True @@ -213,7 +214,7 @@ class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=64, num_flatten_dims=1, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = { "data": np.random.random((32, 12, 4, 6)).astype("float32") @@ -249,7 +250,7 @@ class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=64, num_flatten_dims=2, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = { "data": np.random.random((32, 128, 32, 32)).astype("float32") @@ -285,7 +286,7 @@ class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest): fc_out1 = fluid.layers.fc( input=data, size=64, num_flatten_dims=3, act="relu" ) - out = fluid.layers.softmax(input=fc_out1) + out = paddle.nn.functional.softmax(fc_out1) self.feeds = { "data": np.random.random((32, 128, 32, 32)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py index c8b01107eb..3b73ae0744 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py @@ -30,7 +30,7 @@ class TRTGatherTest1(InferencePassTest): data = fluid.data(name='data', shape=[-1, 128], dtype='float32') index = fluid.data(name='index', shape=[-1, 1], dtype='int32') scale_out = paddle.gather(data, index=index) - out = fluid.layers.softmax(input=scale_out) + out = paddle.nn.functional.softmax(scale_out) self.feeds = { "data": np.random.random([self.bs, 128]).astype("float32"), @@ -69,7 +69,7 @@ class TRTGatherTest2(InferencePassTest): data = fluid.data(name='data', shape=[16, 64], dtype='float32') index = fluid.data(name='index', shape=[2], dtype='int32') scale_out = paddle.gather(data, index=index) - out = fluid.layers.softmax(input=scale_out) + out = paddle.nn.functional.softmax(scale_out) self.feeds = { "data": np.random.random([self.bs, 64]).astype("float32"), diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py index 5659ecf3b4..3e958d9d19 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_fc_fuse_pass.py @@ -17,6 +17,7 @@ import unittest import numpy as np from pass_test import PassTest +import paddle import paddle.fluid as fluid import paddle.fluid.core as core @@ -31,7 +32,7 @@ class FCFusePassTest(PassTest): input=data, size=128, num_flatten_dims=1, act="relu" ) tmp_1 = fluid.layers.fc(input=tmp_0, size=32, num_flatten_dims=1) - tmp_2 = fluid.layers.softmax(input=tmp_1) + tmp_2 = paddle.nn.functional.softmax(tmp_1) self.feeds = {"data": np.random.random((32, 128)).astype("float32")} self.fetch_list = [tmp_0, tmp_1, tmp_2] diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py index 41ccda3dba..2ad4b930f0 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_op_npu.py @@ -79,7 +79,7 @@ class TestSoftmaxNet(unittest.TestCase): prediction = fluid.layers.fc(input=fc_1, size=2) # 4 x 2 - prob = fluid.layers.softmax(prediction, axis=1) + prob = paddle.nn.functional.softmax(prediction, axis=1) cost = fluid.layers.cross_entropy(input=prob, label=label) loss = paddle.mean(cost) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 657774b729..8c46a64162 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -310,7 +310,7 @@ class SimpleAttention(fluid.dygraph.Layer): shape=[attention_weight.shape[0], attention_weight.shape[1]], ) - weights_reshape = fluid.layers.softmax(weights_reshape) + weights_reshape = paddle.nn.functional.softmax(weights_reshape) scaled = fluid.layers.elementwise_mul( x=encoder_vec, y=weights_reshape, axis=0 ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py index dfbaae4926..06982a0fc3 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_reinforcement.py @@ -41,7 +41,7 @@ class Policy(fluid.dygraph.Layer): x = fluid.layers.dropout(x, self.dropout_ratio) x = fluid.layers.relu(x) action_scores = self.affine2(x) - return fluid.layers.softmax(action_scores, axis=1) + return paddle.nn.functional.softmax(action_scores, axis=1) class TestImperativeMnist(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index 95f912d822..6eb5ab1874 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -376,7 +376,7 @@ class TestImperativeResneXt(unittest.TestCase): label.stop_gradient = True out = se_resnext(img) - softmax_out = fluid.layers.softmax(out, use_cudnn=False) + softmax_out = paddle.nn.functional.softmax(out) loss = fluid.layers.cross_entropy( input=softmax_out, label=label ) @@ -456,7 +456,7 @@ class TestImperativeResneXt(unittest.TestCase): ) label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = se_resnext(img) - softmax_out = fluid.layers.softmax(out, use_cudnn=False) + softmax_out = paddle.nn.function.softmax(out) loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = paddle.mean(x=loss) optimizer.minimize(avg_loss) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index e850905141..a88c31dd3f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -503,7 +503,7 @@ class MultiHeadAttentionLayer(Layer): ) if attn_bias is not None: product += attn_bias - weights = fluid.layers.softmax(product) + weights = paddle.nn.functional.softmax(product) if self._dropout_rate: weights_droped = fluid.layers.dropout( weights, @@ -1013,7 +1013,7 @@ class WrapDecoderLayer(Layer): if dec_inputs is None: # Return probs for independent decoder program. - predict_out = fluid.layers.softmax(predict) + predict_out = paddle.nn.functional.softmax(predict) return predict_out return predict diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 67cfdfeceb..25b6d0513d 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2748,7 +2748,7 @@ class TestLayer(LayerTest): data = fluid.data(name="input", shape=[-1, 32, 32], dtype="float32") label = fluid.data(name="label", shape=[-1, 1], dtype="int") fc_out = fluid.layers.fc(input=data, size=10) - predict = fluid.layers.softmax(input=fc_out) + predict = paddle.nn.functional.softmax(fc_out) result = paddle.static.accuracy(input=predict, label=label, k=5) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -2764,7 +2764,7 @@ class TestLayer(LayerTest): data = base.to_variable(x) label = base.to_variable(y) fc_out = fluid.layers.fc(data, size=10) - predict = fluid.layers.softmax(fc_out) + predict = paddle.nn.functional.softmax(fc_out) dynamic_out = paddle.static.accuracy( input=predict, label=label, k=5 ) @@ -3056,7 +3056,7 @@ class TestBook(LayerTest): ): data = self._get_data(name='data', shape=[10], dtype='float32') hid = layers.fc(input=data, size=20) - return layers.softmax(hid, axis=1) + return paddle.nn.functional.softmax(hid, axis=1) @prog_scope() def make_nce(self): diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py index 83f07bf747..33f95b439c 100644 --- a/python/paddle/fluid/tests/unittests/test_mean_op.py +++ b/python/paddle/fluid/tests/unittests/test_mean_op.py @@ -89,7 +89,7 @@ class TestMeanOpError(unittest.TestCase): input3 = fluid.layers.data( name='input3', shape=[4], dtype="float16" ) - fluid.layers.softmax(input3) + paddle.nn.functional.softmax(input3) @unittest.skipIf( diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py index 6e01ee1d4f..2b06de33f2 100644 --- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py @@ -617,7 +617,7 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): def dot_attention(query, memory): attn = layers.matmul(query, memory, transpose_y=True) - weight = layers.softmax(attn) + weight = paddle.nn.functional.softmax(attn) weight_memory = layers.matmul(weight, memory) return weight_memory, weight diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py index a557fb9df0..3b3539c486 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py @@ -76,7 +76,7 @@ class DecoderCell(layers.RNNCell): ) if encoder_padding_mask is not None: attn_scores = paddle.add(attn_scores, encoder_padding_mask) - attn_scores = layers.softmax(attn_scores) + attn_scores = paddle.nn.functional.softmax(attn_scores) attn_out = paddle.squeeze( layers.matmul(attn_scores, encoder_output), [1] ) @@ -295,7 +295,7 @@ class Seq2SeqModel: decoder_output.sample_ids, dec_seq_lengths, ) - probs = layers.softmax(logits) + probs = paddle.nn.functional.softmax(logits) return probs, samples, sample_length diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py index 6021256f69..22f759b46f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py @@ -99,7 +99,7 @@ class TestMeanOpError(unittest.TestCase): input3 = fluid.layers.data( name='input3', shape=[4], dtype="float16" ) - fluid.layers.softmax(input3) + paddle.nn.functional.softmax(input3) support_types = get_xpu_op_support_types('mean') -- GitLab