From 33173ab4ba623900e7cae51f8373ec7fb260c521 Mon Sep 17 00:00:00 2001 From: yuehuayingxueluo <867460659@qq.com> Date: Fri, 2 Dec 2022 11:14:11 +0800 Subject: [PATCH] clear fluid apis: square_error_cost (#48029) * clear fluid apis in fleet and passes * fix model.py * fix model.py * fix cpp_pass.py * clear loss.py * change test file * fix some test_*.py * fix adaround.py * fix evaluator.py * fix CI bug * fix CI bug * fix decode.py * fix detection.py * fix ci bug * rm test_sigmoid_cross_entropy_with_logits_op_ipu.py and fix __init__.py * fix ci bug * fix ci BUG --- python/paddle/fluid/contrib/optimizer.py | 1 - .../contrib/slim/quantization/adaround.py | 3 +- .../test_moving_average_abs_max_scale_op.py | 2 +- .../tests/test_image_classification_fp16.py | 4 +- .../tests/test_multi_precision_fp16_train.py | 2 +- python/paddle/fluid/layers/detection.py | 2 +- python/paddle/fluid/layers/loss.py | 57 ------------------- python/paddle/fluid/optimizer.py | 19 +++++-- .../fluid/tests/book/test_fit_a_line.py | 10 +++- .../tests/book/test_recommender_system.py | 4 +- .../tests/unittests/auto_checkpoint_utils.py | 2 +- .../tests/unittests/check_nan_inf_base.py | 2 +- .../fleet/hybrid_parallel_pp_embedding.py | 4 +- .../fleet/hybrid_parallel_shared_weight.py | 4 +- .../fleet/parallel_dygraph_transformer.py | 2 +- .../fleet/test_communicator_half_async.py | 2 +- .../fleet/test_communicator_sync.py | 2 +- .../fleet/test_distributed_strategy.py | 2 +- .../fleet/test_fleet_rolemaker_new.py | 2 +- .../fluid/tests/unittests/dist_transformer.py | 2 +- .../dygraph_to_static/bert_dygraph_model.py | 4 +- .../seq2seq_dygraph_model.py | 4 +- .../unittests/dygraph_to_static/test_bmn.py | 2 +- .../dygraph_to_static/test_ptb_lm.py | 2 +- .../transformer_dygraph_model.py | 2 +- .../tests/unittests/ipu/test_dy2static_ipu.py | 2 +- .../unittests/mlu/test_huber_loss_op_mlu.py | 2 +- .../unittests/mlu/test_momentum_op_mlu.py | 4 +- .../test_softmax_with_cross_entropy_op_mlu.py | 2 +- .../unittests/npu/test_momentum_op_npu.py | 4 +- .../tests/unittests/npu/test_slice_op_npu.py | 2 +- .../test_softmax_with_cross_entropy_op_npu.py | 2 +- .../parallel_dygraph_sparse_embedding.py | 2 +- .../fluid/tests/unittests/test_adadelta_op.py | 4 +- .../fluid/tests/unittests/test_adam_op.py | 4 +- .../test_adam_optimizer_fp32_fp64.py | 4 +- .../fluid/tests/unittests/test_adamw_op.py | 4 +- .../fluid/tests/unittests/test_backward.py | 6 +- .../fluid/tests/unittests/test_bpr_loss_op.py | 3 + .../unittests/test_communicator_async.py | 2 +- .../tests/unittests/test_communicator_geo.py | 2 +- .../unittests/test_communicator_ps_gpu.py | 2 +- .../test_dist_fleet_a_sync_optimizer_async.py | 4 +- .../test_dist_fleet_a_sync_optimizer_sync.py | 2 +- .../test_dist_fleet_trainer_desc_config.py | 2 +- .../tests/unittests/test_dist_transpiler.py | 21 +++---- .../fluid/tests/unittests/test_downpoursgd.py | 12 +++- .../test_eager_deletion_padding_rnn.py | 2 +- .../fluid/tests/unittests/test_exception.py | 2 +- .../unittests/test_executor_check_feed.py | 2 +- .../test_executor_feed_non_tensor.py | 2 +- .../tests/unittests/test_imperative_gnn.py | 8 ++- ..._imperative_lod_tensor_to_selected_rows.py | 2 +- .../unittests/test_imperative_ptb_rnn.py | 2 +- .../unittests/test_imperative_save_load.py | 2 +- .../unittests/test_imperative_save_load_v2.py | 2 +- ..._imperative_selected_rows_to_lod_tensor.py | 2 +- ..._imperative_transformer_sorted_gradient.py | 2 +- .../unittests/test_inference_model_io.py | 24 ++++++-- ...test_inplace_softmax_with_cross_entropy.py | 3 +- .../fluid/tests/unittests/test_lambv2_op.py | 4 +- .../fluid/tests/unittests/test_layers.py | 26 ++++++--- .../unittests/test_listen_and_serv_op.py | 4 +- .../unittests/test_lookup_table_v2_op.py | 2 +- .../tests/unittests/test_memory_usage.py | 2 +- .../fluid/tests/unittests/test_momentum_op.py | 8 ++- .../unittests/test_network_with_dtype.py | 4 +- .../test_optimizer_in_control_flow.py | 6 +- .../unittests/test_program_prune_backward.py | 8 ++- .../fluid/tests/unittests/test_rmsprop_op.py | 4 +- .../tests/unittests/test_rnn_cell_api.py | 2 +- .../fluid/tests/unittests/test_sgd_op.py | 2 +- .../tests/unittests/test_square_error_cost.py | 9 ++- .../tests/unittests/test_static_save_load.py | 2 +- .../unittests/xpu/test_adadelta_op_xpu.py | 4 +- .../tests/unittests/xpu/test_sgd_op_xpu.py | 2 +- .../fluid/transpiler/distribute_transpiler.py | 7 ++- 77 files changed, 205 insertions(+), 180 deletions(-) diff --git a/python/paddle/fluid/contrib/optimizer.py b/python/paddle/fluid/contrib/optimizer.py index 31b175ba62..57df115b96 100644 --- a/python/paddle/fluid/contrib/optimizer.py +++ b/python/paddle/fluid/contrib/optimizer.py @@ -78,7 +78,6 @@ class Momentum(Optimizer): import numpy as np paddle.enable_static() - place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py index 2003380fa1..d6aff8d41c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/adaround.py +++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py @@ -16,6 +16,7 @@ import numpy as np import time import sys import logging +import paddle import paddle import paddle.fluid as fluid @@ -61,7 +62,7 @@ class AdaRoundLoss: self.default_beta_range = default_beta_range def compute_recon_loss(self, ada_quantized_output, orig_output): - square_cost = fluid.layers.square_error_cost( + square_cost = paddle.nn.functional.square_error_cost( ada_quantized_output, orig_output ) recon_loss = paddle.mean(paddle.sum(square_cost, axis=-1)) diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py index 8ddca1b354..4184166806 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py @@ -50,7 +50,7 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase): name=fc_tmp.name, dtype=fc_tmp.dtype ) fc_tmp_1 = out_scale(fc_tmp) - cross_entropy = fluid.layers.softmax_with_cross_entropy( + cross_entropy = paddle.nn.functional.softmax_with_cross_entropy( fc_tmp, label ) loss = paddle.mean(cross_entropy) diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index ab9ebfa719..362dde4d48 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -127,7 +127,7 @@ def train(net_type, use_cuda, save_dirname, is_local): raise ValueError("%s network is not supported" % net_type) logits = fluid.layers.fc(input=net, size=classdim, act="softmax") - cost, predict = fluid.layers.softmax_with_cross_entropy( + cost, predict = paddle.nn.functional.softmax_with_cross_entropy( logits, label, return_softmax=True ) avg_cost = paddle.mean(cost) @@ -509,7 +509,7 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase): net = vgg16_bn_drop(image) logits = fluid.layers.fc(input=net, size=10, act="softmax") - cost, predict = fluid.layers.softmax_with_cross_entropy( + cost, predict = paddle.nn.functional.softmax_with_cross_entropy( logits, label, return_softmax=True ) avg_cost = paddle.mean(cost) diff --git a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py index ba0f6534ad..4265594f71 100644 --- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py +++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py @@ -110,7 +110,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""): label = fluid.layers.data(name='label', shape=[1], dtype='int64') net = resnet_cifar10(images) logits = fluid.layers.fc(input=net, size=classdim, act="softmax") - cost = fluid.layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits, label, return_softmax=False ) sum_cost = paddle.sum(cost) diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index fa00813146..a3db0f70a6 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -21,7 +21,7 @@ from .layer_function_generator import templatedoc from ..layer_helper import LayerHelper from ..framework import Variable, _non_static_mode, static_only, in_dygraph_mode from .. import core -from .loss import softmax_with_cross_entropy +from paddle.fluid.layers import softmax_with_cross_entropy from . import tensor from . import nn from ..data_feeder import check_variable_and_dtype, check_type, check_dtype diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 65a52415f1..1d24489638 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -36,7 +36,6 @@ from paddle import _C_ops, _legacy_C_ops __all__ = [ 'cross_entropy', - 'square_error_cost', 'softmax_with_cross_entropy', ] @@ -144,41 +143,6 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex): return out -def square_error_cost(input, label): - r""" - - Accept input predictions and target label and returns the - squared error cost. - - For predictions label, and target label, the equation is: - - .. math:: - - Out = (input - label)^2 - - Parameters: - input (Tensor): Input tensor, the data type should be float32. - label (Tensor): Label tensor, the data type should be float32. - - Returns: - Tensor, The tensor storing the element-wise squared - error difference between input and label. - - Examples: - - .. code-block:: python - - import paddle - input = paddle.to_tensor([1.1, 1.9]) - label = paddle.to_tensor([1.0, 2.0]) - output = paddle.nn.functional.square_error_cost(input, label) - print(output) - # [0.01, 0.01] - - """ - return paddle.nn.functional.square_error_cost(input, label) - - def softmax_with_cross_entropy( logits, label, @@ -189,49 +153,32 @@ def softmax_with_cross_entropy( axis=-1, ): r""" - This operator implements the cross entropy loss function with softmax. This function combines the calculation of the softmax operation and the cross entropy loss function to provide a more numerically stable gradient. - Because this operator performs a softmax on logits internally, it expects unscaled logits. This operator should not be used with the output of softmax operator since that would produce incorrect results. - When the attribute :attr:`soft_label` is set :attr:`False`, this operators expects mutually exclusive hard labels, each sample in a batch is in exactly one class with a probability of 1.0. Each sample in the batch will have a single label. - The equation is as follows: - 1) Hard label (one-hot label, so every sample has exactly one class) - .. math:: - loss_j = -\\text{logits}_{label_j} + \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K - 2) Soft label (each sample can have a distribution over all classes) - .. math:: - loss_j = -\\sum_{i=0}^{K}\\text{label}_i \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K} \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K - 3) If :attr:`numeric_stable_mode` is :attr:`True`, softmax is calculated first by: - .. math:: - max_j &= \\max_{i=0}^{K}{\\text{logits}_i} - log\\_max\\_sum_j &= \\log\\sum_{i=0}^{K}\\exp(logits_i - max_j) - softmax_j &= \\exp(logits_j - max_j - {log\\_max\\_sum}_j) - and then cross entropy loss is calculated by softmax and label. - Args: logits (Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64. The input tensor of unscaled log probabilities. label (Tensor): The ground truth ``Tensor`` , data type is the same @@ -258,7 +205,6 @@ def softmax_with_cross_entropy( axis (int, optional): The index of dimension to perform softmax calculations. It should be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of input :attr:`logits`. Default: -1. - Returns: ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \ `return_softmax` is False, otherwise the tuple \ @@ -266,13 +212,10 @@ def softmax_with_cross_entropy( with input logits and cross entropy loss is in \ the same shape with input logits except shape \ in dimension :attr:`axis` as 1. - Examples: .. code-block:: python - import paddle import numpy as np - data = np.random.rand(128).astype("float32") label = np.random.rand(1).astype("int64") data = paddle.to_tensor(data) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 8c9a940d84..ea10b49e9c 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1441,13 +1441,14 @@ class SGDOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) @@ -1642,13 +1643,14 @@ class MomentumOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) moment_optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) @@ -2219,13 +2221,14 @@ class AdamOptimizer(Optimizer): import paddle import paddle.fluid as fluid + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.data(name='x', shape=[None, 13], dtype='float32') y = fluid.data(name='y', shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) @@ -2247,13 +2250,14 @@ class AdamOptimizer(Optimizer): import paddle.fluid as fluid import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.data(name='x', shape=[None, 13], dtype='float32') y = fluid.data(name='y', shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) # define beta decay variable @@ -3276,13 +3280,14 @@ class RMSPropOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) rms_optimizer = fluid.optimizer.RMSProp(learning_rate=0.1) @@ -3493,13 +3498,15 @@ class FtrlOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) ftrl_optimizer = fluid.optimizer.Ftrl(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index a6cca9ceeb..558ce9febe 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -56,16 +56,20 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16): if not pure_bf16: with amp.bf16.bf16_guard(): y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) else: y_predict = fluid.layers.fc(input=x, size=1, act=None) with amp.bf16.bf16_guard(): - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) else: y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) lr = 5e-3 if use_bf16 else 1e-3 diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 3aacd377dc..89da6135a8 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -167,7 +167,9 @@ def model(): scale_infer = paddle.scale(x=inference, scale=5.0) label = layers.data(name='score', shape=[1], dtype='float32') - square_cost = layers.square_error_cost(input=scale_infer, label=label) + square_cost = paddle.nn.functional.square_error_cost( + input=scale_infer, label=label + ) avg_cost = paddle.mean(square_cost) return scale_infer, avg_cost diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py index 15d62544d2..cb6f8a0a29 100644 --- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py +++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py @@ -69,7 +69,7 @@ class AutoCheckpointBase(unittest.TestCase): label = fluid.data(name='label', shape=[-1, 1], dtype='int64') fc_tmp = fluid.layers.fc(image, size=CLASS_NUM) - cross_entropy = fluid.layers.softmax_with_cross_entropy( + cross_entropy = paddle.nn.functional.softmax_with_cross_entropy( fc_tmp, label ) loss = paddle.mean(cross_entropy) diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py index c6e9a36ddf..e9812d11ba 100644 --- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py +++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py @@ -63,7 +63,7 @@ def net(): hidden = fluid.layers.fc(input=hidden, size=400, act="sigmoid") hidden = fluid.layers.fc(input=hidden, size=3, act=None) - cost, y_predict = fluid.layers.softmax_with_cross_entropy( + cost, y_predict = paddle.nn.functional.softmax_with_cross_entropy( hidden, y, return_softmax=True ) acc_top1 = paddle.static.accuracy(input=y_predict, label=y, k=1) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py index dfcdd5b130..104aa658ec 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py @@ -57,7 +57,7 @@ class SimpleNet(Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = fluid.layers.elementwise_add(fc, self.softmax_bias) projection = paddle.reshape(fc, shape=[-1, vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1, soft_label=False ) return loss.mean() @@ -106,7 +106,7 @@ class LossNet(Layer): def forward(self, args, y1): projection, x2 = args - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1[0], soft_label=False ) return loss.mean() diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py index f63d7c9ad3..58c0fe7465 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py @@ -67,7 +67,7 @@ class SimpleNet(Layer): projection = paddle.matmul(projection, self.word_embeddings.weight) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1, soft_label=False ) return loss.mean() @@ -120,7 +120,7 @@ class LossNet(Layer): def forward(self, args, y1): projection = args - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1[0], soft_label=False ) return loss.mean() diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index ae747cb465..6792cf2877 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -941,7 +941,7 @@ class TransFormer(Layer): epsilon=self._label_smooth_eps, ) - cost = fluid.layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=predict, label=label_out, soft_label=True if self._label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py index cedfe94448..209233027c 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py @@ -33,7 +33,7 @@ class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): y_predict = fluid.layers.fc(input=x, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) return avg_cost, x, y diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py index 7427e6a58f..550fc5db90 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py @@ -29,7 +29,7 @@ class TestCommunicator(unittest.TestCase): def net(self): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py index 54ee7dbdc9..245aa097b6 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py @@ -273,7 +273,7 @@ class TestDebugInfo(unittest.TestCase): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) role = role_maker.UserDefinedRoleMaker( diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py index 16deaa3bbb..96e8425101 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py @@ -449,7 +449,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32') y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None) y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost( + cost = paddle.nn.functional.square_error_cost( input=y_predict, label=y ) avg_cost = paddle.mean(cost) diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 8a8b013b6b..52b6f674e5 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -1585,7 +1585,7 @@ def transformer( epsilon=label_smooth_eps, ) - cost = layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=paddle.reshape(predict, shape=[-1, trg_vocab_size]), label=label, soft_label=True if label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py index 64d0a8dc73..d45d775829 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py @@ -410,7 +410,7 @@ class PretrainModelLayer(Layer): else: fc_out = self.out_fc(mask_trans_feat) - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( + mask_lm_loss = paddle.nn.functional.softmax_with_cross_entropy( logits=fc_out, label=mask_label ) mean_mask_lm_loss = paddle.mean(mask_lm_loss) @@ -420,7 +420,7 @@ class PretrainModelLayer(Layer): ( next_sent_loss, next_sent_softmax, - ) = fluid.layers.softmax_with_cross_entropy( + ) = paddle.nn.functional.softmax_with_cross_entropy( logits=next_sent_fc_out, label=labels, return_softmax=True ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index 5f22b03cc9..bf1dfdcad2 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -294,7 +294,7 @@ class BaseModel(fluid.dygraph.Layer): dec_output = paddle.stack(dec_output) dec_output = self.fc(self._transpose_batch_time(dec_output)) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=dec_output, label=label, soft_label=False ) loss = paddle.squeeze(loss, axis=[2]) @@ -828,7 +828,7 @@ class AttentionModel(fluid.dygraph.Layer): dec_output = paddle.stack(dec_output) dec_output = self.fc(self._transpose_batch_time(dec_output)) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=dec_output, label=label, soft_label=False ) loss = paddle.squeeze(loss, axis=[2]) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index f8e657499a..0cb3e33304 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -379,7 +379,7 @@ def bmn_loss_func( weights = u_hmask + u_smmask + u_slmask weights.stop_gradient = True - loss = fluid.layers.square_error_cost(pred_score, gt_iou_map) + loss = paddle.nn.functional.square_error_cost(pred_score, gt_iou_map) loss = paddle.multiply(loss, weights) loss = 0.5 * paddle.sum(loss) / paddle.sum(weights) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index c7135a8ff7..62c6c18346 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -216,7 +216,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 209127104b..accf36ff17 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -576,7 +576,7 @@ class CrossEntropyCriterion: epsilon=self.label_smooth_eps, ) - cost = layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=predict, label=label_out, soft_label=True if self.label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py index 73ddadc0ac..dbdfab2882 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py @@ -220,7 +220,7 @@ class TestWithoutIdentityLoss1(TestBase): class TestWithoutIdentityLoss2(TestBase): def set_op_attrs(self): - self.loss_op = paddle.fluid.layers.softmax_with_cross_entropy + self.loss_op = paddle.paddle.nn.functional.softmax_with_cross_entropy def set_data_feed(self): self.data = paddle.uniform((8, 3, 10, 10), dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py index 5e83c7e57d..a286dbf585 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py @@ -103,4 +103,4 @@ def TestHuberLossOp3(TestHuberLossOp): if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py index 8efc129b68..cb393cbd37 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py @@ -143,7 +143,7 @@ class TestMomentumV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Momentum( @@ -268,7 +268,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum( diff --git a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py index f210ea0b63..c924bdc691 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py @@ -126,7 +126,7 @@ class TestPowNet(unittest.TestCase): fc_1 = fluid.layers.fc(input=z, size=128) prediction = fluid.layers.fc(input=fc_1, size=2) - cost = fluid.layers.softmax_with_cross_entropy(prediction, label) + cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label) loss = paddle.mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py index 9719c5582b..fe0882c733 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py @@ -111,7 +111,7 @@ class TestMomentumV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Momentum( @@ -239,7 +239,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum( diff --git a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py index 5bc1700cc1..adb1b1b269 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py @@ -275,7 +275,7 @@ class TestSliceNet(unittest.TestCase): prediction = paddle.static.nn.fc(z, size=2, activation='softmax') - cost = paddle.fluid.layers.softmax_with_cross_entropy( + cost = paddle.paddle.nn.functional.softmax_with_cross_entropy( logits=prediction, label=label ) loss = paddle.mean(cost) diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py index 487ca61320..f47a0275af 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py @@ -124,7 +124,7 @@ class TestPowNet(unittest.TestCase): fc_1 = fluid.layers.fc(input=z, size=128) prediction = fluid.layers.fc(input=fc_1, size=2) - cost = fluid.layers.softmax_with_cross_entropy(prediction, label) + cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label) loss = paddle.mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py index 9e0ed71d03..7c46efe775 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py @@ -68,7 +68,7 @@ class SimpleNet(fluid.Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = paddle.add(fc, self.softmax_bias) projection = paddle.reshape(fc, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py index 73a3c1e1cb..eb0fec336a 100644 --- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py +++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py @@ -146,7 +146,9 @@ class TestAdadeltaV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index 715b5460ed..6298c923a2 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -941,7 +941,9 @@ class TestAdamOptimizer(unittest.TestCase): y_predict = fluid.layers.fc( input=x, size=1, act=None, param_attr=weight_attr ) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) adam = fluid.optimizer.AdamOptimizer( diff --git a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py index 8e43728fb8..79c653cdfb 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py +++ b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py @@ -33,7 +33,9 @@ def main_test_func(place, dtype): x = fluid.data(name='x', shape=[None, 13], dtype=dtype) y = fluid.data(name='y', shape=[None, 1], dtype=dtype) y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py index 1810a4bea6..405a85235c 100644 --- a/python/paddle/fluid/tests/unittests/test_adamw_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -621,7 +621,9 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): fc2_b_mon1 = np.zeros((linear2.bias.shape)).astype("float32") fc2_b_mon2 = np.zeros((linear2.bias.shape)).astype("float32") - cost = fluid.layers.square_error_cost(input=out, label=y) + cost = paddle.nn.functional.square_error_cost( + input=out, label=y + ) avg_cost = paddle.mean(cost) simple_lr_fun = partial( diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py index c98fd7dff5..a24d49b5ea 100644 --- a/python/paddle/fluid/tests/unittests/test_backward.py +++ b/python/paddle/fluid/tests/unittests/test_backward.py @@ -262,7 +262,9 @@ class SimpleNet(BackwardNet): name='fc_no_use', ) # loss - cost = fluid.layers.square_error_cost(input=predict, label=label) + cost = paddle.nn.functional.square_error_cost( + input=predict, label=label + ) loss = paddle.mean(cost, name='mean_loss') return loss @@ -330,7 +332,7 @@ class TestAppendBackwardWithError(unittest.TestCase): y = fluid.data(name='y', shape=[None, 1], dtype='float32') x_emb = fluid.embedding(x, size=[100, 256]) y_predict = fluid.layers.fc(input=x_emb, size=1, name='my_fc') - loss = fluid.layers.square_error_cost(input=y_predict, label=y) + loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_loss = paddle.mean(loss) param_names = [ param.name diff --git a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py index 5bd6dbb2fd..7cd3c98a68 100644 --- a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py @@ -17,6 +17,8 @@ import unittest import numpy as np from op_test import OpTest, randomize_probability +import paddle + class TestBprLossOp1(OpTest): """Test BprLoss with discrete one-hot labels.""" @@ -47,4 +49,5 @@ class TestBprLossOp1(OpTest): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py index 978e0d644c..ebc65cd5ac 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py @@ -30,7 +30,7 @@ class TestCommunicator(unittest.TestCase): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index 9363e2fe47..073a9018c1 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -49,7 +49,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): y_predict = fluid.layers.fc(input=z, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) return avg_cost, x, x1, y diff --git a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py index fbea8b003c..f0c1e9c885 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py @@ -55,7 +55,7 @@ class TestCommunicator(unittest.TestCase): y = fluid.layers.data(name='y', shape=[1], dtype='float32') slots_vars = [x, y] - cost = fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.Adam(0.01) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 4aaf596d57..c80e1a68fd 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -47,7 +47,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -85,7 +85,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index 19c0f48e7c..fdaa0a69c8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -40,7 +40,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py index e64b7d8010..b13e2b8171 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py @@ -41,7 +41,7 @@ class TestDistStrategyTrainerDescConfig(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 00a4742021..45cdf97236 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -47,7 +47,7 @@ class TranspilerTest(unittest.TestCase): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) sgd_optimizer.minimize(avg_cost) @@ -302,7 +302,7 @@ class TestLRDecay(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay( @@ -471,7 +471,7 @@ class TestDecayedAdagrad(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.DecayedAdagrad(learning_rate=0.1) opt.minimize(avg_cost) @@ -492,7 +492,7 @@ class TestFtrl(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.Ftrl(learning_rate=0.1) opt.minimize(avg_cost) @@ -513,7 +513,7 @@ class TestLRDecayConditional(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.piecewise_decay( @@ -579,7 +579,7 @@ class TestL2Decay(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) @@ -616,7 +616,7 @@ class TestL2DecayWithPiecewise(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) base_lr = 1.0 bd = [1, 10, 20, 30] @@ -692,7 +692,7 @@ class TestEmptyPserverOptimizeBlocks(TranspilerTest): bias_attr=False, ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=1.0) sgd_optimizer.minimize(avg_cost) @@ -1134,7 +1134,7 @@ class TestRMSPropOptimizer(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.RMSProp(learning_rate=0.1) optimizer.minimize(avg_cost) @@ -1167,7 +1167,7 @@ class TestLoadSliceVar(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.RMSProp(learning_rate=0.1) optimizer.minimize(avg_cost) @@ -1452,6 +1452,7 @@ class TestRemoteHsigmoid(TestDistLookupTableBase): path_table=path_table, path_code=path_code, ) + avg_cost = paddle.mean(cost) # optimizer optimizer = fluid.optimizer.SGD(learning_rate=0.003) diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py index 556bdda232..2e15d059db 100644 --- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py +++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py @@ -58,7 +58,9 @@ class TestListenAndServOp(unittest.TestCase): ) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) ps_param = pslib.PSParameter() @@ -120,7 +122,9 @@ class TestListenAndServOp(unittest.TestCase): ) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) ps_param = pslib.PSParameter() @@ -180,7 +184,9 @@ class TestListenAndServOp(unittest.TestCase): ) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) ps_param = pslib.PSParameter() diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index ccdf56e64f..8ba799e84b 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -463,7 +463,7 @@ def lm_model( projection = paddle.add(projection, softmax_bias) projection = paddle.reshape(projection, shape=[-1, vocab_size]) - loss = layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y, soft_label=False ) diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py index c627f8688a..45a11656cc 100644 --- a/python/paddle/fluid/tests/unittests/test_exception.py +++ b/python/paddle/fluid/tests/unittests/test_exception.py @@ -43,7 +43,7 @@ class TestExceptionNoCStack(unittest.TestCase): x = fluid.layers.data(name='X', shape=[-1, 13], dtype='float32') y = fluid.layers.data(name='Y', shape=[-1, 1], dtype='float32') predict = fluid.layers.fc(input=x, size=1, act=None) - loss = fluid.layers.square_error_cost(input=predict, label=y) + loss = paddle.nn.functional.square_error_cost(input=predict, label=y) avg_loss = paddle.mean(loss) fluid.optimizer.SGD(learning_rate=0.01).minimize(avg_loss) diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py index 77122f46bd..9696ebcc44 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py +++ b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py @@ -25,7 +25,7 @@ class TestExecutor(unittest.TestCase): y = fluid.data(name="y", shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.Adam(learning_rate=lr) diff --git a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py index e2c52d99fd..3d83711777 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py @@ -27,7 +27,7 @@ class TestExecutor(unittest.TestCase): y = fluid.data(name="y", shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.Adam(learning_rate=lr) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py index bff393f38d..f62dfe436a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py @@ -96,7 +96,9 @@ class TestDygraphGNN(unittest.TestCase): logits = paddle.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. - loss = fluid.layers.softmax_with_cross_entropy(logits, labels) + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits, labels + ) loss = paddle.sum(loss) adam = AdamOptimizer(learning_rate=1e-3) @@ -134,7 +136,7 @@ class TestDygraphGNN(unittest.TestCase): logits = paddle.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits, to_variable(labels) ) loss = paddle.sum(loss) @@ -162,7 +164,7 @@ class TestDygraphGNN(unittest.TestCase): logits2 = paddle.reshape(logits2, logits2.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. - loss2 = fluid.layers.softmax_with_cross_entropy( + loss2 = paddle.nn.functional.softmax_with_cross_entropy( logits2, to_variable(labels2) ) loss2 = paddle.sum(loss2) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index ed5d93961d..76733836dd 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -69,7 +69,7 @@ class SimpleNet(fluid.Layer): ) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index c86a802a0a..3980b0dbb2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -228,7 +228,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index f9b618cedf..a386e2113f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -224,7 +224,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index bd6a6ca22f..19f4616d92 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -225,7 +225,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index 5b53331901..bfba325046 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -78,7 +78,7 @@ class SimpleNet(fluid.Layer): fc, paddle.transpose(self.embedding.weight, perm=[1, 0]) ) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 654ebf198b..c99fbcf4e9 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -1099,7 +1099,7 @@ class TransFormer(Layer): epsilon=self._label_smooth_eps, ) - cost = fluid.layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=predict, label=label_out, soft_label=True if self._label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py index 9cf82e16f7..daeae8e472 100644 --- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py +++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py @@ -59,7 +59,9 @@ class TestBook(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) @@ -153,7 +155,9 @@ class TestSaveInferenceModel(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) place = core.CPUPlace() @@ -209,7 +213,9 @@ class TestInstance(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) place = core.CPUPlace() @@ -245,7 +251,9 @@ class TestSaveInferenceModelNew(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) @@ -422,7 +430,9 @@ class TestSaveInferenceModelNew(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) @@ -469,7 +479,9 @@ class TestSaveInferenceModelNew(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) diff --git a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py index fb6f04d48f..16477e086d 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py @@ -16,6 +16,7 @@ import unittest import numpy as np +import paddle import paddle.fluid as fluid @@ -48,7 +49,7 @@ class TestSoftmaxWithXe(unittest.TestCase): dtype='int64' if not self.soft_label else self.dtype, append_batch_size=False, ) - z_d, s_d = fluid.layers.softmax_with_cross_entropy( + z_d, s_d = paddle.nn.functional.softmax_with_cross_entropy( x_d, y_d, soft_label=self.soft_label, diff --git a/python/paddle/fluid/tests/unittests/test_lambv2_op.py b/python/paddle/fluid/tests/unittests/test_lambv2_op.py index 6b51300810..d3abf54a00 100644 --- a/python/paddle/fluid/tests/unittests/test_lambv2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lambv2_op.py @@ -126,7 +126,9 @@ class TestLambOpWithCombinedOp(unittest.TestCase): x = fluid.layers.data(name='X', shape=[13], dtype='float32') y = fluid.layers.data(name='Y', shape=[1], dtype='float32') prediction = fluid.layers.fc(input=x, size=1, act=None) - loss = fluid.layers.square_error_cost(input=prediction, label=y) + loss = paddle.nn.functional.square_error_cost( + input=prediction, label=y + ) avg_loss = paddle.mean(loss) return avg_loss diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 6079f7636f..1597269b29 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -3037,7 +3037,9 @@ class TestBook(LayerTest): x = self._get_data(name='x', shape=[13], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) y = self._get_data(name='y', shape=[1], dtype='float32') - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) return avg_cost @@ -3256,23 +3258,31 @@ class TestBook(LayerTest): ): x = self._get_data(name='x', shape=[16], dtype='float32') y = self._get_data(name='label', shape=[1], dtype='int64') - loss, softmax = layers.softmax_with_cross_entropy( + loss, softmax = paddle.nn.functional.softmax_with_cross_entropy( x, y, return_softmax=True ) self.assertIsNotNone(loss) self.assertIsNotNone(softmax) - loss = layers.softmax_with_cross_entropy(x, y) + loss = paddle.nn.functional.softmax_with_cross_entropy(x, y) self.assertIsNotNone(loss) x1 = self._get_data(name='x1', shape=[16, 32, 64], dtype='float32') y1 = self._get_data(name='label1', shape=[1, 32, 64], dtype='int64') y2 = self._get_data(name='label2', shape=[16, 1, 64], dtype='int64') y3 = self._get_data(name='label3', shape=[16, 32, 1], dtype='int64') - loss1 = layers.softmax_with_cross_entropy(x1, y1, axis=1) - loss2 = layers.softmax_with_cross_entropy(x1, y2, axis=2) - loss3 = layers.softmax_with_cross_entropy(x1, y3, axis=3) - loss4 = layers.softmax_with_cross_entropy(x1, y3, axis=-1) + loss1 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y1, axis=1 + ) + loss2 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y2, axis=2 + ) + loss3 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y3, axis=3 + ) + loss4 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y3, axis=-1 + ) self.assertIsNotNone(loss1) self.assertIsNotNone(loss2) self.assertIsNotNone(loss3) @@ -3694,7 +3704,7 @@ class TestBook(LayerTest): ): x = self._get_data(name="X", shape=[1], dtype="float32") y = self._get_data(name="Y", shape=[1], dtype="float32") - out = layers.square_error_cost(input=x, label=y) + out = paddle.nn.functional.square_error_cost(input=x, label=y) return out def test_dynamic_lstmp(self): diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py index 4c63a4f2a9..66557b8407 100644 --- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py +++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py @@ -36,7 +36,7 @@ def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id): y = fluid.layers.data(name='y', shape=[1], dtype='float32') # loss function - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) # optimizer @@ -73,7 +73,7 @@ def run_pserver_with_empty_block( y = fluid.layers.data(name='y', shape=[1], dtype='float32') # loss function - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) # optimizer diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 2a74fff41d..cea6858e0d 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -216,7 +216,7 @@ class TestLookupTableIsSparse(unittest.TestCase): ) y = paddle.sum(emb, axis=-1) - loss = fluid.layers.square_error_cost(input=y, label=y_) + loss = paddle.nn.functional.square_error_cost(input=y, label=y_) loss = paddle.mean(loss) sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-4) diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py index 973dba893d..f1293ea7a7 100644 --- a/python/paddle/fluid/tests/unittests/test_memory_usage.py +++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py @@ -30,7 +30,7 @@ def train_simulator(test_batch_size=10): y_predict = fluid.layers.fc(input=x, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py index 102ef0a5fc..8c9ec6d429 100644 --- a/python/paddle/fluid/tests/unittests/test_momentum_op.py +++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py @@ -532,7 +532,9 @@ class TestMomentumV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Momentum( @@ -673,7 +675,9 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum( diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py index f230cc66c2..af4ff64c89 100644 --- a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py +++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py @@ -33,7 +33,9 @@ class TestNetWithDtype(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype) y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype) y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py index 3294b6f370..df07543fa7 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py @@ -82,7 +82,7 @@ def static( def fn_2(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: - loss = layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=pred, label=label ) avg_loss = paddle.mean(loss, name='mean_softmax_loss') @@ -108,7 +108,7 @@ def static( else: loss_1 = layers.cross_entropy(input=prediction, label=label) avg_loss_1 = paddle.mean(loss_1) - loss_2 = layers.softmax_with_cross_entropy( + loss_2 = paddle.nn.functional.softmax_with_cross_entropy( logits=prediction, label=label ) avg_loss_2 = paddle.mean(loss_2) @@ -193,7 +193,7 @@ def dynamic(train_data, use_cuda=False, use_parallel_exe=False): loss.backward() adam.minimize(loss) else: - softmax_loss = layers.softmax_with_cross_entropy( + softmax_loss = paddle.nn.functional.softmax_with_cross_entropy( prediction, var_label ) loss = paddle.mean(softmax_loss) diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py index d751fd4b90..056afc5ead 100755 --- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py +++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py @@ -92,7 +92,9 @@ def cond_net(use_feed=None): return avg_loss def loss2(pred, label): - loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label) + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits=pred, label=label + ) avg_loss = paddle.mean(loss, name='mean_softmax_loss') return avg_loss @@ -119,7 +121,9 @@ def optimization_in_cond_net(with_optimize=False): return avg_loss def loss2(opt, pred, label, with_optimize): - loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label) + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits=pred, label=label + ) avg_loss = paddle.mean(loss, name='mean_softmax_loss') if with_optimize: opt.minimize(avg_loss) diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py index 5e3e899eb6..81b75a1513 100644 --- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py +++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py @@ -280,7 +280,9 @@ class TestRMSPropV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.RMSProp(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index 0d3ccae5bf..6b2383ed56 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -631,7 +631,7 @@ def def_seq2seq_model( ) # loss - loss = layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=logits, label=label, soft_label=False ) loss = layers.unsqueeze(loss, axes=[2]) diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py index 26c4dd18c1..b87d67c713 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py @@ -204,7 +204,7 @@ class TestSGDOpWithLargeInput(unittest.TestCase): emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32') out = fluid.layers.l2_normalize(x=emb, axis=-1) - cost = fluid.layers.square_error_cost(input=out, label=label) + cost = paddle.nn.functional.square_error_cost(input=out, label=label) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_square_error_cost.py b/python/paddle/fluid/tests/unittests/test_square_error_cost.py index 1fd516c050..7828f01b02 100644 --- a/python/paddle/fluid/tests/unittests/test_square_error_cost.py +++ b/python/paddle/fluid/tests/unittests/test_square_error_cost.py @@ -16,6 +16,7 @@ import unittest import numpy as np +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.layers as layers @@ -32,7 +33,9 @@ class TestSquareErrorCost(unittest.TestCase): input_var = layers.create_tensor(dtype="float32", name="input") label_var = layers.create_tensor(dtype="float32", name="label") - output = layers.square_error_cost(input=input_var, label=label_var) + output = paddle.nn.functional.square_error_cost( + input=input_var, label=label_var + ) for use_cuda in ( [False, True] if core.is_compiled_with_cuda() else [False] @@ -54,14 +57,14 @@ class TestSquareErrorInvalidInput(unittest.TestCase): def test_invalid_input(): input = [256, 3] label = fluid.data(name='label1', shape=[None, 3], dtype='float32') - loss = fluid.layers.square_error_cost(input, label) + loss = paddle.nn.functional.square_error_cost(input, label) self.assertRaises(TypeError, test_invalid_input) def test_invalid_label(): input = fluid.data(name='input2', shape=[None, 3], dtype='float32') label = [256, 3] - loss = fluid.layers.square_error_cost(input, label) + loss = paddle.nn.functional.square_error_cost(input, label) self.assertRaises(TypeError, test_invalid_label) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index f417667a82..a2c44c5fae 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -237,7 +237,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py index afd7a57c36..f42ccf12c5 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py @@ -171,7 +171,9 @@ class XPUTestAdadelta(XPUOpTestWrapper): x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype) y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype) y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py index e9cc501a87..46ecff205f 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py @@ -75,7 +75,7 @@ class TestSGDOpWithLargeInput(unittest.TestCase): emb = fluid.embedding(input=data, size=(10000, 150), dtype='float32') out = fluid.layers.l2_normalize(x=emb, axis=-1) - cost = fluid.layers.square_error_cost(input=out, label=label) + cost = paddle.nn.functional.square_error_cost(input=out, label=label) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index ee98dc94a6..ebb249903b 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -283,11 +283,16 @@ class DistributeTranspiler: Examples: .. code-block:: python + import paddle + import paddle.fluid as fluid + + paddle.enable_static() + x = fluid.data(name='x', shape=[13], dtype='float32') y = fluid.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_loss = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) -- GitLab