diff --git a/python/paddle/fluid/contrib/optimizer.py b/python/paddle/fluid/contrib/optimizer.py index 31b175ba62fe1ed535a9ae99f20512c7bf11cefd..57df115b96ee2c502e0ecef10ef0d98a26b6709d 100644 --- a/python/paddle/fluid/contrib/optimizer.py +++ b/python/paddle/fluid/contrib/optimizer.py @@ -78,7 +78,6 @@ class Momentum(Optimizer): import numpy as np paddle.enable_static() - place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): diff --git a/python/paddle/fluid/contrib/slim/quantization/adaround.py b/python/paddle/fluid/contrib/slim/quantization/adaround.py index 2003380fa1a7d46f023b84b1e16871aa0d3bec23..d6aff8d41c69b1e9a53f3bf15a7337a4361797a4 100644 --- a/python/paddle/fluid/contrib/slim/quantization/adaround.py +++ b/python/paddle/fluid/contrib/slim/quantization/adaround.py @@ -16,6 +16,7 @@ import numpy as np import time import sys import logging +import paddle import paddle import paddle.fluid as fluid @@ -61,7 +62,7 @@ class AdaRoundLoss: self.default_beta_range = default_beta_range def compute_recon_loss(self, ada_quantized_output, orig_output): - square_cost = fluid.layers.square_error_cost( + square_cost = paddle.nn.functional.square_error_cost( ada_quantized_output, orig_output ) recon_loss = paddle.mean(paddle.sum(square_cost, axis=-1)) diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py index 8ddca1b354c70985c62c6b643b782465cba518d8..4184166806d4fcd46126d0d366150cd9ef9e0e39 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py @@ -50,7 +50,7 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase): name=fc_tmp.name, dtype=fc_tmp.dtype ) fc_tmp_1 = out_scale(fc_tmp) - cross_entropy = fluid.layers.softmax_with_cross_entropy( + cross_entropy = paddle.nn.functional.softmax_with_cross_entropy( fc_tmp, label ) loss = paddle.mean(cross_entropy) diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index ab9ebfa71929b73f44a827c5e2f01603afc5630d..362dde4d4816f10b950e60c44c59b48c8d571c73 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -127,7 +127,7 @@ def train(net_type, use_cuda, save_dirname, is_local): raise ValueError("%s network is not supported" % net_type) logits = fluid.layers.fc(input=net, size=classdim, act="softmax") - cost, predict = fluid.layers.softmax_with_cross_entropy( + cost, predict = paddle.nn.functional.softmax_with_cross_entropy( logits, label, return_softmax=True ) avg_cost = paddle.mean(cost) @@ -509,7 +509,7 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase): net = vgg16_bn_drop(image) logits = fluid.layers.fc(input=net, size=10, act="softmax") - cost, predict = fluid.layers.softmax_with_cross_entropy( + cost, predict = paddle.nn.functional.softmax_with_cross_entropy( logits, label, return_softmax=True ) avg_cost = paddle.mean(cost) diff --git a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py index ba0f6534adfa55482075918916e8f53bb8c406dd..4265594f71f56fb9dd12bf94db18d2f2a68758ac 100644 --- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py +++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py @@ -110,7 +110,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""): label = fluid.layers.data(name='label', shape=[1], dtype='int64') net = resnet_cifar10(images) logits = fluid.layers.fc(input=net, size=classdim, act="softmax") - cost = fluid.layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits, label, return_softmax=False ) sum_cost = paddle.sum(cost) diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index fa00813146862f53c79c628959b4c2c0236d222c..a3db0f70a6cc46aa1dd809776bf407ce5b7a5f3e 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -21,7 +21,7 @@ from .layer_function_generator import templatedoc from ..layer_helper import LayerHelper from ..framework import Variable, _non_static_mode, static_only, in_dygraph_mode from .. import core -from .loss import softmax_with_cross_entropy +from paddle.fluid.layers import softmax_with_cross_entropy from . import tensor from . import nn from ..data_feeder import check_variable_and_dtype, check_type, check_dtype diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 65a52415f15c6bf28e49103594dca11973c87f2b..1d24489638096f9f369ccfad9d11187fa5c25f94 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -36,7 +36,6 @@ from paddle import _C_ops, _legacy_C_ops __all__ = [ 'cross_entropy', - 'square_error_cost', 'softmax_with_cross_entropy', ] @@ -144,41 +143,6 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex): return out -def square_error_cost(input, label): - r""" - - Accept input predictions and target label and returns the - squared error cost. - - For predictions label, and target label, the equation is: - - .. math:: - - Out = (input - label)^2 - - Parameters: - input (Tensor): Input tensor, the data type should be float32. - label (Tensor): Label tensor, the data type should be float32. - - Returns: - Tensor, The tensor storing the element-wise squared - error difference between input and label. - - Examples: - - .. code-block:: python - - import paddle - input = paddle.to_tensor([1.1, 1.9]) - label = paddle.to_tensor([1.0, 2.0]) - output = paddle.nn.functional.square_error_cost(input, label) - print(output) - # [0.01, 0.01] - - """ - return paddle.nn.functional.square_error_cost(input, label) - - def softmax_with_cross_entropy( logits, label, @@ -189,49 +153,32 @@ def softmax_with_cross_entropy( axis=-1, ): r""" - This operator implements the cross entropy loss function with softmax. This function combines the calculation of the softmax operation and the cross entropy loss function to provide a more numerically stable gradient. - Because this operator performs a softmax on logits internally, it expects unscaled logits. This operator should not be used with the output of softmax operator since that would produce incorrect results. - When the attribute :attr:`soft_label` is set :attr:`False`, this operators expects mutually exclusive hard labels, each sample in a batch is in exactly one class with a probability of 1.0. Each sample in the batch will have a single label. - The equation is as follows: - 1) Hard label (one-hot label, so every sample has exactly one class) - .. math:: - loss_j = -\\text{logits}_{label_j} + \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K - 2) Soft label (each sample can have a distribution over all classes) - .. math:: - loss_j = -\\sum_{i=0}^{K}\\text{label}_i \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K} \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K - 3) If :attr:`numeric_stable_mode` is :attr:`True`, softmax is calculated first by: - .. math:: - max_j &= \\max_{i=0}^{K}{\\text{logits}_i} - log\\_max\\_sum_j &= \\log\\sum_{i=0}^{K}\\exp(logits_i - max_j) - softmax_j &= \\exp(logits_j - max_j - {log\\_max\\_sum}_j) - and then cross entropy loss is calculated by softmax and label. - Args: logits (Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64. The input tensor of unscaled log probabilities. label (Tensor): The ground truth ``Tensor`` , data type is the same @@ -258,7 +205,6 @@ def softmax_with_cross_entropy( axis (int, optional): The index of dimension to perform softmax calculations. It should be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of input :attr:`logits`. Default: -1. - Returns: ``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \ `return_softmax` is False, otherwise the tuple \ @@ -266,13 +212,10 @@ def softmax_with_cross_entropy( with input logits and cross entropy loss is in \ the same shape with input logits except shape \ in dimension :attr:`axis` as 1. - Examples: .. code-block:: python - import paddle import numpy as np - data = np.random.rand(128).astype("float32") label = np.random.rand(1).astype("int64") data = paddle.to_tensor(data) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 8c9a940d846be9aef8fb7c830ee1172bbd7ca0be..ea10b49e9cc6f4abcf340900f60f2fd239ef0cfd 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1441,13 +1441,14 @@ class SGDOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) @@ -1642,13 +1643,14 @@ class MomentumOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) moment_optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) @@ -2219,13 +2221,14 @@ class AdamOptimizer(Optimizer): import paddle import paddle.fluid as fluid + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.data(name='x', shape=[None, 13], dtype='float32') y = fluid.data(name='y', shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) @@ -2247,13 +2250,14 @@ class AdamOptimizer(Optimizer): import paddle.fluid as fluid import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.data(name='x', shape=[None, 13], dtype='float32') y = fluid.data(name='y', shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) # define beta decay variable @@ -3276,13 +3280,14 @@ class RMSPropOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) rms_optimizer = fluid.optimizer.RMSProp(learning_rate=0.1) @@ -3493,13 +3498,15 @@ class FtrlOptimizer(Optimizer): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + place = fluid.CPUPlace() main = fluid.Program() with fluid.program_guard(main): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) ftrl_optimizer = fluid.optimizer.Ftrl(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index a6cca9ceebd6bda8db58276657d6924a15783288..558ce9febe4e5c4c1063fd7a0ce6d6b3a7bff6f6 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -56,16 +56,20 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16): if not pure_bf16: with amp.bf16.bf16_guard(): y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) else: y_predict = fluid.layers.fc(input=x, size=1, act=None) with amp.bf16.bf16_guard(): - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) else: y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) lr = 5e-3 if use_bf16 else 1e-3 diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 3aacd377dc0c7ebb76fe299e503c35d20629cf17..89da6135a806d3f85e46607d1d0c13bf570aa57e 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -167,7 +167,9 @@ def model(): scale_infer = paddle.scale(x=inference, scale=5.0) label = layers.data(name='score', shape=[1], dtype='float32') - square_cost = layers.square_error_cost(input=scale_infer, label=label) + square_cost = paddle.nn.functional.square_error_cost( + input=scale_infer, label=label + ) avg_cost = paddle.mean(square_cost) return scale_infer, avg_cost diff --git a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py index 15d62544d217a3052a017ce9608f5299fea7fc68..cb6f8a0a29f98359a67644d7f6e88776af6420d7 100644 --- a/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py +++ b/python/paddle/fluid/tests/unittests/auto_checkpoint_utils.py @@ -69,7 +69,7 @@ class AutoCheckpointBase(unittest.TestCase): label = fluid.data(name='label', shape=[-1, 1], dtype='int64') fc_tmp = fluid.layers.fc(image, size=CLASS_NUM) - cross_entropy = fluid.layers.softmax_with_cross_entropy( + cross_entropy = paddle.nn.functional.softmax_with_cross_entropy( fc_tmp, label ) loss = paddle.mean(cross_entropy) diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py index c6e9a36ddfe2cce9ac48d6d47ce52cd60732ec84..e9812d11ba7631ca2904664e583a0d5ef2d24e21 100644 --- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py +++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py @@ -63,7 +63,7 @@ def net(): hidden = fluid.layers.fc(input=hidden, size=400, act="sigmoid") hidden = fluid.layers.fc(input=hidden, size=3, act=None) - cost, y_predict = fluid.layers.softmax_with_cross_entropy( + cost, y_predict = paddle.nn.functional.softmax_with_cross_entropy( hidden, y, return_softmax=True ) acc_top1 = paddle.static.accuracy(input=y_predict, label=y, k=1) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py index dfcdd5b130f731df091ee0db6b06b8e63eb02159..104aa658ec331974092c52e3c1022d1cc7b28b5e 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_pp_embedding.py @@ -57,7 +57,7 @@ class SimpleNet(Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = fluid.layers.elementwise_add(fc, self.softmax_bias) projection = paddle.reshape(fc, shape=[-1, vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1, soft_label=False ) return loss.mean() @@ -106,7 +106,7 @@ class LossNet(Layer): def forward(self, args, y1): projection, x2 = args - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1[0], soft_label=False ) return loss.mean() diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py index f63d7c9ad330ee89bcb2eb2eeb0d66efb602938b..58c0fe7465c9183ba3555ccd5339572ac3f47051 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/hybrid_parallel_shared_weight.py @@ -67,7 +67,7 @@ class SimpleNet(Layer): projection = paddle.matmul(projection, self.word_embeddings.weight) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1, soft_label=False ) return loss.mean() @@ -120,7 +120,7 @@ class LossNet(Layer): def forward(self, args, y1): projection = args - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y1[0], soft_label=False ) return loss.mean() diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index ae747cb465e7e80c96ff29a4fbdbcbcd391e86ac..6792cf2877fe533eec407602e6fc7f5f136d7ade 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -941,7 +941,7 @@ class TransFormer(Layer): epsilon=self._label_smooth_eps, ) - cost = fluid.layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=predict, label=label_out, soft_label=True if self._label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py index cedfe94448b58789c8116f4f1304ff8bccdc81d7..209233027c8a73596a652ad98a282e10412c2f43 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py @@ -33,7 +33,7 @@ class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): y_predict = fluid.layers.fc(input=x, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) return avg_cost, x, y diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py index 7427e6a58ff55c03a33b20245bd22c239a280fec..550fc5db902416281c30095f089cdf19e9fa0b81 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_sync.py @@ -29,7 +29,7 @@ class TestCommunicator(unittest.TestCase): def net(self): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py index 54ee7dbdc9d0264839c6dc5c5e317ff40ae11406..245aa097b6aee197b2cca14a0dfb1cc967c61056 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_distributed_strategy.py @@ -273,7 +273,7 @@ class TestDebugInfo(unittest.TestCase): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) role = role_maker.UserDefinedRoleMaker( diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py index 16deaa3bbb9ee0e79a7781b4b29bd68d38fa5387..96e84251011feb1ac0edf98af6f49d86f6ece260 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_rolemaker_new.py @@ -449,7 +449,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32') y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None) y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost( + cost = paddle.nn.functional.square_error_cost( input=y_predict, label=y ) avg_cost = paddle.mean(cost) diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 8a8b013b6b053ad344a96ac0e55a4115808a3e41..52b6f674e5c19e2f4b206e8ad258fb125b34c606 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -1585,7 +1585,7 @@ def transformer( epsilon=label_smooth_eps, ) - cost = layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=paddle.reshape(predict, shape=[-1, trg_vocab_size]), label=label, soft_label=True if label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py index 64d0a8dc73aa7c806f26f686c3ee71bd09fae68b..d45d775829944fcd0c4b6bd192b790b3855b4bdf 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py @@ -410,7 +410,7 @@ class PretrainModelLayer(Layer): else: fc_out = self.out_fc(mask_trans_feat) - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( + mask_lm_loss = paddle.nn.functional.softmax_with_cross_entropy( logits=fc_out, label=mask_label ) mean_mask_lm_loss = paddle.mean(mask_lm_loss) @@ -420,7 +420,7 @@ class PretrainModelLayer(Layer): ( next_sent_loss, next_sent_softmax, - ) = fluid.layers.softmax_with_cross_entropy( + ) = paddle.nn.functional.softmax_with_cross_entropy( logits=next_sent_fc_out, label=labels, return_softmax=True ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index 5f22b03cc9ff09892d170b10a4a7cf4f6e41b5d5..bf1dfdcad23887d557e44ca3c13ea977bf7669f5 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -294,7 +294,7 @@ class BaseModel(fluid.dygraph.Layer): dec_output = paddle.stack(dec_output) dec_output = self.fc(self._transpose_batch_time(dec_output)) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=dec_output, label=label, soft_label=False ) loss = paddle.squeeze(loss, axis=[2]) @@ -828,7 +828,7 @@ class AttentionModel(fluid.dygraph.Layer): dec_output = paddle.stack(dec_output) dec_output = self.fc(self._transpose_batch_time(dec_output)) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=dec_output, label=label, soft_label=False ) loss = paddle.squeeze(loss, axis=[2]) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index f8e657499a4cd7217c42fb1567ea36a06abe32f2..0cb3e333045f5807481af5825f42b3737a1f1f14 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -379,7 +379,7 @@ def bmn_loss_func( weights = u_hmask + u_smmask + u_slmask weights.stop_gradient = True - loss = fluid.layers.square_error_cost(pred_score, gt_iou_map) + loss = paddle.nn.functional.square_error_cost(pred_score, gt_iou_map) loss = paddle.multiply(loss, weights) loss = 0.5 * paddle.sum(loss) / paddle.sum(weights) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index c7135a8ff781cce141cfa122b99a23fc2e820b29..62c6c18346885e8bcc8401dacdd35199c7ed11cd 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -216,7 +216,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 209127104bd4fde4327191f8e81bf2fb8b9eb4cb..accf36ff1791971c9949d9543bf3f9666073f3af 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -576,7 +576,7 @@ class CrossEntropyCriterion: epsilon=self.label_smooth_eps, ) - cost = layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=predict, label=label_out, soft_label=True if self.label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py index 73ddadc0ac4174cacfee284ce5ec8e5e832cb8d9..dbdfab2882503617d20c8bd1b513d6f48d973e60 100644 --- a/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py +++ b/python/paddle/fluid/tests/unittests/ipu/test_dy2static_ipu.py @@ -220,7 +220,7 @@ class TestWithoutIdentityLoss1(TestBase): class TestWithoutIdentityLoss2(TestBase): def set_op_attrs(self): - self.loss_op = paddle.fluid.layers.softmax_with_cross_entropy + self.loss_op = paddle.paddle.nn.functional.softmax_with_cross_entropy def set_data_feed(self): self.data = paddle.uniform((8, 3, 10, 10), dtype='float32') diff --git a/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py index 5e83c7e57daa290f40bb0d0ca7f8061471c08bf6..a286dbf5857e1e938d3595a020574ccef1c31a35 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py @@ -103,4 +103,4 @@ def TestHuberLossOp3(TestHuberLossOp): if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py index 8efc129b68885b9b8ff3f38d1a19b555c1702fb9..cb393cbd3731122ff35ac31fdd1c3fd4233c1eba 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_momentum_op_mlu.py @@ -143,7 +143,7 @@ class TestMomentumV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Momentum( @@ -268,7 +268,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum( diff --git a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py index f210ea0b633b21edee1f53ae934bc9c25f3a5c22..c924bdc6918700bef75fe43614724c749593f77a 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_softmax_with_cross_entropy_op_mlu.py @@ -126,7 +126,7 @@ class TestPowNet(unittest.TestCase): fc_1 = fluid.layers.fc(input=z, size=128) prediction = fluid.layers.fc(input=fc_1, size=2) - cost = fluid.layers.softmax_with_cross_entropy(prediction, label) + cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label) loss = paddle.mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py index 9719c5582bc8c6ff7cb4f6b43d497e0047b98519..fe0882c7330fbf984f3ab96f7f4fa231a3522ce9 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_momentum_op_npu.py @@ -111,7 +111,7 @@ class TestMomentumV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Momentum( @@ -239,7 +239,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum( diff --git a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py index 5bc1700cc16f46405d1a38417b915ce88351a06b..adb1b1b269b276b0aff8034d9b552fdb53b27c90 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_slice_op_npu.py @@ -275,7 +275,7 @@ class TestSliceNet(unittest.TestCase): prediction = paddle.static.nn.fc(z, size=2, activation='softmax') - cost = paddle.fluid.layers.softmax_with_cross_entropy( + cost = paddle.paddle.nn.functional.softmax_with_cross_entropy( logits=prediction, label=label ) loss = paddle.mean(cost) diff --git a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py index 487ca61320e402a3be99ad5d8b7732c2af7237a1..f47a0275af0fd9d3176cb51a67aa1b697e7dfc13 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_softmax_with_cross_entropy_op_npu.py @@ -124,7 +124,7 @@ class TestPowNet(unittest.TestCase): fc_1 = fluid.layers.fc(input=z, size=128) prediction = fluid.layers.fc(input=fc_1, size=2) - cost = fluid.layers.softmax_with_cross_entropy(prediction, label) + cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label) loss = paddle.mean(cost) sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py index 9e0ed71d03598f7a4b3f7ec87bb4c9ca5c690c34..7c46efe77556f8aa4727fb06a1f29718931d7009 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py @@ -68,7 +68,7 @@ class SimpleNet(fluid.Layer): fc = fluid.layers.matmul(x_emb, self.softmax_weight) fc = paddle.add(fc, self.softmax_bias) projection = paddle.reshape(fc, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_adadelta_op.py b/python/paddle/fluid/tests/unittests/test_adadelta_op.py index 73a3c1e1cbf7c028d430abc00ce52cde19d42084..eb0fec336a33ce5e5f8957c4587ded3860d6ca86 100644 --- a/python/paddle/fluid/tests/unittests/test_adadelta_op.py +++ b/python/paddle/fluid/tests/unittests/test_adadelta_op.py @@ -146,7 +146,9 @@ class TestAdadeltaV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index 715b5460ed2f14d3c46c31d9fc21d876def284a7..6298c923a27e1d35fcf441bb07c0fd10660a8b83 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -941,7 +941,9 @@ class TestAdamOptimizer(unittest.TestCase): y_predict = fluid.layers.fc( input=x, size=1, act=None, param_attr=weight_attr ) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) adam = fluid.optimizer.AdamOptimizer( diff --git a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py index 8e43728fb83da1487221df1ee8487be6fa93f789..79c653cdfb26dcdf8740ddbf137abc7bfbe9c371 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py +++ b/python/paddle/fluid/tests/unittests/test_adam_optimizer_fp32_fp64.py @@ -33,7 +33,9 @@ def main_test_func(place, dtype): x = fluid.data(name='x', shape=[None, 13], dtype=dtype) y = fluid.data(name='y', shape=[None, 1], dtype=dtype) y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py index 1810a4bea6121b6cf5750153d0c7ecfad9b66477..405a85235cc19accc66901569b76261f65b90195 100644 --- a/python/paddle/fluid/tests/unittests/test_adamw_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -621,7 +621,9 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): fc2_b_mon1 = np.zeros((linear2.bias.shape)).astype("float32") fc2_b_mon2 = np.zeros((linear2.bias.shape)).astype("float32") - cost = fluid.layers.square_error_cost(input=out, label=y) + cost = paddle.nn.functional.square_error_cost( + input=out, label=y + ) avg_cost = paddle.mean(cost) simple_lr_fun = partial( diff --git a/python/paddle/fluid/tests/unittests/test_backward.py b/python/paddle/fluid/tests/unittests/test_backward.py index c98fd7dff5e2b2878f08a2425a67c9ffac195ff6..a24d49b5ead064f4d4c598a9d33ddcf0e1ef7dce 100644 --- a/python/paddle/fluid/tests/unittests/test_backward.py +++ b/python/paddle/fluid/tests/unittests/test_backward.py @@ -262,7 +262,9 @@ class SimpleNet(BackwardNet): name='fc_no_use', ) # loss - cost = fluid.layers.square_error_cost(input=predict, label=label) + cost = paddle.nn.functional.square_error_cost( + input=predict, label=label + ) loss = paddle.mean(cost, name='mean_loss') return loss @@ -330,7 +332,7 @@ class TestAppendBackwardWithError(unittest.TestCase): y = fluid.data(name='y', shape=[None, 1], dtype='float32') x_emb = fluid.embedding(x, size=[100, 256]) y_predict = fluid.layers.fc(input=x_emb, size=1, name='my_fc') - loss = fluid.layers.square_error_cost(input=y_predict, label=y) + loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_loss = paddle.mean(loss) param_names = [ param.name diff --git a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py index 5bd6dbb2fda5b648dc8ada5b2a0066f9eb34b5fc..7cd3c98a68634bcfc7716f76676b110e0436f80f 100644 --- a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py @@ -17,6 +17,8 @@ import unittest import numpy as np from op_test import OpTest, randomize_probability +import paddle + class TestBprLossOp1(OpTest): """Test BprLoss with discrete one-hot labels.""" @@ -47,4 +49,5 @@ class TestBprLossOp1(OpTest): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py index 978e0d644c7271855639dea65c036fd98d79850a..ebc65cd5ac240b5ea80abdefa6b188b7d3a0840e 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py @@ -30,7 +30,7 @@ class TestCommunicator(unittest.TestCase): x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index 9363e2fe4727c7bea633db5ea12b99a4c2f3d84c..073a9018c19009b5ccea95a4ae77bcb53e865e73 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -49,7 +49,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): y_predict = fluid.layers.fc(input=z, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) return avg_cost, x, x1, y diff --git a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py index fbea8b003ceda4ba602f03664096a890a0214b94..f0c1e9c8850d8547094562ad6e1ace089e0a121e 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_ps_gpu.py @@ -55,7 +55,7 @@ class TestCommunicator(unittest.TestCase): y = fluid.layers.data(name='y', shape=[1], dtype='float32') slots_vars = [x, y] - cost = fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.Adam(0.01) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 4aaf596d5709031e086277c521c30af9ad5a6b56..c80e1a68fddaddf489f8ee607caad17c85ca72e8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -47,7 +47,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() @@ -85,7 +85,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index 19c0f48e7c1a2c19cfb932378e52bd624e17b864..fdaa0a69c8d20839e5004cf696a037f527e4d613 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -40,7 +40,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py index e64b7d8010ef59f29ac5507594ff9daf6375e3df..b13e2b8171c8cf92d9e11e12bfbbd085cd7cf9c6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_trainer_desc_config.py @@ -41,7 +41,7 @@ class TestDistStrategyTrainerDescConfig(unittest.TestCase): x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + cost = paddle.nn.functional.square_error_cost(input=x, label=y) avg_cost = paddle.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index 00a47420210eb1b1c2ff9d6f421a39ecf9e9a5d1..45cdf972368749d0be0459decee395e5d4c9dbb9 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -47,7 +47,7 @@ class TranspilerTest(unittest.TestCase): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) sgd_optimizer.minimize(avg_cost) @@ -302,7 +302,7 @@ class TestLRDecay(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay( @@ -471,7 +471,7 @@ class TestDecayedAdagrad(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.DecayedAdagrad(learning_rate=0.1) opt.minimize(avg_cost) @@ -492,7 +492,7 @@ class TestFtrl(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.Ftrl(learning_rate=0.1) opt.minimize(avg_cost) @@ -513,7 +513,7 @@ class TestLRDecayConditional(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.piecewise_decay( @@ -579,7 +579,7 @@ class TestL2Decay(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) @@ -616,7 +616,7 @@ class TestL2DecayWithPiecewise(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) base_lr = 1.0 bd = [1, 10, 20, 30] @@ -692,7 +692,7 @@ class TestEmptyPserverOptimizeBlocks(TranspilerTest): bias_attr=False, ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=1.0) sgd_optimizer.minimize(avg_cost) @@ -1134,7 +1134,7 @@ class TestRMSPropOptimizer(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.RMSProp(learning_rate=0.1) optimizer.minimize(avg_cost) @@ -1167,7 +1167,7 @@ class TestLoadSliceVar(TranspilerTest): bias_attr=fluid.ParamAttr(name='fc_b'), ) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) optimizer = fluid.optimizer.RMSProp(learning_rate=0.1) optimizer.minimize(avg_cost) @@ -1452,6 +1452,7 @@ class TestRemoteHsigmoid(TestDistLookupTableBase): path_table=path_table, path_code=path_code, ) + avg_cost = paddle.mean(cost) # optimizer optimizer = fluid.optimizer.SGD(learning_rate=0.003) diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py index 556bdda232a2bfacc81e370e08c8264cf32d3fa6..2e15d059db5f30431553621ea17b243d043546bb 100644 --- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py +++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py @@ -58,7 +58,9 @@ class TestListenAndServOp(unittest.TestCase): ) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) ps_param = pslib.PSParameter() @@ -120,7 +122,9 @@ class TestListenAndServOp(unittest.TestCase): ) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) ps_param = pslib.PSParameter() @@ -180,7 +184,9 @@ class TestListenAndServOp(unittest.TestCase): ) y_predict = fluid.layers.fc(input=x_emb, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) ps_param = pslib.PSParameter() diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index ccdf56e64f4900ba169907e7ee7c6089d06e1ebd..8ba799e84bc4c1175ef5e629fc6f481978780ab5 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -463,7 +463,7 @@ def lm_model( projection = paddle.add(projection, softmax_bias) projection = paddle.reshape(projection, shape=[-1, vocab_size]) - loss = layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=y, soft_label=False ) diff --git a/python/paddle/fluid/tests/unittests/test_exception.py b/python/paddle/fluid/tests/unittests/test_exception.py index c627f8688a158014952cae93bf47f4fd3ead1454..45a11656ccf8e537d58f1f4b4c2894910f20a610 100644 --- a/python/paddle/fluid/tests/unittests/test_exception.py +++ b/python/paddle/fluid/tests/unittests/test_exception.py @@ -43,7 +43,7 @@ class TestExceptionNoCStack(unittest.TestCase): x = fluid.layers.data(name='X', shape=[-1, 13], dtype='float32') y = fluid.layers.data(name='Y', shape=[-1, 1], dtype='float32') predict = fluid.layers.fc(input=x, size=1, act=None) - loss = fluid.layers.square_error_cost(input=predict, label=y) + loss = paddle.nn.functional.square_error_cost(input=predict, label=y) avg_loss = paddle.mean(loss) fluid.optimizer.SGD(learning_rate=0.01).minimize(avg_loss) diff --git a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py index 77122f46bdcf5d099e451011a9e9a2c8b6f8e871..9696ebcc4412eb1e893278832b8c6d9c6dacf136 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_check_feed.py +++ b/python/paddle/fluid/tests/unittests/test_executor_check_feed.py @@ -25,7 +25,7 @@ class TestExecutor(unittest.TestCase): y = fluid.data(name="y", shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.Adam(learning_rate=lr) diff --git a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py index e2c52d99fd2d80aa4e43963ed0b070ba0aebfc9f..3d8371177705da4ad42b1fee4d014adce026658b 100644 --- a/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_executor_feed_non_tensor.py @@ -27,7 +27,7 @@ class TestExecutor(unittest.TestCase): y = fluid.data(name="y", shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) opt = fluid.optimizer.Adam(learning_rate=lr) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py index bff393f38d1332bee6c258df6166702b55e14d93..f62dfe436a7998249bf5f8799994a554ad6a798d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py @@ -96,7 +96,9 @@ class TestDygraphGNN(unittest.TestCase): logits = paddle.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. - loss = fluid.layers.softmax_with_cross_entropy(logits, labels) + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits, labels + ) loss = paddle.sum(loss) adam = AdamOptimizer(learning_rate=1e-3) @@ -134,7 +136,7 @@ class TestDygraphGNN(unittest.TestCase): logits = paddle.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits, to_variable(labels) ) loss = paddle.sum(loss) @@ -162,7 +164,7 @@ class TestDygraphGNN(unittest.TestCase): logits2 = paddle.reshape(logits2, logits2.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. - loss2 = fluid.layers.softmax_with_cross_entropy( + loss2 = paddle.nn.functional.softmax_with_cross_entropy( logits2, to_variable(labels2) ) loss2 = paddle.sum(loss2) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index ed5d93961d1ae7e303af4a130765a280f1fb4bdc..76733836ddc565782f35a8290db285b7cefb3d93 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -69,7 +69,7 @@ class SimpleNet(fluid.Layer): ) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index c86a802a0a4006997c70c56d3e8cb23673a8de13..3980b0dbb27e660dd5f80e5f2d8ae304e7d4c84e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -228,7 +228,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index f9b618cedf1c2ac66a15706a23122f022319a47d..a386e2113fa992578db3b0d9e7258307c31efbac 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -224,7 +224,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index bd6a6ca22f5633331b97c0f062f222d182aa437f..19f4616d9270598bf7ca9c5678a8b2321fffd59a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -225,7 +225,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index 5b533319019b838803cb2a98f4ae21fd344c5453..bfba325046ea3f677487388005fc2a7b569c523a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -78,7 +78,7 @@ class SimpleNet(fluid.Layer): fc, paddle.transpose(self.embedding.weight, perm=[1, 0]) ) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index 654ebf198b7d09a0422c78f3b99b4290953bec1b..c99fbcf4e9a84afdf39b9c373b74fd7f7e8feddf 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -1099,7 +1099,7 @@ class TransFormer(Layer): epsilon=self._label_smooth_eps, ) - cost = fluid.layers.softmax_with_cross_entropy( + cost = paddle.nn.functional.softmax_with_cross_entropy( logits=predict, label=label_out, soft_label=True if self._label_smooth_eps else False, diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py index 9cf82e16f742f0fd274adfcbe46ed2adde35e6c3..daeae8e472fe2653a4eb77365edd45c20ef4269d 100644 --- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py +++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py @@ -59,7 +59,9 @@ class TestBook(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) @@ -153,7 +155,9 @@ class TestSaveInferenceModel(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) place = core.CPUPlace() @@ -209,7 +213,9 @@ class TestInstance(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) place = core.CPUPlace() @@ -245,7 +251,9 @@ class TestSaveInferenceModelNew(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) @@ -422,7 +430,9 @@ class TestSaveInferenceModelNew(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) @@ -469,7 +479,9 @@ class TestSaveInferenceModelNew(unittest.TestCase): y_predict = layers.fc(input=x, size=1, act=None) - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) diff --git a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py index fb6f04d48f04a630c923609f8067f560db08fc7e..16477e086daf11a39cbb9e6b4053a65ebb1402ca 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_softmax_with_cross_entropy.py @@ -16,6 +16,7 @@ import unittest import numpy as np +import paddle import paddle.fluid as fluid @@ -48,7 +49,7 @@ class TestSoftmaxWithXe(unittest.TestCase): dtype='int64' if not self.soft_label else self.dtype, append_batch_size=False, ) - z_d, s_d = fluid.layers.softmax_with_cross_entropy( + z_d, s_d = paddle.nn.functional.softmax_with_cross_entropy( x_d, y_d, soft_label=self.soft_label, diff --git a/python/paddle/fluid/tests/unittests/test_lambv2_op.py b/python/paddle/fluid/tests/unittests/test_lambv2_op.py index 6b513008109ec6a4315388ddd4c53b3f6fa7b5dc..d3abf54a00beed1f58f9e09bbc123b1b39da4f2d 100644 --- a/python/paddle/fluid/tests/unittests/test_lambv2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lambv2_op.py @@ -126,7 +126,9 @@ class TestLambOpWithCombinedOp(unittest.TestCase): x = fluid.layers.data(name='X', shape=[13], dtype='float32') y = fluid.layers.data(name='Y', shape=[1], dtype='float32') prediction = fluid.layers.fc(input=x, size=1, act=None) - loss = fluid.layers.square_error_cost(input=prediction, label=y) + loss = paddle.nn.functional.square_error_cost( + input=prediction, label=y + ) avg_loss = paddle.mean(loss) return avg_loss diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 6079f7636f3af99b0fd0c5d5b5036f9e95246f31..1597269b29adc1013438b7619f20ebaf565a22f6 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -3037,7 +3037,9 @@ class TestBook(LayerTest): x = self._get_data(name='x', shape=[13], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) y = self._get_data(name='y', shape=[1], dtype='float32') - cost = layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) return avg_cost @@ -3256,23 +3258,31 @@ class TestBook(LayerTest): ): x = self._get_data(name='x', shape=[16], dtype='float32') y = self._get_data(name='label', shape=[1], dtype='int64') - loss, softmax = layers.softmax_with_cross_entropy( + loss, softmax = paddle.nn.functional.softmax_with_cross_entropy( x, y, return_softmax=True ) self.assertIsNotNone(loss) self.assertIsNotNone(softmax) - loss = layers.softmax_with_cross_entropy(x, y) + loss = paddle.nn.functional.softmax_with_cross_entropy(x, y) self.assertIsNotNone(loss) x1 = self._get_data(name='x1', shape=[16, 32, 64], dtype='float32') y1 = self._get_data(name='label1', shape=[1, 32, 64], dtype='int64') y2 = self._get_data(name='label2', shape=[16, 1, 64], dtype='int64') y3 = self._get_data(name='label3', shape=[16, 32, 1], dtype='int64') - loss1 = layers.softmax_with_cross_entropy(x1, y1, axis=1) - loss2 = layers.softmax_with_cross_entropy(x1, y2, axis=2) - loss3 = layers.softmax_with_cross_entropy(x1, y3, axis=3) - loss4 = layers.softmax_with_cross_entropy(x1, y3, axis=-1) + loss1 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y1, axis=1 + ) + loss2 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y2, axis=2 + ) + loss3 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y3, axis=3 + ) + loss4 = paddle.nn.functional.softmax_with_cross_entropy( + x1, y3, axis=-1 + ) self.assertIsNotNone(loss1) self.assertIsNotNone(loss2) self.assertIsNotNone(loss3) @@ -3694,7 +3704,7 @@ class TestBook(LayerTest): ): x = self._get_data(name="X", shape=[1], dtype="float32") y = self._get_data(name="Y", shape=[1], dtype="float32") - out = layers.square_error_cost(input=x, label=y) + out = paddle.nn.functional.square_error_cost(input=x, label=y) return out def test_dynamic_lstmp(self): diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py index 4c63a4f2a9e4c1b43bd31a47c1085a436a5ad64a..66557b84079692ed76ad011e9f5904306b35697e 100644 --- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py +++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py @@ -36,7 +36,7 @@ def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id): y = fluid.layers.data(name='y', shape=[1], dtype='float32') # loss function - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) # optimizer @@ -73,7 +73,7 @@ def run_pserver_with_empty_block( y = fluid.layers.data(name='y', shape=[1], dtype='float32') # loss function - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) # optimizer diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 2a74fff41d734f55c2c1d5c50d3584e78289feaa..cea6858e0d4d31aaf3b6f21597bbc8635af63937 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -216,7 +216,7 @@ class TestLookupTableIsSparse(unittest.TestCase): ) y = paddle.sum(emb, axis=-1) - loss = fluid.layers.square_error_cost(input=y, label=y_) + loss = paddle.nn.functional.square_error_cost(input=y, label=y_) loss = paddle.mean(loss) sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-4) diff --git a/python/paddle/fluid/tests/unittests/test_memory_usage.py b/python/paddle/fluid/tests/unittests/test_memory_usage.py index 973dba893d4404e6cc31c69db04bb474db0f9b97..f1293ea7a765b80711a8c374b8142f8423031526 100644 --- a/python/paddle/fluid/tests/unittests/test_memory_usage.py +++ b/python/paddle/fluid/tests/unittests/test_memory_usage.py @@ -30,7 +30,7 @@ def train_simulator(test_batch_size=10): y_predict = fluid.layers.fc(input=x, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py index 102ef0a5fc5fde186050b8a6b138ecf1c21e3437..8c9ec6d4295e089ffb6c49c6e3eb54e4d8c14aa9 100644 --- a/python/paddle/fluid/tests/unittests/test_momentum_op.py +++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py @@ -532,7 +532,9 @@ class TestMomentumV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Momentum( @@ -673,7 +675,9 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum( diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py index f230cc66c20a1f3749b054fffca0fa05dba1bf63..af4ff64c894485fc94d49ba3a1a437b61240b83e 100644 --- a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py +++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py @@ -33,7 +33,9 @@ class TestNetWithDtype(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype) y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype) y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py index 3294b6f37067ce5baa47701d38f9d44f13961654..df07543fa7acfea034f78f7225528fb293fbcd7d 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py @@ -82,7 +82,7 @@ def static( def fn_2(opt, avg_loss=None, pred=None, label=None): if avg_loss is None: - loss = layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=pred, label=label ) avg_loss = paddle.mean(loss, name='mean_softmax_loss') @@ -108,7 +108,7 @@ def static( else: loss_1 = layers.cross_entropy(input=prediction, label=label) avg_loss_1 = paddle.mean(loss_1) - loss_2 = layers.softmax_with_cross_entropy( + loss_2 = paddle.nn.functional.softmax_with_cross_entropy( logits=prediction, label=label ) avg_loss_2 = paddle.mean(loss_2) @@ -193,7 +193,7 @@ def dynamic(train_data, use_cuda=False, use_parallel_exe=False): loss.backward() adam.minimize(loss) else: - softmax_loss = layers.softmax_with_cross_entropy( + softmax_loss = paddle.nn.functional.softmax_with_cross_entropy( prediction, var_label ) loss = paddle.mean(softmax_loss) diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py index d751fd4b90d862e413b357ce57fb5d5b4a22f075..056afc5ead83393d474e43b9fc2fe1ddafed76cd 100755 --- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py +++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py @@ -92,7 +92,9 @@ def cond_net(use_feed=None): return avg_loss def loss2(pred, label): - loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label) + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits=pred, label=label + ) avg_loss = paddle.mean(loss, name='mean_softmax_loss') return avg_loss @@ -119,7 +121,9 @@ def optimization_in_cond_net(with_optimize=False): return avg_loss def loss2(opt, pred, label, with_optimize): - loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label) + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits=pred, label=label + ) avg_loss = paddle.mean(loss, name='mean_softmax_loss') if with_optimize: opt.minimize(avg_loss) diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py index 5e3e899eb67bdf252a94d32c64130fbf2eea91a7..81b75a1513eb89cc46ad494798f49a62890839aa 100644 --- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py +++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py @@ -280,7 +280,9 @@ class TestRMSPropV2(unittest.TestCase): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.RMSProp(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index 0d3ccae5bfcb4406eba6ea6c5e36ec31c838fa21..6b2383ed56933ebce02a0606604d532b99ac8eff 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -631,7 +631,7 @@ def def_seq2seq_model( ) # loss - loss = layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=logits, label=label, soft_label=False ) loss = layers.unsqueeze(loss, axes=[2]) diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op.py b/python/paddle/fluid/tests/unittests/test_sgd_op.py index 26c4dd18c13a1dc781e8f0d4f0d6259e843f637e..b87d67c7130476b19cbd20897586c8898aa781f7 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py @@ -204,7 +204,7 @@ class TestSGDOpWithLargeInput(unittest.TestCase): emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32') out = fluid.layers.l2_normalize(x=emb, axis=-1) - cost = fluid.layers.square_error_cost(input=out, label=label) + cost = paddle.nn.functional.square_error_cost(input=out, label=label) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_square_error_cost.py b/python/paddle/fluid/tests/unittests/test_square_error_cost.py index 1fd516c0504ad8ee08844c01f00d0727f4e18472..7828f01b02fe61773c63476d605421d7860012ae 100644 --- a/python/paddle/fluid/tests/unittests/test_square_error_cost.py +++ b/python/paddle/fluid/tests/unittests/test_square_error_cost.py @@ -16,6 +16,7 @@ import unittest import numpy as np +import paddle import paddle.fluid as fluid import paddle.fluid.core as core import paddle.fluid.layers as layers @@ -32,7 +33,9 @@ class TestSquareErrorCost(unittest.TestCase): input_var = layers.create_tensor(dtype="float32", name="input") label_var = layers.create_tensor(dtype="float32", name="label") - output = layers.square_error_cost(input=input_var, label=label_var) + output = paddle.nn.functional.square_error_cost( + input=input_var, label=label_var + ) for use_cuda in ( [False, True] if core.is_compiled_with_cuda() else [False] @@ -54,14 +57,14 @@ class TestSquareErrorInvalidInput(unittest.TestCase): def test_invalid_input(): input = [256, 3] label = fluid.data(name='label1', shape=[None, 3], dtype='float32') - loss = fluid.layers.square_error_cost(input, label) + loss = paddle.nn.functional.square_error_cost(input, label) self.assertRaises(TypeError, test_invalid_input) def test_invalid_label(): input = fluid.data(name='input2', shape=[None, 3], dtype='float32') label = [256, 3] - loss = fluid.layers.square_error_cost(input, label) + loss = paddle.nn.functional.square_error_cost(input, label) self.assertRaises(TypeError, test_invalid_label) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index f417667a82a024a4ba26d9a1822ce8b542b103a0..a2c44c5fae8faef3a7dec1bd78fc6d531fd014d8 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -237,7 +237,7 @@ class PtbModel(fluid.Layer): projection = fluid.layers.matmul(rnn_out, self.softmax_weight) projection = paddle.add(projection, self.softmax_bias) projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( + loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False ) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py index afd7a57c367006aa206d867477021fa33f67d3bf..f42ccf12c581affde029e03621a2865d8c98bcc4 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_adadelta_op_xpu.py @@ -171,7 +171,9 @@ class XPUTestAdadelta(XPUOpTestWrapper): x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype) y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype) y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = paddle.nn.functional.square_error_cost( + input=y_predict, label=y + ) avg_cost = paddle.mean(cost) rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py index e9cc501a87770ab8be5eff80cf65b9d5848d8835..46ecff205f79b60eeb78ef928d073479cadfe6db 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_sgd_op_xpu.py @@ -75,7 +75,7 @@ class TestSGDOpWithLargeInput(unittest.TestCase): emb = fluid.embedding(input=data, size=(10000, 150), dtype='float32') out = fluid.layers.l2_normalize(x=emb, axis=-1) - cost = fluid.layers.square_error_cost(input=out, label=label) + cost = paddle.nn.functional.square_error_cost(input=out, label=label) avg_cost = paddle.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index ee98dc94a6f1ef5b036b7203a9e50bb8785c7c3c..ebb249903b228cbbb94ee4952f30f4f0cca6a427 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -283,11 +283,16 @@ class DistributeTranspiler: Examples: .. code-block:: python + import paddle + import paddle.fluid as fluid + + paddle.enable_static() + x = fluid.data(name='x', shape=[13], dtype='float32') y = fluid.data(name='y', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y) avg_loss = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)