diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py index c956f287d7b14978e15c26c5e534b6f55776a53f..a4e41147811619b54d5f9cb13861b7244508a975 100644 --- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py @@ -77,7 +77,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Train program predict = cnn_model(images) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) # Evaluator diff --git a/python/paddle/fluid/tests/unittests/dist_ctr.py b/python/paddle/fluid/tests/unittests/dist_ctr.py index 73e4f7fffc871fa29d060669e0ebc62308716b48..8b910514202ff9d0abad5ba83e18f1c30d84c974 100644 --- a/python/paddle/fluid/tests/unittests/dist_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_ctr.py @@ -103,7 +103,9 @@ class TestDistCTR2x2(TestDistRunnerBase): auc_var, batch_auc_var, auc_states = paddle.static.auc( input=predict, label=label ) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) inference_program = paddle.fluid.default_main_program().clone() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 3826bbae5caed5fd380b681649a085a54577dc57..59f8f67aca83362a91f127e37638daf8ca8fda77 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -154,7 +154,9 @@ class TestDistCTR2x2(FleetDistRunnerBase): input=predict, label=label ) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) self.feeds = datas diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py index 07feafdf77f062cd8b6726f83ec4eafa2496d192..5e909773a61752e5710235d15a05d096534fa8d0 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py @@ -123,7 +123,9 @@ class TestHeterPipelinePsCTR2x2(FleetDistHeterRunnerBase): label = fluid.layers.cast(label, dtype="int64") predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) fluid.layers.Print(avg_cost, message="avg_cost") diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py index 7c98169433b0b89c2dbeb1fa6bdefe1aad6d7725..e45d2c93a869f6807572ca74b774909a3374db7a 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py @@ -79,7 +79,9 @@ class TestFleetMetaOptimizerPrecision(TestDistRunnerBase): # Train program predict = cnn_model(images) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) # Evaluator diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py index e46173735a96796e3598a45c4a67b5a61881b599..1a23b86549e0396149ef0d2b1470367aa81cf4bd 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py @@ -79,7 +79,9 @@ class TestFleetMetaOptimizerFuseAllReducePrecision(TestDistRunnerBase): # Train program predict = cnn_model(images) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) # Evaluator diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py index 1868f81dd12986df9d70ea98313557e06e0ca934..b22403a6fa8cda9b4ef9b8b6b8b12a0a2cae190c 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py @@ -140,7 +140,9 @@ class TestDistCTR2x2(FleetDistRunnerBase): acc = paddle.static.accuracy(input=predict, label=label) auc_var, _, _ = paddle.static.auc(input=predict, label=label) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) self.feeds = datas diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py index 819b959a1fa8d32b72d98a1d40f34b8c31be2757..4510b57391bebee005ebf1ae6ea55240ece2b4cb 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist.py @@ -78,7 +78,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Train program predict = cnn_model(images) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) # Evaluator diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py index aa963ab012bc05f4bdb3fc2ab58e853afb4833ea..cac46996edd4a35a98addcbffd8506f89b1c709b 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py @@ -43,7 +43,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Train program predict = cnn_model(images) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) # Evaluator diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py index ad0b25e8ea15acf6868059f37d73faa47aae79a2..e4af13fa89b066e02a9f026f3d6a11223a5536c1 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py @@ -37,7 +37,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Train program predict = cnn_model(images) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) # Evaluator diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py index b886ad8953461b9984cf2b8e017aa6dac76f408e..dc2fc10450bf6e6532e68926d7851f888ad4f1d6 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_lars.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_lars.py @@ -34,7 +34,9 @@ class TestDistMnist2x2(TestDistRunnerBase): # Train program predict = cnn_model(images) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) # Evaluator diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py index c4dd1de92d2dd58c8f246cc8c8b695bad2903602..2b8ee5c62ac30d484e38d6a5cb64b677e03d3b34 100644 --- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py @@ -214,7 +214,9 @@ class DistSeResneXt2x2(TestDistRunnerBase): # Train program model = SE_ResNeXt(layers=50) out = model.net(input=image, class_dim=102) - cost = fluid.layers.cross_entropy(input=out, label=label) + cost = paddle.nn.functional.cross_entropy( + input=out, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.static.accuracy(input=out, label=label, k=1) diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py index 73e153164b3e8f9340728488d54a0ebffce62646..591106ab1ff1c46acf06635cbee3ad255930ced8 100644 --- a/python/paddle/fluid/tests/unittests/dist_text_classification.py +++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py @@ -132,7 +132,9 @@ class TestDistTextClassification2x2(TestDistRunnerBase): # Train program predict = conv_net(data, dict_dim) - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) acc = paddle.static.accuracy(input=predict, label=label) inference_program = fluid.default_main_program().clone() diff --git a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py index 7c93d710c9ab69a8c59b9d5870c6461fe06482ca..1c7ecd1036ccf0c43dcff60ee126feeb353402ac 100644 --- a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py +++ b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py @@ -112,7 +112,9 @@ def net(batch_size=4, lr=0.01): label = fluid.layers.cast(label, dtype="int64") predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) return datas, avg_cost diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py index dd9e9956065342038e868f35371cc3edc58e64ba..d12d07f6944dc2336022636c1c8fd9a496c15a9e 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py @@ -95,7 +95,9 @@ class MNIST(fluid.dygraph.Layer): x = self._simple_img_conv_pool_2(x) x = paddle.reshape(x, shape=[-1, self.pool_2_shape]) cost = self._fc(x) - loss = fluid.layers.cross_entropy(self.act(cost), label) + loss = paddle.nn.functional.cross_entropy( + self.act(cost), label, reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) return avg_loss diff --git a/python/paddle/fluid/tests/unittests/seresnext_net.py b/python/paddle/fluid/tests/unittests/seresnext_net.py index 8dcccfa643dd55629f655a71798e0281ad0edbcd..5df8d39ed1b37dbaa3009762e388d4bed88c96f2 100644 --- a/python/paddle/fluid/tests/unittests/seresnext_net.py +++ b/python/paddle/fluid/tests/unittests/seresnext_net.py @@ -170,7 +170,9 @@ def SE_ResNeXt50Small(use_feed): ) # Classifier layer: prediction = fluid.layers.fc(input=dropout, size=1000, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, reduction='none', use_softmax=False + ) loss = paddle.mean(loss) return loss diff --git a/python/paddle/fluid/tests/unittests/simple_nets.py b/python/paddle/fluid/tests/unittests/simple_nets.py index 9d124ee5092009b915e8beaf747e30d26dd0c802..747cde082743a93ff8ec09e96484e35fefb87c7a 100644 --- a/python/paddle/fluid/tests/unittests/simple_nets.py +++ b/python/paddle/fluid/tests/unittests/simple_nets.py @@ -30,7 +30,9 @@ def simple_fc_net_with_inputs(img, label, class_num=10): ), ) prediction = fluid.layers.fc(hidden, size=class_num, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, reduction='none', use_softmax=False + ) loss = paddle.mean(loss) return loss @@ -56,7 +58,9 @@ def batchnorm_fc_with_inputs(img, label, class_num=10): hidden = paddle.static.nn.batch_norm(input=hidden) prediction = fluid.layers.fc(hidden, size=class_num, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, reduction='none', use_softmax=False + ) loss = paddle.mean(loss) return loss @@ -93,7 +97,9 @@ def bow_net( fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh") fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh") prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax") - cost = fluid.layers.cross_entropy(input=prediction, label=label) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=label, reduction='none', use_softmax=False + ) avg_cost = paddle.mean(x=cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py index a338d31f78fde1ee73d95880471077aa9d338df1..b51d20eb7e723af495872e06aa4dd00b856e0648 100644 --- a/python/paddle/fluid/tests/unittests/transformer_model.py +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -594,6 +594,8 @@ def transformer( ) predict = paddle.nn.functional.softmax(predict) - cost = layers.cross_entropy(input=predict, label=gold) + cost = paddle.nn.functional.cross_entropy( + input=predict, label=gold, reduction='none', use_softmax=False + ) weighted_cost = cost * weights return paddle.sum(weighted_cost)