From 0c1cb5e3304970db2c74b1922601207b5bf3865b Mon Sep 17 00:00:00 2001 From: kangguangli Date: Thu, 15 Dec 2022 14:13:09 +0800 Subject: [PATCH] [remove fluid.layers.cross_entropy] remove sample code and api (#49018) * remove reference in sample code for `fluid.layers.cross_entropy` * fix error * fix doc codestyle and remove api `layers.cross_entropy` && `layers.cross_entropy2` * fix unittest test_fleet_hybrid_meta_optimizer && test_fleet_sharding_meta_optimizer --- python/paddle/fluid/clip.py | 7 +- python/paddle/fluid/contrib/layers/nn.py | 5 +- python/paddle/fluid/io.py | 15 ++- python/paddle/fluid/layers/loss.py | 102 ------------------ python/paddle/fluid/optimizer.py | 61 ++++++++--- python/paddle/fluid/reader.py | 34 +++++- python/paddle/fluid/regularizer.py | 14 ++- .../fleet/test_fleet_hybrid_meta_optimizer.py | 20 ++-- .../test_fleet_sharding_meta_optimizer.py | 82 +++++++------- .../tests/unittests/dist_sharding_save.py | 7 +- .../fluid/tests/unittests/dist_word2vec.py | 7 +- .../unittests/fleet_meta_optimizer_base.py | 14 ++- 12 files changed, 178 insertions(+), 190 deletions(-) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 287ddc96219..d753b66fd7a 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -133,13 +133,14 @@ class ErrorClipByValue(BaseErrorClipAttr): predict = fluid.layers.fc( input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy(input=predict, label=label, reduction='none', use_softmax=False) avg_cost = paddle.mean(cost) prog_clip = prog.clone() prog_clip.block(0).var(hidden1.name)._set_error_clip( fluid.clip.ErrorClipByValue( - max=CLIP_MAX, min=CLIP_MIN) - ) + max=CLIP_MAX, min=CLIP_MIN + ) + ) """ def __init__(self, max, min=None): diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index bd170b348ae..b6c4688dac3 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -1998,7 +1998,10 @@ def fused_bn_add_act( data_layout='NHWC') fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn) prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=y, + reduction='none', use_softmax=False + ) loss = paddle.mean(loss) sgd = fluid.optimizer.SGD(learning_rate=0.001) sgd = fluid.contrib.mixed_precision.decorate( diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 5e301d834b3..1e07431f7bb 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -513,7 +513,10 @@ def save_params(executor, dirname, main_program=None, filename=None): feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) predict = fluid.layers.fc(input=image, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=predict, label=label) + loss = paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) exe = fluid.Executor(fluid.CPUPlace()) @@ -748,7 +751,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None): feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) predict = fluid.layers.fc(input=image, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=predict, label=label) + loss = paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) @@ -1380,7 +1386,10 @@ def save_inference_model( feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) predict = fluid.layers.fc(input=image, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=predict, label=label) + loss = paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) exe = fluid.Executor(fluid.CPUPlace()) diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 1d244896380..87b534aa8f0 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -35,114 +35,12 @@ import warnings from paddle import _C_ops, _legacy_C_ops __all__ = [ - 'cross_entropy', 'softmax_with_cross_entropy', ] kIgnoreIndex = -100 -def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): - r""" - :alias_main: paddle.nn.functional.cross_entropy - :alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy - :old_api: paddle.fluid.layers.cross_entropy - - This operator computes the cross entropy between input and label. It - supports both hard-label and and soft-label cross entropy computation. - - 1. Hard-label cross entropy: if soft_label=False, :math:`label[i_1, i_2, ..., i_k]` - is the hard label of each sample. - - .. math:: - - output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index - - 2. Soft-label cross entropy: if soft_label=True, :math:`label[i_1, i_2, ..., i_k, j]` - is the soft label of each sample corresponding to the j-th class. - - .. math:: - - output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j]) - - Args: - input (Variable): a multidimensional Tensor with shape - :math:`[N_1, N_2, ..., N_k, D]`, where the last dimension D is - the class number. The data type should be float32 or float64. - label (Variable): label value corresponding to input. If - soft_label=False, the dimension of label should be :math:`[N_1, N_2, ..., N_k]` - or :math:`[N_1, N_2, ..., N_k, 1]` , and its data type should be int64, - and the value must be inside [0, D). If soft_label=True, the shape, - data type of label should be the same with input, and the sum of - soft label value of each sample should be 1. - soft_label (bool): indicate whether label is soft. Default False, meaning that - the label is hard. If soft_label=True, the label is soft. - ignore_index (int): specify an ignorable label value. The ignored label would be - omitted when computing. If it is a negative integer, no label would - be ignored. Only valid when soft_label=False. Default -100. - - Returns: - A Variable holding Tensor representing the cross entropy, whose data type is the same with input. - If soft_label=False, the shape of output is the same with label. - If soft_label=True, the shape of output is :math:`[N_1, N_2, ..., N_k, 1]` . - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - class_num = 7 - x = fluid.data(name='x', shape=[None, 3, 10], dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - predict = fluid.layers.fc(input=x, size=class_num, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) - """ - if not soft_label: - return cross_entropy2(input, label, ignore_index) - - if _non_static_mode(): - return _legacy_C_ops.cross_entropy( - input, label, "soft_label", soft_label, "ignore_index", ignore_index - ) - - inputs = {'X': [input], 'Label': [label]} - attrs = {"soft_label": soft_label, "ignore_index": ignore_index} - - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy' - ) - helper = LayerHelper('cross_entropy', **locals()) - out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='cross_entropy', inputs=inputs, outputs={'Y': [out]}, attrs=attrs - ) - return out - - -def cross_entropy2(input, label, ignore_index=kIgnoreIndex): - if _non_static_mode(): - loss, _, _ = _legacy_C_ops.cross_entropy2( - input, label, 'ignore_index', ignore_index - ) - return loss - - inputs = {'X': [input], 'Label': [label]} - attrs = {'ignore_index': ignore_index} - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy2' - ) - helper = LayerHelper('cross_entropy2', **locals()) - out = helper.create_variable_for_type_inference(dtype=input.dtype) - xshape = helper.create_variable_for_type_inference(dtype=input.dtype) - match_x = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='cross_entropy2', - inputs=inputs, - outputs={'Y': [out], 'MatchX': [match_x], 'XShape': [xshape]}, - attrs=attrs, - ) - return out - - def softmax_with_cross_entropy( logits, label, diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 79f2cde1b1b..af085a357e6 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -6408,8 +6408,12 @@ class RecomputeOptimizer(Optimizer): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + + paddle.enable_static() + def gen_data(): return {"x": np.random.random(size=(32, 32)).astype('float32'), "y": np.random.randint(2, size=(32, 1)).astype('int64')} @@ -6417,8 +6421,11 @@ class RecomputeOptimizer(Optimizer): print(input_x) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') input_y = fluid.layers.data(name="y", shape=[1], dtype='int64') @@ -6491,8 +6498,11 @@ class RecomputeOptimizer(Optimizer): def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') @@ -6526,14 +6536,20 @@ class RecomputeOptimizer(Optimizer): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework + paddle.enable_static() + def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction @@ -7015,13 +7031,19 @@ class RecomputeOptimizer(Optimizer): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() + def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction @@ -7091,13 +7113,19 @@ class RecomputeOptimizer(Optimizer): params_grads (list): list of (param, grad) pair to do optimization. Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() + def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') @@ -7190,7 +7218,10 @@ class LookaheadOptimizer: x = fluid.layers.data(name='x', shape=[2], dtype='float32') label = fluid.layers.data(name="label", shape=[1], dtype="int64") y = fluid.layers.fc(input=[x], size=2, act="softmax") - loss = fluid.layers.cross_entropy(input=y, label=label) + loss = paddle.nn.functional.cross_entropy( + input=y, label=label, + reduction='none', use_softmax=False + ) loss = paddle.mean(x=loss) sgd = fluid.optimizer.SGD(learning_rate=0.01) optimizer = fluid.optimizer.LookaheadOptimizer(sgd, @@ -7357,6 +7388,7 @@ class GradientMergeOptimizer: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np @@ -7367,8 +7399,11 @@ class GradientMergeOptimizer: def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 356a68c9004..66427426cbb 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -1684,6 +1684,8 @@ class PyReader(DataLoaderBase): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 5 BATCH_SIZE = 3 @@ -1691,7 +1693,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def reader_creator_random_image_and_label(height, width): def reader(): @@ -1738,6 +1743,8 @@ class PyReader(DataLoaderBase): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 5 BATCH_SIZE = 10 @@ -1745,7 +1752,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def reader_creator_random_image(height, width): def reader(): @@ -1939,7 +1949,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def random_image_and_label_generator(height, width): def generator(): @@ -1994,6 +2007,8 @@ class PyReader(DataLoaderBase): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 15 BATCH_SIZE = 3 @@ -2001,7 +2016,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def random_image_and_label_generator(height, width): def generator(): @@ -2051,9 +2069,12 @@ class PyReader(DataLoaderBase): Example: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 15 BATCH_SIZE = 3 @@ -2061,7 +2082,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def random_image_and_label_generator(height, width): def generator(): diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 17afccde8a2..900ba20cf68 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -78,7 +78,10 @@ class L2DecayRegularizer(WeightDecayRegularizer): label = fluid.layers.data(name='label', shape=[1], dtype='int64') hidden = fluid.layers.fc(input=data, size=128, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) optimizer = fluid.optimizer.Adagrad( learning_rate=1e-4, @@ -94,7 +97,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) - x = fluid.layers.uniform_random([3,4]) + x = paddle.uniform([3,4]) # set L1 regularization in fluid.ParamAttr w_param = fluid.ParamAttr(regularizer=l1) @@ -194,7 +197,10 @@ class L1DecayRegularizer(WeightDecayRegularizer): label = fluid.layers.data(name='label', shape=[1], dtype='int64') hidden = fluid.layers.fc(input=data, size=128, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) optimizer = fluid.optimizer.Adagrad( learning_rate=1e-4, @@ -209,7 +215,7 @@ class L1DecayRegularizer(WeightDecayRegularizer): paddle.enable_static() l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) - x = fluid.layers.uniform_random([3,4]) + x = paddle.uniform([3,4]) # set L1 regularization in fluid.ParamAttr w_param = fluid.ParamAttr(regularizer=l1) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py index 46c0b4fc58f..eee12356708 100755 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py @@ -118,11 +118,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -286,11 +286,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -442,13 +442,13 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -640,7 +640,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -650,7 +650,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -833,7 +833,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -845,7 +845,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'cast', diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py index e1a4bcce65c..d59c074c03f 100755 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py @@ -78,11 +78,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -171,13 +171,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -268,11 +268,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -377,13 +377,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -498,13 +498,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -599,11 +599,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -686,11 +686,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -763,7 +763,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', ], ) @@ -925,11 +925,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -1034,11 +1034,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -1178,11 +1178,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -1399,13 +1399,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -1666,13 +1666,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -1897,13 +1897,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2199,13 +2199,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2414,13 +2414,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2639,7 +2639,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -2649,7 +2649,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2836,7 +2836,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -2846,7 +2846,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -3021,7 +3021,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -3031,7 +3031,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -3160,13 +3160,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'coalesce_tensor', 'coalesce_tensor', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -3302,7 +3302,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -3312,7 +3312,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', diff --git a/python/paddle/fluid/tests/unittests/dist_sharding_save.py b/python/paddle/fluid/tests/unittests/dist_sharding_save.py index 8be67c3680d..81b5733bb90 100755 --- a/python/paddle/fluid/tests/unittests/dist_sharding_save.py +++ b/python/paddle/fluid/tests/unittests/dist_sharding_save.py @@ -50,8 +50,11 @@ def runtime_main(): prediction = paddle.fluid.layers.fc( input=[fc_2], size=2, act='softmax' ) - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y + cost = paddle.nn.functional.cross_entropy( + input=prediction, + label=input_y, + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(x=cost) diff --git a/python/paddle/fluid/tests/unittests/dist_word2vec.py b/python/paddle/fluid/tests/unittests/dist_word2vec.py index ed3a83a5b42..33ab8c7a363 100644 --- a/python/paddle/fluid/tests/unittests/dist_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dist_word2vec.py @@ -95,8 +95,11 @@ class TestDistWord2vec2x2(TestDistRunnerBase): initializer=fluid.initializer.Constant(value=0.1) ), ) - cost = fluid.layers.cross_entropy( - input=predict_word, label=words[4] + cost = paddle.nn.functional.cross_entropy( + input=predict_word, + label=words[4], + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(cost) return avg_cost, predict_word diff --git a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py index 225bff65114..040e566f051 100755 --- a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py +++ b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py @@ -69,8 +69,11 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc( input=[fc_2], size=2, act='softmax' ) - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y + cost = paddle.nn.functional.cross_entropy( + input=prediction, + label=input_y, + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(x=cost) @@ -104,8 +107,11 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc( input=[input_x], size=2, act='softmax' ) - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y + cost = paddle.nn.functional.cross_entropy( + input=prediction, + label=input_y, + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(x=cost) -- GitLab