diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 287ddc9621964626e48c5053b238701ef9980526..d753b66fd7aa931387d12adea1c2cc8f0a59e5e5 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -133,13 +133,14 @@ class ErrorClipByValue(BaseErrorClipAttr): predict = fluid.layers.fc( input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') - cost = fluid.layers.cross_entropy(input=predict, label=label) + cost = paddle.nn.functional.cross_entropy(input=predict, label=label, reduction='none', use_softmax=False) avg_cost = paddle.mean(cost) prog_clip = prog.clone() prog_clip.block(0).var(hidden1.name)._set_error_clip( fluid.clip.ErrorClipByValue( - max=CLIP_MAX, min=CLIP_MIN) - ) + max=CLIP_MAX, min=CLIP_MIN + ) + ) """ def __init__(self, max, min=None): diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index bd170b348ae620247b36a4a07f00716739c26d0e..b6c4688dac3feedfd51612ba6aa393bbc857bc56 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -1998,7 +1998,10 @@ def fused_bn_add_act( data_layout='NHWC') fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn) prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=y, + reduction='none', use_softmax=False + ) loss = paddle.mean(loss) sgd = fluid.optimizer.SGD(learning_rate=0.001) sgd = fluid.contrib.mixed_precision.decorate( diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 5e301d834b3469f733485970b2d6b666bda71574..1e07431f7bbe8c958cdbb168faad165df41ce337 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -513,7 +513,10 @@ def save_params(executor, dirname, main_program=None, filename=None): feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) predict = fluid.layers.fc(input=image, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=predict, label=label) + loss = paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) exe = fluid.Executor(fluid.CPUPlace()) @@ -748,7 +751,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None): feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) predict = fluid.layers.fc(input=image, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=predict, label=label) + loss = paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) @@ -1380,7 +1386,10 @@ def save_inference_model( feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) predict = fluid.layers.fc(input=image, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=predict, label=label) + loss = paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) exe = fluid.Executor(fluid.CPUPlace()) diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 1d24489638096f9f369ccfad9d11187fa5c25f94..87b534aa8f0f4926aeab9d4d2f31f1cc32630ddc 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -35,114 +35,12 @@ import warnings from paddle import _C_ops, _legacy_C_ops __all__ = [ - 'cross_entropy', 'softmax_with_cross_entropy', ] kIgnoreIndex = -100 -def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex): - r""" - :alias_main: paddle.nn.functional.cross_entropy - :alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy - :old_api: paddle.fluid.layers.cross_entropy - - This operator computes the cross entropy between input and label. It - supports both hard-label and and soft-label cross entropy computation. - - 1. Hard-label cross entropy: if soft_label=False, :math:`label[i_1, i_2, ..., i_k]` - is the hard label of each sample. - - .. math:: - - output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index - - 2. Soft-label cross entropy: if soft_label=True, :math:`label[i_1, i_2, ..., i_k, j]` - is the soft label of each sample corresponding to the j-th class. - - .. math:: - - output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j]) - - Args: - input (Variable): a multidimensional Tensor with shape - :math:`[N_1, N_2, ..., N_k, D]`, where the last dimension D is - the class number. The data type should be float32 or float64. - label (Variable): label value corresponding to input. If - soft_label=False, the dimension of label should be :math:`[N_1, N_2, ..., N_k]` - or :math:`[N_1, N_2, ..., N_k, 1]` , and its data type should be int64, - and the value must be inside [0, D). If soft_label=True, the shape, - data type of label should be the same with input, and the sum of - soft label value of each sample should be 1. - soft_label (bool): indicate whether label is soft. Default False, meaning that - the label is hard. If soft_label=True, the label is soft. - ignore_index (int): specify an ignorable label value. The ignored label would be - omitted when computing. If it is a negative integer, no label would - be ignored. Only valid when soft_label=False. Default -100. - - Returns: - A Variable holding Tensor representing the cross entropy, whose data type is the same with input. - If soft_label=False, the shape of output is the same with label. - If soft_label=True, the shape of output is :math:`[N_1, N_2, ..., N_k, 1]` . - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - class_num = 7 - x = fluid.data(name='x', shape=[None, 3, 10], dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - predict = fluid.layers.fc(input=x, size=class_num, act='softmax') - cost = fluid.layers.cross_entropy(input=predict, label=label) - """ - if not soft_label: - return cross_entropy2(input, label, ignore_index) - - if _non_static_mode(): - return _legacy_C_ops.cross_entropy( - input, label, "soft_label", soft_label, "ignore_index", ignore_index - ) - - inputs = {'X': [input], 'Label': [label]} - attrs = {"soft_label": soft_label, "ignore_index": ignore_index} - - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy' - ) - helper = LayerHelper('cross_entropy', **locals()) - out = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='cross_entropy', inputs=inputs, outputs={'Y': [out]}, attrs=attrs - ) - return out - - -def cross_entropy2(input, label, ignore_index=kIgnoreIndex): - if _non_static_mode(): - loss, _, _ = _legacy_C_ops.cross_entropy2( - input, label, 'ignore_index', ignore_index - ) - return loss - - inputs = {'X': [input], 'Label': [label]} - attrs = {'ignore_index': ignore_index} - check_variable_and_dtype( - input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy2' - ) - helper = LayerHelper('cross_entropy2', **locals()) - out = helper.create_variable_for_type_inference(dtype=input.dtype) - xshape = helper.create_variable_for_type_inference(dtype=input.dtype) - match_x = helper.create_variable_for_type_inference(dtype=input.dtype) - helper.append_op( - type='cross_entropy2', - inputs=inputs, - outputs={'Y': [out], 'MatchX': [match_x], 'XShape': [xshape]}, - attrs=attrs, - ) - return out - - def softmax_with_cross_entropy( logits, label, diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 79f2cde1b1b6c2acf68095b05c1e4f53f1025b77..af085a357e6f4ebd7f96d98dbafec873c55fa197 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -6408,8 +6408,12 @@ class RecomputeOptimizer(Optimizer): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + + paddle.enable_static() + def gen_data(): return {"x": np.random.random(size=(32, 32)).astype('float32'), "y": np.random.randint(2, size=(32, 1)).astype('int64')} @@ -6417,8 +6421,11 @@ class RecomputeOptimizer(Optimizer): print(input_x) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') input_y = fluid.layers.data(name="y", shape=[1], dtype='int64') @@ -6491,8 +6498,11 @@ class RecomputeOptimizer(Optimizer): def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') @@ -6526,14 +6536,20 @@ class RecomputeOptimizer(Optimizer): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework + paddle.enable_static() + def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction @@ -7015,13 +7031,19 @@ class RecomputeOptimizer(Optimizer): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() + def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction @@ -7091,13 +7113,19 @@ class RecomputeOptimizer(Optimizer): params_grads (list): list of (param, grad) pair to do optimization. Examples: .. code-block:: python + import paddle import paddle.fluid as fluid + paddle.enable_static() + def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') @@ -7190,7 +7218,10 @@ class LookaheadOptimizer: x = fluid.layers.data(name='x', shape=[2], dtype='float32') label = fluid.layers.data(name="label", shape=[1], dtype="int64") y = fluid.layers.fc(input=[x], size=2, act="softmax") - loss = fluid.layers.cross_entropy(input=y, label=label) + loss = paddle.nn.functional.cross_entropy( + input=y, label=label, + reduction='none', use_softmax=False + ) loss = paddle.mean(x=loss) sgd = fluid.optimizer.SGD(learning_rate=0.01) optimizer = fluid.optimizer.LookaheadOptimizer(sgd, @@ -7357,6 +7388,7 @@ class GradientMergeOptimizer: Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np @@ -7367,8 +7399,11 @@ class GradientMergeOptimizer: def mlp(input_x, input_y, hid_dim=128, label_dim=2): fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') - cost = fluid.layers.cross_entropy(input=prediction, label=input_y) - sum_cost = fluid.layers.reduce_mean(cost) + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=input_y, + reduction='none', use_softmax=False + ) + sum_cost = paddle.mean(cost) return sum_cost, fc_1, prediction input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 356a68c9004195e7c6cf8bbfdaf6bb99017723a4..66427426cbbfff9821935e639ebaa129cf9a59b9 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -1684,6 +1684,8 @@ class PyReader(DataLoaderBase): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 5 BATCH_SIZE = 3 @@ -1691,7 +1693,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def reader_creator_random_image_and_label(height, width): def reader(): @@ -1738,6 +1743,8 @@ class PyReader(DataLoaderBase): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 5 BATCH_SIZE = 10 @@ -1745,7 +1752,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def reader_creator_random_image(height, width): def reader(): @@ -1939,7 +1949,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def random_image_and_label_generator(height, width): def generator(): @@ -1994,6 +2007,8 @@ class PyReader(DataLoaderBase): import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 15 BATCH_SIZE = 3 @@ -2001,7 +2016,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def random_image_and_label_generator(height, width): def generator(): @@ -2051,9 +2069,12 @@ class PyReader(DataLoaderBase): Example: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() + EPOCH_NUM = 3 ITER_NUM = 15 BATCH_SIZE = 3 @@ -2061,7 +2082,10 @@ class PyReader(DataLoaderBase): def network(image, label): # User-defined network, here is an example of softmax regression. predict = fluid.layers.fc(input=image, size=10, act='softmax') - return fluid.layers.cross_entropy(input=predict, label=label) + return paddle.nn.functional.cross_entropy( + input=predict, label=label, + reduction='none', use_softmax=False + ) def random_image_and_label_generator(height, width): def generator(): diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 17afccde8a229e79153815f596d66170b0de4fd9..900ba20cf6884bba77bc83155d3ea65a3b4e1495 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -78,7 +78,10 @@ class L2DecayRegularizer(WeightDecayRegularizer): label = fluid.layers.data(name='label', shape=[1], dtype='int64') hidden = fluid.layers.fc(input=data, size=128, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) optimizer = fluid.optimizer.Adagrad( learning_rate=1e-4, @@ -94,7 +97,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) - x = fluid.layers.uniform_random([3,4]) + x = paddle.uniform([3,4]) # set L1 regularization in fluid.ParamAttr w_param = fluid.ParamAttr(regularizer=l1) @@ -194,7 +197,10 @@ class L1DecayRegularizer(WeightDecayRegularizer): label = fluid.layers.data(name='label', shape=[1], dtype='int64') hidden = fluid.layers.fc(input=data, size=128, act='relu') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = paddle.nn.functional.cross_entropy( + input=prediction, label=label, + reduction='none', use_softmax=False + ) avg_loss = paddle.mean(loss) optimizer = fluid.optimizer.Adagrad( learning_rate=1e-4, @@ -209,7 +215,7 @@ class L1DecayRegularizer(WeightDecayRegularizer): paddle.enable_static() l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) - x = fluid.layers.uniform_random([3,4]) + x = paddle.uniform([3,4]) # set L1 regularization in fluid.ParamAttr w_param = fluid.ParamAttr(regularizer=l1) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py index 46c0b4fc58f65b6c1e46a5d2c0de60b3b6b63704..eee1235670805f8d66b8206bbdd954129adfba97 100755 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_hybrid_meta_optimizer.py @@ -118,11 +118,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -286,11 +286,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -442,13 +442,13 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -640,7 +640,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -650,7 +650,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -833,7 +833,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -845,7 +845,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'cast', diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py index e1a4bcce65c9247c683411ec3b8b570b8058063f..d59c074c03f11dd5ce9acc635216a417e7437f07 100755 --- a/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_fleet_sharding_meta_optimizer.py @@ -78,11 +78,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -171,13 +171,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -268,11 +268,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -377,13 +377,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -498,13 +498,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -599,11 +599,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -686,11 +686,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -763,7 +763,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', ], ) @@ -925,11 +925,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -1034,11 +1034,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -1178,11 +1178,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -1399,13 +1399,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -1666,13 +1666,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'elementwise_add', 'softmax', 'cast', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'cast', 'softmax_grad', 'elementwise_add_grad', @@ -1897,13 +1897,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2199,13 +2199,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2414,13 +2414,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2639,7 +2639,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -2649,7 +2649,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -2836,7 +2836,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -2846,7 +2846,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -3021,7 +3021,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -3031,7 +3031,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', @@ -3160,13 +3160,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'mul', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'coalesce_tensor', 'coalesce_tensor', 'fill_constant', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'mul_grad', @@ -3302,7 +3302,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'cast', 'elementwise_add', 'softmax', - 'cross_entropy2', + 'softmax_with_cross_entropy', 'reduce_mean', 'elementwise_mul', 'coalesce_tensor', @@ -3312,7 +3312,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): 'fill_constant', 'elementwise_mul_grad', 'reduce_mean_grad', - 'cross_entropy_grad2', + 'softmax_with_cross_entropy_grad', 'softmax_grad', 'elementwise_add_grad', 'cast', diff --git a/python/paddle/fluid/tests/unittests/dist_sharding_save.py b/python/paddle/fluid/tests/unittests/dist_sharding_save.py index 8be67c3680de45841f43a28a3fcc16beafc017d6..81b5733bb9068a7658b7a68e8918f20ac41edd14 100755 --- a/python/paddle/fluid/tests/unittests/dist_sharding_save.py +++ b/python/paddle/fluid/tests/unittests/dist_sharding_save.py @@ -50,8 +50,11 @@ def runtime_main(): prediction = paddle.fluid.layers.fc( input=[fc_2], size=2, act='softmax' ) - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y + cost = paddle.nn.functional.cross_entropy( + input=prediction, + label=input_y, + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(x=cost) diff --git a/python/paddle/fluid/tests/unittests/dist_word2vec.py b/python/paddle/fluid/tests/unittests/dist_word2vec.py index ed3a83a5b42a9fd67233db5d85d29dcc4de3fdd6..33ab8c7a363b27f8ca9def470884eedc21d6afcb 100644 --- a/python/paddle/fluid/tests/unittests/dist_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dist_word2vec.py @@ -95,8 +95,11 @@ class TestDistWord2vec2x2(TestDistRunnerBase): initializer=fluid.initializer.Constant(value=0.1) ), ) - cost = fluid.layers.cross_entropy( - input=predict_word, label=words[4] + cost = paddle.nn.functional.cross_entropy( + input=predict_word, + label=words[4], + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(cost) return avg_cost, predict_word diff --git a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py index 225bff65114bd06cfdb6ad22abd936813a99093a..040e566f0512713091850a5efcc0ff564b1f46be 100755 --- a/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py +++ b/python/paddle/fluid/tests/unittests/fleet_meta_optimizer_base.py @@ -69,8 +69,11 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc( input=[fc_2], size=2, act='softmax' ) - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y + cost = paddle.nn.functional.cross_entropy( + input=prediction, + label=input_y, + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(x=cost) @@ -104,8 +107,11 @@ class TestFleetMetaOptimizer(unittest.TestCase): prediction = paddle.fluid.layers.fc( input=[input_x], size=2, act='softmax' ) - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y + cost = paddle.nn.functional.cross_entropy( + input=prediction, + label=input_y, + reduction='none', + use_softmax=False, ) avg_cost = paddle.mean(x=cost)