未验证 提交 0c1cb5e3 编写于 作者: K kangguangli 提交者: GitHub

[remove fluid.layers.cross_entropy] remove sample code and api (#49018)

* remove reference in sample code for `fluid.layers.cross_entropy`

* fix error

* fix doc codestyle and remove api `layers.cross_entropy` && `layers.cross_entropy2`

* fix unittest test_fleet_hybrid_meta_optimizer && test_fleet_sharding_meta_optimizer
上级 f665a6a5
...@@ -133,12 +133,13 @@ class ErrorClipByValue(BaseErrorClipAttr): ...@@ -133,12 +133,13 @@ class ErrorClipByValue(BaseErrorClipAttr):
predict = fluid.layers.fc( predict = fluid.layers.fc(
input=hidden2, size=10, act='softmax') input=hidden2, size=10, act='softmax')
label = fluid.layers.data(name='y', shape=[1], dtype='int64') label = fluid.layers.data(name='y', shape=[1], dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label) cost = paddle.nn.functional.cross_entropy(input=predict, label=label, reduction='none', use_softmax=False)
avg_cost = paddle.mean(cost) avg_cost = paddle.mean(cost)
prog_clip = prog.clone() prog_clip = prog.clone()
prog_clip.block(0).var(hidden1.name)._set_error_clip( prog_clip.block(0).var(hidden1.name)._set_error_clip(
fluid.clip.ErrorClipByValue( fluid.clip.ErrorClipByValue(
max=CLIP_MAX, min=CLIP_MIN) max=CLIP_MAX, min=CLIP_MIN
)
) )
""" """
......
...@@ -1998,7 +1998,10 @@ def fused_bn_add_act( ...@@ -1998,7 +1998,10 @@ def fused_bn_add_act(
data_layout='NHWC') data_layout='NHWC')
fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn) fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn)
prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax') prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=y) loss = paddle.nn.functional.cross_entropy(
input=prediction, label=y,
reduction='none', use_softmax=False
)
loss = paddle.mean(loss) loss = paddle.mean(loss)
sgd = fluid.optimizer.SGD(learning_rate=0.001) sgd = fluid.optimizer.SGD(learning_rate=0.001)
sgd = fluid.contrib.mixed_precision.decorate( sgd = fluid.contrib.mixed_precision.decorate(
......
...@@ -513,7 +513,10 @@ def save_params(executor, dirname, main_program=None, filename=None): ...@@ -513,7 +513,10 @@ def save_params(executor, dirname, main_program=None, filename=None):
feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=predict, label=label) loss = paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
...@@ -748,7 +751,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None): ...@@ -748,7 +751,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None):
feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=predict, label=label) loss = paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -1380,7 +1386,10 @@ def save_inference_model( ...@@ -1380,7 +1386,10 @@ def save_inference_model(
feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace()) feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=predict, label=label) loss = paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
exe = fluid.Executor(fluid.CPUPlace()) exe = fluid.Executor(fluid.CPUPlace())
......
...@@ -35,114 +35,12 @@ import warnings ...@@ -35,114 +35,12 @@ import warnings
from paddle import _C_ops, _legacy_C_ops from paddle import _C_ops, _legacy_C_ops
__all__ = [ __all__ = [
'cross_entropy',
'softmax_with_cross_entropy', 'softmax_with_cross_entropy',
] ]
kIgnoreIndex = -100 kIgnoreIndex = -100
def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
r"""
:alias_main: paddle.nn.functional.cross_entropy
:alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy
:old_api: paddle.fluid.layers.cross_entropy
This operator computes the cross entropy between input and label. It
supports both hard-label and and soft-label cross entropy computation.
1. Hard-label cross entropy: if soft_label=False, :math:`label[i_1, i_2, ..., i_k]`
is the hard label of each sample.
.. math::
output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index
2. Soft-label cross entropy: if soft_label=True, :math:`label[i_1, i_2, ..., i_k, j]`
is the soft label of each sample corresponding to the j-th class.
.. math::
output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j])
Args:
input (Variable): a multidimensional Tensor with shape
:math:`[N_1, N_2, ..., N_k, D]`, where the last dimension D is
the class number. The data type should be float32 or float64.
label (Variable): label value corresponding to input. If
soft_label=False, the dimension of label should be :math:`[N_1, N_2, ..., N_k]`
or :math:`[N_1, N_2, ..., N_k, 1]` , and its data type should be int64,
and the value must be inside [0, D). If soft_label=True, the shape,
data type of label should be the same with input, and the sum of
soft label value of each sample should be 1.
soft_label (bool): indicate whether label is soft. Default False, meaning that
the label is hard. If soft_label=True, the label is soft.
ignore_index (int): specify an ignorable label value. The ignored label would be
omitted when computing. If it is a negative integer, no label would
be ignored. Only valid when soft_label=False. Default -100.
Returns:
A Variable holding Tensor representing the cross entropy, whose data type is the same with input.
If soft_label=False, the shape of output is the same with label.
If soft_label=True, the shape of output is :math:`[N_1, N_2, ..., N_k, 1]` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
class_num = 7
x = fluid.data(name='x', shape=[None, 3, 10], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
predict = fluid.layers.fc(input=x, size=class_num, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
"""
if not soft_label:
return cross_entropy2(input, label, ignore_index)
if _non_static_mode():
return _legacy_C_ops.cross_entropy(
input, label, "soft_label", soft_label, "ignore_index", ignore_index
)
inputs = {'X': [input], 'Label': [label]}
attrs = {"soft_label": soft_label, "ignore_index": ignore_index}
check_variable_and_dtype(
input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy'
)
helper = LayerHelper('cross_entropy', **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type='cross_entropy', inputs=inputs, outputs={'Y': [out]}, attrs=attrs
)
return out
def cross_entropy2(input, label, ignore_index=kIgnoreIndex):
if _non_static_mode():
loss, _, _ = _legacy_C_ops.cross_entropy2(
input, label, 'ignore_index', ignore_index
)
return loss
inputs = {'X': [input], 'Label': [label]}
attrs = {'ignore_index': ignore_index}
check_variable_and_dtype(
input, 'input', ['float16', 'float32', 'float64'], 'cross_entropy2'
)
helper = LayerHelper('cross_entropy2', **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype)
xshape = helper.create_variable_for_type_inference(dtype=input.dtype)
match_x = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type='cross_entropy2',
inputs=inputs,
outputs={'Y': [out], 'MatchX': [match_x], 'XShape': [xshape]},
attrs=attrs,
)
return out
def softmax_with_cross_entropy( def softmax_with_cross_entropy(
logits, logits,
label, label,
......
...@@ -6408,8 +6408,12 @@ class RecomputeOptimizer(Optimizer): ...@@ -6408,8 +6408,12 @@ class RecomputeOptimizer(Optimizer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
def gen_data(): def gen_data():
return {"x": np.random.random(size=(32, 32)).astype('float32'), return {"x": np.random.random(size=(32, 32)).astype('float32'),
"y": np.random.randint(2, size=(32, 1)).astype('int64')} "y": np.random.randint(2, size=(32, 1)).astype('int64')}
...@@ -6417,8 +6421,11 @@ class RecomputeOptimizer(Optimizer): ...@@ -6417,8 +6421,11 @@ class RecomputeOptimizer(Optimizer):
print(input_x) print(input_x)
fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=input_y) cost = paddle.nn.functional.cross_entropy(
sum_cost = fluid.layers.reduce_mean(cost) input=prediction, label=input_y,
reduction='none', use_softmax=False
)
sum_cost = paddle.mean(cost)
return sum_cost, fc_1, prediction return sum_cost, fc_1, prediction
input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
input_y = fluid.layers.data(name="y", shape=[1], dtype='int64') input_y = fluid.layers.data(name="y", shape=[1], dtype='int64')
...@@ -6491,8 +6498,11 @@ class RecomputeOptimizer(Optimizer): ...@@ -6491,8 +6498,11 @@ class RecomputeOptimizer(Optimizer):
def mlp(input_x, input_y, hid_dim=128, label_dim=2): def mlp(input_x, input_y, hid_dim=128, label_dim=2):
fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=input_y) cost = paddle.nn.functional.cross_entropy(
sum_cost = fluid.layers.reduce_mean(cost) input=prediction, label=input_y,
reduction='none', use_softmax=False
)
sum_cost = paddle.mean(cost)
return sum_cost, fc_1, prediction return sum_cost, fc_1, prediction
input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
...@@ -6526,14 +6536,20 @@ class RecomputeOptimizer(Optimizer): ...@@ -6526,14 +6536,20 @@ class RecomputeOptimizer(Optimizer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
paddle.enable_static()
def mlp(input_x, input_y, hid_dim=128, label_dim=2): def mlp(input_x, input_y, hid_dim=128, label_dim=2):
fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=input_y) cost = paddle.nn.functional.cross_entropy(
sum_cost = fluid.layers.reduce_mean(cost) input=prediction, label=input_y,
reduction='none', use_softmax=False
)
sum_cost = paddle.mean(cost)
return sum_cost, fc_1, prediction return sum_cost, fc_1, prediction
...@@ -7015,13 +7031,19 @@ class RecomputeOptimizer(Optimizer): ...@@ -7015,13 +7031,19 @@ class RecomputeOptimizer(Optimizer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
paddle.enable_static()
def mlp(input_x, input_y, hid_dim=128, label_dim=2): def mlp(input_x, input_y, hid_dim=128, label_dim=2):
fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=input_y) cost = paddle.nn.functional.cross_entropy(
sum_cost = fluid.layers.reduce_mean(cost) input=prediction, label=input_y,
reduction='none', use_softmax=False
)
sum_cost = paddle.mean(cost)
return sum_cost, fc_1, prediction return sum_cost, fc_1, prediction
...@@ -7091,13 +7113,19 @@ class RecomputeOptimizer(Optimizer): ...@@ -7091,13 +7113,19 @@ class RecomputeOptimizer(Optimizer):
params_grads (list): list of (param, grad) pair to do optimization. params_grads (list): list of (param, grad) pair to do optimization.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
paddle.enable_static()
def mlp(input_x, input_y, hid_dim=128, label_dim=2): def mlp(input_x, input_y, hid_dim=128, label_dim=2):
fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=input_y) cost = paddle.nn.functional.cross_entropy(
sum_cost = fluid.layers.reduce_mean(cost) input=prediction, label=input_y,
reduction='none', use_softmax=False
)
sum_cost = paddle.mean(cost)
return sum_cost, fc_1, prediction return sum_cost, fc_1, prediction
input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
...@@ -7190,7 +7218,10 @@ class LookaheadOptimizer: ...@@ -7190,7 +7218,10 @@ class LookaheadOptimizer:
x = fluid.layers.data(name='x', shape=[2], dtype='float32') x = fluid.layers.data(name='x', shape=[2], dtype='float32')
label = fluid.layers.data(name="label", shape=[1], dtype="int64") label = fluid.layers.data(name="label", shape=[1], dtype="int64")
y = fluid.layers.fc(input=[x], size=2, act="softmax") y = fluid.layers.fc(input=[x], size=2, act="softmax")
loss = fluid.layers.cross_entropy(input=y, label=label) loss = paddle.nn.functional.cross_entropy(
input=y, label=label,
reduction='none', use_softmax=False
)
loss = paddle.mean(x=loss) loss = paddle.mean(x=loss)
sgd = fluid.optimizer.SGD(learning_rate=0.01) sgd = fluid.optimizer.SGD(learning_rate=0.01)
optimizer = fluid.optimizer.LookaheadOptimizer(sgd, optimizer = fluid.optimizer.LookaheadOptimizer(sgd,
...@@ -7357,6 +7388,7 @@ class GradientMergeOptimizer: ...@@ -7357,6 +7388,7 @@ class GradientMergeOptimizer:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
...@@ -7367,8 +7399,11 @@ class GradientMergeOptimizer: ...@@ -7367,8 +7399,11 @@ class GradientMergeOptimizer:
def mlp(input_x, input_y, hid_dim=128, label_dim=2): def mlp(input_x, input_y, hid_dim=128, label_dim=2):
fc_1 = fluid.layers.fc(input=input_x, size=hid_dim) fc_1 = fluid.layers.fc(input=input_x, size=hid_dim)
prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax') prediction = fluid.layers.fc(input=[fc_1], size=label_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=input_y) cost = paddle.nn.functional.cross_entropy(
sum_cost = fluid.layers.reduce_mean(cost) input=prediction, label=input_y,
reduction='none', use_softmax=False
)
sum_cost = paddle.mean(cost)
return sum_cost, fc_1, prediction return sum_cost, fc_1, prediction
input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') input_x = fluid.layers.data(name="x", shape=[32], dtype='float32')
......
...@@ -1684,6 +1684,8 @@ class PyReader(DataLoaderBase): ...@@ -1684,6 +1684,8 @@ class PyReader(DataLoaderBase):
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
EPOCH_NUM = 3 EPOCH_NUM = 3
ITER_NUM = 5 ITER_NUM = 5
BATCH_SIZE = 3 BATCH_SIZE = 3
...@@ -1691,7 +1693,10 @@ class PyReader(DataLoaderBase): ...@@ -1691,7 +1693,10 @@ class PyReader(DataLoaderBase):
def network(image, label): def network(image, label):
# User-defined network, here is an example of softmax regression. # User-defined network, here is an example of softmax regression.
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
return fluid.layers.cross_entropy(input=predict, label=label) return paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
def reader_creator_random_image_and_label(height, width): def reader_creator_random_image_and_label(height, width):
def reader(): def reader():
...@@ -1738,6 +1743,8 @@ class PyReader(DataLoaderBase): ...@@ -1738,6 +1743,8 @@ class PyReader(DataLoaderBase):
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
EPOCH_NUM = 3 EPOCH_NUM = 3
ITER_NUM = 5 ITER_NUM = 5
BATCH_SIZE = 10 BATCH_SIZE = 10
...@@ -1745,7 +1752,10 @@ class PyReader(DataLoaderBase): ...@@ -1745,7 +1752,10 @@ class PyReader(DataLoaderBase):
def network(image, label): def network(image, label):
# User-defined network, here is an example of softmax regression. # User-defined network, here is an example of softmax regression.
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
return fluid.layers.cross_entropy(input=predict, label=label) return paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
def reader_creator_random_image(height, width): def reader_creator_random_image(height, width):
def reader(): def reader():
...@@ -1939,7 +1949,10 @@ class PyReader(DataLoaderBase): ...@@ -1939,7 +1949,10 @@ class PyReader(DataLoaderBase):
def network(image, label): def network(image, label):
# User-defined network, here is an example of softmax regression. # User-defined network, here is an example of softmax regression.
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
return fluid.layers.cross_entropy(input=predict, label=label) return paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
def random_image_and_label_generator(height, width): def random_image_and_label_generator(height, width):
def generator(): def generator():
...@@ -1994,6 +2007,8 @@ class PyReader(DataLoaderBase): ...@@ -1994,6 +2007,8 @@ class PyReader(DataLoaderBase):
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
EPOCH_NUM = 3 EPOCH_NUM = 3
ITER_NUM = 15 ITER_NUM = 15
BATCH_SIZE = 3 BATCH_SIZE = 3
...@@ -2001,7 +2016,10 @@ class PyReader(DataLoaderBase): ...@@ -2001,7 +2016,10 @@ class PyReader(DataLoaderBase):
def network(image, label): def network(image, label):
# User-defined network, here is an example of softmax regression. # User-defined network, here is an example of softmax regression.
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
return fluid.layers.cross_entropy(input=predict, label=label) return paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
def random_image_and_label_generator(height, width): def random_image_and_label_generator(height, width):
def generator(): def generator():
...@@ -2051,9 +2069,12 @@ class PyReader(DataLoaderBase): ...@@ -2051,9 +2069,12 @@ class PyReader(DataLoaderBase):
Example: Example:
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
EPOCH_NUM = 3 EPOCH_NUM = 3
ITER_NUM = 15 ITER_NUM = 15
BATCH_SIZE = 3 BATCH_SIZE = 3
...@@ -2061,7 +2082,10 @@ class PyReader(DataLoaderBase): ...@@ -2061,7 +2082,10 @@ class PyReader(DataLoaderBase):
def network(image, label): def network(image, label):
# User-defined network, here is an example of softmax regression. # User-defined network, here is an example of softmax regression.
predict = fluid.layers.fc(input=image, size=10, act='softmax') predict = fluid.layers.fc(input=image, size=10, act='softmax')
return fluid.layers.cross_entropy(input=predict, label=label) return paddle.nn.functional.cross_entropy(
input=predict, label=label,
reduction='none', use_softmax=False
)
def random_image_and_label_generator(height, width): def random_image_and_label_generator(height, width):
def generator(): def generator():
......
...@@ -78,7 +78,10 @@ class L2DecayRegularizer(WeightDecayRegularizer): ...@@ -78,7 +78,10 @@ class L2DecayRegularizer(WeightDecayRegularizer):
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = fluid.layers.fc(input=data, size=128, act='relu') hidden = fluid.layers.fc(input=data, size=128, act='relu')
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label,
reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
optimizer = fluid.optimizer.Adagrad( optimizer = fluid.optimizer.Adagrad(
learning_rate=1e-4, learning_rate=1e-4,
...@@ -94,7 +97,7 @@ class L2DecayRegularizer(WeightDecayRegularizer): ...@@ -94,7 +97,7 @@ class L2DecayRegularizer(WeightDecayRegularizer):
l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1)
x = fluid.layers.uniform_random([3,4]) x = paddle.uniform([3,4])
# set L1 regularization in fluid.ParamAttr # set L1 regularization in fluid.ParamAttr
w_param = fluid.ParamAttr(regularizer=l1) w_param = fluid.ParamAttr(regularizer=l1)
...@@ -194,7 +197,10 @@ class L1DecayRegularizer(WeightDecayRegularizer): ...@@ -194,7 +197,10 @@ class L1DecayRegularizer(WeightDecayRegularizer):
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = fluid.layers.fc(input=data, size=128, act='relu') hidden = fluid.layers.fc(input=data, size=128, act='relu')
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label,
reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
optimizer = fluid.optimizer.Adagrad( optimizer = fluid.optimizer.Adagrad(
learning_rate=1e-4, learning_rate=1e-4,
...@@ -209,7 +215,7 @@ class L1DecayRegularizer(WeightDecayRegularizer): ...@@ -209,7 +215,7 @@ class L1DecayRegularizer(WeightDecayRegularizer):
paddle.enable_static() paddle.enable_static()
l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1) l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1) l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1)
x = fluid.layers.uniform_random([3,4]) x = paddle.uniform([3,4])
# set L1 regularization in fluid.ParamAttr # set L1 regularization in fluid.ParamAttr
w_param = fluid.ParamAttr(regularizer=l1) w_param = fluid.ParamAttr(regularizer=l1)
......
...@@ -118,11 +118,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): ...@@ -118,11 +118,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -286,11 +286,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): ...@@ -286,11 +286,11 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -442,13 +442,13 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): ...@@ -442,13 +442,13 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
...@@ -640,7 +640,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): ...@@ -640,7 +640,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'coalesce_tensor', 'coalesce_tensor',
...@@ -650,7 +650,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): ...@@ -650,7 +650,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
...@@ -833,7 +833,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): ...@@ -833,7 +833,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'coalesce_tensor', 'coalesce_tensor',
...@@ -845,7 +845,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer): ...@@ -845,7 +845,7 @@ class TestFleetHybridOptimizer(TestFleetMetaOptimizer):
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'cast', 'cast',
......
...@@ -78,11 +78,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -78,11 +78,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -171,13 +171,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -171,13 +171,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
...@@ -268,11 +268,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -268,11 +268,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -377,13 +377,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -377,13 +377,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
...@@ -498,13 +498,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -498,13 +498,13 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
...@@ -599,11 +599,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -599,11 +599,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -686,11 +686,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -686,11 +686,11 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -763,7 +763,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): ...@@ -763,7 +763,7 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
], ],
) )
...@@ -925,11 +925,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -925,11 +925,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -1034,11 +1034,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -1034,11 +1034,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -1178,11 +1178,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -1178,11 +1178,11 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -1399,13 +1399,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -1399,13 +1399,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
...@@ -1666,13 +1666,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -1666,13 +1666,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cast', 'cast',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'cast', 'cast',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
...@@ -1897,13 +1897,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -1897,13 +1897,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'cast', 'cast',
...@@ -2199,13 +2199,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -2199,13 +2199,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'cast', 'cast',
...@@ -2414,13 +2414,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -2414,13 +2414,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'cast', 'cast',
...@@ -2639,7 +2639,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -2639,7 +2639,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'coalesce_tensor', 'coalesce_tensor',
...@@ -2649,7 +2649,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -2649,7 +2649,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'cast', 'cast',
...@@ -2836,7 +2836,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -2836,7 +2836,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'coalesce_tensor', 'coalesce_tensor',
...@@ -2846,7 +2846,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -2846,7 +2846,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'cast', 'cast',
...@@ -3021,7 +3021,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -3021,7 +3021,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'coalesce_tensor', 'coalesce_tensor',
...@@ -3031,7 +3031,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -3031,7 +3031,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'cast', 'cast',
...@@ -3160,13 +3160,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -3160,13 +3160,13 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'mul', 'mul',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'coalesce_tensor', 'coalesce_tensor',
'coalesce_tensor', 'coalesce_tensor',
'fill_constant', 'fill_constant',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'mul_grad', 'mul_grad',
...@@ -3302,7 +3302,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -3302,7 +3302,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'cast', 'cast',
'elementwise_add', 'elementwise_add',
'softmax', 'softmax',
'cross_entropy2', 'softmax_with_cross_entropy',
'reduce_mean', 'reduce_mean',
'elementwise_mul', 'elementwise_mul',
'coalesce_tensor', 'coalesce_tensor',
...@@ -3312,7 +3312,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer): ...@@ -3312,7 +3312,7 @@ class TestFleetShardingHybridOptimizer(TestFleetMetaOptimizer):
'fill_constant', 'fill_constant',
'elementwise_mul_grad', 'elementwise_mul_grad',
'reduce_mean_grad', 'reduce_mean_grad',
'cross_entropy_grad2', 'softmax_with_cross_entropy_grad',
'softmax_grad', 'softmax_grad',
'elementwise_add_grad', 'elementwise_add_grad',
'cast', 'cast',
......
...@@ -50,8 +50,11 @@ def runtime_main(): ...@@ -50,8 +50,11 @@ def runtime_main():
prediction = paddle.fluid.layers.fc( prediction = paddle.fluid.layers.fc(
input=[fc_2], size=2, act='softmax' input=[fc_2], size=2, act='softmax'
) )
cost = paddle.fluid.layers.cross_entropy( cost = paddle.nn.functional.cross_entropy(
input=prediction, label=input_y input=prediction,
label=input_y,
reduction='none',
use_softmax=False,
) )
avg_cost = paddle.mean(x=cost) avg_cost = paddle.mean(x=cost)
......
...@@ -95,8 +95,11 @@ class TestDistWord2vec2x2(TestDistRunnerBase): ...@@ -95,8 +95,11 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
initializer=fluid.initializer.Constant(value=0.1) initializer=fluid.initializer.Constant(value=0.1)
), ),
) )
cost = fluid.layers.cross_entropy( cost = paddle.nn.functional.cross_entropy(
input=predict_word, label=words[4] input=predict_word,
label=words[4],
reduction='none',
use_softmax=False,
) )
avg_cost = paddle.mean(cost) avg_cost = paddle.mean(cost)
return avg_cost, predict_word return avg_cost, predict_word
......
...@@ -69,8 +69,11 @@ class TestFleetMetaOptimizer(unittest.TestCase): ...@@ -69,8 +69,11 @@ class TestFleetMetaOptimizer(unittest.TestCase):
prediction = paddle.fluid.layers.fc( prediction = paddle.fluid.layers.fc(
input=[fc_2], size=2, act='softmax' input=[fc_2], size=2, act='softmax'
) )
cost = paddle.fluid.layers.cross_entropy( cost = paddle.nn.functional.cross_entropy(
input=prediction, label=input_y input=prediction,
label=input_y,
reduction='none',
use_softmax=False,
) )
avg_cost = paddle.mean(x=cost) avg_cost = paddle.mean(x=cost)
...@@ -104,8 +107,11 @@ class TestFleetMetaOptimizer(unittest.TestCase): ...@@ -104,8 +107,11 @@ class TestFleetMetaOptimizer(unittest.TestCase):
prediction = paddle.fluid.layers.fc( prediction = paddle.fluid.layers.fc(
input=[input_x], size=2, act='softmax' input=[input_x], size=2, act='softmax'
) )
cost = paddle.fluid.layers.cross_entropy( cost = paddle.nn.functional.cross_entropy(
input=prediction, label=input_y input=prediction,
label=input_y,
reduction='none',
use_softmax=False,
) )
avg_cost = paddle.mean(x=cost) avg_cost = paddle.mean(x=cost)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册