未验证 提交 33173ab4 编写于 作者: Y yuehuayingxueluo 提交者: GitHub

clear fluid apis: square_error_cost (#48029)

* clear fluid apis in fleet and passes

* fix model.py

* fix model.py

* fix cpp_pass.py

* clear loss.py

* change test file

* fix some test_*.py

* fix adaround.py

* fix evaluator.py

* fix CI bug

* fix CI bug

* fix decode.py

* fix detection.py

* fix ci bug

* rm test_sigmoid_cross_entropy_with_logits_op_ipu.py and fix __init__.py

* fix ci bug

* fix ci BUG
上级 f71de378
......@@ -78,7 +78,6 @@ class Momentum(Optimizer):
import numpy as np
paddle.enable_static()
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
......
......@@ -16,6 +16,7 @@ import numpy as np
import time
import sys
import logging
import paddle
import paddle
import paddle.fluid as fluid
......@@ -61,7 +62,7 @@ class AdaRoundLoss:
self.default_beta_range = default_beta_range
def compute_recon_loss(self, ada_quantized_output, orig_output):
square_cost = fluid.layers.square_error_cost(
square_cost = paddle.nn.functional.square_error_cost(
ada_quantized_output, orig_output
)
recon_loss = paddle.mean(paddle.sum(square_cost, axis=-1))
......
......@@ -50,7 +50,7 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
name=fc_tmp.name, dtype=fc_tmp.dtype
)
fc_tmp_1 = out_scale(fc_tmp)
cross_entropy = fluid.layers.softmax_with_cross_entropy(
cross_entropy = paddle.nn.functional.softmax_with_cross_entropy(
fc_tmp, label
)
loss = paddle.mean(cross_entropy)
......
......@@ -127,7 +127,7 @@ def train(net_type, use_cuda, save_dirname, is_local):
raise ValueError("%s network is not supported" % net_type)
logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
cost, predict = fluid.layers.softmax_with_cross_entropy(
cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
logits, label, return_softmax=True
)
avg_cost = paddle.mean(cost)
......@@ -509,7 +509,7 @@ class TestAmpWithNonIterableDataLoader(unittest.TestCase):
net = vgg16_bn_drop(image)
logits = fluid.layers.fc(input=net, size=10, act="softmax")
cost, predict = fluid.layers.softmax_with_cross_entropy(
cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
logits, label, return_softmax=True
)
avg_cost = paddle.mean(cost)
......
......@@ -110,7 +110,7 @@ def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
net = resnet_cifar10(images)
logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
cost = fluid.layers.softmax_with_cross_entropy(
cost = paddle.nn.functional.softmax_with_cross_entropy(
logits, label, return_softmax=False
)
sum_cost = paddle.sum(cost)
......
......@@ -21,7 +21,7 @@ from .layer_function_generator import templatedoc
from ..layer_helper import LayerHelper
from ..framework import Variable, _non_static_mode, static_only, in_dygraph_mode
from .. import core
from .loss import softmax_with_cross_entropy
from paddle.fluid.layers import softmax_with_cross_entropy
from . import tensor
from . import nn
from ..data_feeder import check_variable_and_dtype, check_type, check_dtype
......
......@@ -36,7 +36,6 @@ from paddle import _C_ops, _legacy_C_ops
__all__ = [
'cross_entropy',
'square_error_cost',
'softmax_with_cross_entropy',
]
......@@ -144,41 +143,6 @@ def cross_entropy2(input, label, ignore_index=kIgnoreIndex):
return out
def square_error_cost(input, label):
r"""
Accept input predictions and target label and returns the
squared error cost.
For predictions label, and target label, the equation is:
.. math::
Out = (input - label)^2
Parameters:
input (Tensor): Input tensor, the data type should be float32.
label (Tensor): Label tensor, the data type should be float32.
Returns:
Tensor, The tensor storing the element-wise squared
error difference between input and label.
Examples:
.. code-block:: python
import paddle
input = paddle.to_tensor([1.1, 1.9])
label = paddle.to_tensor([1.0, 2.0])
output = paddle.nn.functional.square_error_cost(input, label)
print(output)
# [0.01, 0.01]
"""
return paddle.nn.functional.square_error_cost(input, label)
def softmax_with_cross_entropy(
logits,
label,
......@@ -189,49 +153,32 @@ def softmax_with_cross_entropy(
axis=-1,
):
r"""
This operator implements the cross entropy loss function with softmax. This function
combines the calculation of the softmax operation and the cross entropy loss function
to provide a more numerically stable gradient.
Because this operator performs a softmax on logits internally, it expects
unscaled logits. This operator should not be used with the output of
softmax operator since that would produce incorrect results.
When the attribute :attr:`soft_label` is set :attr:`False`, this operators
expects mutually exclusive hard labels, each sample in a batch is in exactly
one class with a probability of 1.0. Each sample in the batch will have a
single label.
The equation is as follows:
1) Hard label (one-hot label, so every sample has exactly one class)
.. math::
loss_j = -\\text{logits}_{label_j} +
\\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K
2) Soft label (each sample can have a distribution over all classes)
.. math::
loss_j = -\\sum_{i=0}^{K}\\text{label}_i
\\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K}
\\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K
3) If :attr:`numeric_stable_mode` is :attr:`True`, softmax is calculated first by:
.. math::
max_j &= \\max_{i=0}^{K}{\\text{logits}_i}
log\\_max\\_sum_j &= \\log\\sum_{i=0}^{K}\\exp(logits_i - max_j)
softmax_j &= \\exp(logits_j - max_j - {log\\_max\\_sum}_j)
and then cross entropy loss is calculated by softmax and label.
Args:
logits (Tensor): A multi-dimension ``Tensor`` , and the data type is float32 or float64. The input tensor of unscaled log probabilities.
label (Tensor): The ground truth ``Tensor`` , data type is the same
......@@ -258,7 +205,6 @@ def softmax_with_cross_entropy(
axis (int, optional): The index of dimension to perform softmax calculations. It
should be in range :math:`[-1, rank - 1]`, while :math:`rank`
is the rank of input :attr:`logits`. Default: -1.
Returns:
``Tensor`` or Tuple of two ``Tensor`` : Return the cross entropy loss if \
`return_softmax` is False, otherwise the tuple \
......@@ -266,13 +212,10 @@ def softmax_with_cross_entropy(
with input logits and cross entropy loss is in \
the same shape with input logits except shape \
in dimension :attr:`axis` as 1.
Examples:
.. code-block:: python
import paddle
import numpy as np
data = np.random.rand(128).astype("float32")
label = np.random.rand(1).astype("int64")
data = paddle.to_tensor(data)
......
......@@ -1441,13 +1441,14 @@ class SGDOptimizer(Optimizer):
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
......@@ -1642,13 +1643,14 @@ class MomentumOptimizer(Optimizer):
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
moment_optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
......@@ -2219,13 +2221,14 @@ class AdamOptimizer(Optimizer):
import paddle
import paddle.fluid as fluid
paddle.enable_static()
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.data(name='x', shape=[None, 13], dtype='float32')
y = fluid.data(name='y', shape=[None, 1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
......@@ -2247,13 +2250,14 @@ class AdamOptimizer(Optimizer):
import paddle.fluid as fluid
import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler
paddle.enable_static()
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.data(name='x', shape=[None, 13], dtype='float32')
y = fluid.data(name='y', shape=[None, 1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
# define beta decay variable
......@@ -3276,13 +3280,14 @@ class RMSPropOptimizer(Optimizer):
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
rms_optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
......@@ -3493,13 +3498,15 @@ class FtrlOptimizer(Optimizer):
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
ftrl_optimizer = fluid.optimizer.Ftrl(learning_rate=0.1)
......
......@@ -56,16 +56,20 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16):
if not pure_bf16:
with amp.bf16.bf16_guard():
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
else:
y_predict = fluid.layers.fc(input=x, size=1, act=None)
with amp.bf16.bf16_guard():
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
else:
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
lr = 5e-3 if use_bf16 else 1e-3
......
......@@ -167,7 +167,9 @@ def model():
scale_infer = paddle.scale(x=inference, scale=5.0)
label = layers.data(name='score', shape=[1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label)
square_cost = paddle.nn.functional.square_error_cost(
input=scale_infer, label=label
)
avg_cost = paddle.mean(square_cost)
return scale_infer, avg_cost
......
......@@ -69,7 +69,7 @@ class AutoCheckpointBase(unittest.TestCase):
label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
fc_tmp = fluid.layers.fc(image, size=CLASS_NUM)
cross_entropy = fluid.layers.softmax_with_cross_entropy(
cross_entropy = paddle.nn.functional.softmax_with_cross_entropy(
fc_tmp, label
)
loss = paddle.mean(cross_entropy)
......
......@@ -63,7 +63,7 @@ def net():
hidden = fluid.layers.fc(input=hidden, size=400, act="sigmoid")
hidden = fluid.layers.fc(input=hidden, size=3, act=None)
cost, y_predict = fluid.layers.softmax_with_cross_entropy(
cost, y_predict = paddle.nn.functional.softmax_with_cross_entropy(
hidden, y, return_softmax=True
)
acc_top1 = paddle.static.accuracy(input=y_predict, label=y, k=1)
......
......@@ -57,7 +57,7 @@ class SimpleNet(Layer):
fc = fluid.layers.matmul(x_emb, self.softmax_weight)
fc = fluid.layers.elementwise_add(fc, self.softmax_bias)
projection = paddle.reshape(fc, shape=[-1, vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=y1, soft_label=False
)
return loss.mean()
......@@ -106,7 +106,7 @@ class LossNet(Layer):
def forward(self, args, y1):
projection, x2 = args
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=y1[0], soft_label=False
)
return loss.mean()
......
......@@ -67,7 +67,7 @@ class SimpleNet(Layer):
projection = paddle.matmul(projection, self.word_embeddings.weight)
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=y1, soft_label=False
)
return loss.mean()
......@@ -120,7 +120,7 @@ class LossNet(Layer):
def forward(self, args, y1):
projection = args
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=y1[0], soft_label=False
)
return loss.mean()
......
......@@ -941,7 +941,7 @@ class TransFormer(Layer):
epsilon=self._label_smooth_eps,
)
cost = fluid.layers.softmax_with_cross_entropy(
cost = paddle.nn.functional.softmax_with_cross_entropy(
logits=predict,
label=label_out,
soft_label=True if self._label_smooth_eps else False,
......
......@@ -33,7 +33,7 @@ class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase):
y_predict = fluid.layers.fc(input=x, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
return avg_cost, x, y
......
......@@ -29,7 +29,7 @@ class TestCommunicator(unittest.TestCase):
def net(self):
x = fluid.layers.data(name='x', shape=[1], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=x, label=y)
cost = paddle.nn.functional.square_error_cost(input=x, label=y)
avg_cost = paddle.mean(cost)
return avg_cost
......
......@@ -273,7 +273,7 @@ class TestDebugInfo(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[1], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
role = role_maker.UserDefinedRoleMaker(
......
......@@ -449,7 +449,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase):
x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None)
y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = paddle.fluid.layers.square_error_cost(
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
......
......@@ -1585,7 +1585,7 @@ def transformer(
epsilon=label_smooth_eps,
)
cost = layers.softmax_with_cross_entropy(
cost = paddle.nn.functional.softmax_with_cross_entropy(
logits=paddle.reshape(predict, shape=[-1, trg_vocab_size]),
label=label,
soft_label=True if label_smooth_eps else False,
......
......@@ -410,7 +410,7 @@ class PretrainModelLayer(Layer):
else:
fc_out = self.out_fc(mask_trans_feat)
mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
mask_lm_loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=fc_out, label=mask_label
)
mean_mask_lm_loss = paddle.mean(mask_lm_loss)
......@@ -420,7 +420,7 @@ class PretrainModelLayer(Layer):
(
next_sent_loss,
next_sent_softmax,
) = fluid.layers.softmax_with_cross_entropy(
) = paddle.nn.functional.softmax_with_cross_entropy(
logits=next_sent_fc_out, label=labels, return_softmax=True
)
......
......@@ -294,7 +294,7 @@ class BaseModel(fluid.dygraph.Layer):
dec_output = paddle.stack(dec_output)
dec_output = self.fc(self._transpose_batch_time(dec_output))
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=dec_output, label=label, soft_label=False
)
loss = paddle.squeeze(loss, axis=[2])
......@@ -828,7 +828,7 @@ class AttentionModel(fluid.dygraph.Layer):
dec_output = paddle.stack(dec_output)
dec_output = self.fc(self._transpose_batch_time(dec_output))
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=dec_output, label=label, soft_label=False
)
loss = paddle.squeeze(loss, axis=[2])
......
......@@ -379,7 +379,7 @@ def bmn_loss_func(
weights = u_hmask + u_smmask + u_slmask
weights.stop_gradient = True
loss = fluid.layers.square_error_cost(pred_score, gt_iou_map)
loss = paddle.nn.functional.square_error_cost(pred_score, gt_iou_map)
loss = paddle.multiply(loss, weights)
loss = 0.5 * paddle.sum(loss) / paddle.sum(weights)
......
......@@ -216,7 +216,7 @@ class PtbModel(fluid.Layer):
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = paddle.add(projection, self.softmax_bias)
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -576,7 +576,7 @@ class CrossEntropyCriterion:
epsilon=self.label_smooth_eps,
)
cost = layers.softmax_with_cross_entropy(
cost = paddle.nn.functional.softmax_with_cross_entropy(
logits=predict,
label=label_out,
soft_label=True if self.label_smooth_eps else False,
......
......@@ -220,7 +220,7 @@ class TestWithoutIdentityLoss1(TestBase):
class TestWithoutIdentityLoss2(TestBase):
def set_op_attrs(self):
self.loss_op = paddle.fluid.layers.softmax_with_cross_entropy
self.loss_op = paddle.paddle.nn.functional.softmax_with_cross_entropy
def set_data_feed(self):
self.data = paddle.uniform((8, 3, 10, 10), dtype='float32')
......
......@@ -143,7 +143,7 @@ class TestMomentumV2(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
rms_optimizer = paddle.optimizer.Momentum(
......@@ -268,7 +268,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum(
......
......@@ -126,7 +126,7 @@ class TestPowNet(unittest.TestCase):
fc_1 = fluid.layers.fc(input=z, size=128)
prediction = fluid.layers.fc(input=fc_1, size=2)
cost = fluid.layers.softmax_with_cross_entropy(prediction, label)
cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label)
loss = paddle.mean(cost)
sgd = fluid.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss)
......
......@@ -111,7 +111,7 @@ class TestMomentumV2(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
rms_optimizer = paddle.optimizer.Momentum(
......@@ -239,7 +239,7 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum(
......
......@@ -275,7 +275,7 @@ class TestSliceNet(unittest.TestCase):
prediction = paddle.static.nn.fc(z, size=2, activation='softmax')
cost = paddle.fluid.layers.softmax_with_cross_entropy(
cost = paddle.paddle.nn.functional.softmax_with_cross_entropy(
logits=prediction, label=label
)
loss = paddle.mean(cost)
......
......@@ -124,7 +124,7 @@ class TestPowNet(unittest.TestCase):
fc_1 = fluid.layers.fc(input=z, size=128)
prediction = fluid.layers.fc(input=fc_1, size=2)
cost = fluid.layers.softmax_with_cross_entropy(prediction, label)
cost = paddle.nn.functional.softmax_with_cross_entropy(prediction, label)
loss = paddle.mean(cost)
sgd = fluid.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss)
......
......@@ -68,7 +68,7 @@ class SimpleNet(fluid.Layer):
fc = fluid.layers.matmul(x_emb, self.softmax_weight)
fc = paddle.add(fc, self.softmax_bias)
projection = paddle.reshape(fc, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -146,7 +146,9 @@ class TestAdadeltaV2(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1)
......
......@@ -941,7 +941,9 @@ class TestAdamOptimizer(unittest.TestCase):
y_predict = fluid.layers.fc(
input=x, size=1, act=None, param_attr=weight_attr
)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
adam = fluid.optimizer.AdamOptimizer(
......
......@@ -33,7 +33,9 @@ def main_test_func(place, dtype):
x = fluid.data(name='x', shape=[None, 13], dtype=dtype)
y = fluid.data(name='y', shape=[None, 1], dtype=dtype)
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
......
......@@ -621,7 +621,9 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp):
fc2_b_mon1 = np.zeros((linear2.bias.shape)).astype("float32")
fc2_b_mon2 = np.zeros((linear2.bias.shape)).astype("float32")
cost = fluid.layers.square_error_cost(input=out, label=y)
cost = paddle.nn.functional.square_error_cost(
input=out, label=y
)
avg_cost = paddle.mean(cost)
simple_lr_fun = partial(
......
......@@ -262,7 +262,9 @@ class SimpleNet(BackwardNet):
name='fc_no_use',
)
# loss
cost = fluid.layers.square_error_cost(input=predict, label=label)
cost = paddle.nn.functional.square_error_cost(
input=predict, label=label
)
loss = paddle.mean(cost, name='mean_loss')
return loss
......@@ -330,7 +332,7 @@ class TestAppendBackwardWithError(unittest.TestCase):
y = fluid.data(name='y', shape=[None, 1], dtype='float32')
x_emb = fluid.embedding(x, size=[100, 256])
y_predict = fluid.layers.fc(input=x_emb, size=1, name='my_fc')
loss = fluid.layers.square_error_cost(input=y_predict, label=y)
loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_loss = paddle.mean(loss)
param_names = [
param.name
......
......@@ -17,6 +17,8 @@ import unittest
import numpy as np
from op_test import OpTest, randomize_probability
import paddle
class TestBprLossOp1(OpTest):
"""Test BprLoss with discrete one-hot labels."""
......@@ -47,4 +49,5 @@ class TestBprLossOp1(OpTest):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -30,7 +30,7 @@ class TestCommunicator(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[1], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=x, label=y)
cost = paddle.nn.functional.square_error_cost(input=x, label=y)
avg_cost = paddle.mean(cost)
return avg_cost
......
......@@ -49,7 +49,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase):
y_predict = fluid.layers.fc(input=z, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
return avg_cost, x, x1, y
......
......@@ -55,7 +55,7 @@ class TestCommunicator(unittest.TestCase):
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
slots_vars = [x, y]
cost = fluid.layers.square_error_cost(input=x, label=y)
cost = paddle.nn.functional.square_error_cost(input=x, label=y)
avg_cost = paddle.mean(cost)
optimizer = fluid.optimizer.Adam(0.01)
......
......@@ -47,7 +47,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
cost = paddle.nn.functional.square_error_cost(input=x, label=y)
avg_cost = paddle.mean(cost)
strategy = paddle.distributed.fleet.DistributedStrategy()
......@@ -85,7 +85,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
cost = paddle.nn.functional.square_error_cost(input=x, label=y)
avg_cost = paddle.mean(cost)
strategy = paddle.distributed.fleet.DistributedStrategy()
......
......@@ -40,7 +40,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
cost = paddle.nn.functional.square_error_cost(input=x, label=y)
avg_cost = paddle.mean(cost)
strategy = paddle.distributed.fleet.DistributedStrategy()
......
......@@ -41,7 +41,7 @@ class TestDistStrategyTrainerDescConfig(unittest.TestCase):
x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32')
y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = paddle.fluid.layers.square_error_cost(input=x, label=y)
cost = paddle.nn.functional.square_error_cost(input=x, label=y)
avg_cost = paddle.mean(cost)
strategy = paddle.distributed.fleet.DistributedStrategy()
......
......@@ -47,7 +47,7 @@ class TranspilerTest(unittest.TestCase):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
sgd_optimizer.minimize(avg_cost)
......@@ -302,7 +302,7 @@ class TestLRDecay(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
......@@ -471,7 +471,7 @@ class TestDecayedAdagrad(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
opt = fluid.optimizer.DecayedAdagrad(learning_rate=0.1)
opt.minimize(avg_cost)
......@@ -492,7 +492,7 @@ class TestFtrl(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
opt = fluid.optimizer.Ftrl(learning_rate=0.1)
opt.minimize(avg_cost)
......@@ -513,7 +513,7 @@ class TestLRDecayConditional(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.piecewise_decay(
......@@ -579,7 +579,7 @@ class TestL2Decay(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
......@@ -616,7 +616,7 @@ class TestL2DecayWithPiecewise(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
base_lr = 1.0
bd = [1, 10, 20, 30]
......@@ -692,7 +692,7 @@ class TestEmptyPserverOptimizeBlocks(TranspilerTest):
bias_attr=False,
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1.0)
sgd_optimizer.minimize(avg_cost)
......@@ -1134,7 +1134,7 @@ class TestRMSPropOptimizer(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
optimizer.minimize(avg_cost)
......@@ -1167,7 +1167,7 @@ class TestLoadSliceVar(TranspilerTest):
bias_attr=fluid.ParamAttr(name='fc_b'),
)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
optimizer = fluid.optimizer.RMSProp(learning_rate=0.1)
optimizer.minimize(avg_cost)
......@@ -1452,6 +1452,7 @@ class TestRemoteHsigmoid(TestDistLookupTableBase):
path_table=path_table,
path_code=path_code,
)
avg_cost = paddle.mean(cost)
# optimizer
optimizer = fluid.optimizer.SGD(learning_rate=0.003)
......
......@@ -58,7 +58,9 @@ class TestListenAndServOp(unittest.TestCase):
)
y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
ps_param = pslib.PSParameter()
......@@ -120,7 +122,9 @@ class TestListenAndServOp(unittest.TestCase):
)
y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
ps_param = pslib.PSParameter()
......@@ -180,7 +184,9 @@ class TestListenAndServOp(unittest.TestCase):
)
y_predict = fluid.layers.fc(input=x_emb, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
ps_param = pslib.PSParameter()
......
......@@ -463,7 +463,7 @@ def lm_model(
projection = paddle.add(projection, softmax_bias)
projection = paddle.reshape(projection, shape=[-1, vocab_size])
loss = layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=y, soft_label=False
)
......
......@@ -43,7 +43,7 @@ class TestExceptionNoCStack(unittest.TestCase):
x = fluid.layers.data(name='X', shape=[-1, 13], dtype='float32')
y = fluid.layers.data(name='Y', shape=[-1, 1], dtype='float32')
predict = fluid.layers.fc(input=x, size=1, act=None)
loss = fluid.layers.square_error_cost(input=predict, label=y)
loss = paddle.nn.functional.square_error_cost(input=predict, label=y)
avg_loss = paddle.mean(loss)
fluid.optimizer.SGD(learning_rate=0.01).minimize(avg_loss)
......
......@@ -25,7 +25,7 @@ class TestExecutor(unittest.TestCase):
y = fluid.data(name="y", shape=[None, 1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
opt = fluid.optimizer.Adam(learning_rate=lr)
......
......@@ -27,7 +27,7 @@ class TestExecutor(unittest.TestCase):
y = fluid.data(name="y", shape=[None, 1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
opt = fluid.optimizer.Adam(learning_rate=lr)
......
......@@ -96,7 +96,9 @@ class TestDygraphGNN(unittest.TestCase):
logits = paddle.reshape(logits, logits.shape[1:])
# In other example, it's nll with log_softmax. However, paddle's
# log_loss only supports binary classification now.
loss = fluid.layers.softmax_with_cross_entropy(logits, labels)
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits, labels
)
loss = paddle.sum(loss)
adam = AdamOptimizer(learning_rate=1e-3)
......@@ -134,7 +136,7 @@ class TestDygraphGNN(unittest.TestCase):
logits = paddle.reshape(logits, logits.shape[1:])
# In other example, it's nll with log_softmax. However, paddle's
# log_loss only supports binary classification now.
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits, to_variable(labels)
)
loss = paddle.sum(loss)
......@@ -162,7 +164,7 @@ class TestDygraphGNN(unittest.TestCase):
logits2 = paddle.reshape(logits2, logits2.shape[1:])
# In other example, it's nll with log_softmax. However, paddle's
# log_loss only supports binary classification now.
loss2 = fluid.layers.softmax_with_cross_entropy(
loss2 = paddle.nn.functional.softmax_with_cross_entropy(
logits2, to_variable(labels2)
)
loss2 = paddle.sum(loss2)
......
......@@ -69,7 +69,7 @@ class SimpleNet(fluid.Layer):
)
projection = paddle.add(projection, self.softmax_bias)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -228,7 +228,7 @@ class PtbModel(fluid.Layer):
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = paddle.add(projection, self.softmax_bias)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -224,7 +224,7 @@ class PtbModel(fluid.Layer):
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = paddle.add(projection, self.softmax_bias)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -225,7 +225,7 @@ class PtbModel(fluid.Layer):
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = paddle.add(projection, self.softmax_bias)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -78,7 +78,7 @@ class SimpleNet(fluid.Layer):
fc, paddle.transpose(self.embedding.weight, perm=[1, 0])
)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -1099,7 +1099,7 @@ class TransFormer(Layer):
epsilon=self._label_smooth_eps,
)
cost = fluid.layers.softmax_with_cross_entropy(
cost = paddle.nn.functional.softmax_with_cross_entropy(
logits=predict,
label=label_out,
soft_label=True if self._label_smooth_eps else False,
......
......@@ -59,7 +59,9 @@ class TestBook(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None)
cost = layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
......@@ -153,7 +155,9 @@ class TestSaveInferenceModel(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None)
cost = layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
place = core.CPUPlace()
......@@ -209,7 +213,9 @@ class TestInstance(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None)
cost = layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
place = core.CPUPlace()
......@@ -245,7 +251,9 @@ class TestSaveInferenceModelNew(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None)
cost = layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
......@@ -422,7 +430,9 @@ class TestSaveInferenceModelNew(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None)
cost = layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
......@@ -469,7 +479,9 @@ class TestSaveInferenceModelNew(unittest.TestCase):
y_predict = layers.fc(input=x, size=1, act=None)
cost = layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
......
......@@ -16,6 +16,7 @@ import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
......@@ -48,7 +49,7 @@ class TestSoftmaxWithXe(unittest.TestCase):
dtype='int64' if not self.soft_label else self.dtype,
append_batch_size=False,
)
z_d, s_d = fluid.layers.softmax_with_cross_entropy(
z_d, s_d = paddle.nn.functional.softmax_with_cross_entropy(
x_d,
y_d,
soft_label=self.soft_label,
......
......@@ -126,7 +126,9 @@ class TestLambOpWithCombinedOp(unittest.TestCase):
x = fluid.layers.data(name='X', shape=[13], dtype='float32')
y = fluid.layers.data(name='Y', shape=[1], dtype='float32')
prediction = fluid.layers.fc(input=x, size=1, act=None)
loss = fluid.layers.square_error_cost(input=prediction, label=y)
loss = paddle.nn.functional.square_error_cost(
input=prediction, label=y
)
avg_loss = paddle.mean(loss)
return avg_loss
......
......@@ -3037,7 +3037,9 @@ class TestBook(LayerTest):
x = self._get_data(name='x', shape=[13], dtype='float32')
y_predict = layers.fc(input=x, size=1, act=None)
y = self._get_data(name='y', shape=[1], dtype='float32')
cost = layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
return avg_cost
......@@ -3256,23 +3258,31 @@ class TestBook(LayerTest):
):
x = self._get_data(name='x', shape=[16], dtype='float32')
y = self._get_data(name='label', shape=[1], dtype='int64')
loss, softmax = layers.softmax_with_cross_entropy(
loss, softmax = paddle.nn.functional.softmax_with_cross_entropy(
x, y, return_softmax=True
)
self.assertIsNotNone(loss)
self.assertIsNotNone(softmax)
loss = layers.softmax_with_cross_entropy(x, y)
loss = paddle.nn.functional.softmax_with_cross_entropy(x, y)
self.assertIsNotNone(loss)
x1 = self._get_data(name='x1', shape=[16, 32, 64], dtype='float32')
y1 = self._get_data(name='label1', shape=[1, 32, 64], dtype='int64')
y2 = self._get_data(name='label2', shape=[16, 1, 64], dtype='int64')
y3 = self._get_data(name='label3', shape=[16, 32, 1], dtype='int64')
loss1 = layers.softmax_with_cross_entropy(x1, y1, axis=1)
loss2 = layers.softmax_with_cross_entropy(x1, y2, axis=2)
loss3 = layers.softmax_with_cross_entropy(x1, y3, axis=3)
loss4 = layers.softmax_with_cross_entropy(x1, y3, axis=-1)
loss1 = paddle.nn.functional.softmax_with_cross_entropy(
x1, y1, axis=1
)
loss2 = paddle.nn.functional.softmax_with_cross_entropy(
x1, y2, axis=2
)
loss3 = paddle.nn.functional.softmax_with_cross_entropy(
x1, y3, axis=3
)
loss4 = paddle.nn.functional.softmax_with_cross_entropy(
x1, y3, axis=-1
)
self.assertIsNotNone(loss1)
self.assertIsNotNone(loss2)
self.assertIsNotNone(loss3)
......@@ -3694,7 +3704,7 @@ class TestBook(LayerTest):
):
x = self._get_data(name="X", shape=[1], dtype="float32")
y = self._get_data(name="Y", shape=[1], dtype="float32")
out = layers.square_error_cost(input=x, label=y)
out = paddle.nn.functional.square_error_cost(input=x, label=y)
return out
def test_dynamic_lstmp(self):
......
......@@ -36,7 +36,7 @@ def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id):
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
# loss function
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
# optimizer
......@@ -73,7 +73,7 @@ def run_pserver_with_empty_block(
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
# loss function
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
# optimizer
......
......@@ -216,7 +216,7 @@ class TestLookupTableIsSparse(unittest.TestCase):
)
y = paddle.sum(emb, axis=-1)
loss = fluid.layers.square_error_cost(input=y, label=y_)
loss = paddle.nn.functional.square_error_cost(input=y, label=y_)
loss = paddle.mean(loss)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
......
......@@ -30,7 +30,7 @@ def train_simulator(test_batch_size=10):
y_predict = fluid.layers.fc(input=x, size=1, act=None)
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
......
......@@ -532,7 +532,9 @@ class TestMomentumV2(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
rms_optimizer = paddle.optimizer.Momentum(
......@@ -673,7 +675,9 @@ class TestMomentumOpWithDecayAPI(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
momentum_optimizer = paddle.fluid.contrib.optimizer.Momentum(
......
......@@ -33,7 +33,9 @@ class TestNetWithDtype(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)
......
......@@ -82,7 +82,7 @@ def static(
def fn_2(opt, avg_loss=None, pred=None, label=None):
if avg_loss is None:
loss = layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=pred, label=label
)
avg_loss = paddle.mean(loss, name='mean_softmax_loss')
......@@ -108,7 +108,7 @@ def static(
else:
loss_1 = layers.cross_entropy(input=prediction, label=label)
avg_loss_1 = paddle.mean(loss_1)
loss_2 = layers.softmax_with_cross_entropy(
loss_2 = paddle.nn.functional.softmax_with_cross_entropy(
logits=prediction, label=label
)
avg_loss_2 = paddle.mean(loss_2)
......@@ -193,7 +193,7 @@ def dynamic(train_data, use_cuda=False, use_parallel_exe=False):
loss.backward()
adam.minimize(loss)
else:
softmax_loss = layers.softmax_with_cross_entropy(
softmax_loss = paddle.nn.functional.softmax_with_cross_entropy(
prediction, var_label
)
loss = paddle.mean(softmax_loss)
......
......@@ -92,7 +92,9 @@ def cond_net(use_feed=None):
return avg_loss
def loss2(pred, label):
loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label)
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=pred, label=label
)
avg_loss = paddle.mean(loss, name='mean_softmax_loss')
return avg_loss
......@@ -119,7 +121,9 @@ def optimization_in_cond_net(with_optimize=False):
return avg_loss
def loss2(opt, pred, label, with_optimize):
loss = fluid.layers.softmax_with_cross_entropy(logits=pred, label=label)
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=pred, label=label
)
avg_loss = paddle.mean(loss, name='mean_softmax_loss')
if with_optimize:
opt.minimize(avg_loss)
......
......@@ -280,7 +280,9 @@ class TestRMSPropV2(unittest.TestCase):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
rms_optimizer = paddle.optimizer.RMSProp(learning_rate=0.1)
......
......@@ -631,7 +631,7 @@ def def_seq2seq_model(
)
# loss
loss = layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=logits, label=label, soft_label=False
)
loss = layers.unsqueeze(loss, axes=[2])
......
......@@ -204,7 +204,7 @@ class TestSGDOpWithLargeInput(unittest.TestCase):
emb = fluid.embedding(input=data, size=(10000000, 150), dtype='float32')
out = fluid.layers.l2_normalize(x=emb, axis=-1)
cost = fluid.layers.square_error_cost(input=out, label=label)
cost = paddle.nn.functional.square_error_cost(input=out, label=label)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)
......
......@@ -16,6 +16,7 @@ import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
......@@ -32,7 +33,9 @@ class TestSquareErrorCost(unittest.TestCase):
input_var = layers.create_tensor(dtype="float32", name="input")
label_var = layers.create_tensor(dtype="float32", name="label")
output = layers.square_error_cost(input=input_var, label=label_var)
output = paddle.nn.functional.square_error_cost(
input=input_var, label=label_var
)
for use_cuda in (
[False, True] if core.is_compiled_with_cuda() else [False]
......@@ -54,14 +57,14 @@ class TestSquareErrorInvalidInput(unittest.TestCase):
def test_invalid_input():
input = [256, 3]
label = fluid.data(name='label1', shape=[None, 3], dtype='float32')
loss = fluid.layers.square_error_cost(input, label)
loss = paddle.nn.functional.square_error_cost(input, label)
self.assertRaises(TypeError, test_invalid_input)
def test_invalid_label():
input = fluid.data(name='input2', shape=[None, 3], dtype='float32')
label = [256, 3]
loss = fluid.layers.square_error_cost(input, label)
loss = paddle.nn.functional.square_error_cost(input, label)
self.assertRaises(TypeError, test_invalid_label)
......
......@@ -237,7 +237,7 @@ class PtbModel(fluid.Layer):
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = paddle.add(projection, self.softmax_bias)
projection = paddle.reshape(projection, shape=[-1, self.vocab_size])
loss = fluid.layers.softmax_with_cross_entropy(
loss = paddle.nn.functional.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False
)
loss = paddle.reshape(loss, shape=[-1, self.num_steps])
......
......@@ -171,7 +171,9 @@ class XPUTestAdadelta(XPUOpTestWrapper):
x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost = paddle.nn.functional.square_error_cost(
input=y_predict, label=y
)
avg_cost = paddle.mean(cost)
rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1)
......
......@@ -75,7 +75,7 @@ class TestSGDOpWithLargeInput(unittest.TestCase):
emb = fluid.embedding(input=data, size=(10000, 150), dtype='float32')
out = fluid.layers.l2_normalize(x=emb, axis=-1)
cost = fluid.layers.square_error_cost(input=out, label=label)
cost = paddle.nn.functional.square_error_cost(input=out, label=label)
avg_cost = paddle.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)
......
......@@ -283,11 +283,16 @@ class DistributeTranspiler:
Examples:
.. code-block:: python
import paddle
import paddle.fluid as fluid
paddle.enable_static()
x = fluid.data(name='x', shape=[13], dtype='float32')
y = fluid.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
cost =paddle.nn.functional.square_error_cost(input=y_predict, label=y)
avg_loss = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册