diff --git a/python/paddle/amp/grad_scaler.py b/python/paddle/amp/grad_scaler.py index 21ccc01640c8260b83e17f56bf9587b3a1ec1ae8..0e43e5a6a17fe45ec1104bc198e0a7725b30ca1c 100644 --- a/python/paddle/amp/grad_scaler.py +++ b/python/paddle/amp/grad_scaler.py @@ -56,7 +56,7 @@ class GradScaler(AmpScaler): data = paddle.rand([10, 3, 32, 32]) with paddle.amp.auto_cast(): conv = model(data) - loss = paddle.fluid.layers.reduce_mean(conv) + loss = paddle.mean(conv) scaled = scaler.scale(loss) # scale the loss scaled.backward() # do backward scaler.minimize(optimizer, scaled) # update parameters @@ -96,7 +96,7 @@ class GradScaler(AmpScaler): data = paddle.rand([10, 3, 32, 32]) with paddle.amp.auto_cast(): conv = model(data) - loss = paddle.fluid.layers.reduce_mean(conv) + loss = paddle.mean(conv) scaled = scaler.scale(loss) # scale the loss scaled.backward() # do backward scaler.minimize(optimizer, scaled) # update parameters @@ -128,7 +128,7 @@ class GradScaler(AmpScaler): data = paddle.rand([10, 3, 32, 32]) with paddle.amp.auto_cast(): conv = model(data) - loss = paddle.fluid.layers.reduce_mean(conv) + loss = paddle.mean(conv) scaled = scaler.scale(loss) # scale the loss scaled.backward() # do backward scaler.minimize(optimizer, scaled) # update parameters diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py index 347f3f0d794e5120bd300d4a7db36a7559ba7177..ec57057164f61c412493904ded99c018b06de8e6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -105,7 +105,7 @@ class ReduceMeanLayer(object): """ operation """ - mean = paddle.fluid.layers.reduce_mean(input) + mean = paddle.mean(input) return mean diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py index ef5a5878e0cf4fbe905a3a529aee12d7862bacb7..2c74e5b221f7e5df5574dbeb285be385b1f0ef85 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py @@ -187,7 +187,7 @@ class PtbModel(paddle.nn.Layer): loss = paddle.nn.functional.softmax_with_cross_entropy( logits=projection, label=label, soft_label=False) loss = paddle.reshape(loss, shape=[-1, self.num_steps]) - loss = paddle.fluid.layers.reduce_mean(loss, dim=[0]) + loss = paddle.mean(loss, axis=[0]) loss = paddle.fluid.layers.reduce_sum(loss) return loss, last_hidden, last_cell diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index f9ae3cda671e83a67fe7971bcd9eb390d6b51c9d..0cdc413c2f68cc021ab020bdccaedd98c5b91b1f 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -414,7 +414,7 @@ class TestLRScheduler(unittest.TestCase): for batch_id in range(2): x = paddle.to_tensor(x) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() adam.step() adam.clear_grad() diff --git a/python/paddle/fluid/tests/unittests/test_retain_graph.py b/python/paddle/fluid/tests/unittests/test_retain_graph.py index 5829ba624ebd4e51b961ea0ce370f95780e019e5..3e1dd4ef57320d9e23486177dfb10e0333c4ac8c 100644 --- a/python/paddle/fluid/tests/unittests/test_retain_graph.py +++ b/python/paddle/fluid/tests/unittests/test_retain_graph.py @@ -73,8 +73,8 @@ class TestRetainGraph(unittest.TestCase): fake_AB = paddle.concat((real_data.detach(), interpolatesv), 1) disc_interpolates = netD(fake_AB) - outs = paddle.fluid.layers.fill_constant(disc_interpolates.shape, - disc_interpolates.dtype, 1.0) + outs = paddle.fluid.layers.fill_constant( + disc_interpolates.shape, disc_interpolates.dtype, 1.0) gradients = paddle.grad( outputs=disc_interpolates, inputs=fake_AB, @@ -85,9 +85,9 @@ class TestRetainGraph(unittest.TestCase): gradients = paddle.reshape(gradients[0], [real_data.shape[0], -1]) - gradient_penalty = paddle.fluid.layers.reduce_mean((paddle.norm( - gradients + 1e-16, 2, 1) - constant)** - 2) * lambda_gp # added eps + gradient_penalty = paddle.mean((paddle.norm(gradients + 1e-16, 2, 1) + - constant)** + 2) * lambda_gp # added eps return gradient_penalty, gradients else: return 0.0, None @@ -113,7 +113,8 @@ class TestRetainGraph(unittest.TestCase): fake_AB = paddle.concat((realA, fakeB), 1) G_pred_fake = d(fake_AB.detach()) - false_target = paddle.fluid.layers.fill_constant(G_pred_fake.shape, 'float32', 0.0) + false_target = paddle.fluid.layers.fill_constant(G_pred_fake.shape, + 'float32', 0.0) G_gradient_penalty, _ = self.cal_gradient_penalty( d, realA, fakeB, lambda_gp=10.0) @@ -125,7 +126,8 @@ class TestRetainGraph(unittest.TestCase): optim_g.clear_gradients() fake_AB = paddle.concat((realA, fakeB), 1) G_pred_fake = d(fake_AB) - true_target = paddle.fluid.layers.fill_constant(G_pred_fake.shape, 'float32', 1.0) + true_target = paddle.fluid.layers.fill_constant(G_pred_fake.shape, + 'float32', 1.0) loss_g = l1_criterion(fakeB, realB) + gan_criterion(G_pred_fake, true_target) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index ab2c0fe905bfd9463d191a2ae11720ed1d94482a..3fc8155d87f36397a0685eaa2c087b0b0e9b07f4 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -229,7 +229,7 @@ class NoamDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -325,7 +325,7 @@ class PiecewiseDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -407,7 +407,7 @@ class NaturalExpDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -485,7 +485,7 @@ class InverseTimeDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -580,7 +580,7 @@ class PolynomialDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -695,7 +695,7 @@ class LinearWarmup(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -798,7 +798,7 @@ class ExponentialDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -885,7 +885,7 @@ class MultiStepDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -992,7 +992,7 @@ class StepDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -1086,7 +1086,7 @@ class LambdaDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -1184,7 +1184,7 @@ class ReduceOnPlateau(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() @@ -1390,7 +1390,7 @@ class CosineAnnealingDecay(LRScheduler): for batch_id in range(2): x = paddle.uniform([10, 10]) out = linear(x) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index eeedbbdd1b18f25b67654cd3a45f735412bbaa6f..9f857680ca9e13f9aa601ebb0f7afa58e98541a9 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -377,7 +377,7 @@ class Optimizer(object): linear = paddle.nn.Linear(10, 10) inp = paddle.to_tensor(inp) out = linear(inp) - loss = paddle.fluid.layers.reduce_mean(out) + loss = paddle.mean(out) bd = [2, 4, 6, 8] value = [0.2, 0.4, 0.6, 0.8, 1.0]