diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 7eb3a5659654ab151a0234df3e226a973139345e..03bf1e1a58276b40c7b8f680074566df65d075b4 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -630,9 +630,7 @@ class Fleet(object): print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() @@ -842,9 +840,7 @@ class Fleet(object): print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() @@ -903,9 +899,7 @@ class Fleet(object): print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index fab391e9fdf693e4c0e56d891105f7aa93192af0..16b031e116acdc2e06696740a0406daac783c58f 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -92,9 +92,7 @@ def init_parallel_env(): labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index ca35a3c92594a394664587d5413de4c21a7d3a02..fda898799f4fc88c96012ce629022e0ec86c2c6b 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -314,9 +314,7 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): if print_result is True: print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index d810709e670c46743308fbc9e5b01dc7f28192f0..28670aa1b038bc3656bdc308dcb5d41d851a55a4 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -397,9 +397,7 @@ class DataParallel(layers.Layer): labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad()