From ed31dac6eb75094eb5f3331d6826a605210f5958 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 14 Oct 2020 10:03:53 +0800 Subject: [PATCH] remove scale loss and coll grads, test=document_fix (#27874) --- python/paddle/distributed/fleet/base/fleet_base.py | 6 ------ python/paddle/distributed/parallel.py | 2 -- python/paddle/distributed/spawn.py | 2 -- python/paddle/fluid/dygraph/parallel.py | 2 -- 4 files changed, 12 deletions(-) diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 7eb3a565965..03bf1e1a582 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -630,9 +630,7 @@ class Fleet(object): print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() @@ -842,9 +840,7 @@ class Fleet(object): print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() @@ -903,9 +899,7 @@ class Fleet(object): print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py index fab391e9fdf..16b031e116a 100644 --- a/python/paddle/distributed/parallel.py +++ b/python/paddle/distributed/parallel.py @@ -92,9 +92,7 @@ def init_parallel_env(): labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py index ca35a3c9259..fda898799f4 100644 --- a/python/paddle/distributed/spawn.py +++ b/python/paddle/distributed/spawn.py @@ -314,9 +314,7 @@ def spawn(func, args=(), nprocs=-1, join=True, daemon=False, **options): if print_result is True: print("loss:", loss.numpy()) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index d810709e670..28670aa1b03 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -397,9 +397,7 @@ class DataParallel(layers.Layer): labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) - loss = dp_layer.scale_loss(loss) loss.backward() - dp_layer.apply_collective_grads() adam.step() adam.clear_grad() -- GitLab