From a1d9a14e89f6ae6734f6be47b539327868a7ef91 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 28 Dec 2020 21:10:29 -0600 Subject: [PATCH] support grad accumulated across batch (#29942) --- .../fluid/imperative/gradient_accumulator.h | 1 + .../test_complex_grad_accumulated.py | 37 +++++++++++++++---- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/imperative/gradient_accumulator.h b/paddle/fluid/imperative/gradient_accumulator.h index ab5ec52fb2..e2dabc06a7 100644 --- a/paddle/fluid/imperative/gradient_accumulator.h +++ b/paddle/fluid/imperative/gradient_accumulator.h @@ -45,6 +45,7 @@ class GradientAccumulator { inner_var_ = std::make_shared(var->Name()); inner_var_->SetType(var->Type()); inner_var_->SetDataType(var->DataType()); + inner_var_->SetForwardDataType(var->ForwardDataType()); inner_var_->InnerSetOverridedStopGradient( var->InnerOverridedStopGradient()); VLOG(6) << " Create inner grad var for (" << var->Name() diff --git a/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py b/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py index 106b9fe15a..ac29272ab0 100644 --- a/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py +++ b/python/paddle/fluid/tests/unittests/test_complex_grad_accumulated.py @@ -41,7 +41,6 @@ class Optimization_ex1(paddle.nn.Layer): np.random.random((4, 4)).astype(dtype) + np.random.random( (4, 4)).astype(dtype) * 1j, stop_gradient=False) - print(self.A) def forward(self, mode=1): jj = paddle.to_tensor(np.array([1j]).astype(np.complex64)) @@ -70,31 +69,55 @@ class TestComplexGradAccumulated(unittest.TestCase): self.devices = ['cpu'] if core.is_compiled_with_cuda(): self.devices.append('gpu') + self.iter = 3 + self.learning_rate = 0.5 self.dtypes = ['float32', 'float64'] self.theta_size = [4, 4] - def run_backward(self, device, dtype, mode): + def train(self, device, dtype, mode): paddle.set_device(device) myLayer = Optimization_ex1(self.theta_size, dtype) + optimizer = paddle.optimizer.SGD(learning_rate=self.learning_rate, + parameters=myLayer.parameters()) - loss = myLayer(mode) - loss.backward() + for iter in range(self.iter): + loss = myLayer(mode) + loss.backward() + + optimizer.step() + optimizer.clear_grad() + + def train_no_clear_grad(self, device, dtype, mode): + paddle.set_device(device) + + myLayer = Optimization_ex1(self.theta_size, dtype) + optimizer = paddle.optimizer.SGD(learning_rate=self.learning_rate, + parameters=myLayer.parameters()) + + for iter in range(self.iter): + loss = myLayer(mode) + loss.backward() + + optimizer.step() def test_case_one_step(self): for dev in self.devices: for dtype in self.dtypes: - self.run_backward(dev, dtype, 1) + self.train(dev, dtype, 1) + self.train_no_clear_grad(dev, dtype, 1) def test_case_two_step(self): for dev in self.devices: for dtype in self.dtypes: - self.run_backward(dev, dtype, 2) + self.train(dev, dtype, 2) + self.train_no_clear_grad(dev, dtype, 2) def test_case_non_param(self): for dev in self.devices: for dtype in self.dtypes: - self.run_backward(dev, dtype, 3) + self.train(dev, dtype, 3) + self.train_no_clear_grad(dev, dtype, 3) if __name__ == '__main__': -- GitLab