From 3f1169fec2da4d51f9988857b8dea202e1e02b9d Mon Sep 17 00:00:00 2001 From: WangXi Date: Wed, 4 Dec 2019 16:24:28 +0800 Subject: [PATCH] Fix dgc clip & rampup step, test=release/1.6 (#21519) --- paddle/fluid/operators/dgc_op.h | 7 ++++--- python/paddle/fluid/optimizer.py | 4 ++-- python/paddle/fluid/tests/unittests/test_dgc_optimizer.py | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index 65aaf47472d..1736fc36f64 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -28,7 +28,7 @@ inline float get_period_sparcity(const std::vector& sparsity, size_t idx = static_cast(cur_step * sparsity.size() / rampup_steps); if (idx >= sparsity.size()) { - return 0.999; + idx = sparsity.size() - 1; } PADDLE_ENFORCE_LT(idx, sparsity.size()); @@ -102,8 +102,9 @@ class DGCOpKernel : public framework::OpKernel { } float ratio = - 1 - get_period_sparcity(sparsity, static_cast(*current_step), - rampup_step); + 1 - get_period_sparcity( + sparsity, static_cast(*current_step - rampup_begin_step), + rampup_step); PADDLE_ENFORCE_GE(ratio, 0.0); PADDLE_ENFORCE_LT(ratio, 1.0); int k = static_cast(g->numel() * ratio); diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 168afd4a52c..8d66a810b0b 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -947,6 +947,7 @@ class DGCMomentumOptimizer(Optimizer): self._momentum = momentum self._use_nesterov = bool(use_nesterov) + assert rampup_begin_step >= 0, "rampup_begin_step must >= 0" self._rampup_begin_step = rampup_begin_step self._rampup_step = rampup_step self._sparsity = sparsity @@ -963,8 +964,7 @@ class DGCMomentumOptimizer(Optimizer): self._local_grad_clip_norm = local_grad_clip_norm self._num_trainers = num_trainers - self._clip_norm = local_grad_clip_norm / (num_trainers * - num_trainers) + self._clip_norm = local_grad_clip_norm * (num_trainers**-0.5) self._get_dgc_regularization_param() diff --git a/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py b/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py index 996b6ae6ea4..07dda4d5942 100644 --- a/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_dgc_optimizer.py @@ -67,6 +67,8 @@ class TestDGCMomentumOptimizer(unittest.TestCase): learning_rate=learning_rate, momentum=0.2, rampup_begin_step=0, + local_grad_clip_norm=1.0, + num_trainers=2, regularization=regularization) mean_out = block.create_var( dtype="float32", shape=[1], lod_level=0, name="mean.out") -- GitLab