From 0342f0124929ce7d7af6d28ed9c3cf013cd8bf16 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 2 Apr 2019 12:03:10 +0800 Subject: [PATCH] Fix dgc bug. (#16602) --- paddle/fluid/operators/dgc_clip_by_norm_op.h | 27 +++++++++++--------- python/paddle/fluid/optimizer.py | 4 +-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/operators/dgc_clip_by_norm_op.h b/paddle/fluid/operators/dgc_clip_by_norm_op.h index bd22d16f7..197bf59b2 100644 --- a/paddle/fluid/operators/dgc_clip_by_norm_op.h +++ b/paddle/fluid/operators/dgc_clip_by_norm_op.h @@ -24,18 +24,21 @@ class DGCClipByNormKernel : public ClipByNormKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto rampup_begin_step = context.Attr("rampup_begin_step"); - if (static_cast(rampup_begin_step) >= 0) { - auto current_step_tensor = - context.Input("current_step"); - auto* current_step = current_step_tensor->data(); - - if (static_cast(*current_step) < - static_cast(rampup_begin_step)) { - VLOG(10) << "current_step:" << *current_step - << " < rampup_begin_step:" << rampup_begin_step - << " so does't use dgc_clip_by_norm"; - return; - } + if (static_cast(rampup_begin_step) < 0) { + return; + } + + auto current_step_tensor = context.Input("current_step"); + auto* current_step = current_step_tensor->data(); + + VLOG(10) << "current_step:" << *current_step + << ", rampup_begin_step:" << rampup_begin_step; + + if (static_cast(*current_step) < static_cast(rampup_begin_step)) { + VLOG(10) << "current_step:" << *current_step + << " < rampup_begin_step:" << rampup_begin_step + << " so does't use dgc_clip_by_norm"; + return; } return ClipByNormKernel::Compute(context); diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 79accabe8..7e6e37116 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -832,7 +832,7 @@ class DGCMomentumOptimizer(MomentumOptimizer): type=x.type, name=name, dtype=x.dtype, persistable=False) helper.append_op( - type="clip_by_norm", + type="dgc_clip_by_norm", inputs={"X": x, "current_step": self._global_step_var}, attrs={ @@ -845,7 +845,7 @@ class DGCMomentumOptimizer(MomentumOptimizer): def _append_clip_norm(self, grad_var, clip_norm): with grad_var.block.program._backward_role_guard(): return self._clip_by_norm( - x=grad_var, max_norm=clip_norm, name=grad_var.name + "@DGC") + x=grad_var, max_norm=clip_norm, name=grad_var.name) def _dgc_op(self, param_var, clip_var, grad_var, u_var, v_var, k_var, encoded_var): -- GitLab