From 82367481e20cde30406e3ccd99e0b49ff31f2726 Mon Sep 17 00:00:00 2001 From: xiemoyuan <71377852+xiemoyuan@users.noreply.github.com> Date: Fri, 5 Feb 2021 17:09:59 +0800 Subject: [PATCH] Fix DGU bug (#5272) --- PaddleNLP/examples/dialogue/dgu/main.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/PaddleNLP/examples/dialogue/dgu/main.py b/PaddleNLP/examples/dialogue/dgu/main.py index b7a12664..352f0ef0 100644 --- a/PaddleNLP/examples/dialogue/dgu/main.py +++ b/PaddleNLP/examples/dialogue/dgu/main.py @@ -128,22 +128,13 @@ def train(args, model, train_data_loader, dev_data_loader, metric, rank): max_train_steps=max_train_steps) lr_scheduler = LambdaDecay(args.learning_rate, factor_fn) optimizer = AdamW( - learning_rate=lr_scheduler, - parameters=model.parameters(), - weight_decay=args.weight_decay, - apply_decay_param_fun=lambda x: x in [ - params.name for params in model.parameters() - if not any(nd in params.name for nd in ['bias', 'norm'])], - grad_clip=nn.ClipGradByGlobalNorm(args.max_grad_norm) - ) - optimizer = paddle.optimizer.AdamW( learning_rate=lr_scheduler, parameters=model.parameters(), weight_decay=args.weight_decay, apply_decay_param_fun=lambda x: x in [ p.name for n, p in model.named_parameters() - if not any(nd in n for nd in ["bias", "norm"]) - ]) + if not any(nd in n for nd in ["bias", "norm"])], + grad_clip=nn.ClipGradByGlobalNorm(args.max_grad_norm)) loss_fn = DGULossFunction(args.task_name) load_ckpt(args, model, optimizer) -- GitLab