From d0299b811a9658d3e4d73132b9291fa939dbe6fd Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 22 Oct 2019 09:51:32 +0800 Subject: [PATCH] Fix div zero error in lr decay (#3694) * Fix div zero error in lr decay * Update readme --- .../BERT/README.md | 2 +- .../BERT/model/classifier.py | 4 ++-- .../BERT/optimization.py | 23 +++++++++++++++---- .../BERT/run_squad.py | 6 ++--- .../BERT/train.py | 2 +- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/PaddleNLP/language_representations_kit/BERT/README.md b/PaddleNLP/language_representations_kit/BERT/README.md index 626f382a..7ed1d28b 100644 --- a/PaddleNLP/language_representations_kit/BERT/README.md +++ b/PaddleNLP/language_representations_kit/BERT/README.md @@ -70,7 +70,7 @@ ``` ## 安装 -本项目依赖于 Paddle Fluid **1.5.1** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。 +本项目依赖于 Paddle Fluid **1.6.0** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。 ## 预训练 diff --git a/PaddleNLP/language_representations_kit/BERT/model/classifier.py b/PaddleNLP/language_representations_kit/BERT/model/classifier.py index 8b9fea2b..03bfb7aa 100644 --- a/PaddleNLP/language_representations_kit/BERT/model/classifier.py +++ b/PaddleNLP/language_representations_kit/BERT/model/classifier.py @@ -26,13 +26,13 @@ def create_model(args, bert_config, num_labels, is_prediction=False): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'], 'shapes': [[None, None], [None, None], [None, None], - [-1, args.max_seq_len, 1], [-1, 1]], + [None, args.max_seq_len, 1], [None, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } inputs = [ - fluid.layers.data( + fluid.data( name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], diff --git a/PaddleNLP/language_representations_kit/BERT/optimization.py b/PaddleNLP/language_representations_kit/BERT/optimization.py index c0795018..0771ab77 100644 --- a/PaddleNLP/language_representations_kit/BERT/optimization.py +++ b/PaddleNLP/language_representations_kit/BERT/optimization.py @@ -73,9 +73,10 @@ def optimization(loss, .noam_decay(1/(warmup_steps *(learning_rate ** 2)), warmup_steps) else: - printf( - "WARNING: noam decay should have postive warmup steps, using " - "constant learning rate instead!") + print( + "WARNING: noam decay of learning rate should have postive warmup " + "steps but given {}, using constant learning rate instead!" + .format(warmup_steps)) scheduled_lr = fluid.layers.create_global_var( name=fluid.unique_name.generate("learning_rate"), shape=[1], @@ -83,8 +84,20 @@ def optimization(loss, dtype='float32', persistable=True) elif scheduler == 'linear_warmup_decay': - scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, - num_train_steps) + if warmup_steps > 0: + scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, + num_train_steps) + else: + print( + "WARNING: linear warmup decay of learning rate should have " + "postive warmup steps but given {}, use constant learning rate " + "instead!".format(warmup_steps)) + scheduled_lr = fluid.layers.create_global_var( + name=fluid.unique_name.generate("learning_rate"), + shape=[1], + value=learning_rate, + dtype='float32', + persistable=True) else: raise ValueError("Unkown learning rate scheduler, should be " "'noam_decay' or 'linear_warmup_decay'") diff --git a/PaddleNLP/language_representations_kit/BERT/run_squad.py b/PaddleNLP/language_representations_kit/BERT/run_squad.py index af0a706c..fc3659b6 100644 --- a/PaddleNLP/language_representations_kit/BERT/run_squad.py +++ b/PaddleNLP/language_representations_kit/BERT/run_squad.py @@ -111,7 +111,7 @@ def create_model(bert_config, is_training=False): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'], 'shapes': [[None, None], [None, None], [None, None], - [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], + [None, args.max_seq_len, 1], [None, 1], [None, 1]], 'dtypes': [ 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0, 0, 0], @@ -120,13 +120,13 @@ def create_model(bert_config, is_training=False): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'], 'shapes': [[None, None], [None, None], [None, None], - [-1, args.max_seq_len, 1], [-1, 1]], + [None, args.max_seq_len, 1], [None, 1]], 'dtypes': [ 'int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } - inputs = [fluid.layers.data(name=input_fields['names'][i], + inputs = [fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))] diff --git a/PaddleNLP/language_representations_kit/BERT/train.py b/PaddleNLP/language_representations_kit/BERT/train.py index 686e5281..f9a1fb49 100644 --- a/PaddleNLP/language_representations_kit/BERT/train.py +++ b/PaddleNLP/language_representations_kit/BERT/train.py @@ -105,7 +105,7 @@ def create_model(bert_config): 'lod_levels': [0, 0, 0, 0, 0, 0, 0], } - inputs = [fluid.layers.data(name=input_fields['names'][i], + inputs = [fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))] -- GitLab