diff --git a/PaddleNLP/language_representations_kit/BERT/README.md b/PaddleNLP/language_representations_kit/BERT/README.md index 626f382aa37994b72ffc92921bf6b7a77f761278..7ed1d28bca0f5454995b6e67951894b7430c20bc 100644 --- a/PaddleNLP/language_representations_kit/BERT/README.md +++ b/PaddleNLP/language_representations_kit/BERT/README.md @@ -70,7 +70,7 @@ ``` ## 安装 -本项目依赖于 Paddle Fluid **1.5.1** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。 +本项目依赖于 Paddle Fluid **1.6.0** 及以上版本,请参考[安装指南](http://www.paddlepaddle.org/#quick-start)进行安装。如果需要进行 TensorFlow 模型到 Paddle Fluid 参数的转换,则需要同时安装 TensorFlow 1.12。 ## 预训练 diff --git a/PaddleNLP/language_representations_kit/BERT/model/classifier.py b/PaddleNLP/language_representations_kit/BERT/model/classifier.py index 8b9fea2b7171b8e6dd123a4eb27fbf75b563328c..03bfb7aa1504253399b5bdd4d1f4fe9c41cad27b 100644 --- a/PaddleNLP/language_representations_kit/BERT/model/classifier.py +++ b/PaddleNLP/language_representations_kit/BERT/model/classifier.py @@ -26,13 +26,13 @@ def create_model(args, bert_config, num_labels, is_prediction=False): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'], 'shapes': [[None, None], [None, None], [None, None], - [-1, args.max_seq_len, 1], [-1, 1]], + [None, args.max_seq_len, 1], [None, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } inputs = [ - fluid.layers.data( + fluid.data( name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], diff --git a/PaddleNLP/language_representations_kit/BERT/optimization.py b/PaddleNLP/language_representations_kit/BERT/optimization.py index c0795018a883832b49660024aaa0ccac489c4a11..0771ab77922dea90104ff67ab201bfb307212637 100644 --- a/PaddleNLP/language_representations_kit/BERT/optimization.py +++ b/PaddleNLP/language_representations_kit/BERT/optimization.py @@ -73,9 +73,10 @@ def optimization(loss, .noam_decay(1/(warmup_steps *(learning_rate ** 2)), warmup_steps) else: - printf( - "WARNING: noam decay should have postive warmup steps, using " - "constant learning rate instead!") + print( + "WARNING: noam decay of learning rate should have postive warmup " + "steps but given {}, using constant learning rate instead!" + .format(warmup_steps)) scheduled_lr = fluid.layers.create_global_var( name=fluid.unique_name.generate("learning_rate"), shape=[1], @@ -83,8 +84,20 @@ def optimization(loss, dtype='float32', persistable=True) elif scheduler == 'linear_warmup_decay': - scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, - num_train_steps) + if warmup_steps > 0: + scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, + num_train_steps) + else: + print( + "WARNING: linear warmup decay of learning rate should have " + "postive warmup steps but given {}, use constant learning rate " + "instead!".format(warmup_steps)) + scheduled_lr = fluid.layers.create_global_var( + name=fluid.unique_name.generate("learning_rate"), + shape=[1], + value=learning_rate, + dtype='float32', + persistable=True) else: raise ValueError("Unkown learning rate scheduler, should be " "'noam_decay' or 'linear_warmup_decay'") diff --git a/PaddleNLP/language_representations_kit/BERT/run_squad.py b/PaddleNLP/language_representations_kit/BERT/run_squad.py index af0a706ca4296cb4f5899aee205288b01825bb00..fc3659b61eeb7d06c252a61f83c99fbe3adfea97 100644 --- a/PaddleNLP/language_representations_kit/BERT/run_squad.py +++ b/PaddleNLP/language_representations_kit/BERT/run_squad.py @@ -111,7 +111,7 @@ def create_model(bert_config, is_training=False): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions'], 'shapes': [[None, None], [None, None], [None, None], - [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], + [None, args.max_seq_len, 1], [None, 1], [None, 1]], 'dtypes': [ 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0, 0, 0], @@ -120,13 +120,13 @@ def create_model(bert_config, is_training=False): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'], 'shapes': [[None, None], [None, None], [None, None], - [-1, args.max_seq_len, 1], [-1, 1]], + [None, args.max_seq_len, 1], [None, 1]], 'dtypes': [ 'int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } - inputs = [fluid.layers.data(name=input_fields['names'][i], + inputs = [fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))] diff --git a/PaddleNLP/language_representations_kit/BERT/train.py b/PaddleNLP/language_representations_kit/BERT/train.py index 686e5281a11795ed7f08c6ecf9a5662cf809e112..f9a1fb49ef4fda9957f27e90b7a18cea6f6d4e4b 100644 --- a/PaddleNLP/language_representations_kit/BERT/train.py +++ b/PaddleNLP/language_representations_kit/BERT/train.py @@ -105,7 +105,7 @@ def create_model(bert_config): 'lod_levels': [0, 0, 0, 0, 0, 0, 0], } - inputs = [fluid.layers.data(name=input_fields['names'][i], + inputs = [fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names']))]