Minor bug fixed (#2488)

d812ee8c · Hongyu Li · Yibing Liu · 36d21984 · d812ee8c · d812ee8c
4 changed file
--- a/PaddleNLP/Research/MRQA2019-BASELINE/README.md
+++ b/PaddleNLP/Research/MRQA2019-BASELINE/README.md
@@ -50,7 +50,7 @@ The combined files will be saved in `./data/train/mrqa-combined.raw.json` and `.
 To get better performance than the official baseline, we provide a pretrained model - **ERNIE** for fine-tuning. To download the ERNIE parameters, run

 ```
-sh download_pre_train_model.sh
+sh download_pretrained_model.sh
 ```
 The pretrained model parameters and config files will be saved in `./ernie_model`.

@@ -73,7 +73,7 @@ Where `parameters_to_restore` is the model parameters used in the evaluatation (

 |      Model     | HotpotQA | NaturalQ | NewsQA | SearchQA | SQuAD | TriviaQA | Macro-F1 |
 | :------------- | :---------: | :----------: | :---------: | :----------: | :---------: | :----------: |:----------: |
-| baseline + EMA | 82.3/66.8 | 81.6/70.0 | 73.1/57.9 | 85.1/79.1 | 93.3/87.1 | 79.0/73.4 | 82.4 |
+| baseline + EMA | 81.4/65.5 | 81.6/70.0 | 73.1/57.9 | 85.1/79.1 | 93.3/87.1 | 79.0/73.4 | 82.4 |
 | baseline woEMA | 82.4/66.9 | 81.7/69.9 | 73.0/57.8 | 85.1/79.2 | 93.4/87.2 | 79.0/73.4 | 82.4 |

 ##### out-of-domain dev  (F1/EM)

--- a/PaddleNLP/Research/MRQA2019-BASELINE/run_finetuning.sh
+++ b/PaddleNLP/Research/MRQA2019-BASELINE/run_finetuning.sh
@@ -24,7 +24,7 @@ export FLAGS_eager_delete_tensor_gb=1
 export CUDA_VISIBLE_DEVICES=0

 # path of pre_train model
-BERT_BASE_PATH=ernie_model
+ERNIE_BASE_PATH=ernie_model
 # path to save checkpoint
 CHECKPOINT_PATH=output/
 mkdir -p $CHECKPOINT_PATH
@@ -36,9 +36,9 @@ DATA_PATH_dev=data/dev
 python -u src/run_mrqa.py --use_cuda true\
        --batch_size 4 \
        --in_tokens false \
-        --init_pretraining_params ${BERT_BASE_PATH}/params \
+        --init_pretraining_params ${ERNIE_BASE_PATH}/params \
        --checkpoints ${CHECKPOINT_PATH} \
-        --vocab_path ${BERT_BASE_PATH}/vocab.txt \
+        --vocab_path ${ERNIE_BASE_PATH}/vocab.txt \
        --do_train true \
        --do_predict true \
        --save_steps 10000 \
@@ -46,7 +46,7 @@ python -u src/run_mrqa.py --use_cuda true\
        --weight_decay  0.01 \
        --epoch 2 \
        --max_seq_len 512 \
-        --bert_config_path ${BERT_BASE_PATH}/bert_config.json \
+        --bert_config_path ${ERNIE_BASE_PATH}/bert_config.json \
        --predict_file ${DATA_PATH_dev}/mrqa-combined.raw.json \
        --do_lower_case true \
        --doc_stride 128 \

--- a/PaddleNLP/Research/MRQA2019-BASELINE/run_predict.sh
+++ b/PaddleNLP/Research/MRQA2019-BASELINE/run_predict.sh
@@ -17,11 +17,14 @@

 set -xe

+export FLAGS_sync_nccl_allreduce=0
+export FLAGS_eager_delete_tensor_gb=1
+
 # set CUDA_VISIBLE_DEVICES
 export CUDA_VISIBLE_DEVICES=0

 # path of pre_train model
-BERT_BASE_PATH=ernie_model
+ERNIE_BASE_PATH=ernie_model
 # path to save checkpoint
 CHECKPOINT_PATH=output/
 mkdir -p $CHECKPOINT_PATH
@@ -34,10 +37,10 @@ DATA_PATH_dev=data/dev
 python -u src/run_mrqa.py --use_cuda true\
        --batch_size 8 \
        --in_tokens false \
-        --init_pretraining_params ${BERT_BASE_PATH}/params \
+        --init_pretraining_params ${ERNIE_BASE_PATH}/params \
        --init_checkpoint ${PATH_init_checkpoint} \
        --checkpoints ${CHECKPOINT_PATH} \
-        --vocab_path ${BERT_BASE_PATH}/vocab.txt \
+        --vocab_path ${ERNIE_BASE_PATH}/vocab.txt \
        --do_train false \
        --do_predict true \
        --save_steps 10000 \
@@ -45,7 +48,7 @@ python -u src/run_mrqa.py --use_cuda true\
        --weight_decay  0.01 \
        --epoch 2 \
        --max_seq_len 512 \
-        --bert_config_path ${BERT_BASE_PATH}/bert_config.json \
+        --bert_config_path ${ERNIE_BASE_PATH}/bert_config.json \
        --predict_file ${DATA_PATH_dev}/mrqa-combined.raw.json \
        --do_lower_case true \
        --doc_stride 128 \

--- a/PaddleNLP/Research/MRQA2019-BASELINE/src/run_mrqa.py
+++ b/PaddleNLP/Research/MRQA2019-BASELINE/src/run_mrqa.py
@@ -241,7 +241,7 @@ def train(args):
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

-    if args.do_train: 
+    if args.do_train:
        build_strategy = fluid.BuildStrategy()
        print("estimating runtime number of examples...")
        num_train_examples = processor.estimate_runtime_examples(args.train_file, sample_rate=args.sample_rate)
@@ -277,7 +277,7 @@ def train(args):
                    pyreader_name='train_reader',
                    bert_config=bert_config,
                    is_training=True)
-                
+
                train_pyreader.decorate_tensor_provider(train_data_generator)

                scheduled_lr = optimization(
@@ -291,9 +291,9 @@ def train(args):
                    scheduler=args.lr_scheduler,
                    use_fp16=args.use_fp16,
                    loss_scaling=args.loss_scaling)
-                
-                loss.persistable = True 
-                num_seqs.persistable = True 
+
+                loss.persistable = True
+                num_seqs.persistable = True

                ema = fluid.optimizer.ExponentialMovingAverage(args.ema_decay)
                ema.update()
@@ -312,7 +312,7 @@ def train(args):
            print("Theoretical memory usage in training:  %.3f - %.3f %s" %
                  (lower_mem, upper_mem, unit))

-    if args.do_predict: 
+    if args.do_predict:
        build_strategy = fluid.BuildStrategy()
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
@@ -321,7 +321,7 @@ def train(args):
                    pyreader_name='test_reader',
                    bert_config=bert_config,
                    is_training=False)
-                
+
                if 'ema' not in dir():
                    ema = fluid.optimizer.ExponentialMovingAverage(args.ema_decay)

@@ -441,11 +441,11 @@ def train(args):

        if args.use_ema:
            with ema.apply(exe):
-                predict(exe, test_prog, test_pyreader, [
+                predict(exe, test_compiled_program, test_pyreader, [
                    unique_ids.name, start_logits.name, end_logits.name, num_seqs.name
                ], processor, prefix='ema_')
        else:
-            predict(exe, test_prog, test_pyreader, [
+            predict(exe, test_compiled_program, test_pyreader, [
                unique_ids.name, start_logits.name, end_logits.name, num_seqs.name
            ], processor)