提交 d812ee8c 编写于 作者: H Hongyu Li 提交者: Yibing Liu

Minor bug fixed (#2488)

上级 36d21984
......@@ -50,7 +50,7 @@ The combined files will be saved in `./data/train/mrqa-combined.raw.json` and `.
To get better performance than the official baseline, we provide a pretrained model - **ERNIE** for fine-tuning. To download the ERNIE parameters, run
```
sh download_pre_train_model.sh
sh download_pretrained_model.sh
```
The pretrained model parameters and config files will be saved in `./ernie_model`.
......@@ -73,7 +73,7 @@ Where `parameters_to_restore` is the model parameters used in the evaluatation (
| Model | HotpotQA | NaturalQ | NewsQA | SearchQA | SQuAD | TriviaQA | Macro-F1 |
| :------------- | :---------: | :----------: | :---------: | :----------: | :---------: | :----------: |:----------: |
| baseline + EMA | 82.3/66.8 | 81.6/70.0 | 73.1/57.9 | 85.1/79.1 | 93.3/87.1 | 79.0/73.4 | 82.4 |
| baseline + EMA | 81.4/65.5 | 81.6/70.0 | 73.1/57.9 | 85.1/79.1 | 93.3/87.1 | 79.0/73.4 | 82.4 |
| baseline woEMA | 82.4/66.9 | 81.7/69.9 | 73.0/57.8 | 85.1/79.2 | 93.4/87.2 | 79.0/73.4 | 82.4 |
##### out-of-domain dev (F1/EM)
......
......@@ -24,7 +24,7 @@ export FLAGS_eager_delete_tensor_gb=1
export CUDA_VISIBLE_DEVICES=0
# path of pre_train model
BERT_BASE_PATH=ernie_model
ERNIE_BASE_PATH=ernie_model
# path to save checkpoint
CHECKPOINT_PATH=output/
mkdir -p $CHECKPOINT_PATH
......@@ -36,9 +36,9 @@ DATA_PATH_dev=data/dev
python -u src/run_mrqa.py --use_cuda true\
--batch_size 4 \
--in_tokens false \
--init_pretraining_params ${BERT_BASE_PATH}/params \
--init_pretraining_params ${ERNIE_BASE_PATH}/params \
--checkpoints ${CHECKPOINT_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--vocab_path ${ERNIE_BASE_PATH}/vocab.txt \
--do_train true \
--do_predict true \
--save_steps 10000 \
......@@ -46,7 +46,7 @@ python -u src/run_mrqa.py --use_cuda true\
--weight_decay 0.01 \
--epoch 2 \
--max_seq_len 512 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--bert_config_path ${ERNIE_BASE_PATH}/bert_config.json \
--predict_file ${DATA_PATH_dev}/mrqa-combined.raw.json \
--do_lower_case true \
--doc_stride 128 \
......
......@@ -17,11 +17,14 @@
set -xe
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1
# set CUDA_VISIBLE_DEVICES
export CUDA_VISIBLE_DEVICES=0
# path of pre_train model
BERT_BASE_PATH=ernie_model
ERNIE_BASE_PATH=ernie_model
# path to save checkpoint
CHECKPOINT_PATH=output/
mkdir -p $CHECKPOINT_PATH
......@@ -34,10 +37,10 @@ DATA_PATH_dev=data/dev
python -u src/run_mrqa.py --use_cuda true\
--batch_size 8 \
--in_tokens false \
--init_pretraining_params ${BERT_BASE_PATH}/params \
--init_pretraining_params ${ERNIE_BASE_PATH}/params \
--init_checkpoint ${PATH_init_checkpoint} \
--checkpoints ${CHECKPOINT_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--vocab_path ${ERNIE_BASE_PATH}/vocab.txt \
--do_train false \
--do_predict true \
--save_steps 10000 \
......@@ -45,7 +48,7 @@ python -u src/run_mrqa.py --use_cuda true\
--weight_decay 0.01 \
--epoch 2 \
--max_seq_len 512 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--bert_config_path ${ERNIE_BASE_PATH}/bert_config.json \
--predict_file ${DATA_PATH_dev}/mrqa-combined.raw.json \
--do_lower_case true \
--doc_stride 128 \
......
......@@ -241,7 +241,7 @@ def train(args):
if args.random_seed is not None:
startup_prog.random_seed = args.random_seed
if args.do_train:
if args.do_train:
build_strategy = fluid.BuildStrategy()
print("estimating runtime number of examples...")
num_train_examples = processor.estimate_runtime_examples(args.train_file, sample_rate=args.sample_rate)
......@@ -277,7 +277,7 @@ def train(args):
pyreader_name='train_reader',
bert_config=bert_config,
is_training=True)
train_pyreader.decorate_tensor_provider(train_data_generator)
scheduled_lr = optimization(
......@@ -291,9 +291,9 @@ def train(args):
scheduler=args.lr_scheduler,
use_fp16=args.use_fp16,
loss_scaling=args.loss_scaling)
loss.persistable = True
num_seqs.persistable = True
loss.persistable = True
num_seqs.persistable = True
ema = fluid.optimizer.ExponentialMovingAverage(args.ema_decay)
ema.update()
......@@ -312,7 +312,7 @@ def train(args):
print("Theoretical memory usage in training: %.3f - %.3f %s" %
(lower_mem, upper_mem, unit))
if args.do_predict:
if args.do_predict:
build_strategy = fluid.BuildStrategy()
test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog):
......@@ -321,7 +321,7 @@ def train(args):
pyreader_name='test_reader',
bert_config=bert_config,
is_training=False)
if 'ema' not in dir():
ema = fluid.optimizer.ExponentialMovingAverage(args.ema_decay)
......@@ -441,11 +441,11 @@ def train(args):
if args.use_ema:
with ema.apply(exe):
predict(exe, test_prog, test_pyreader, [
predict(exe, test_compiled_program, test_pyreader, [
unique_ids.name, start_logits.name, end_logits.name, num_seqs.name
], processor, prefix='ema_')
else:
predict(exe, test_prog, test_pyreader, [
predict(exe, test_compiled_program, test_pyreader, [
unique_ids.name, start_logits.name, end_logits.name, num_seqs.name
], processor)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册