未验证 提交 d1115696 编写于 作者: G Guo Sheng 提交者: GitHub

Fix single data file in BERT pre-training. (#5127)

上级 63e1893b
...@@ -131,7 +131,7 @@ python -u ./run_glue.py \ ...@@ -131,7 +131,7 @@ python -u ./run_glue.py \
| Task | Metric | Result | | Task | Metric | Result |
|:-----:|:----------------------------:|:-----------------:| |:-----:|:----------------------------:|:-----------------:|
| SST-2 | Accuracy | 0.92660 | | SST-2 | Accuracy | 0.92660 |
| QNLI | Accuracy | 0.91781 | | QNLI | Accuracy | 0.91707 |
| CoLA | Mattehew's corr | 0.59557 | | CoLA | Mattehew's corr | 0.59557 |
| MRPC | F1/Accuracy | 0.91667/0.88235 | | MRPC | F1/Accuracy | 0.91667/0.88235 |
| STS-B | Person/Spearman corr | 0.88847/0.88350 | | STS-B | Person/Spearman corr | 0.88847/0.88350 |
......
...@@ -340,13 +340,13 @@ def do_train(args): ...@@ -340,13 +340,13 @@ def do_train(args):
train_data_loader, _ = create_pretraining_dataset( train_data_loader, _ = create_pretraining_dataset(
data_file, args.max_predictions_per_seq, shared_file_list, args, data_file, args.max_predictions_per_seq, shared_file_list, args,
worker_init) worker_init)
# TODO(guosheng): better way to process single file # TODO(guosheng): better way to process single file
if f_start_id + 1 == len(files): single_file = True single_file = True if f_start_id + 1 == len(files) else False
for f_id in range(f_start_id, len(files)): for f_id in range(f_start_id, len(files)):
if not single_file: if not single_file and f_id == f_start_id:
continue continue
if paddle.distributed.get_world_size() > num_files: if paddle.distributed.get_world_size() > num_files:
data_file = files[(f_id * paddle.distributed.get_world_size() + data_file = files[(f_id * paddle.distributed.get_world_size() +
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册