From 82562dad34e5ed0038c0f6493a4a74f13d8fdd0b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Mon, 11 Mar 2019 06:35:49 +0000 Subject: [PATCH] Use default num_iteration_per_drop_scope --- BERT/README.md | 3 +-- BERT/run_classifier.py | 2 -- BERT/run_squad.py | 1 - BERT/train.py | 1 - 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/BERT/README.md b/BERT/README.md index 6af28df..527bd22 100644 --- a/BERT/README.md +++ b/BERT/README.md @@ -151,8 +151,7 @@ python -u run_classifier.py --task_name ${TASK_NAME} \ --max_seq_len 512 \ --bert_config_path ${BERT_BASE_PATH}/bert_config.json \ --learning_rate 1e-4 \ - --skip_steps 10 \ - --num_iteration_per_drop_scope 1 + --skip_steps 10 ``` 这里的 `chinese_L-12_H-768_A-12` 即是转换后的中文预训练模型。需要注意的是,BERT on PaddlePaddle 支持按两种方式构建一个 batch 的数据,`in_tokens` 参数影响 `batch_size` 参数的意义,如果 `in_tokens` 为 `true` 则按照 token 个数构建 batch, 如不设定则按照 example 个数来构建 batch. 训练过程中会输出训练误差、训练速度等信息,训练结束后会输出如下所示的在验证集上的测试结果: diff --git a/BERT/run_classifier.py b/BERT/run_classifier.py index 1453ef7..01cc70f 100644 --- a/BERT/run_classifier.py +++ b/BERT/run_classifier.py @@ -76,7 +76,6 @@ data_g.add_arg("random_seed", int, 0, "Random seed.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("use_fast_executor", bool, False, "If set, use fast parallel executor (in experiment).") -run_type_g.add_arg("num_iteration_per_drop_scope", int, 10, "Iteration intervals to drop scope.") run_type_g.add_arg("task_name", str, None, "The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}.") run_type_g.add_arg("do_train", bool, True, "Whether to perform training.") @@ -248,7 +247,6 @@ def main(args): if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count - exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, diff --git a/BERT/run_squad.py b/BERT/run_squad.py index 07ce1c1..efc0d9c 100644 --- a/BERT/run_squad.py +++ b/BERT/run_squad.py @@ -344,7 +344,6 @@ def train(args): if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count - exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps) train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, diff --git a/BERT/train.py b/BERT/train.py index 64c751c..9b0698d 100644 --- a/BERT/train.py +++ b/BERT/train.py @@ -313,7 +313,6 @@ def train(args): if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count - exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps) build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False -- GitLab