Use default num_iteration_per_drop_scope

82562dad · Yibing Liu · root · 23bf59ef · 82562dad · 82562dad
隐藏空白更改
内联并排

Showing with 1 addition and 6 deletion

BERT/README.md BERT/README.md +1 -2

BERT/run_classifier.py BERT/run_classifier.py +0 -2

BERT/run_squad.py BERT/run_squad.py +0 -1

BERT/train.py BERT/train.py +0 -1

未找到文件。
--- a/BERT/README.md
+++ b/BERT/README.md
@@ -151,8 +151,7 @@ python -u run_classifier.py --task_name ${TASK_NAME} \
                   --max_seq_len 512 \
                   --bert_config_path ${BERT_BASE_PATH}/bert_config.json \
                   --learning_rate 1e-4 \
-                   --skip_steps 10 \
+                   --skip_steps 10
-                   --num_iteration_per_drop_scope 1
 ```
 这里的 `chinese_L-12_H-768_A-12` 即是转换后的中文预训练模型。需要注意的是，BERT on PaddlePaddle 支持按两种方式构建一个 batch 的数据，`in_tokens` 参数影响 `batch_size` 参数的意义，如果 `in_tokens` 为 `true` 则按照 token 个数构建 batch, 如不设定则按照 example 个数来构建 batch. 训练过程中会输出训练误差、训练速度等信息，训练结束后会输出如下所示的在验证集上的测试结果：

--- a/BERT/run_classifier.py
+++ b/BERT/run_classifier.py
@@ -76,7 +76,6 @@ data_g.add_arg("random_seed",   int,  0,     "Random seed.")
 run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
 run_type_g.add_arg("use_cuda",                     bool,   True,  "If set, use GPU for training.")
 run_type_g.add_arg("use_fast_executor",            bool,   False, "If set, use fast parallel executor (in experiment).")
-run_type_g.add_arg("num_iteration_per_drop_scope", int,    10,    "Iteration intervals to drop scope.")
 run_type_g.add_arg("task_name",                    str,    None,
                   "The name of task to perform fine-tuning, should be in {'xnli', 'mnli', 'cola', 'mrpc'}.")
 run_type_g.add_arg("do_train",                     bool,   True,  "Whether to perform training.")
@@ -248,7 +247,6 @@ def main(args):
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
-        exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope
        train_exe = fluid.ParallelExecutor(
            use_cuda=args.use_cuda,

--- a/BERT/run_squad.py
+++ b/BERT/run_squad.py
@@ -344,7 +344,6 @@ def train(args):
        if args.use_fast_executor:
            exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = dev_count
-        exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps)
        train_exe = fluid.ParallelExecutor(
            use_cuda=args.use_cuda,

--- a/BERT/train.py
+++ b/BERT/train.py
@@ -313,7 +313,6 @@ def train(args):
    if args.use_fast_executor:
        exec_strategy.use_experimental_executor = True
    exec_strategy.num_threads = dev_count
-    exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps)
    build_strategy = fluid.BuildStrategy()
    build_strategy.remove_unnecessary_lock = False