From 1a672a1c652e1e7139b7666fd074b78f8d76c651 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 5 Mar 2019 09:25:10 +0000 Subject: [PATCH] Some tiny adjustments --- BERT/README.md | 4 ++-- BERT/predict_classifier.py | 4 ++-- BERT/run_classifier.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/BERT/README.md b/BERT/README.md index 84e05b2..6f122ca 100644 --- a/BERT/README.md +++ b/BERT/README.md @@ -12,7 +12,7 @@ - 支持 BERT GPU 多卡 Fine-tuning - 提供 BERT 预测接口 demo, 方便多硬件设备生产环境的部署 -2)支持FP16/FP32混合精度训练和 Fine-tuning,节省显存开销、加速训练过程; +2)支持 FP16/FP32 混合精度训练和 Fine-tuning,节省显存开销、加速训练过程; 3)提供转换成 Paddle Fluid 参数格式的 [BERT 开源预训练模型](https://github.com/google-research/bert) 供下载,以进行下游任务的 Fine-tuning, 包括如下模型: @@ -155,7 +155,7 @@ python -u run_classifier.py --task_name ${TASK_NAME} \ --num_iteration_per_drop_scope 1 ``` -这里的 `chinese_L-12_H-768_A-12` 即是转换后的中文预训练模型。需要注意的是,BERT 支持按两种方式构建一个 batch 的数据,`in_tokens` 参数影响 `batch_size` 参数的意义,如果 `in_tokens` 为 `true` 则按照 token 个数构建 batch, 如不设定则按照 example 个数来构建 batch. 训练过程中会输出训练误差、训练速度等信息,训练结束后会输出如下所示的在验证集上的测试结果: +这里的 `chinese_L-12_H-768_A-12` 即是转换后的中文预训练模型。需要注意的是,BERT on PaddlePaddle 支持按两种方式构建一个 batch 的数据,`in_tokens` 参数影响 `batch_size` 参数的意义,如果 `in_tokens` 为 `true` 则按照 token 个数构建 batch, 如不设定则按照 example 个数来构建 batch. 训练过程中会输出训练误差、训练速度等信息,训练结束后会输出如下所示的在验证集上的测试结果: ``` [dev evaluation] ave loss: 0.622958, ave acc: 0.770281, elapsed time: 8.946956 s diff --git a/BERT/predict_classifier.py b/BERT/predict_classifier.py index 3b64499..fe813ff 100644 --- a/BERT/predict_classifier.py +++ b/BERT/predict_classifier.py @@ -109,13 +109,13 @@ def main(args): # Due to the design that ParallelExecutor would drop small batches (mostly the last batch) # So using ParallelExecutor may left some data unpredicted - # if prediction of each and every example is needed, use Executor instead + # if prediction of each and every example is needed, please use Executor instead predict_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, main_program=predict_prog) predict_pyreader.decorate_tensor_provider( processor.data_generator( - batch_size=args.batch_size, phase='test', epoch=1)) + batch_size=args.batch_size, phase='test', epoch=1, shuffle=False)) predict_pyreader.start() all_results = [] diff --git a/BERT/run_classifier.py b/BERT/run_classifier.py index fd5e307..5ba2ca9 100644 --- a/BERT/run_classifier.py +++ b/BERT/run_classifier.py @@ -65,7 +65,7 @@ data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data process data_g.add_arg("data_dir", str, None, "Path to training data.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.") data_g.add_arg("max_seq_len", int, 512, "Number of words of the longest seqence.") -data_g.add_arg("batch_size", int, 8192, "Total examples' number in batch for training. see also --in_tokens.") +data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training. see also --in_tokens.") data_g.add_arg("in_tokens", bool, False, "If set, the batch size will be the maximum number of tokens in one batch. " "Otherwise, it will be the maximum number of examples in one batch.") -- GitLab