diff --git a/BERT/train.py b/BERT/train.py index c8dc1157217a23fa58e20682c67cd75f9c7169ad..aaf2a8758beee71f3fb74cf5207bc06457a01350 100644 --- a/BERT/train.py +++ b/BERT/train.py @@ -313,13 +313,18 @@ def train(args): exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope - train_exe = fluid.ParallelExecutor( - use_cuda=args.use_cuda, - loss_name=total_loss.name, - exec_strategy=exec_strategy, - main_program=train_program, - num_trainers=nccl2_num_trainers, - trainer_id=nccl2_trainer_id) + # use_ngraph is for CPU only, please refer to README_ngraph.md for details + use_ngraph = os.getenv('FLAGS_use_ngraph') + if not use_ngraph: + train_exe = fluid.ParallelExecutor( + use_cuda=args.use_cuda, + loss_name=total_loss.name, + exec_strategy=exec_strategy, + main_program=train_program, + num_trainers=nccl2_num_trainers, + trainer_id=nccl2_trainer_id) + else: + train_exe = exe if args.validation_set_dir and args.validation_set_dir != "": predict = predict_wrapper( @@ -345,17 +350,30 @@ def train(args): skip_steps = args.skip_steps * nccl2_num_trainers if nccl2_trainer_id != 0: - train_exe.run(fetch_list=[]) + if use_ngraph: + train_exe.run(fetch_list=[], program=train_program) + else: + train_exe.run(fetch_list=[]) continue if steps % skip_steps != 0: - train_exe.run(fetch_list=[]) + if use_ngraph: + train_exe.run(fetch_list=[], program=train_program) + else: + train_exe.run(fetch_list=[]) + else: - each_next_acc, each_mask_lm_cost, each_total_cost, np_lr = train_exe.run( - fetch_list=[ - next_sent_acc.name, mask_lm_loss.name, total_loss.name, - scheduled_lr.name - ]) + if use_ngraph: + each_next_acc, each_mask_lm_cost, each_total_cost, np_lr = train_exe.run( + fetch_list=[ + next_sent_acc.name, mask_lm_loss.name, total_loss.name, + scheduled_lr.name], program=train_program) + else: + each_next_acc, each_mask_lm_cost, each_total_cost, np_lr = train_exe.run( + fetch_list=[ + next_sent_acc.name, mask_lm_loss.name, total_loss.name, + scheduled_lr.name]) + acc.extend(each_next_acc) lm_cost.extend(each_mask_lm_cost) cost.extend(each_total_cost) @@ -398,7 +416,6 @@ def train(args): train_pyreader.reset() break - if __name__ == '__main__': print_arguments(args) if args.do_test: diff --git a/ELMo/README.md b/ELMo/README.md index ffb97acec4c96b9836ccd65e836308252c48a2bc..24bb2f57a64393c2a058587b44c3f628869f821c 100755 --- a/ELMo/README.md +++ b/ELMo/README.md @@ -20,7 +20,7 @@ ELMo(Embeddings from Language Models) 是重要的通用语义表示模型之一 | Task | 评估指标 | Baseline | +ELMo | | :------| :------: | :------: |:------: | | [LAC](https://github.com/baidu/lac) | F1 | 87.3% | **88.4%** | -| [阅读理解](github.com/PaddlePaddle/models/tree/develop/PaddleNLP/machine_reading_comprehension) | Rouge-L | 39.4% | **40.4%** | +| [阅读理解](https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/unarchived/machine_reading_comprehension) | Rouge-L | 39.4% | **40.4%** | **Note**: diff --git a/ELMo/train.py b/ELMo/train.py index c8726f84962d4111d6dbbff39d20db0faa5ef0bb..c062df304c08e52736f3526e1da8ee06456c8773 100755 --- a/ELMo/train.py +++ b/ELMo/train.py @@ -555,6 +555,7 @@ def train_loop(args, valid_ppl = eval(vocab, infer_progs, dev_count, logger, args) logger.info("valid ppl {}".format(valid_ppl)) if batch_id > 0 and batch_id % args.save_interval == 0: + epoch_id = int(batch_id / n_batches_per_epoch) model_path = os.path.join(args.para_save_dir, str(batch_id + epoch_id)) if not os.path.isdir(model_path): diff --git a/ERNIE/finetune/classifier.py b/ERNIE/finetune/classifier.py index 48139c8d6da84b8bf86f7a361eefd6df6987bc29..950c550944e4c3cb3fb2e3f6091b8a45aaeee08e 100644 --- a/ERNIE/finetune/classifier.py +++ b/ERNIE/finetune/classifier.py @@ -156,7 +156,7 @@ def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase): outputs = exe.run(fetch_list=train_fetch_list) ret = {"loss": np.mean(outputs[0]), "accuracy": np.mean(outputs[1])} if "learning_rate" in graph_vars: - ret["learning_rate"] = float(outputs[4][0]) + ret["learning_rate"] = float(outputs[3][0]) return ret test_pyreader.start()