diff --git a/PaddleNLP/benchmark/transformer/README.md b/PaddleNLP/benchmark/transformer/README.md index 0ddecbfdc3f499dbab4e3595a6503df755cf8583..b4318c7fe6790d6e976899d6ac2c55223d433a9f 100644 --- a/PaddleNLP/benchmark/transformer/README.md +++ b/PaddleNLP/benchmark/transformer/README.md @@ -55,7 +55,7 @@ dataset = WMT14ende.get_datasets(mode="train", transform_func=transform_func) ``` shell cd static/ export CUDA_VISIBLE_DEVICES=0 -python3 train.py +python3 train.py --config ../configs/transformer.base.yaml ``` 需要注意的是,单卡下的超参设置与多卡下的超参设置有些不同,单卡执行需要修改 `configs/transformer.big.yaml` 或是 `configs/transformer.base.yaml` 中: @@ -67,7 +67,7 @@ python3 train.py ``` shell cd dygraph/ export CUDA_VISIBLE_DEVICES=0 -python3 train.py +python3 train.py --config ../configs/transformer.base.yaml ``` 需要注意的是,单卡下的超参设置与多卡下的超参设置有些不同,单卡执行需要修改 `configs/transformer.big.yaml` 或是 `configs/transformer.base.yaml` 中: @@ -85,7 +85,7 @@ python3 train.py ``` shell cd static/ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python3 train.py +python3 train.py --config ../configs/transformer.base.yaml ``` 使用 PE 的方式启动单机多卡需要设置 `configs/transformer.big.yaml` 或是 `configs/transformer.base.yaml` 中 `is_distributed` 参数为 `False`。 @@ -94,7 +94,7 @@ python3 train.py ``` shell cd static/ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python -m paddle.distributed.launch --gpus="0,1,2,3,4,5,6,7" train.py +python3 -m paddle.distributed.launch --gpus="0,1,2,3,4,5,6,7" train.py --config ../configs/transformer.base.yaml ``` 使用 fleet 的方式启动单机多卡需要设置 `configs/transformer.big.yaml` 或是 `configs/transformer.base.yaml` 中 `is_distributed` 参数为 `True`。 @@ -104,7 +104,7 @@ python -m paddle.distributed.launch --gpus="0,1,2,3,4,5,6,7" train.py ``` shell cd dygraph/ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python3 -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py +python3 -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py --config ../configs/transformer.base.yaml ``` ### 模型推断 @@ -116,7 +116,7 @@ python3 -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py # setting visible devices for prediction cd static/ export CUDA_VISIBLE_DEVICES=0 -python predict.py --config ./configs/transformer.base.yaml +python3 predict.py --config ../configs/transformer.base.yaml ``` 由 `predict_file` 指定的文件中文本的翻译结果会输出到 `output_file` 指定的文件。执行预测时需要设置 `init_from_params` 来给出模型所在目录,更多参数的使用可以在 `configs/transformer.big.yaml` 和 `configs/transformer.base.yaml` 文件中查阅注释说明并进行更改设置。如果执行不提供 `--config` 选项,程序将默认使用 big model 的配置。 @@ -128,7 +128,7 @@ python predict.py --config ./configs/transformer.base.yaml # setting visible devices for prediction cd dygraph/ export CUDA_VISIBLE_DEVICES=0 -python predict.py --config ./configs/transformer.base.yaml +python3 predict.py --config ../configs/transformer.base.yaml ``` 由 `predict_file` 指定的文件中文本的翻译结果会输出到 `output_file` 指定的文件。执行预测时需要设置 `init_from_params` 来给出模型所在目录,更多参数的使用可以在 `configs/transformer.big.yaml` 和 `configs/transformer.base.yaml` 文件中查阅注释说明并进行更改设置。如果执行不提供 `--config` 选项,程序将默认使用 big model 的配置。 diff --git a/PaddleNLP/benchmark/transformer/dygraph/train.py b/PaddleNLP/benchmark/transformer/dygraph/train.py index bb1a83a77638e41007c32e521d020935f22f20e6..58424f063317f191985fcc877521cbd941a1a32a 100644 --- a/PaddleNLP/benchmark/transformer/dygraph/train.py +++ b/PaddleNLP/benchmark/transformer/dygraph/train.py @@ -16,7 +16,7 @@ from paddlenlp.transformers import TransformerModel, CrossEntropyCriterion, posi sys.path.append("../") import reader -from utils.record import AverageStatistical +from util.record import AverageStatistical FORMAT = '%(asctime)s-%(levelname)s: %(message)s' logging.basicConfig(level=logging.INFO, format=FORMAT) diff --git a/PaddleNLP/benchmark/transformer/static/predict.py b/PaddleNLP/benchmark/transformer/static/predict.py index 27ccf1d6610acde575caf86664e730c602945291..0ba42e6e02aae7f53ed054b6bedc7df2f0e015b7 100644 --- a/PaddleNLP/benchmark/transformer/static/predict.py +++ b/PaddleNLP/benchmark/transformer/static/predict.py @@ -48,7 +48,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, output_eos=False): return seq -def do_train(args): +def do_predict(args): paddle.enable_static() if args.use_gpu: place = paddle.set_device("gpu:0") @@ -118,4 +118,4 @@ if __name__ == "__main__": args = AttrDict(yaml.safe_load(f)) pprint(args) - do_train(args) + do_predict(args) diff --git a/PaddleNLP/benchmark/transformer/static/train.py b/PaddleNLP/benchmark/transformer/static/train.py index 84a00af605c68b39fe9bc1ec878463837b974330..23d5a332e43b1706091c2fefbedded3ccc00de82 100644 --- a/PaddleNLP/benchmark/transformer/static/train.py +++ b/PaddleNLP/benchmark/transformer/static/train.py @@ -17,7 +17,7 @@ from paddlenlp.transformers import TransformerModel, CrossEntropyCriterion sys.path.append("../") import reader -from utils.record import AverageStatistical +from util.record import AverageStatistical FORMAT = '%(asctime)s-%(levelname)s: %(message)s' logging.basicConfig(level=logging.INFO, format=FORMAT) @@ -40,10 +40,12 @@ def do_train(args): if args.is_distributed: fleet.init(is_collective=True) gpu_id = int(os.getenv("FLAGS_selected_gpus", "0")) - places = paddle.CUDAPlace(gpu_id) if args.use_gpu else paddle.static.cpu_places() + places = paddle.CUDAPlace( + gpu_id) if args.use_gpu else paddle.static.cpu_places() trainer_count = 1 if args.use_gpu else len(places) else: - places = paddle.static.cuda_places() if args.use_gpu else paddle.static.cpu_places() + places = paddle.static.cuda_places( + ) if args.use_gpu else paddle.static.cpu_places() trainer_count = len(places) # Set seed for CE @@ -110,24 +112,24 @@ def do_train(args): 'init_loss_scaling': args.scale_loss, } - optimizer = fleet.distributed_optimizer(optimizer, strategy=dist_strategy) + optimizer = fleet.distributed_optimizer( + optimizer, strategy=dist_strategy) optimizer.minimize(avg_cost) if args.is_distributed: exe = paddle.static.Executor(places) else: exe = paddle.static.Executor() - build_strategy = paddle.static.BuildStrategy() - exec_strategy = paddle.static.ExecutionStrategy() + build_strategy = paddle.static.BuildStrategy() + exec_strategy = paddle.static.ExecutionStrategy() - compiled_train_program = paddle.static.CompiledProgram( - train_program).with_data_parallel( - loss_name=avg_cost.name, - build_strategy=build_strategy, + compiled_train_program = paddle.static.CompiledProgram( + train_program).with_data_parallel( + loss_name=avg_cost.name, + build_strategy=build_strategy, exec_strategy=exec_strategy) exe.run(startup_program) - # the best cross-entropy value with label smoothing loss_normalizer = -( (1. - args.label_smooth_eps) * np.log( @@ -220,6 +222,10 @@ def do_train(args): step_idx += 1 batch_start = time.time() + if args.save_model and dist.get_rank() == 0: + model_path = os.path.join(args.save_model, "step_final", "transformer") + paddle.static.save(train_program, model_path) + paddle.disable_static() diff --git a/PaddleNLP/benchmark/transformer/utils/distributed_utils.py b/PaddleNLP/benchmark/transformer/util/distributed_utils.py similarity index 100% rename from PaddleNLP/benchmark/transformer/utils/distributed_utils.py rename to PaddleNLP/benchmark/transformer/util/distributed_utils.py diff --git a/PaddleNLP/benchmark/transformer/utils/record.py b/PaddleNLP/benchmark/transformer/util/record.py similarity index 100% rename from PaddleNLP/benchmark/transformer/utils/record.py rename to PaddleNLP/benchmark/transformer/util/record.py