From 094905234d65d369fa7017c4e3614d9256de6742 Mon Sep 17 00:00:00 2001 From: liu zhengxi <380185688@qq.com> Date: Fri, 5 Feb 2021 14:03:44 +0800 Subject: [PATCH] Transformer dy2sta and inference support (#5209) * dy2sta and inference support * delete useless code * update dir * sys * delete useless import --- .../machine_translation/transformer/README.md | 23 +++- .../transformer/configs/transformer.base.yaml | 2 + .../transformer/configs/transformer.big.yaml | 2 + .../transformer/deploy/python/inference.py | 123 ++++++++++++++++++ .../transformer/export_model.py | 85 ++++++++++++ .../machine_translation/transformer/reader.py | 11 ++ 6 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 PaddleNLP/examples/machine_translation/transformer/deploy/python/inference.py create mode 100644 PaddleNLP/examples/machine_translation/transformer/export_model.py diff --git a/PaddleNLP/examples/machine_translation/transformer/README.md b/PaddleNLP/examples/machine_translation/transformer/README.md index c747370f..dd58ee2f 100644 --- a/PaddleNLP/examples/machine_translation/transformer/README.md +++ b/PaddleNLP/examples/machine_translation/transformer/README.md @@ -98,6 +98,8 @@ python -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" train.py --config . ## 模型推断 +### 使用动态图预测 + 以英德翻译数据为例,模型训练完成后可以执行以下命令对指定文件中的文本进行翻译: ``` sh @@ -106,10 +108,29 @@ export CUDA_VISIBLE_DEVICES=0 python predict.py --config ./configs/transformer.base.yaml ``` - 由 `predict_file` 指定的文件中文本的翻译结果会输出到 `output_file` 指定的文件。执行预测时需要设置 `init_from_params` 来给出模型所在目录,更多参数的使用可以在 `configs/transformer.big.yaml` 和 `configs/transformer.base.yaml` 文件中查阅注释说明并进行更改设置。如果执行不提供 `--config` 选项,程序将默认使用 big model 的配置。 +翻译结果会输出到 `output_file` 指定的文件。执行预测时需要设置 `init_from_params` 来给出模型所在目录,更多参数的使用可以在 `configs/transformer.big.yaml` 和 `configs/transformer.base.yaml` 文件中查阅注释说明并进行更改设置。如果执行不提供 `--config` 选项,程序将默认使用 big model 的配置。 需要注意的是,目前预测仅实现了单卡的预测,原因在于,翻译后面需要的模型评估依赖于预测结果写入文件顺序,多卡情况下,目前暂未支持将结果按照指定顺序写入文件。 +### 导出静态图预测模型与预测引擎预测 + +Transformer 同时提供了将训练的动态图的 checkpoint 转成静态图模型功能,并提供了对应的使用预测引擎进行预测推理的方法。具体的使用方式如下: + +首先是进行动转静,使用 `export_model.py` 脚本完成将动态图的 checkpoint 转成静态图的模型,并保存成 inference 的模型。 + +``` sh +python export_model.py --config ./configs/transformer.base.yaml +``` + +模型默认保存在 `infer_model/` 路径下面。可以在 `configs/` 路径下的配置文件中更改 `inference_model_dir` 配置,从而保存至自定义的路径。 + +其次,对于完成动转静的静态图的模型,可以使用 `inference.py` 实现使用预测引擎进行高性能预测的功能。此时,因为保存静态图模型本身已经包括了模型结构,所以,指定的配置文件将只用于 `reader` 以及指定预测使用的设备(gpu/cpu/xpu),模型结构相关的配置将不再起作用。 + +``` sh +python deploy/python/inference.py --config ./configs/transformer.base.yaml +``` + +翻译结果同样将会保存在 `predict.txt` 文件中,可以在配置文件中自定义更改 `output_file` 来指定预测结果写入到的文件的名称。 ## 模型评估 diff --git a/PaddleNLP/examples/machine_translation/transformer/configs/transformer.base.yaml b/PaddleNLP/examples/machine_translation/transformer/configs/transformer.base.yaml index b1683a0a..a28dd5d8 100644 --- a/PaddleNLP/examples/machine_translation/transformer/configs/transformer.base.yaml +++ b/PaddleNLP/examples/machine_translation/transformer/configs/transformer.base.yaml @@ -10,6 +10,8 @@ init_from_pretrain_model: "" init_from_params: "./trained_models/step_final/" # The directory for saving model save_model: "trained_models" +# The directory for saving inference model +inference_model_dir: "infer_model" # Set seed for CE or debug random_seed: None # The file to output the translation results of predict_file to. diff --git a/PaddleNLP/examples/machine_translation/transformer/configs/transformer.big.yaml b/PaddleNLP/examples/machine_translation/transformer/configs/transformer.big.yaml index 17fb9e0b..1e240227 100644 --- a/PaddleNLP/examples/machine_translation/transformer/configs/transformer.big.yaml +++ b/PaddleNLP/examples/machine_translation/transformer/configs/transformer.big.yaml @@ -10,6 +10,8 @@ init_from_pretrain_model: "" init_from_params: "./trained_models/step_final/" # The directory for saving model save_model: "trained_models" +# The directory for saving inference model +inference_model_dir: "infer_model" # Set seed for CE or debug random_seed: None # The file to output the translation results of predict_file to. diff --git a/PaddleNLP/examples/machine_translation/transformer/deploy/python/inference.py b/PaddleNLP/examples/machine_translation/transformer/deploy/python/inference.py new file mode 100644 index 00000000..89bfbf89 --- /dev/null +++ b/PaddleNLP/examples/machine_translation/transformer/deploy/python/inference.py @@ -0,0 +1,123 @@ +import os +import sys + +import argparse +import numpy as np +import yaml +from attrdict import AttrDict +from pprint import pprint + +import paddle +from paddle import inference + +sys.path.append("../../") +import reader + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--config", + default="./configs/transformer.big.yaml", + type=str, + help="Path of the config file. ") + args = parser.parse_args() + return args + + +def post_process_seq(seq, bos_idx, eos_idx, output_bos=False, output_eos=False): + """ + Post-process the decoded sequence. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = [ + idx for idx in seq[:eos_pos + 1] + if (output_bos or idx != bos_idx) and (output_eos or idx != eos_idx) + ] + return seq + + +class Predictor(object): + def __init__(self, predictor, input_handles, output_handles): + self.predictor = predictor + self.input_handles = input_handles + self.output_handles = output_handles + + @classmethod + def create_predictor(cls, args, config=None): + if config is None: + config = inference.Config( + os.path.join(args.inference_model_dir, "transformer.pdmodel"), + os.path.join(args.inference_model_dir, "transformer.pdiparams")) + if args.use_gpu: + config.enable_use_gpu(100, 0) + elif args.use_xpu: + config.enable_xpu(100) + else: + # CPU + # such as enable_mkldnn, set_cpu_math_library_num_threads + config.disable_gpu() + # Use ZeroCopy. + config.switch_use_feed_fetch_ops(False) + + predictor = inference.create_predictor(config) + input_handles = [ + predictor.get_input_handle(name) + for name in predictor.get_input_names() + ] + output_handles = [ + predictor.get_input_handle(name) + for name in predictor.get_output_names() + ] + return cls(predictor, input_handles, output_handles) + + def predict_batch(self, data): + for input_field, input_handle in zip(data, self.input_handles): + input_handle.copy_from_cpu(input_field.numpy() if isinstance( + input_field, paddle.Tensor) else input_field) + self.predictor.run() + output = [ + output_handle.copy_to_cpu() for output_handle in self.output_handles + ] + return output + + def predict(self, test_loader): + outputs = [] + for data in test_loader: + output = self.predict_batch(data) + outputs.append(output) + return outputs + + +def do_inference(args): + # Define data loader + test_loader, to_tokens = reader.create_infer_loader(args) + + predictor = Predictor.create_predictor(args) + sequence_outputs = predictor.predict(test_loader) + + f = open(args.output_file, "w") + for finished_sequence in sequence_outputs: + finished_sequence = finished_sequence[0].transpose([0, 2, 1]) + for ins in finished_sequence: + for beam_idx, beam in enumerate(ins): + if beam_idx >= args.n_best: + break + id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) + word_list = to_tokens(id_list) + sequence = " ".join(word_list) + "\n" + f.write(sequence) + + +if __name__ == "__main__": + ARGS = parse_args() + yaml_file = ARGS.config + with open(yaml_file, 'rt') as f: + args = AttrDict(yaml.safe_load(f)) + pprint(args) + + do_inference(args) diff --git a/PaddleNLP/examples/machine_translation/transformer/export_model.py b/PaddleNLP/examples/machine_translation/transformer/export_model.py new file mode 100644 index 00000000..0b356b8b --- /dev/null +++ b/PaddleNLP/examples/machine_translation/transformer/export_model.py @@ -0,0 +1,85 @@ +import os +import yaml +import argparse +from pprint import pprint +from attrdict import AttrDict + +import paddle + +import reader + +from paddlenlp.transformers import InferTransformerModel, position_encoding_init +from paddlenlp.utils.log import logger + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--config", + default="./configs/transformer.big.yaml", + type=str, + help="Path of the config file. ") + args = parser.parse_args() + return args + + +def do_export(args): + # Adapt vocabulary size + reader.adapt_vocab_size(args) + # Define model + transformer = InferTransformerModel( + src_vocab_size=args.src_vocab_size, + trg_vocab_size=args.trg_vocab_size, + max_length=args.max_length + 1, + n_layer=args.n_layer, + n_head=args.n_head, + d_model=args.d_model, + d_inner_hid=args.d_inner_hid, + dropout=args.dropout, + weight_sharing=args.weight_sharing, + bos_id=args.bos_idx, + eos_id=args.eos_idx, + beam_size=args.beam_size, + max_out_len=args.max_out_len) + + # Load the trained model + assert args.init_from_params, ( + "Please set init_from_params to load the infer model.") + + model_dict = paddle.load( + os.path.join(args.init_from_params, "transformer.pdparams")) + + # To avoid a longer length than training, reset the size of position + # encoding to max_length + model_dict["encoder.pos_encoder.weight"] = position_encoding_init( + args.max_length + 1, args.d_model) + model_dict["decoder.pos_encoder.weight"] = position_encoding_init( + args.max_length + 1, args.d_model) + transformer.load_dict(model_dict) + # Set evaluate mode + transformer.eval() + + # Convert dygraph model to static graph model + transformer = paddle.jit.to_static( + transformer, + input_spec=[ + # src_word + paddle.static.InputSpec( + shape=[None, None], dtype="int64") + ]) + + # Save converted static graph model + paddle.jit.save(transformer, + os.path.join(args.inference_model_dir, "transformer")) + logger.info("Transformer has been saved to {}".format( + args.inference_model_dir)) + + +if __name__ == "__main__": + ARGS = parse_args() + yaml_file = ARGS.config + with open(yaml_file, 'rt') as f: + args = AttrDict(yaml.safe_load(f)) + pprint(args) + + do_export(args) diff --git a/PaddleNLP/examples/machine_translation/transformer/reader.py b/PaddleNLP/examples/machine_translation/transformer/reader.py index a53d7579..dc820165 100644 --- a/PaddleNLP/examples/machine_translation/transformer/reader.py +++ b/PaddleNLP/examples/machine_translation/transformer/reader.py @@ -110,6 +110,17 @@ def create_infer_loader(args): return data_loader, trg_vocab.to_tokens +def adapt_vocab_size(args): + root = None if args.root == "None" else args.root + (src_vocab, trg_vocab) = WMT14ende.get_vocab(root=root) + padding_vocab = ( + lambda x: (x + args.pad_factor - 1) // args.pad_factor * args.pad_factor + ) + + args.src_vocab_size = padding_vocab(len(src_vocab)) + args.trg_vocab_size = padding_vocab(len(trg_vocab)) + + def prepare_train_input(insts, bos_idx, eos_idx, pad_idx): """ Put all padded data needed by training into a list. -- GitLab