use infer_classifier instead of predict_classifier

507e0916 · chenxuyi · 331f6f05 · 507e0916 · 507e0916 · 507e0916
隐藏空白更改
内联并排

Showing with 23 addition and 200 deletion

README.md README.md +7 -11

README.zh.md README.zh.md +9 -12

infer_classifyer.py infer_classifyer.py +7 -5

predict_classifier.py predict_classifier.py +0 -172

未找到文件。
--- a/README.md
+++ b/README.md
@@ -979,17 +979,13 @@ when finished running this script,  `cls_emb.npy` and `top_layer_emb.npy `will b
 Take classification tasks for example, here is the script for batch prediction:
 ```
-python -u predict_classifier.py \
+python -u infer_classifyer.py \
-       --use_cuda true \
+    --ernie_config_path ${MODEL_PATH}/ernie_config.json \
-       --batch_size 32 \
+    --init_checkpoint "./checkpoints/step_100" \
-       --vocab_path ${MODEL_PATH}/vocab.txt \
+    --save_inference_model_path ./saved_model \
-       --init_checkpoint "./checkpoints/step_100" \
+    --predict_set  ${TASK_DATA_PATH}/xnli/test.tsv \
-       --do_lower_case true \
+    --vocab_path ${MODEL_PATH}/vocab.txt  
-       --max_seq_len 128 \
+    --num_labels 3 
-       --ernie_config_path ${MODEL_PATH}/ernie_config.json \
-       --do_predict true \
-       --predict_set ${TASK_DATA_PATH}/lcqmc/test.tsv \
-       --num_labels 2
 ```
 Argument  `init_checkpoint` is the path of the model, `predict_set` is the path of test file,  `num_labels` is the number of target labels.

--- a/README.zh.md
+++ b/README.zh.md
@@ -935,7 +935,7 @@ ERNIE提供了通过数据蒸馏从而达到模型压缩、加速的开发套件
 完成finetune之后只需几步操作即可生成inference\_model, PaddlePaddle可以在生产环境中加载生成的预测模型并进行高效地预测。
 ### 生成inference\_model
-运行`classify_infer.py`或者`predict_classifier.py` 脚本时通过指定 `--save_inference_model_path` 便可生成 inference_model 到指定位置。 
+运行`infer_classifyer.py`  脚本时通过指定 `--save_inference_model_path` 便可生成 inference_model 到指定位置。 
 如果您采用 `propeller` 完成finetune，则 `BestInferenceExporter` 会在finetune过程中根据预测指标，挑最好的模型生成 inference_model . 使用 `propeller` 完成finetune的流程请参考 `propeller_xnli_demo.ipynb` 
@@ -985,17 +985,14 @@ python -u ernie_encoder.py \
 我们以分类任务为例，给出了分类任务进行批量预测的脚本, 使用示例如下:
 ```
-python -u predict_classifier.py \
+python -u infer_classifyer.py \
-       --use_cuda true \
+    --ernie_config_path ${MODEL_PATH}/ernie_config.json \
-       --batch_size 32 \
+    --init_checkpoint "./checkpoints/step_100" \
-       --vocab_path ${MODEL_PATH}/vocab.txt \
+    --save_inference_model_path ./saved_model \
-       --init_checkpoint "./checkpoints/step_100" \
+    --predict_set  ${TASK_DATA_PATH}/xnli/test.tsv \
-       --do_lower_case true \
+    --vocab_path ${MODEL_PATH}/vocab.txt  
-       --max_seq_len 128 \
+    --num_labels 3 
-       --ernie_config_path ${MODEL_PATH}/ernie_config.json \
-       --do_predict true \
-       --predict_set ${TASK_DATA_PATH}/lcqmc/test.tsv \
-       --num_labels 2
 ```
 实际使用时，需要通过 `init_checkpoint` 指定预测用的模型，通过 `predict_set` 指定待预测的数据文件，通过 `num_labels` 配置分类的类别数目;

--- a/classify_infer.py
+++ b/classify_infer.py
@@ -22,6 +22,7 @@ import os
 import time
 import argparse
 import numpy as np
+import logging
 import multiprocessing
 # NOTE(paddle-dev): All of these flags should be
@@ -40,7 +41,7 @@ from reader.task_reader import ClassifyReader
 from model.ernie import ErnieConfig
 from finetune.classifier import create_model
-from utils.args import print_arguments, check_cuda, prepare_logger
+from utils.args import print_arguments, check_cuda, prepare_logger, ArgumentGroup
 from utils.init import init_pretraining_params
 from finetune_args import parser
@@ -129,6 +130,9 @@ def main(args):
    if not args.use_cuda:
        log.info("disable gpu")
        config.disable_gpu()
+    else:
+        log.info("using gpu")
+        config.enable_use_gpu(1024)
    # Create PaddlePredictor
    predictor = create_paddle_predictor(config)
@@ -158,12 +162,10 @@ def main(args):
        # parse outputs
        output = outputs[0]
-        log.info(output.name)
        output_data = output.data.float_data()
-        #assert len(output_data) == args.num_labels * args.batch_size
+        batch_result  = np.array(output_data).reshape(output.shape)
-        batch_result  = np.array(output_data).reshape((-1, args.num_labels))
        for single_example_probs in batch_result:
-            log.info("{} example\t{}".format(index, single_example_probs))
+            print('\t'.join(map(str, single_example_probs.tolist())))
            index += 1
    log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format(index/total_time, total_time, index, args.batch_size))

--- a/predict_classifier.py
+++ b/predict_classifier.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Load classifier's checkpoint to do prediction or save inference model."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import time
-import argparse
-import numpy as np
-import multiprocessing
-# NOTE(paddle-dev): All of these flags should be
-# set before `import paddle`. Otherwise, it would
-# not take any effect.
-os.environ['FLAGS_eager_delete_tensor_gb'] = '0'  # enable gc
-import paddle.fluid as fluid
-from reader.task_reader import ClassifyReader
-from model.ernie import ErnieConfig
-from finetune.classifier import create_model
-from utils.args import ArgumentGroup, print_arguments
-from utils.init import init_pretraining_params
-from finetune_args import parser
-# yapf: disable
-parser = argparse.ArgumentParser(__doc__)
-model_g = ArgumentGroup(parser, "model", "options to init, resume and save model.")
-model_g.add_arg("ernie_config_path",            str,  None,  "Path to the json file for ernie model config.")
-model_g.add_arg("init_checkpoint",              str,  None,  "Init checkpoint to resume training from.")
-model_g.add_arg("save_inference_model_path",    str,  "inference_model",  "If set, save the inference model to this path.")
-model_g.add_arg("use_fp16",                     bool, False, "Whether to resume parameters from fp16 checkpoint.")
-model_g.add_arg("num_labels",                   int,  2,     "num labels for classify")
-model_g.add_arg("ernie_version",                str,  "1.0", "ernie_version")
-data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options.")
-data_g.add_arg("predict_set",         str,  None,  "Predict set file")
-data_g.add_arg("vocab_path",          str,  None,  "Vocabulary path.")
-data_g.add_arg("label_map_config",    str,  None,  "Label_map_config json file.")
-data_g.add_arg("max_seq_len",         int,  128,   "Number of words of the longest seqence.")
-data_g.add_arg("batch_size",          int,  32,    "Total examples' number in batch for training. see also --in_tokens.")
-data_g.add_arg("do_lower_case",       bool, True,
-               "Whether to lower case the input text. Should be True for uncased models and False for cased models.")
-run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
-run_type_g.add_arg("use_cuda",          bool,   True,  "If set, use GPU for training.")
-run_type_g.add_arg("do_prediction",     bool,   True,  "Whether to do prediction on test set.")
-args = parser.parse_args()
-# yapf: enable.
-def main(args):
-    ernie_config = ErnieConfig(args.ernie_config_path)
-    ernie_config.print_config()
-    reader = ClassifyReader(
-        vocab_path=args.vocab_path,
-        label_map_config=args.label_map_config,
-        max_seq_len=args.max_seq_len,
-        do_lower_case=args.do_lower_case,
-        in_tokens=False,
-        is_inference=True)
-    predict_prog = fluid.Program()
-    predict_startup = fluid.Program()
-    with fluid.program_guard(predict_prog, predict_startup):
-        with fluid.unique_name.guard():
-            predict_pyreader, probs, feed_target_names = create_model(
-                args,
-                pyreader_name='predict_reader',
-                ernie_config=ernie_config,
-                is_classify=True,
-                is_prediction=True,
-                ernie_version=args.ernie_version)
-    predict_prog = predict_prog.clone(for_test=True)
-    if args.use_cuda:
-        place = fluid.CUDAPlace(0)
-        dev_count = fluid.core.get_cuda_device_count()
-    else:
-        place = fluid.CPUPlace()
-        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    exe.run(predict_startup)
-    if args.init_checkpoint:
-        init_pretraining_params(exe, args.init_checkpoint, predict_prog)
-    else:
-        raise ValueError("args 'init_checkpoint' should be set for prediction!")
-    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
-    _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/'))
-    dir_name = ckpt_dir + '_inference_model'
-    model_path = os.path.join(args.save_inference_model_path, dir_name)
-    print("save inference model to %s" % model_path)
-    fluid.io.save_inference_model(
-        model_path,
-        feed_target_names, [probs],
-        exe,
-        main_program=predict_prog)
-    print("load inference model from %s" % model_path)
-    infer_program, feed_target_names, probs = fluid.io.load_inference_model(
-            model_path, exe)
-    src_ids = feed_target_names[0]
-    sent_ids = feed_target_names[1]
-    pos_ids = feed_target_names[2]
-    input_mask = feed_target_names[3]
-    if args.ernie_version == "2.0":
-        task_ids = feed_target_names[4]
-    predict_data_generator = reader.data_generator(
-        input_file=args.predict_set,
-        batch_size=args.batch_size,
-        epoch=1,
-        shuffle=False)
-    print("-------------- prediction results --------------")
-    np.set_printoptions(precision=4, suppress=True)
-    index = 0
-    for sample in predict_data_generator():
-        src_ids_data = sample[0]
-        sent_ids_data = sample[1]
-        pos_ids_data = sample[2]
-        task_ids_data = sample[3]
-        input_mask_data = sample[4]
-        if args.ernie_version == "1.0":
-            output = exe.run(
-                infer_program,
-                feed={src_ids: src_ids_data,
-                      sent_ids: sent_ids_data,
-                      pos_ids: pos_ids_data,
-                      input_mask: input_mask_data},
-                fetch_list=probs)
-        elif args.ernie_version == "2.0":
-            output = exe.run(
-                infer_program,
-                feed={src_ids: src_ids_data,
-                      sent_ids: sent_ids_data,
-                      pos_ids: pos_ids_data,
-                      task_ids: task_ids_data,
-                      input_mask: input_mask_data},
-                fetch_list=probs)
-        else:
-            raise ValueError("ernie_version must be 1.0 or 2.0")
-        for single_result in output[0]:
-            print("example_index:{}\t{}".format(index, single_result))
-            index += 1
-if __name__ == '__main__':
-    print_arguments(args)
-    main(args)