Merge branch 'dygraph' into add_test_serving

3d695fcc · xiaoting · GitHub · ae12e0d5 · d2d1483c · 3d695fcc
28 changed file
--- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml
@@ -141,6 +141,7 @@ Train:
          img_mode: BGR
          channel_first: False
      - DetLabelEncode: # Class handling label
+      - CopyPaste:
      - IaaAugment:
          augmenter_args:
            - { 'type': Fliplr, 'args': { 'p': 0.5 } }

--- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml
@@ -68,8 +68,7 @@ Loss:
      ohem_ratio: 3
  - DistillationDBLoss:
      weight: 1.0
-      model_name_list: ["Student", "Teacher"]
+      model_name_list: ["Student"]
-      # key: maps
      name: DBLoss
      balance_loss: true
      main_loss_type: DiceLoss
@@ -116,6 +115,7 @@ Train:
          img_mode: BGR
          channel_first: False
      - DetLabelEncode: # Class handling label
+      - CopyPaste:
      - IaaAugment:
          augmenter_args:
            - { 'type': Fliplr, 'args': { 'p': 0.5 } }

--- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml
@@ -118,6 +118,7 @@ Train:
          img_mode: BGR
          channel_first: False
      - DetLabelEncode: # Class handling label
+      - CopyPaste:
      - IaaAugment:
          augmenter_args:
            - { 'type': Fliplr, 'args': { 'p': 0.5 } }

--- a/configs/e2e/e2e_r50_vd_pg.yml
+++ b/configs/e2e/e2e_r50_vd_pg.yml
@@ -94,7 +94,7 @@ Eval:
    label_file_list: [./train_data/total_text/test/test.txt]
    transforms:
      - DecodeImage: # load image
-          img_mode: RGB
+          img_mode: BGR
          channel_first: False
      - E2ELabelEncodeTest:
      - E2EResizeForTest:

--- a/doc/doc_ch/enhanced_ctc_loss.md
+++ b/doc/doc_ch/enhanced_ctc_loss.md
@@ -16,7 +16,7 @@ Focal Loss 出自论文《Focal Loss for Dense Object Detection》, 该loss最
 从上图可以看到, 当&gamma;> 0时，调整系数（1-y’）^&gamma; 赋予易分类样本损失一个更小的权重，使得网络更关注于困难的、错分的样本。 调整因子&gamma;用于调节简单样本权重降低的速率，当&gamma;为0时即为交叉熵损失函数，当&gamma;增加时，调整因子的影响也会随之增大。实验发现&gamma;为2是最优。平衡因子&alpha;用来平衡正负样本本身的比例不均，文中&alpha;取0.25。
-对于经典的CTC算法，假设某个特征序列（f<sub>1</sub>, f<sub>2</sub>, ......f<sub>t</sub>), 经过CTC解码之后结果等于label的概率为y’, 则CTC解码结果不为label的概率即为（1-y’)；不难发现 CTCLoss值和y’有如下关系：
+对于经典的CTC算法，假设某个特征序列（f<sub>1</sub>, f<sub>2</sub>, ......f<sub>t</sub>), 经过CTC解码之后结果等于label的概率为y’, 则CTC解码结果不为label的概率即为（1-y’)；不难发现, CTCLoss值和y’有如下关系：
 <div align="center"> 
 <img src="./equation_ctcloss.png" width = "250" /> 
 </div>
@@ -38,7 +38,7 @@ A-CTC Loss是CTC Loss + ACE Loss的简称。 其中ACE Loss出自论文< Aggrega
 <img src="./rec_algo_compare.png" width = "1000" /> 
 </div>
-虽然ACELoss确实如上图所说，可以处理2D预测，在内存占用及推理速度方面具备优势，但在实践过程中，我们发现单独使用ACE Loss,  识别效果并不如CTCLoss.  因此，我们尝试将CTCLoss和ACELoss进行组合，同时以CTCLoss为主，将ACELoss 定位为一个辅助监督loss。 这一尝试收到了效果，在我们内部的实验数据集上，相比单独使用CTCLoss，识别准确率可以提升1%左右。
+虽然ACELoss确实如上图所说，可以处理2D预测，在内存占用及推理速度方面具备优势，但在实践过程中，我们发现单独使用ACE Loss,  识别效果并不如CTCLoss.  因此，我们尝试将CTCLoss和ACELoss进行结合，同时以CTCLoss为主，将ACELoss 定位为一个辅助监督loss。 这一尝试收到了效果，在我们内部的实验数据集上，相比单独使用CTCLoss，识别准确率可以提升1%左右。
 A_CTC Loss定义如下:  
 <div align="center">
 <img src="./equation_a_ctc.png" width = "300" /> 
@@ -47,7 +47,7 @@ A_CTC Loss定义如下:
 实验中，λ = 0.1.  ACE loss实现代码见:  [ace_loss.py](../../ppocr/losses/ace_loss.py)
 ## 3. C-CTC Loss
-C-CTC Loss是CTC Loss + Center Loss的简称。 其中Center Loss出自论文 < A Discriminative Feature Learning Approach for Deep Face Recognition>.  最早用于人脸识别任务，用于增大累间距离，减小类内距离,  是Metric Learning领域一种较早的、也比较常用的一种算法。 
+C-CTC Loss是CTC Loss + Center Loss的简称。 其中Center Loss出自论文 < A Discriminative Feature Learning Approach for Deep Face Recognition>.  最早用于人脸识别任务，用于增大类间距离，减小类内距离,  是Metric Learning领域一种较早的、也比较常用的一种算法。 
 在中文OCR识别任务中，通过对badcase分析， 我们发现中文识别的一大难点是相似字符多，容易误识。 由此我们想到是否可以借鉴Metric Learing的想法， 增大相似字符的类间距，从而提高识别准确率。然而，MetricLearning主要用于图像识别领域，训练数据的标签为一个固定的值；而对于OCR识别来说，其本质上是一个序列识别任务，特征和label之间并不具有显式的对齐关系，因此两者如何结合依然是一个值得探索的方向。
 通过尝试Arcmargin, Cosmargin等方法， 我们最终发现Centerloss 有助于进一步提升识别的准确率。C_CTC Loss定义如下：
 <div align="center">

--- a/doc/doc_ch/environment.md
+++ b/doc/doc_ch/environment.md
 # 运行环境准备
 Windows和Mac用户推荐使用Anaconda搭建Python环境，Linux用户建议使用docker搭建PyThon环境。
+推荐环境：
+- PaddlePaddle >= 2.0.0 (2.1.2)
+- python3.7
+- CUDA10.1 / CUDA10.2
+- CUDNN 7.6
 如果对于Python环境熟悉的用户可以直接跳到第2步安装PaddlePaddle。
 * [1. Python环境搭建](#1)
@@ -294,11 +301,12 @@ cd /home/Projects
 # 首次运行需创建一个docker容器，再次运行时不需要运行当前命令
 # 创建一个名字为ppocr的docker容器，并将当前目录映射到容器的/paddle目录下
-如果您希望在CPU环境下使用docker，使用docker而不是nvidia-docker创建docker
+#如果您希望在CPU环境下使用docker，使用docker而不是nvidia-docker创建docker
-sudo docker run --name ppocr -v $PWD:/paddle --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
+sudo docker run --name ppocr -v $PWD:/paddle --network=host -it registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda10.2-cudnn7 /bin/bash
-如果使用CUDA10，请运行以下命令创建容器，设置docker容器共享内存shm-size为64G，建议设置32G以上
+#如果使用CUDA10，请运行以下命令创建容器，设置docker容器共享内存shm-size为64G，建议设置32G以上
-sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
+# 如果是CUDA11+CUDNN8，推荐使用镜像registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda11.2-cudnn8
+sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda10.2-cudnn7 /bin/bash
 # ctrl+P+Q可退出docker 容器，重新进入docker 容器使用如下命令
 sudo docker container exec -it ppocr /bin/bash
@@ -321,8 +329,3 @@ python3 -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
 ```
 更多的版本需求，请参照[飞桨官网安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
--- a/doc/doc_en/environment_en.md
+++ b/doc/doc_en/environment_en.md
 # Environment Preparation
+Recommended working environment:
+- PaddlePaddle >= 2.0.0 (2.1.2)
+- python3.7
+- CUDA10.1 / CUDA10.2
+- CUDNN 7.6
 * [1. Python Environment Setup](#1)
  + [1.1 Windows](#1.1)
  + [1.2 Mac](#1.2)
  + [1.3 Linux](#1.3)
 * [2. Install PaddlePaddle 2.0](#2)
 <a name="1"></a>
 ## 1. Python Environment Setup
@@ -202,7 +209,6 @@ Linux users can choose to run either Anaconda or Docker. If you are familiar wit
  <img src="../install/linux/anaconda_download.png" akt="anaconda download" width="800" align="center"/>
  - Select the appropriate version for your operating system
      - Type `uname -m` in the terminal to check the command set used by your system
@@ -309,7 +315,18 @@ cd /home/Projects
 # Create a docker container named ppocr and map the current directory to the /paddle directory of the container
 # If using CPU, use docker instead of nvidia-docker to create docker
-sudo docker run --name ppocr -v $PWD:/paddle --network=host -it  paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82  /bin/bash
+sudo docker run --name ppocr -v $PWD:/paddle --network=host -it  registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda10.2-cudnn7  /bin/bash
+# If using GPU, use nvidia-docker to create docker
+# docker image registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda11.2-cudnn8 is recommended for CUDA11.2 + CUDNN8.
+sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda10.2-cudnn7 /bin/bash
+```
+You can also visit [DockerHub](https://hub.docker.com/r/paddlepaddle/paddle/tags/) to get the image that fits your machine.
+```
+# ctrl+P+Q to exit docker, to re-enter docker using the following command:
+sudo docker container exec -it ppocr /bin/bash
 ```
 <a name="2"></a>
@@ -329,4 +346,3 @@ python3 -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
 ```
 For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation.
--- a/doc/joinus.PNG
+++ b/doc/joinus.PNG
--- a/ppocr/losses/ace_loss.py
+++ b/ppocr/losses/ace_loss.py
@@ -32,6 +32,7 @@ class ACELoss(nn.Layer):
    def __call__(self, predicts, batch):
        if isinstance(predicts, (list, tuple)):
            predicts = predicts[-1]
        B, N = predicts.shape[:2]
        div = paddle.to_tensor([N]).astype('float32')
@@ -42,9 +43,7 @@ class ACELoss(nn.Layer):
        length = batch[2].astype("float32")
        batch = batch[3].astype("float32")
        batch[:, 0] = paddle.subtract(div, length)
        batch = paddle.divide(batch, div)
        loss = self.loss_func(aggregation_preds, batch)
        return {"loss_ace": loss}
--- a/ppocr/losses/center_loss.py
+++ b/ppocr/losses/center_loss.py
@@ -27,7 +27,6 @@ class CenterLoss(nn.Layer):
    """
    Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
    """
    def __init__(self,
                 num_classes=6625,
                 feat_dim=96,
@@ -37,8 +36,7 @@ class CenterLoss(nn.Layer):
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.centers = paddle.randn(
-            shape=[self.num_classes, self.feat_dim]).astype(
+            shape=[self.num_classes, self.feat_dim]).astype("float64")
-                "float64")  #random center
        if init_center:
            assert os.path.exists(
@@ -60,22 +58,23 @@ class CenterLoss(nn.Layer):
        batch_size = feats_reshape.shape[0]
-        #calc feat * feat   
+        #calc l2 distance between feats and centers  
-        dist1 = paddle.sum(paddle.square(feats_reshape), axis=1, keepdim=True)
+        square_feat = paddle.sum(paddle.square(feats_reshape),
-        dist1 = paddle.expand(dist1, [batch_size, self.num_classes])
+                                 axis=1,
+                                 keepdim=True)
+        square_feat = paddle.expand(square_feat, [batch_size, self.num_classes])
-        #dist2 of centers
+        square_center = paddle.sum(paddle.square(self.centers),
-        dist2 = paddle.sum(paddle.square(self.centers), axis=1,
+                                   axis=1,
-                           keepdim=True)  #num_classes
+                                   keepdim=True)
-        dist2 = paddle.expand(dist2,
+        square_center = paddle.expand(
-                              [self.num_classes, batch_size]).astype("float64")
+            square_center, [self.num_classes, batch_size]).astype("float64")
-        dist2 = paddle.transpose(dist2, [1, 0])
+        square_center = paddle.transpose(square_center, [1, 0])
-        #first x * x + y * y
+        distmat = paddle.add(square_feat, square_center)
-        distmat = paddle.add(dist1, dist2)
+        feat_dot_center = paddle.matmul(feats_reshape,
-        tmp = paddle.matmul(feats_reshape,
                                        paddle.transpose(self.centers, [1, 0]))
-        distmat = distmat - 2.0 * tmp
+        distmat = distmat - 2.0 * feat_dot_center
        #generate the mask
        classes = paddle.arange(self.num_classes).astype("int64")
@@ -83,7 +82,8 @@ class CenterLoss(nn.Layer):
            paddle.unsqueeze(label, 1), (batch_size, self.num_classes))
        mask = paddle.equal(
            paddle.expand(classes, [batch_size, self.num_classes]),
-            label).astype("float64")  #get mask
+            label).astype("float64")
        dist = paddle.multiply(distmat, mask)
        loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size
        return {'loss_center': loss}
--- a/ppocr/losses/rec_sar_loss.py
+++ b/ppocr/losses/rec_sar_loss.py
@@ -9,11 +9,14 @@ from paddle import nn
 class SARLoss(nn.Layer):
    def __init__(self, **kwargs):
        super(SARLoss, self).__init__()
-        self.loss_func = paddle.nn.loss.CrossEntropyLoss(reduction="mean", ignore_index=96)
+        self.loss_func = paddle.nn.loss.CrossEntropyLoss(
+            reduction="mean", ignore_index=92)
    def forward(self, predicts, batch):
-        predict = predicts[:, :-1, :] # ignore last index of outputs to be in same seq_len with targets
+        predict = predicts[:, :
-        label = batch[1].astype("int64")[:, 1:] # ignore first index of target in loss calculation
+                           -1, :]  # ignore last index of outputs to be in same seq_len with targets
+        label = batch[1].astype(
+            "int64")[:, 1:]  # ignore first index of target in loss calculation
        batch_size, num_steps, num_classes = predict.shape[0], predict.shape[
            1], predict.shape[2]
        assert len(label.shape) == len(list(predict.shape)) - 1, \

--- a/ppocr/modeling/backbones/rec_mv1_enhance.py
+++ b/ppocr/modeling/backbones/rec_mv1_enhance.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,26 +16,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import numpy as np
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
-from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
-from paddle.nn.initializer import KaimingNormal
 import math
 import numpy as np
 import paddle
-from paddle import ParamAttr, reshape, transpose, concat, split
+from paddle import ParamAttr, reshape, transpose
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
 from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
 from paddle.nn.initializer import KaimingNormal
-import math
-from paddle.nn.functional import hardswish, hardsigmoid
 from paddle.regularizer import L2Decay
+from paddle.nn.functional import hardswish, hardsigmoid
 class ConvBNLayer(nn.Layer):

--- a/ppocr/modeling/necks/rnn.py
+++ b/ppocr/modeling/necks/rnn.py
@@ -51,7 +51,7 @@ class EncoderWithFC(nn.Layer):
        super(EncoderWithFC, self).__init__()
        self.out_channels = hidden_size
        weight_attr, bias_attr = get_para_bias_attr(
-            l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
+            l2_decay=0.00001, k=in_channels)
        self.fc = nn.Linear(
            in_channels,
            hidden_size,

--- a/ppocr/utils/dict90.txt
+++ b/ppocr/utils/dict90.txt
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
+
+,
+-
+.
+/
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+_
+`
+~
\ No newline at end of file
--- a/tests/docs/compare_right.png
+++ b/tests/docs/compare_right.png
--- a/tests/docs/compare_wrong.png
+++ b/tests/docs/compare_wrong.png
--- a/tests/docs/guide.png
+++ b/tests/docs/guide.png
--- a/tests/docs/test.png
+++ b/tests/docs/test.png
--- a/tests/docs/test_cpp.md
+++ b/tests/docs/test_cpp.md
+# C++预测功能测试
+C++预测功能测试的主程序为`test_cpp.sh`，可以测试基于C++预测库的模型推理功能。
+## 测试结论汇总
+| 算法名称 | 模型名称 |device | batchsize | mkldnn | cpu多线程 | tensorrt | 离线量化 |
+|  ----  |   ----  |  ----  |  ---- |  ---- |  ----  |  ----| --- | 
+| DB   |ch_ppocr_mobile_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+| DB   |ch_ppocr_server_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+| CRNN |ch_ppocr_mobile_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+| CRNN |ch_ppocr_server_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+|PP-OCR|ch_ppocr_server_v2.0    | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+|PP-OCR|ch_ppocr_server_v2.0    | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+## 1. 功能测试
+先运行`prepare.sh`准备数据和模型，然后运行`test_cpp.sh`进行测试，最终在```tests/output```目录下生成`cpp_infer_*.log`后缀的日志文件。
+```shell
+bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt
+# 用法1:
+bash tests/test_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt
+# 用法2: 指定GPU卡预测，第三个传入参数为GPU卡号
+bash tests/test_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt '1'
+```  
+## 2. 精度测试
+使用compare_results.py脚本比较模型预测的结果是否符合预期，主要步骤包括：
+- 提取日志中的预测坐标；
+- 从本地文件中提取保存好的坐标结果；
+- 比较上述两个结果是否符合精度预期，误差大于设置阈值时会报错。
+### 使用方式
+运行命令：
+```shell
+python3.7 tests/compare_results.py --gt_file=./tests/results/cpp_*.txt  --log_file=./tests/output/cpp_*.log --atol=1e-3 --rtol=1e-3
+```
+参数介绍：  
+- gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在tests/result/ 文件夹下
+- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持infer_*.log格式传入
+- atol: 设置的绝对误差
+- rtol: 设置的相对误差
+### 运行结果
+正常运行效果如下图：
+<img src="compare_right.png" width="1000">
+出现不一致结果时的运行输出：
+<img src="compare_wrong.png" width="1000">
--- a/tests/docs/test_python.md
+++ b/tests/docs/test_python.md
+# Python功能测试
+Python功能测试的主程序为`test_python.sh`，可以测试基于Python的模型训练、评估、推理等基本功能，包括裁剪、量化、蒸馏。
+## 测试结论汇总
+- 训练相关：
+| 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩（单机多卡） |
+|  :----  |   :----  |    :----  |  :----   |  :----   |  :----   |
+|  DB  | ch_ppocr_mobile_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练：FPGM裁剪、PACT量化 |
+|  DB  | ch_ppocr_server_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练：FPGM裁剪、PACT量化 |
+| CRNN | ch_ppocr_mobile_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练：FPGM裁剪、PACT量化 |
+| CRNN | ch_ppocr_server_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练：FPGM裁剪、PACT量化 |
+|PP-OCR| ch_ppocr_mobile_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练：FPGM裁剪、PACT量化 |
+|PP-OCR| ch_ppocr_server_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练：FPGM裁剪、PACT量化 |
+- 预测相关：
+| 算法名称 | 模型名称 |device | batchsize | mkldnn | cpu多线程 | tensorrt | 离线量化 |
+|  ----  |   ----  |  ----  |  ---- |  ---- |  ----  |  ----| --- | 
+| DB   |ch_ppocr_mobile_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+| DB   |ch_ppocr_server_v2.0_det| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+| CRNN |ch_ppocr_mobile_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+| CRNN |ch_ppocr_server_v2.0_rec| CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+|PP-OCR|ch_ppocr_server_v2.0    | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+|PP-OCR|ch_ppocr_server_v2.0    | CPU/GPU | 1/6 | 支持 | 支持 | fp32/fp16/int8 | 支持 |
+## 1. 安装依赖
+- 安装PaddlePaddle >= 2.0
+- 安装PaddleOCR依赖
+    ```
+    pip3 install  -r ../requirements.txt
+    ```
+- 安装autolog（规范化日志输出工具）
+    ```
+    git clone https://github.com/LDOUBLEV/AutoLog
+    cd AutoLog
+    pip3 install -r requirements.txt
+    python3 setup.py bdist_wheel
+    pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl
+    cd ../
+    ```
+## 2. 功能测试
+先运行`prepare.sh`准备数据和模型，然后运行`test_python.sh`进行测试，最终在```tests/output```目录下生成`infer_*.log`格式的日志文件。
+test_python.sh包含四种运行模式，每种模式的运行数据不同，分别用于测试速度和精度，分别是：
+- 模式1：lite_train_infer，使用少量数据训练，用于快速验证训练到预测的走通流程，不验证精度和速度；
+```shell
+bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer'
+bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer'
+```  
+- 模式2：whole_infer，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
+```shell
+bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer'
+bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer'
+```  
+- 模式3：infer 不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度;
+```shell
+bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer'
+# 用法1:
+bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer'
+# 用法2: 指定GPU卡预测，第三个传入参数为GPU卡号
+bash tests/test_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' '1'
+```  
+- 模式4：whole_train_infer , CE： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度；
+```shell
+bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer'
+bash tests/test.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer'
+```  
+- 模式5：klquant_infer , 测试离线量化；
+```shell
+bash tests/test_python.sh tests/configs/ppocr_det_mobile_params.txt  'klquant_infer'
+```
+## 3. 精度测试
+使用compare_results.py脚本比较模型预测的结果是否符合预期，主要步骤包括：
+- 提取日志中的预测坐标；
+- 从本地文件中提取保存好的坐标结果；
+- 比较上述两个结果是否符合精度预期，误差大于设置阈值时会报错。
+### 使用方式
+运行命令：
+```shell
+python3.7 tests/compare_results.py --gt_file=./tests/results/python_*.txt  --log_file=./tests/output/python_*.log --atol=1e-3 --rtol=1e-3
+```
+参数介绍：  
+- gt_file： 指向事先保存好的预测结果路径，支持*.txt 结尾，会自动索引*.txt格式的文件，文件默认保存在tests/result/ 文件夹下
+- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志，预测日志中打印的有预测结果，比如：文本框，预测文本，类别等等，同样支持infer_*.log格式传入
+- atol: 设置的绝对误差
+- rtol: 设置的相对误差
+### 运行结果
+正常运行效果如下图：
+<img src="compare_right.png" width="1000">
+出现不一致结果时的运行输出：
+<img src="compare_wrong.png" width="1000">
--- a/tests/readme.md
+++ b/tests/readme.md
-# 从训练到推理部署工具链测试方法介绍
+# 推理部署导航
-test.sh和params.txt文件配合使用，完成OCR轻量检测和识别模型从训练到预测的流程测试。
+飞桨除了基本的模型训练和预测，还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的推理部署导航，方便用户查阅每种模型的推理部署打通情况，并可以进行一键测试。
-# 安装依赖
+<div align="center">
- 安装PaddlePaddle >= 2.0
+    <img src="docs/guide.png" width="1000">
- 安装PaddleOCR依赖
+</div>
-    ```
-    pip3 install  -r ../requirements.txt
-    ```
- 安装autolog
-    ```
-    git clone https://github.com/LDOUBLEV/AutoLog
-    cd AutoLog
-    pip3 install -r requirements.txt
-    python3 setup.py bdist_wheel
-    pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl
-    cd ../
-    ```
-# 目录介绍
+打通情况汇总如下，已填写的部分表示可以使用本工具进行一键测试，未填写的表示正在支持中。
-```bash
+| 算法论文 | 模型名称 | 模型类型 | python训练预测 |   其他  |
-tests/
+| :--- | :--- |  :----  | :-------- |  :----  |
-├── ocr_det_params.txt            # 测试OCR检测模型的参数配置文件
+| DB     |ch_ppocr_mobile_v2.0_det | 检测  | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++  <br> Paddle-Lite: Python, C++ / ARM CPU |
-├── ocr_rec_params.txt            # 测试OCR识别模型的参数配置文件
+| DB     |ch_ppocr_server_v2.0_det | 检测  | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++  <br> Paddle-Lite: Python, C++ / ARM CPU |
-├── ocr_ppocr_mobile_params.txt   # 测试OCR检测+识别模型串联的参数配置文件
+| DB     |ch_PP-OCRv2_det          | 检测  |
-└── prepare.sh                    # 完成test.sh运行所需要的数据和模型下载
+| CRNN   |ch_ppocr_mobile_v2.0_rec | 识别  | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++  <br> Paddle-Lite: Python, C++ / ARM CPU |
-└── test.sh                       # 测试主程序
+| CRNN   |ch_ppocr_server_v2.0_rec | 识别  | 支持 | Paddle Inference: C++预测 <br> Paddle Serving: Python, C++  <br> Paddle-Lite: Python, C++ / ARM CPU |
-```
+| CRNN   |ch_PP-OCRv2_rec          | 识别  |
+| DB     |det_mv3_db_v2.0                | 检测  |
-# 使用方法
+| DB     |det_r50_vd_db_v2.0             | 检测  |
+| EAST   |det_mv3_east_v2.0              | 检测  |
+| EAST   |det_r50_vd_east_v2.0           | 检测  |
+| PSENet |det_mv3_pse_v2.0               | 检测  |
+| PSENet |det_r50_vd_pse_v2.0            | 检测  |
+| SAST   |det_r50_vd_sast_totaltext_v2.0 | 检测  |
+| Rosetta|rec_mv3_none_none_ctc_v2.0     | 识别  |
+| Rosetta|rec_r34_vd_none_none_ctc_v2.0  | 识别  |
+| CRNN   |rec_mv3_none_bilstm_ctc_v2.0   | 识别  |
+| CRNN   |rec_r34_vd_none_bilstm_ctc_v2.0| 识别  |
+| StarNet|rec_mv3_tps_bilstm_ctc_v2.0    | 识别  |
+| StarNet|rec_r34_vd_tps_bilstm_ctc_v2.0 | 识别  |
+| RARE   |rec_mv3_tps_bilstm_att_v2.0    | 识别  |
+| RARE   |rec_r34_vd_tps_bilstm_att_v2.0 | 识别  |
+| SRN    |rec_r50fpn_vd_none_srn         | 识别  |
+| NRTR   |rec_mtb_nrtr                   | 识别  |
+| SAR    |rec_r31_sar                    | 识别  |
+| PGNet  |rec_r34_vd_none_none_ctc_v2.0  | 端到端|
-test.sh包含四种运行模式，每种模式的运行数据不同，分别用于测试速度和精度，分别是：
- 模式1：lite_train_infer，使用少量数据训练，用于快速验证训练到预测的走通流程，不验证精度和速度；
-```shell
-bash tests/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer'
-bash tests/test.sh ./tests/ocr_det_params.txt 'lite_train_infer'
-```  
- 模式2：whole_infer，使用少量数据训练，一定量数据预测，用于验证训练后的模型执行预测，预测速度是否合理；
+## 一键测试工具使用
-```shell
+### 目录介绍
-bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_infer'
-bash tests/test.sh ./tests/ocr_det_params.txt 'whole_infer'
-```  
- 模式3：infer 不训练，全量数据预测，走通开源模型评估、动转静，检查inference model预测时间和精度;
 ```shell
-bash tests/prepare.sh ./tests/ocr_det_params.txt 'infer'
+tests/
-# 用法1:
+├── configs/  # 配置文件目录
-bash tests/test.sh ./tests/ocr_det_params.txt 'infer'
+	├── det_mv3_db.yml               # 测试mobile版ppocr检测模型训练的yml文件
-# 用法2: 指定GPU卡预测，第三个传入参数为GPU卡号
+	├── det_r50_vd_db.yml            # 测试server版ppocr检测模型训练的yml文件
-bash tests/test.sh ./tests/ocr_det_params.txt 'infer' '1'
+	├── rec_icdar15_r34_train.yml    # 测试server版ppocr识别模型训练的yml文件
+	├── ppocr_sys_mobile_params.txt     # 测试mobile版ppocr检测+识别模型串联的参数配置文件
+	├── ppocr_det_mobile_params.txt     # 测试mobile版ppocr检测模型的参数配置文件
+	├── ppocr_rec_mobile_params.txt     # 测试mobile版ppocr识别模型的参数配置文件
+	├── ppocr_sys_server_params.txt     # 测试server版ppocr检测+识别模型串联的参数配置文件
+	├── ppocr_det_server_params.txt     # 测试server版ppocr检测模型的参数配置文件
+	├── ppocr_rec_server_params.txt     # 测试server版ppocr识别模型的参数配置文件
+	├── ...                                
+├── results/   # 预先保存的预测结果，用于和实际预测结果进行精读比对
+	├── ppocr_det_mobile_results_fp32.txt           # 预存的mobile版ppocr检测模型fp32精度的结果
+	├── ppocr_det_mobile_results_fp16.txt           # 预存的mobile版ppocr检测模型fp16精度的结果
+	├── ppocr_det_mobile_results_fp32_cpp.txt       # 预存的mobile版ppocr检测模型c++预测的fp32精度的结果
+	├── ppocr_det_mobile_results_fp16_cpp.txt       # 预存的mobile版ppocr检测模型c++预测的fp16精度的结果
+	├── ...
+├── prepare.sh                # 完成test_*.sh运行所需要的数据和模型下载
+├── test_python.sh            # 测试python训练预测的主程序
+├── test_cpp.sh               # 测试c++预测的主程序
+├── test_serving.sh           # 测试serving部署预测的主程序
+├── test_lite.sh              # 测试lite部署预测的主程序
+├── compare_results.py        # 用于对比log中的预测结果与results中的预存结果精度误差是否在限定范围内
+└── readme.md                 # 使用文档
 ```
- 模式4：whole_train_infer , CE： 全量数据训练，全量数据预测，验证模型训练精度，预测精度，预测速度；
+### 测试流程
-```shell
+使用本工具，可以测试不同功能的支持情况，以及预测结果是否对齐，测试流程如下：
-bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_train_infer'
+<div align="center">
-bash tests/test.sh ./tests/ocr_det_params.txt 'whole_train_infer'
+    <img src="docs/test.png" width="800">
-```  
+</div>
- 模式5：cpp_infer , CE： 验证inference model的c++预测是否走通；
+1. 运行prepare.sh准备测试所需数据和模型；
-```shell
+2. 运行要测试的功能对应的测试脚本`test_*.sh`，产出log，由log可以看到不同配置是否运行成功；
-bash tests/prepare.sh ./tests/ocr_det_params.txt 'cpp_infer'
+3. 用`compare_results.py`对比log中的预测结果和预存在results目录下的结果，判断预测精度是否符合预期（在误差范围内）。
-bash tests/test.sh ./tests/ocr_det_params.txt 'cpp_infer'
-```  
-# 日志输出
+其中，有4个测试主程序，功能如下：
-最终在```tests/output```目录下生成.log后缀的日志文件
+- `test_python.sh`：测试基于Python的模型训练、评估、推理等基本功能，包括裁剪、量化、蒸馏。
+- `test_cpp.sh`：测试基于C++的模型推理。
+- `test_serving.sh`：测试基于Paddle Serving的服务化部署功能。
+- `test_lite.sh`：测试基于Paddle-Lite的端侧预测部署功能。
+各功能测试中涉及GPU/CPU、mkldnn、Tensorrt等多种参数配置，点击相应链接了解更多细节和使用教程：  
+[test_python使用](docs/test_python.md)  
+[test_cpp使用](docs/test_cpp.md)  
+[test_serving使用](docs/test_serving.md)  
+[test_lite使用](docs/test_lite.md)  
--- a/tests/results/ppocr_det_mobile_results_fp16_cpp.txt
+++ b/tests/results/ppocr_det_mobile_results_fp16_cpp.txt
--- a/tests/results/ppocr_det_mobile_results_fp32_cpp.txt
+++ b/tests/results/ppocr_det_mobile_results_fp32_cpp.txt
--- a/tests/results/ppocr_det_mobile_results_fp16.txt
+++ b/tests/results/ppocr_det_mobile_results_fp16.txt
--- a/tests/results/ppocr_det_mobile_results_fp32.txt
+++ b/tests/results/ppocr_det_mobile_results_fp32.txt
--- a/tests/test.sh
+++ b/tests/test.sh
-#!/bin/bash
-FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer', 'serving_infer', 'klquant_infer']
-MODE=$2
-if [ ${MODE} = "cpp_infer" ]; then
-    dataline=$(awk 'NR==67, NR==81{print}'  $FILENAME)
-elif [ ${MODE} = "serving_infer" ]; then
-    dataline=$(awk 'NR==52, NR==66{print}'  $FILENAME)
-elif [ ${MODE} = "klquant_infer" ]; then
-    dataline=$(awk 'NR==82, NR==98{print}'  $FILENAME)
-else
-    dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
-fi
-# parser params
-IFS=$'\n'
-lines=(${dataline})
-function func_parser_key(){
-    strs=$1
-    IFS=":"
-    array=(${strs})
-    tmp=${array[0]}
-    echo ${tmp}
-}
-function func_parser_value(){
-    strs=$1
-    IFS=":"
-    array=(${strs})
-    tmp=${array[1]}
-    echo ${tmp}
-}
-function func_set_params(){
-    key=$1
-    value=$2
-    if [ ${key} = "null" ];then
-        echo " "
-    elif [[ ${value} = "null" ]] || [[ ${value} = " " ]] || [ ${#value} -le 0 ];then
-        echo " "
-    else 
-        echo "${key}=${value}"
-    fi
-}
-function func_parser_params(){
-    strs=$1
-    IFS=":"
-    array=(${strs})
-    key=${array[0]}
-    tmp=${array[1]}
-    IFS="|"
-    res=""
-    for _params in ${tmp[*]}; do
-        IFS="="
-        array=(${_params})
-        mode=${array[0]}
-        value=${array[1]}
-        if [[ ${mode} = ${MODE} ]]; then
-            IFS="|"
-            #echo $(func_set_params "${mode}" "${value}")
-            echo $value
-            break
-        fi
-        IFS="|"
-    done
-    echo ${res}
-}
-function status_check(){
-    last_status=$1   # the exit code
-    run_command=$2
-    run_log=$3
-    if [ $last_status -eq 0 ]; then
-        echo -e "\033[33m Run successfully with command - ${run_command}!  \033[0m" | tee -a ${run_log}
-    else
-        echo -e "\033[33m Run failed with command - ${run_command}!  \033[0m" | tee -a ${run_log}
-    fi
-}
-IFS=$'\n'
-# The training params
-model_name=$(func_parser_value "${lines[1]}")
-python=$(func_parser_value "${lines[2]}")
-gpu_list=$(func_parser_value "${lines[3]}")
-train_use_gpu_key=$(func_parser_key "${lines[4]}")
-train_use_gpu_value=$(func_parser_value "${lines[4]}")
-autocast_list=$(func_parser_value "${lines[5]}")
-autocast_key=$(func_parser_key "${lines[5]}")
-epoch_key=$(func_parser_key "${lines[6]}")
-epoch_num=$(func_parser_params "${lines[6]}")
-save_model_key=$(func_parser_key "${lines[7]}")
-train_batch_key=$(func_parser_key "${lines[8]}")
-train_batch_value=$(func_parser_params "${lines[8]}")
-pretrain_model_key=$(func_parser_key "${lines[9]}")
-pretrain_model_value=$(func_parser_value "${lines[9]}")
-train_model_name=$(func_parser_value "${lines[10]}")
-train_infer_img_dir=$(func_parser_value "${lines[11]}")
-train_param_key1=$(func_parser_key "${lines[12]}")
-train_param_value1=$(func_parser_value "${lines[12]}")
-trainer_list=$(func_parser_value "${lines[14]}")
-trainer_norm=$(func_parser_key "${lines[15]}")
-norm_trainer=$(func_parser_value "${lines[15]}")
-pact_key=$(func_parser_key "${lines[16]}")
-pact_trainer=$(func_parser_value "${lines[16]}")
-fpgm_key=$(func_parser_key "${lines[17]}")
-fpgm_trainer=$(func_parser_value "${lines[17]}")
-distill_key=$(func_parser_key "${lines[18]}")
-distill_trainer=$(func_parser_value "${lines[18]}")
-trainer_key1=$(func_parser_key "${lines[19]}")
-trainer_value1=$(func_parser_value "${lines[19]}")
-trainer_key2=$(func_parser_key "${lines[20]}")
-trainer_value2=$(func_parser_value "${lines[20]}")
-eval_py=$(func_parser_value "${lines[23]}")
-eval_key1=$(func_parser_key "${lines[24]}")
-eval_value1=$(func_parser_value "${lines[24]}")
-save_infer_key=$(func_parser_key "${lines[27]}")
-export_weight=$(func_parser_key "${lines[28]}")
-norm_export=$(func_parser_value "${lines[29]}")
-pact_export=$(func_parser_value "${lines[30]}")
-fpgm_export=$(func_parser_value "${lines[31]}")
-distill_export=$(func_parser_value "${lines[32]}")
-export_key1=$(func_parser_key "${lines[33]}")
-export_value1=$(func_parser_value "${lines[33]}")
-export_key2=$(func_parser_key "${lines[34]}")
-export_value2=$(func_parser_value "${lines[34]}")
-# parser inference model 
-infer_model_dir_list=$(func_parser_value "${lines[36]}")
-infer_export_list=$(func_parser_value "${lines[37]}")
-infer_is_quant=$(func_parser_value "${lines[38]}")
-# parser inference 
-inference_py=$(func_parser_value "${lines[39]}")
-use_gpu_key=$(func_parser_key "${lines[40]}")
-use_gpu_list=$(func_parser_value "${lines[40]}")
-use_mkldnn_key=$(func_parser_key "${lines[41]}")
-use_mkldnn_list=$(func_parser_value "${lines[41]}")
-cpu_threads_key=$(func_parser_key "${lines[42]}")
-cpu_threads_list=$(func_parser_value "${lines[42]}")
-batch_size_key=$(func_parser_key "${lines[43]}")
-batch_size_list=$(func_parser_value "${lines[43]}")
-use_trt_key=$(func_parser_key "${lines[44]}")
-use_trt_list=$(func_parser_value "${lines[44]}")
-precision_key=$(func_parser_key "${lines[45]}")
-precision_list=$(func_parser_value "${lines[45]}")
-infer_model_key=$(func_parser_key "${lines[46]}")
-image_dir_key=$(func_parser_key "${lines[47]}")
-infer_img_dir=$(func_parser_value "${lines[47]}")
-save_log_key=$(func_parser_key "${lines[48]}")
-benchmark_key=$(func_parser_key "${lines[49]}")
-benchmark_value=$(func_parser_value "${lines[49]}")
-infer_key1=$(func_parser_key "${lines[50]}")
-infer_value1=$(func_parser_value "${lines[50]}")
-# parser serving
-if [ ${MODE} = "klquant_infer" ]; then
-    # parser inference model 
-    infer_model_dir_list=$(func_parser_value "${lines[1]}")
-    infer_export_list=$(func_parser_value "${lines[2]}")
-    infer_is_quant=$(func_parser_value "${lines[3]}")
-    # parser inference 
-    inference_py=$(func_parser_value "${lines[4]}")
-    use_gpu_key=$(func_parser_key "${lines[5]}")
-    use_gpu_list=$(func_parser_value "${lines[5]}")
-    use_mkldnn_key=$(func_parser_key "${lines[6]}")
-    use_mkldnn_list=$(func_parser_value "${lines[6]}")
-    cpu_threads_key=$(func_parser_key "${lines[7]}")
-    cpu_threads_list=$(func_parser_value "${lines[7]}")
-    batch_size_key=$(func_parser_key "${lines[8]}")
-    batch_size_list=$(func_parser_value "${lines[8]}")
-    use_trt_key=$(func_parser_key "${lines[9]}")
-    use_trt_list=$(func_parser_value "${lines[9]}")
-    precision_key=$(func_parser_key "${lines[10]}")
-    precision_list=$(func_parser_value "${lines[10]}")
-    infer_model_key=$(func_parser_key "${lines[11]}")
-    image_dir_key=$(func_parser_key "${lines[12]}")
-    infer_img_dir=$(func_parser_value "${lines[12]}")
-    save_log_key=$(func_parser_key "${lines[13]}")
-    benchmark_key=$(func_parser_key "${lines[14]}")
-    benchmark_value=$(func_parser_value "${lines[14]}")
-    infer_key1=$(func_parser_key "${lines[15]}")
-    infer_value1=$(func_parser_value "${lines[15]}")
-fi
-# parser serving
-if [ ${MODE} = "server_infer" ]; then
-    trans_model_py=$(func_parser_value "${lines[1]}")
-    infer_model_dir_key=$(func_parser_key "${lines[2]}")
-    infer_model_dir_value=$(func_parser_value "${lines[2]}")
-    model_filename_key=$(func_parser_key "${lines[3]}")
-    model_filename_value=$(func_parser_value "${lines[3]}")
-    params_filename_key=$(func_parser_key "${lines[4]}")
-    params_filename_value=$(func_parser_value "${lines[4]}")
-    serving_server_key=$(func_parser_key "${lines[5]}")
-    serving_server_value=$(func_parser_value "${lines[5]}")
-    serving_client_key=$(func_parser_key "${lines[6]}")
-    serving_client_value=$(func_parser_value "${lines[6]}")
-    serving_dir_value=$(func_parser_value "${lines[7]}")
-    web_service_py=$(func_parser_value "${lines[8]}")
-    web_use_gpu_key=$(func_parser_key "${lines[9]}")
-    web_use_gpu_list=$(func_parser_value "${lines[9]}")
-    web_use_mkldnn_key=$(func_parser_key "${lines[10]}")
-    web_use_mkldnn_list=$(func_parser_value "${lines[10]}")
-    web_cpu_threads_key=$(func_parser_key "${lines[11]}")
-    web_cpu_threads_list=$(func_parser_value "${lines[11]}")
-    web_use_trt_key=$(func_parser_key "${lines[12]}")
-    web_use_trt_list=$(func_parser_value "${lines[12]}")
-    web_precision_key=$(func_parser_key "${lines[13]}")
-    web_precision_list=$(func_parser_value "${lines[13]}")
-    pipeline_py=$(func_parser_value "${lines[14]}")
-fi
-if [ ${MODE} = "cpp_infer" ]; then
-    # parser cpp inference model 
-    cpp_infer_model_dir_list=$(func_parser_value "${lines[1]}")
-    cpp_infer_is_quant=$(func_parser_value "${lines[2]}")
-    # parser cpp inference 
-    inference_cmd=$(func_parser_value "${lines[3]}")
-    cpp_use_gpu_key=$(func_parser_key "${lines[4]}")
-    cpp_use_gpu_list=$(func_parser_value "${lines[4]}")
-    cpp_use_mkldnn_key=$(func_parser_key "${lines[5]}")
-    cpp_use_mkldnn_list=$(func_parser_value "${lines[5]}")
-    cpp_cpu_threads_key=$(func_parser_key "${lines[6]}")
-    cpp_cpu_threads_list=$(func_parser_value "${lines[6]}")
-    cpp_batch_size_key=$(func_parser_key "${lines[7]}")
-    cpp_batch_size_list=$(func_parser_value "${lines[7]}")
-    cpp_use_trt_key=$(func_parser_key "${lines[8]}")
-    cpp_use_trt_list=$(func_parser_value "${lines[8]}")
-    cpp_precision_key=$(func_parser_key "${lines[9]}")
-    cpp_precision_list=$(func_parser_value "${lines[9]}")
-    cpp_infer_model_key=$(func_parser_key "${lines[10]}")
-    cpp_image_dir_key=$(func_parser_key "${lines[11]}")
-    cpp_infer_img_dir=$(func_parser_value "${lines[12]}")
-    cpp_infer_key1=$(func_parser_key "${lines[13]}")
-    cpp_infer_value1=$(func_parser_value "${lines[13]}")
-    cpp_benchmark_key=$(func_parser_key "${lines[14]}")
-    cpp_benchmark_value=$(func_parser_value "${lines[14]}")
-fi
-LOG_PATH="./tests/output"
-mkdir -p ${LOG_PATH}
-status_log="${LOG_PATH}/results.log"
-function func_inference(){
-    IFS='|'
-    _python=$1
-    _script=$2
-    _model_dir=$3
-    _log_path=$4
-    _img_dir=$5
-    _flag_quant=$6
-    # inference 
-    for use_gpu in ${use_gpu_list[*]}; do
-        if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
-            for use_mkldnn in ${use_mkldnn_list[*]}; do
-                if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
-                    continue
-                fi
-                for threads in ${cpu_threads_list[*]}; do
-                    for batch_size in ${batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
-                        set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
-                        set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
-                        set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
-                        set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
-                        set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
-                        set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
-                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
-                        eval $command
-                        last_status=${PIPESTATUS[0]}
-                        eval "cat ${_save_log_path}"
-                        status_check $last_status "${command}" "${status_log}"
-                    done
-                done
-            done
-        elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
-            for use_trt in ${use_trt_list[*]}; do
-                for precision in ${precision_list[*]}; do
-                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
-                        continue
-                    fi 
-                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
-                        continue
-                    fi
-                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
-                        continue
-                    fi
-                    for batch_size in ${batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
-                        set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
-                        set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
-                        set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
-                        set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
-                        set_precision=$(func_set_params "${precision_key}" "${precision}")
-                        set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
-                        set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
-                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
-                        eval $command
-                        last_status=${PIPESTATUS[0]}
-                        eval "cat ${_save_log_path}"
-                        status_check $last_status "${command}" "${status_log}"
-                    done
-                done
-            done
-        else
-            echo "Does not support hardware other than CPU and GPU Currently!"
-        fi
-    done
-}
-function func_serving(){
-    IFS='|'
-    _python=$1
-    _script=$2
-    _model_dir=$3
-    # pdserving
-    set_dirname=$(func_set_params "${infer_model_dir_key}" "${infer_model_dir_value}")
-    set_model_filename=$(func_set_params "${model_filename_key}" "${model_filename_value}")
-    set_params_filename=$(func_set_params "${params_filename_key}" "${params_filename_value}")
-    set_serving_server=$(func_set_params "${serving_server_key}" "${serving_server_value}")
-    set_serving_client=$(func_set_params "${serving_client_key}" "${serving_client_value}")
-    trans_model_cmd="${python} ${trans_model_py} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_serving_server} ${set_serving_client}"
-    eval $trans_model_cmd
-    cd ${serving_dir_value}
-    echo $PWD
-    unset https_proxy
-    unset http_proxy
-    for use_gpu in ${web_use_gpu_list[*]}; do
-        echo ${ues_gpu}
-        if [ ${use_gpu} = "null" ]; then
-            for use_mkldnn in ${web_use_mkldnn_list[*]}; do
-                if [ ${use_mkldnn} = "False" ]; then
-                    continue
-                fi
-                for threads in ${web_cpu_threads_list[*]}; do
-                      _save_log_path="${_log_path}/server_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_1.log"
-                      set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}")
-                      web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &>${_save_log_path} &"
-                      eval $web_service_cmd
-                      sleep 2s
-                      pipeline_cmd="${python} ${pipeline_py}"
-                      eval $pipeline_cmd
-                      last_status=${PIPESTATUS[0]}
-                      eval "cat ${_save_log_path}"
-                      status_check $last_status "${pipeline_cmd}" "${status_log}"
-                      PID=$!
-                      kill $PID
-                      sleep 2s
-                      ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9
-                done
-            done
-        elif [ ${use_gpu} = "0" ]; then
-            for use_trt in ${web_use_trt_list[*]}; do
-                for precision in ${web_precision_list[*]}; do
-                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
-                        continue
-                    fi
-                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
-                        continue
-                    fi
-                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then
-                        continue
-                    fi
-                    _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_1.log"
-                    set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}")
-                    set_precision=$(func_set_params "${web_precision_key}" "${precision}")
-                    web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} &>${_save_log_path} & "
-                    eval $web_service_cmd
-                    sleep 2s
-                    pipeline_cmd="${python} ${pipeline_py}"
-                    eval $pipeline_cmd
-                    last_status=${PIPESTATUS[0]}
-                    eval "cat ${_save_log_path}"
-                    status_check $last_status "${pipeline_cmd}" "${status_log}"
-                    PID=$!
-                    kill $PID
-                    sleep 2s
-                    ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9
-                done
-            done
-        else
-            echo "Does not support hardware other than CPU and GPU Currently!"
-        fi
-    done
-}
-function func_cpp_inference(){
-    IFS='|'
-    _script=$1
-    _model_dir=$2
-    _log_path=$3
-    _img_dir=$4
-    _flag_quant=$5
-    # inference 
-    for use_gpu in ${cpp_use_gpu_list[*]}; do
-        if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
-            for use_mkldnn in ${cpp_use_mkldnn_list[*]}; do
-                if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
-                    continue
-                fi
-                for threads in ${cpp_cpu_threads_list[*]}; do
-                    for batch_size in ${cpp_batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
-                        set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
-                        set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
-                        set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
-                        set_cpu_threads=$(func_set_params "${cpp_cpu_threads_key}" "${threads}")
-                        set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
-                        set_infer_params1=$(func_set_params "${cpp_infer_key1}" "${cpp_infer_value1}")
-                        command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${cpp_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
-                        eval $command
-                        last_status=${PIPESTATUS[0]}
-                        eval "cat ${_save_log_path}"
-                        status_check $last_status "${command}" "${status_log}"
-                    done
-                done
-            done
-        elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
-            for use_trt in ${cpp_use_trt_list[*]}; do
-                for precision in ${cpp_precision_list[*]}; do
-                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
-                        continue
-                    fi 
-                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
-                        continue
-                    fi
-                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
-                        continue
-                    fi
-                    for batch_size in ${cpp_batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
-                        set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
-                        set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
-                        set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
-                        set_tensorrt=$(func_set_params "${cpp_use_trt_key}" "${use_trt}")
-                        set_precision=$(func_set_params "${cpp_precision_key}" "${precision}")
-                        set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
-                        set_infer_params1=$(func_set_params "${cpp_infer_key1}" "${cpp_infer_value1}")
-                        command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
-                        eval $command
-                        last_status=${PIPESTATUS[0]}
-                        eval "cat ${_save_log_path}"
-                        status_check $last_status "${command}" "${status_log}"
-                    done
-                done
-            done
-        else
-            echo "Does not support hardware other than CPU and GPU Currently!"
-        fi
-    done
-}
-if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then
-    GPUID=$3
-    if [ ${#GPUID} -le 0 ];then
-        env=" "
-    else
-        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
-    fi
-    # set CUDA_VISIBLE_DEVICES
-    eval $env
-    export Count=0
-    IFS="|"
-    infer_run_exports=(${infer_export_list})
-    infer_quant_flag=(${infer_is_quant})
-    for infer_model in ${infer_model_dir_list[*]}; do
-        # run export
-        if [ ${infer_run_exports[Count]} != "null" ];then
-            save_infer_dir=$(dirname $infer_model)
-            set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
-            set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
-            export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}"
-            echo ${infer_run_exports[Count]} 
-            echo  $export_cmd
-            eval $export_cmd
-            status_export=$?
-            status_check $status_export "${export_cmd}" "${status_log}"
-        else
-            save_infer_dir=${infer_model}
-        fi
-        #run inference
-        is_quant=${infer_quant_flag[Count]}
-        func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
-        Count=$(($Count + 1))
-    done
-elif [ ${MODE} = "cpp_infer" ]; then
-    GPUID=$3
-    if [ ${#GPUID} -le 0 ];then
-        env=" "
-    else
-        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
-    fi
-    # set CUDA_VISIBLE_DEVICES
-    eval $env
-    export Count=0
-    IFS="|"
-    infer_quant_flag=(${cpp_infer_is_quant})
-    for infer_model in ${cpp_infer_model_dir_list[*]}; do
-        #run inference
-        is_quant=${infer_quant_flag[Count]}
-        func_cpp_inference "${inference_cmd}" "${infer_model}" "${LOG_PATH}" "${cpp_infer_img_dir}" ${is_quant}
-        Count=$(($Count + 1))
-    done
-elif [ ${MODE} = "serving_infer" ]; then
-    GPUID=$3
-    if [ ${#GPUID} -le 0 ];then
-        env=" "
-    else
-        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
-    fi
-    # set CUDA_VISIBLE_DEVICES
-    eval $env
-    export Count=0
-    IFS="|"
-    #run serving
-    func_serving "${web_service_cmd}"
-else
-    IFS="|"
-    export Count=0
-    USE_GPU_KEY=(${train_use_gpu_value})
-    for gpu in ${gpu_list[*]}; do
-        use_gpu=${USE_GPU_KEY[Count]}
-        Count=$(($Count + 1))
-        if [ ${gpu} = "-1" ];then
-            env=""
-        elif [ ${#gpu} -le 1 ];then
-            env="export CUDA_VISIBLE_DEVICES=${gpu}"
-            eval ${env}
-        elif [ ${#gpu} -le 15 ];then
-            IFS=","
-            array=(${gpu})
-            env="export CUDA_VISIBLE_DEVICES=${array[0]}"
-            IFS="|"
-        else
-            IFS=";"
-            array=(${gpu})
-            ips=${array[0]}
-            gpu=${array[1]}
-            IFS="|"
-            env=" "
-        fi
-        for autocast in ${autocast_list[*]}; do 
-            for trainer in ${trainer_list[*]}; do 
-                flag_quant=False
-                if [ ${trainer} = ${pact_key} ]; then
-                    run_train=${pact_trainer}
-                    run_export=${pact_export}
-                    flag_quant=True
-                elif [ ${trainer} = "${fpgm_key}" ]; then
-                    run_train=${fpgm_trainer}
-                    run_export=${fpgm_export}
-                elif [ ${trainer} = "${distill_key}" ]; then
-                    run_train=${distill_trainer}
-                    run_export=${distill_export}
-                elif [ ${trainer} = ${trainer_key1} ]; then
-                    run_train=${trainer_value1}
-                    run_export=${export_value1}
-                elif [[ ${trainer} = ${trainer_key2} ]]; then
-                    run_train=${trainer_value2}
-                    run_export=${export_value2}
-                else
-                    run_train=${norm_trainer}
-                    run_export=${norm_export}
-                fi
-                if [ ${run_train} = "null" ]; then
-                    continue
-                fi
-                set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
-                set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
-                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
-                set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
-                set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
-                set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${use_gpu}")
-                save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
-                # load pretrain from norm training if current trainer is pact or fpgm trainer
-                if [ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]; then
-                    set_pretrain="${load_norm_train_model}"
-                fi
-                set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
-                if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
-                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} "
-                elif [ ${#gpu} -le 15 ];then  # train with multi-gpu
-                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}"
-                else     # train with multi-machine
-                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
-                fi
-                # run train
-                eval "unset CUDA_VISIBLE_DEVICES"
-                eval $cmd
-                status_check $? "${cmd}" "${status_log}"
-                set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
-                # save norm trained models to set pretrain for pact training and fpgm training 
-                if [ ${trainer} = ${trainer_norm} ]; then
-                    load_norm_train_model=${set_eval_pretrain}
-                fi
-                # run eval 
-                if [ ${eval_py} != "null" ]; then
-                    set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
-                    eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}" 
-                    eval $eval_cmd
-                    status_check $? "${eval_cmd}" "${status_log}"
-                fi
-                # run export model
-                if [ ${run_export} != "null" ]; then 
-                    # run export model
-                    save_infer_path="${save_log}"
-                    set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
-                    set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
-                    export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key}"
-                    eval $export_cmd
-                    status_check $? "${export_cmd}" "${status_log}"
-                    #run inference
-                    eval $env
-                    save_infer_path="${save_log}"
-                    func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
-                    eval "unset CUDA_VISIBLE_DEVICES"
-                fi
-            done  # done with:    for trainer in ${trainer_list[*]}; do 
-        done      # done with:    for autocast in ${autocast_list[*]}; do 
-    done          # done with:    for gpu in ${gpu_list[*]}; do
-fi  # end if [ ${MODE} = "infer" ]; then
--- a/tests/test_python.sh
+++ b/tests/test_python.sh
@@ -2,7 +2,14 @@
 source tests/common_func.sh
 FILENAME=$1
-dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
+# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer']
+MODE=$2
+if [ ${MODE} = "klquant_infer" ]; then
+    dataline=$(awk 'NR==82, NR==98{print}'  $FILENAME)
+else
+    dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
+fi
 # parser params
 IFS=$'\n'
@@ -84,6 +91,35 @@ benchmark_value=$(func_parser_value "${lines[49]}")
 infer_key1=$(func_parser_key "${lines[50]}")
 infer_value1=$(func_parser_value "${lines[50]}")
+# parser klquant_infer
+if [ ${MODE} = "klquant_infer" ]; then
+    # parser inference model 
+    infer_model_dir_list=$(func_parser_value "${lines[1]}")
+    infer_export_list=$(func_parser_value "${lines[2]}")
+    infer_is_quant=$(func_parser_value "${lines[3]}")
+    # parser inference 
+    inference_py=$(func_parser_value "${lines[4]}")
+    use_gpu_key=$(func_parser_key "${lines[5]}")
+    use_gpu_list=$(func_parser_value "${lines[5]}")
+    use_mkldnn_key=$(func_parser_key "${lines[6]}")
+    use_mkldnn_list=$(func_parser_value "${lines[6]}")
+    cpu_threads_key=$(func_parser_key "${lines[7]}")
+    cpu_threads_list=$(func_parser_value "${lines[7]}")
+    batch_size_key=$(func_parser_key "${lines[8]}")
+    batch_size_list=$(func_parser_value "${lines[8]}")
+    use_trt_key=$(func_parser_key "${lines[9]}")
+    use_trt_list=$(func_parser_value "${lines[9]}")
+    precision_key=$(func_parser_key "${lines[10]}")
+    precision_list=$(func_parser_value "${lines[10]}")
+    infer_model_key=$(func_parser_key "${lines[11]}")
+    image_dir_key=$(func_parser_key "${lines[12]}")
+    infer_img_dir=$(func_parser_value "${lines[12]}")
+    save_log_key=$(func_parser_key "${lines[13]}")
+    benchmark_key=$(func_parser_key "${lines[14]}")
+    benchmark_value=$(func_parser_value "${lines[14]}")
+    infer_key1=$(func_parser_key "${lines[15]}")
+    infer_value1=$(func_parser_value "${lines[15]}")
+fi
 LOG_PATH="./tests/output"
 mkdir -p ${LOG_PATH}
@@ -107,7 +143,7 @@ function func_inference(){
                fi
                for threads in ${cpu_threads_list[*]}; do
                    for batch_size in ${batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
+                        _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
                        set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
                        set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
                        set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
@@ -135,7 +171,7 @@ function func_inference(){
                        continue
                    fi
                    for batch_size in ${batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
+                        _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
                        set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
                        set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
                        set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
@@ -158,16 +194,148 @@ function func_inference(){
    done
 }
+if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then
-# set cuda device
+    GPUID=$3
-GPUID=$2
+    if [ ${#GPUID} -le 0 ];then
-if [ ${#GPUID} -le 0 ];then
        env=" "
-else
+    else
        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
-fi
+    fi
-set CUDA_VISIBLE_DEVICES
+    # set CUDA_VISIBLE_DEVICES
-eval $env
+    eval $env
+    export Count=0
+    IFS="|"
+    infer_run_exports=(${infer_export_list})
+    infer_quant_flag=(${infer_is_quant})
+    for infer_model in ${infer_model_dir_list[*]}; do
+        # run export
+        if [ ${infer_run_exports[Count]} != "null" ];then
+            save_infer_dir=$(dirname $infer_model)
+            set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
+            set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
+            export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}"
+            echo ${infer_run_exports[Count]} 
+            echo  $export_cmd
+            eval $export_cmd
+            status_export=$?
+            status_check $status_export "${export_cmd}" "${status_log}"
+        else
+            save_infer_dir=${infer_model}
+        fi
+        #run inference
+        is_quant=${infer_quant_flag[Count]}
+        func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
+        Count=$(($Count + 1))
+    done
+else
+    IFS="|"
+    export Count=0
+    USE_GPU_KEY=(${train_use_gpu_value})
+    for gpu in ${gpu_list[*]}; do
+        use_gpu=${USE_GPU_KEY[Count]}
+        Count=$(($Count + 1))
+        if [ ${gpu} = "-1" ];then
+            env=""
+        elif [ ${#gpu} -le 1 ];then
+            env="export CUDA_VISIBLE_DEVICES=${gpu}"
+            eval ${env}
+        elif [ ${#gpu} -le 15 ];then
+            IFS=","
+            array=(${gpu})
+            env="export CUDA_VISIBLE_DEVICES=${array[0]}"
+            IFS="|"
+        else
+            IFS=";"
+            array=(${gpu})
+            ips=${array[0]}
+            gpu=${array[1]}
+            IFS="|"
+            env=" "
+        fi
+        for autocast in ${autocast_list[*]}; do 
+            for trainer in ${trainer_list[*]}; do 
+                flag_quant=False
+                if [ ${trainer} = ${pact_key} ]; then
+                    run_train=${pact_trainer}
+                    run_export=${pact_export}
+                    flag_quant=True
+                elif [ ${trainer} = "${fpgm_key}" ]; then
+                    run_train=${fpgm_trainer}
+                    run_export=${fpgm_export}
+                elif [ ${trainer} = "${distill_key}" ]; then
+                    run_train=${distill_trainer}
+                    run_export=${distill_export}
+                elif [ ${trainer} = ${trainer_key1} ]; then
+                    run_train=${trainer_value1}
+                    run_export=${export_value1}
+                elif [[ ${trainer} = ${trainer_key2} ]]; then
+                    run_train=${trainer_value2}
+                    run_export=${export_value2}
+                else
+                    run_train=${norm_trainer}
+                    run_export=${norm_export}
+                fi
+                if [ ${run_train} = "null" ]; then
+                    continue
+                fi
+                set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
+                set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
+                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
+                set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
+                set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
+                set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${use_gpu}")
+                save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
+                # load pretrain from norm training if current trainer is pact or fpgm trainer
+                if [ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]; then
+                    set_pretrain="${load_norm_train_model}"
+                fi
+                set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
+                if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
+                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} "
+                elif [ ${#gpu} -le 15 ];then  # train with multi-gpu
+                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+                else     # train with multi-machine
+                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
+                fi
+                # run train
+                eval "unset CUDA_VISIBLE_DEVICES"
+                eval $cmd
+                status_check $? "${cmd}" "${status_log}"
+                set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
+                # save norm trained models to set pretrain for pact training and fpgm training 
+                if [ ${trainer} = ${trainer_norm} ]; then
+                    load_norm_train_model=${set_eval_pretrain}
+                fi
+                # run eval 
+                if [ ${eval_py} != "null" ]; then
+                    set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
+                    eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}" 
+                    eval $eval_cmd
+                    status_check $? "${eval_cmd}" "${status_log}"
+                fi
+                # run export model
+                if [ ${run_export} != "null" ]; then 
+                    # run export model
+                    save_infer_path="${save_log}"
+                    set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
+                    set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
+                    export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key}"
+                    eval $export_cmd
+                    status_check $? "${export_cmd}" "${status_log}"
+                    #run inference
+                    eval $env
+                    save_infer_path="${save_log}"
+                    func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+                    eval "unset CUDA_VISIBLE_DEVICES"
+                fi
+            done  # done with:    for trainer in ${trainer_list[*]}; do 
+        done      # done with:    for autocast in ${autocast_list[*]}; do 
+    done          # done with:    for gpu in ${gpu_list[*]}; do
+fi  # end if [ ${MODE} = "infer" ]; then
-echo "################### run test ###################"
--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
@@ -141,7 +141,6 @@ if __name__ == "__main__":
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
-            img = img[:, :, ::-1]
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue