Merge pull request #7978 from WenmuZhou/tipc3

[TIPC] fix pact bug in slanet

Merge pull request #7978 from WenmuZhou/tipc3
[TIPC] fix pact bug in slanet
533f276a · zhoujun · GitHub · 5f8eca85 · 4cf04cbe · 533f276a
6 changed file
--- a/ppstructure/kie/requirements.txt
+++ b/ppstructure/kie/requirements.txt
@@ -4,4 +4,4 @@ seqeval
 pypandoc
 attrdict
 python_docx
-https://paddleocr.bj.bcebos.com/ppstructure/whl/paddlenlp-2.3.0.dev0-py3-none-any.whl
+paddlenlp>=2.4.1
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
@@ -68,6 +68,7 @@ def build_pre_process_list(args):

 class TableStructurer(object):
    def __init__(self, args):
+        self.args = args
        self.use_onnx = args.use_onnx
        pre_process_list = build_pre_process_list(args)
        if args.table_algorithm not in ['TableMaster']:
@@ -89,8 +90,31 @@ class TableStructurer(object):
        self.predictor, self.input_tensor, self.output_tensors, self.config = \
            utility.create_predictor(args, 'table', logger)

+        if args.benchmark:
+            import auto_log
+            pid = os.getpid()
+            gpu_id = utility.get_infer_gpuid()
+            self.autolog = auto_log.AutoLogger(
+                model_name="table",
+                model_precision=args.precision,
+                batch_size=1,
+                data_shape="dynamic",
+                save_path=None,  #args.save_log_path,
+                inference_config=self.config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=gpu_id if args.use_gpu else None,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=0,
+                logger=logger)
+
    def __call__(self, img):
        starttime = time.time()
+        if self.args.benchmark:
+            self.autolog.times.start()
+
        ori_im = img.copy()
        data = {'image': img}
        data = transform(data, self.preprocess_op)
@@ -99,6 +123,8 @@ class TableStructurer(object):
            return None, 0
        img = np.expand_dims(img, axis=0)
        img = img.copy()
+        if self.args.benchmark:
+            self.autolog.times.stamp()
        if self.use_onnx:
            input_dict = {}
            input_dict[self.input_tensor.name] = img
@@ -110,6 +136,8 @@ class TableStructurer(object):
            for output_tensor in self.output_tensors:
                output = output_tensor.copy_to_cpu()
                outputs.append(output)
+            if self.args.benchmark:
+                self.autolog.times.stamp()

        preds = {}
        preds['structure_probs'] = outputs[1]
@@ -125,6 +153,8 @@ class TableStructurer(object):
            '<html>', '<body>', '<table>'
        ] + structure_str_list + ['</table>', '</body>', '</html>']
        elapse = time.time() - starttime
+        if self.args.benchmark:
+            self.autolog.times.end(stamp=True)
        return (structure_str_list, bbox_list), elapse


@@ -164,6 +194,8 @@ def main(args):
                total_time += elapse
            count += 1
            logger.info("Predict time of {}: {}".format(image_file, elapse))
+    if args.benchmark:
+        table_structurer.autolog.report()


 if __name__ == "__main__":

--- a/ppstructure/table/predict_table.py
+++ b/ppstructure/table/predict_table.py
@@ -14,7 +14,6 @@

 import os
 import sys
-import subprocess

 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
@@ -58,48 +57,28 @@ def expand(pix, det_box, shape):

 class TableSystem(object):
    def __init__(self, args, text_detector=None, text_recognizer=None):
+        self.args = args
        if not args.show_log:
            logger.setLevel(logging.INFO)
-
-        self.text_detector = predict_det.TextDetector(
-            args) if text_detector is None else text_detector
-        self.text_recognizer = predict_rec.TextRecognizer(
-            args) if text_recognizer is None else text_recognizer
-
+        args.benchmark = False
+        self.text_detector = predict_det.TextDetector(copy.deepcopy(
+            args)) if text_detector is None else text_detector
+        self.text_recognizer = predict_rec.TextRecognizer(copy.deepcopy(
+            args)) if text_recognizer is None else text_recognizer
+        args.benchmark = True
        self.table_structurer = predict_strture.TableStructurer(args)
        if args.table_algorithm in ['TableMaster']:
            self.match = TableMasterMatcher()
        else:
            self.match = TableMatch(filter_ocr_result=True)

-        self.benchmark = args.benchmark
        self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
            args, 'table', logger)
-        if args.benchmark:
-            import auto_log
-            pid = os.getpid()
-            gpu_id = utility.get_infer_gpuid()
-            self.autolog = auto_log.AutoLogger(
-                model_name="table",
-                model_precision=args.precision,
-                batch_size=1,
-                data_shape="dynamic",
-                save_path=None,  #args.save_log_path,
-                inference_config=self.config,
-                pids=pid,
-                process_name=None,
-                gpu_ids=gpu_id if args.use_gpu else None,
-                time_keys=[
-                    'preprocess_time', 'inference_time', 'postprocess_time'
-                ],
-                warmup=0,
-                logger=logger)

    def __call__(self, img, return_ocr_result_in_table=False):
        result = dict()
        time_dict = {'det': 0, 'rec': 0, 'table': 0, 'all': 0, 'match': 0}
        start = time.time()
-
        structure_res, elapse = self._structure(copy.deepcopy(img))
        result['cell_bbox'] = structure_res[1].tolist()
        time_dict['table'] = elapse
@@ -118,24 +97,16 @@ class TableSystem(object):
        toc = time.time()
        time_dict['match'] = toc - tic
        result['html'] = pred_html
-        if self.benchmark:
-            self.autolog.times.end(stamp=True)
        end = time.time()
        time_dict['all'] = end - start
-        if self.benchmark:
-            self.autolog.times.stamp()
        return result, time_dict

    def _structure(self, img):
-        if self.benchmark:
-            self.autolog.times.start()
        structure_res, elapse = self.table_structurer(copy.deepcopy(img))
        return structure_res, elapse

    def _ocr(self, img):
        h, w = img.shape[:2]
-        if self.benchmark:
-            self.autolog.times.stamp()
        dt_boxes, det_elapse = self.text_detector(copy.deepcopy(img))
        dt_boxes = sorted_boxes(dt_boxes)

@@ -233,12 +204,13 @@ def main(args):
    f_html.close()

    if args.benchmark:
-        text_sys.autolog.report()
+        table_sys.table_structurer.autolog.report()


 if __name__ == "__main__":
    args = parse_args()
    if args.use_mp:
+        import subprocess
        p_list = []
        total_process_num = args.total_process_num
        for process_id in range(total_process_num):

--- a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
@@ -7,14 +7,14 @@ Global.auto_cast:fp32
 Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
 Global.save_model_dir:./output/
 Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
-Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh
+Architecture.Backbone.pretrained:pretrain_models/ser_LayoutXLM_xfun_zh
 train_model_name:latest
 train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
 null:null
 ##
 trainer:pact_train
 norm_train:null
-pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.eval_batch_step=[2000,10]
 fpgm_train:null
 distill_train:null
 null:null

--- a/test_tipc/configs/slanet/train_pact_infer_python.txt
+++ b/test_tipc/configs/slanet/train_pact_infer_python.txt
@@ -34,7 +34,7 @@ distill_export:null
 export1:null
 export2:null
 ##
-infer_model:./inference/en_ppocr_mobile_v2.0_table_structure_infer
+infer_model:./inference/en_ppstructure_mobile_v2.0_SLANet_infer
 infer_export:null
 infer_quant:True
 inference:ppstructure/table/predict_table.py --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer  --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --det_limit_side_len=736 --det_limit_type=min --output ./output/table

--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -146,6 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
    python_name=${array[0]}
    ${python_name} -m pip install -r requirements.txt
    ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
+    ${python_name} -m pip install paddleslim==2.3.4
    # pretrain lite train data
    wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams  --no-check-certificate
    wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar  --no-check-certificate
@@ -260,7 +261,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
        cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
    fi
-    if [ ${model_name} == "layoutxlm_ser" ]; then
+    if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
        ${python_name} -m pip install -r ppstructure/kie/requirements.txt
        ${python_name} -m pip install opencv-python -U
        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate