diff --git a/ppstructure/kie/requirements.txt b/ppstructure/kie/requirements.txt index 11fa98da1bff7a1863d8a077ca73435d15072523..6cfcba764190fd46f98b76c27e93db6f4fa36c45 100644 --- a/ppstructure/kie/requirements.txt +++ b/ppstructure/kie/requirements.txt @@ -4,4 +4,4 @@ seqeval pypandoc attrdict python_docx -https://paddleocr.bj.bcebos.com/ppstructure/whl/paddlenlp-2.3.0.dev0-py3-none-any.whl +paddlenlp>=2.4.1 diff --git a/ppstructure/table/predict_table.py b/ppstructure/table/predict_table.py index aeec66deca62f648df249a5833dbfa678d2da612..fdf611b7ffc049fc745b86233ee127337eaf5f90 100644 --- a/ppstructure/table/predict_table.py +++ b/ppstructure/table/predict_table.py @@ -58,6 +58,7 @@ def expand(pix, det_box, shape): class TableSystem(object): def __init__(self, args, text_detector=None, text_recognizer=None): + self.args = args if not args.show_log: logger.setLevel(logging.INFO) @@ -99,13 +100,18 @@ class TableSystem(object): result = dict() time_dict = {'det': 0, 'rec': 0, 'table': 0, 'all': 0, 'match': 0} start = time.time() - + if self.args.benchmark: + self.autolog.times.start() structure_res, elapse = self._structure(copy.deepcopy(img)) + if self.benchmark: + self.autolog.times.stamp() result['cell_bbox'] = structure_res[1].tolist() time_dict['table'] = elapse dt_boxes, rec_res, det_elapse, rec_elapse = self._ocr( copy.deepcopy(img)) + if self.benchmark: + self.autolog.times.stamp() time_dict['det'] = det_elapse time_dict['rec'] = rec_elapse @@ -118,24 +124,18 @@ class TableSystem(object): toc = time.time() time_dict['match'] = toc - tic result['html'] = pred_html - if self.benchmark: - self.autolog.times.end(stamp=True) end = time.time() time_dict['all'] = end - start if self.benchmark: - self.autolog.times.stamp() + self.autolog.times.end(stamp=True) return result, time_dict def _structure(self, img): - if self.benchmark: - self.autolog.times.start() structure_res, elapse = self.table_structurer(copy.deepcopy(img)) return structure_res, elapse def _ocr(self, img): h, w = img.shape[:2] - if self.benchmark: - self.autolog.times.stamp() dt_boxes, det_elapse = self.text_detector(copy.deepcopy(img)) dt_boxes = sorted_boxes(dt_boxes) @@ -233,7 +233,7 @@ def main(args): f_html.close() if args.benchmark: - text_sys.autolog.report() + table_sys.autolog.report() if __name__ == "__main__": diff --git a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt index fbf2a880269fba4596908def0980cb778a9281e3..c19b4b73a9fb8cc3b253d932f932479f3d706082 100644 --- a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt +++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt @@ -7,14 +7,14 @@ Global.auto_cast:fp32 Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8 -Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh +Architecture.Backbone.pretrained:pretrain_models/ser_LayoutXLM_xfun_zh train_model_name:latest train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg null:null ## trainer:pact_train norm_train:null -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.eval_batch_step=[2000,10] fpgm_train:null distill_train:null null:null diff --git a/test_tipc/configs/slanet/train_pact_infer_python.txt b/test_tipc/configs/slanet/train_pact_infer_python.txt index 42ed0cf5995d17d5fd55d2f35f0659f8e3defecb..98546afa696a0f04d3cbf800542c18352b55dee9 100644 --- a/test_tipc/configs/slanet/train_pact_infer_python.txt +++ b/test_tipc/configs/slanet/train_pact_infer_python.txt @@ -34,7 +34,7 @@ distill_export:null export1:null export2:null ## -infer_model:./inference/en_ppocr_mobile_v2.0_table_structure_infer +infer_model:./inference/en_ppstructure_mobile_v2.0_SLANet_infer infer_export:null infer_quant:True inference:ppstructure/table/predict_table.py --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --det_limit_side_len=736 --det_limit_type=min --output ./output/table diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 62cda1d8a5b7c75ee6b22ef9e81f467d06907559..da6dfecad0dd1c43cc712ede767c77c8467b4e34 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -146,6 +146,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then python_name=${array[0]} ${python_name} -m pip install -r requirements.txt ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl + ${python_name} -m pip install paddleslim==2.3.4 # pretrain lite train data wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate @@ -260,7 +261,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../ fi - if [ ${model_name} == "layoutxlm_ser" ]; then + if [[ ${model_name} =~ "layoutxlm_ser" ]]; then ${python_name} -m pip install -r ppstructure/kie/requirements.txt ${python_name} -m pip install opencv-python -U wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate