提交 c647a6da 编写于 作者: 文幕地方's avatar 文幕地方

add re to ppstructure system

上级 12ddc34b
...@@ -567,6 +567,7 @@ class PPStructure(StructureSystem): ...@@ -567,6 +567,7 @@ class PPStructure(StructureSystem):
assert params.structure_version in SUPPORT_STRUCTURE_MODEL_VERSION, "structure_version must in {}, but get {}".format( assert params.structure_version in SUPPORT_STRUCTURE_MODEL_VERSION, "structure_version must in {}, but get {}".format(
SUPPORT_STRUCTURE_MODEL_VERSION, params.structure_version) SUPPORT_STRUCTURE_MODEL_VERSION, params.structure_version)
params.use_gpu = check_gpu(params.use_gpu) params.use_gpu = check_gpu(params.use_gpu)
params.mode = 'structure'
if not params.show_log: if not params.show_log:
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
# 基于Python预测引擎推理 # 基于Python预测引擎推理
- [1. 版面信息抽取](#1) - [1. 版面信息抽取](#1-版面信息抽取)
- [1.1 版面分析+表格识别](#1.1) - [1.1 版面分析+表格识别](#11-版面分析表格识别)
- [1.2 版面分析](#1.2) - [1.2 版面分析](#12-版面分析)
- [1.3 表格识别](#1.3) - [1.3 表格识别](#13-表格识别)
- [2. 关键信息抽取](#2) - [2. 关键信息抽取](#2-关键信息抽取)
- [2.1 SER](#21-ser)
- [2.2 RE+SER](#22-reser)
<a name="1"></a> <a name="1"></a>
## 1. 版面信息抽取 ## 1. 版面信息抽取
...@@ -70,6 +72,8 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \ ...@@ -70,6 +72,8 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
<a name="2"></a> <a name="2"></a>
## 2. 关键信息抽取 ## 2. 关键信息抽取
### 2.1 SER
```bash ```bash
cd ppstructure cd ppstructure
...@@ -87,3 +91,27 @@ python3 kie/predict_kie_token_ser.py \ ...@@ -87,3 +91,27 @@ python3 kie/predict_kie_token_ser.py \
``` ```
运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片,图片名和输入图片名一致。 运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片,图片名和输入图片名一致。
### 2.2 RE+SER
```bash
cd ppstructure
mkdir inference && cd inference
# 下载RE SER XFUND 模型并解压
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layoutxlm_xfund_infer.tar && tar -xf re_vi_layoutxlm_xfund_infer.tar
cd ..
python3 predict_system.py \
--kie_algorithm=LayoutXLM \
--re_model_dir=./inference/re_vi_layoutxlm_xfund_infer \
--ser_model_dir=./inference/ser_vi_layoutxlm_xfund_infer \
--image_dir=./docs/kie/input/zh_val_42.jpg \
--ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
--vis_font_path=../doc/fonts/simfang.ttf \
--ocr_order_method="tb-yx" \
--mode=kie
```
运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下有一个同名目录,目录中存放可视化图片和预测结果。
# Python Inference # Python Inference
- [1. Layout Structured Analysis](#1) - [1. Layout Structured Analysis](#1-layout-structured-analysis)
- [1.1 layout analysis + table recognition](#1.1) - [1.1 layout analysis + table recognition](#11-layout-analysis--table-recognition)
- [1.2 layout analysis](#1.2) - [1.2 layout analysis](#12-layout-analysis)
- [1.3 table recognition](#1.3) - [1.3 table recognition](#13-table-recognition)
- [2. Key Information Extraction](#2) - [2. Key Information Extraction](#2-key-information-extraction)
- [2.1 SER](#21-ser)
- [2.2 RE+SER](#22-reser)
<a name="1"></a> <a name="1"></a>
## 1. Layout Structured Analysis ## 1. Layout Structured Analysis
...@@ -72,6 +74,7 @@ After the operation is completed, each image will have a directory with the same ...@@ -72,6 +74,7 @@ After the operation is completed, each image will have a directory with the same
<a name="2"></a> <a name="2"></a>
## 2. Key Information Extraction ## 2. Key Information Extraction
### 2.1 SER
```bash ```bash
cd ppstructure cd ppstructure
...@@ -89,3 +92,28 @@ python3 kie/predict_kie_token_ser.py \ ...@@ -89,3 +92,28 @@ python3 kie/predict_kie_token_ser.py \
``` ```
After the operation is completed, each image will store the visualized image in the `kie` directory under the directory specified by the `output` field, and the image name is the same as the input image name. After the operation is completed, each image will store the visualized image in the `kie` directory under the directory specified by the `output` field, and the image name is the same as the input image name.
### 2.2 RE+SER
```bash
cd ppstructure
mkdir inference && cd inference
# download model
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layoutxlm_xfund_infer.tar && tar -xf re_vi_layoutxlm_xfund_infer.tar
cd ..
python3 predict_system.py \
--kie_algorithm=LayoutXLM \
--re_model_dir=./inference/re_vi_layoutxlm_xfund_infer \
--ser_model_dir=./inference/ser_vi_layoutxlm_xfund_infer \
--image_dir=./docs/kie/input/zh_val_42.jpg \
--ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
--vis_font_path=../doc/fonts/simfang.ttf \
--ocr_order_method="tb-yx" \
--mode=kie
```
After the operation is completed, each image will have a directory with the same name in the `kie` directory under the directory specified by the `output` field, where the visual images and prediction results are stored.
...@@ -34,8 +34,6 @@ from ppocr.utils.utility import get_image_file_list, check_and_read ...@@ -34,8 +34,6 @@ from ppocr.utils.utility import get_image_file_list, check_and_read
from ppstructure.utility import parse_args from ppstructure.utility import parse_args
from ppstructure.kie.predict_kie_token_ser import SerPredictor from ppstructure.kie.predict_kie_token_ser import SerPredictor
from paddleocr import PaddleOCR
logger = get_logger() logger = get_logger()
......
...@@ -30,6 +30,7 @@ from copy import deepcopy ...@@ -30,6 +30,7 @@ from copy import deepcopy
from ppocr.utils.utility import get_image_file_list, check_and_read from ppocr.utils.utility import get_image_file_list, check_and_read
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
from ppocr.utils.visual import draw_re_results
from tools.infer.predict_system import TextSystem from tools.infer.predict_system import TextSystem
from ppstructure.layout.predict_layout import LayoutPredictor from ppstructure.layout.predict_layout import LayoutPredictor
from ppstructure.table.predict_table import TableSystem, to_excel from ppstructure.table.predict_table import TableSystem, to_excel
...@@ -75,7 +76,8 @@ class StructureSystem(object): ...@@ -75,7 +76,8 @@ class StructureSystem(object):
self.table_system = TableSystem(args) self.table_system = TableSystem(args)
elif self.mode == 'kie': elif self.mode == 'kie':
raise NotImplementedError from ppstructure.kie.predict_kie_token_ser_re import SerRePredictor
self.kie_predictor = SerRePredictor(args)
def __call__(self, img, return_ocr_result_in_table=False, img_idx=0): def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
time_dict = { time_dict = {
...@@ -176,7 +178,9 @@ class StructureSystem(object): ...@@ -176,7 +178,9 @@ class StructureSystem(object):
time_dict['all'] = end - start time_dict['all'] = end - start
return res_list, time_dict return res_list, time_dict
elif self.mode == 'kie': elif self.mode == 'kie':
raise NotImplementedError re_res, elapse = self.kie_predictor(img)
time_dict['kie'] = elapse
return re_res[0], time_dict
return None, None return None, None
...@@ -235,15 +239,28 @@ def main(args): ...@@ -235,15 +239,28 @@ def main(args):
all_res = [] all_res = []
for index, img in enumerate(imgs): for index, img in enumerate(imgs):
res, time_dict = structure_sys(img, img_idx=index) res, time_dict = structure_sys(img, img_idx=index)
img_save_path = os.path.join(save_folder, img_name,
'show_{}.jpg'.format(index))
os.makedirs(os.path.join(save_folder, img_name), exist_ok=True)
if structure_sys.mode == 'structure' and res != []: if structure_sys.mode == 'structure' and res != []:
save_structure_res(res, save_folder, img_name, index)
draw_img = draw_structure_result(img, res, args.vis_font_path) draw_img = draw_structure_result(img, res, args.vis_font_path)
img_save_path = os.path.join(save_folder, img_name, save_structure_res(res, save_folder, img_name, index)
'show_{}.jpg'.format(index))
elif structure_sys.mode == 'kie': elif structure_sys.mode == 'kie':
raise NotImplementedError draw_img = draw_re_results(
# draw_img = draw_ser_results(img, res, args.vis_font_path) img, res, font_path=args.vis_font_path)
# img_save_path = os.path.join(save_folder, img_name + '.jpg')
with open(
os.path.join(save_folder, img_name,
'res_{}_kie.txt'.format(index)),
'w',
encoding='utf8') as f:
res_str = '{}\t{}\n'.format(
image_file,
json.dumps(
{
"ocr_info": res
}, ensure_ascii=False))
f.write(res_str)
if res != []: if res != []:
cv2.imwrite(img_save_path, draw_img) cv2.imwrite(img_save_path, draw_img)
logger.info('result save to {}'.format(img_save_path)) logger.info('result save to {}'.format(img_save_path))
......
...@@ -11,9 +11,9 @@ ...@@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import random
import ast import ast
from PIL import Image from PIL import Image, ImageDraw, ImageFont
import numpy as np import numpy as np
from tools.infer.utility import draw_ocr_box_txt, str2bool, init_args as infer_args from tools.infer.utility import draw_ocr_box_txt, str2bool, init_args as infer_args
...@@ -64,6 +64,7 @@ def init_args(): ...@@ -64,6 +64,7 @@ def init_args():
parser.add_argument( parser.add_argument(
"--mode", "--mode",
type=str, type=str,
choices=['structure', 'kie'],
default='structure', default='structure',
help='structure and kie is supported') help='structure and kie is supported')
parser.add_argument( parser.add_argument(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册