提交 c647a6da 编写于 作者: 文幕地方's avatar 文幕地方

add re to ppstructure system

上级 12ddc34b
......@@ -567,6 +567,7 @@ class PPStructure(StructureSystem):
assert params.structure_version in SUPPORT_STRUCTURE_MODEL_VERSION, "structure_version must in {}, but get {}".format(
SUPPORT_STRUCTURE_MODEL_VERSION, params.structure_version)
params.use_gpu = check_gpu(params.use_gpu)
params.mode = 'structure'
if not params.show_log:
logger.setLevel(logging.INFO)
......
# 基于Python预测引擎推理
- [1. 版面信息抽取](#1)
- [1.1 版面分析+表格识别](#1.1)
- [1.2 版面分析](#1.2)
- [1.3 表格识别](#1.3)
- [2. 关键信息抽取](#2)
- [1. 版面信息抽取](#1-版面信息抽取)
- [1.1 版面分析+表格识别](#11-版面分析表格识别)
- [1.2 版面分析](#12-版面分析)
- [1.3 表格识别](#13-表格识别)
- [2. 关键信息抽取](#2-关键信息抽取)
- [2.1 SER](#21-ser)
- [2.2 RE+SER](#22-reser)
<a name="1"></a>
## 1. 版面信息抽取
......@@ -70,6 +72,8 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
<a name="2"></a>
## 2. 关键信息抽取
### 2.1 SER
```bash
cd ppstructure
......@@ -87,3 +91,27 @@ python3 kie/predict_kie_token_ser.py \
```
运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片,图片名和输入图片名一致。
### 2.2 RE+SER
```bash
cd ppstructure
mkdir inference && cd inference
# 下载RE SER XFUND 模型并解压
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layoutxlm_xfund_infer.tar && tar -xf re_vi_layoutxlm_xfund_infer.tar
cd ..
python3 predict_system.py \
--kie_algorithm=LayoutXLM \
--re_model_dir=./inference/re_vi_layoutxlm_xfund_infer \
--ser_model_dir=./inference/ser_vi_layoutxlm_xfund_infer \
--image_dir=./docs/kie/input/zh_val_42.jpg \
--ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
--vis_font_path=../doc/fonts/simfang.ttf \
--ocr_order_method="tb-yx" \
--mode=kie
```
运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下有一个同名目录,目录中存放可视化图片和预测结果。
# Python Inference
- [1. Layout Structured Analysis](#1)
- [1.1 layout analysis + table recognition](#1.1)
- [1.2 layout analysis](#1.2)
- [1.3 table recognition](#1.3)
- [2. Key Information Extraction](#2)
- [1. Layout Structured Analysis](#1-layout-structured-analysis)
- [1.1 layout analysis + table recognition](#11-layout-analysis--table-recognition)
- [1.2 layout analysis](#12-layout-analysis)
- [1.3 table recognition](#13-table-recognition)
- [2. Key Information Extraction](#2-key-information-extraction)
- [2.1 SER](#21-ser)
- [2.2 RE+SER](#22-reser)
<a name="1"></a>
## 1. Layout Structured Analysis
......@@ -72,6 +74,7 @@ After the operation is completed, each image will have a directory with the same
<a name="2"></a>
## 2. Key Information Extraction
### 2.1 SER
```bash
cd ppstructure
......@@ -89,3 +92,28 @@ python3 kie/predict_kie_token_ser.py \
```
After the operation is completed, each image will store the visualized image in the `kie` directory under the directory specified by the `output` field, and the image name is the same as the input image name.
### 2.2 RE+SER
```bash
cd ppstructure
mkdir inference && cd inference
# download model
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layoutxlm_xfund_infer.tar && tar -xf re_vi_layoutxlm_xfund_infer.tar
cd ..
python3 predict_system.py \
--kie_algorithm=LayoutXLM \
--re_model_dir=./inference/re_vi_layoutxlm_xfund_infer \
--ser_model_dir=./inference/ser_vi_layoutxlm_xfund_infer \
--image_dir=./docs/kie/input/zh_val_42.jpg \
--ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
--vis_font_path=../doc/fonts/simfang.ttf \
--ocr_order_method="tb-yx" \
--mode=kie
```
After the operation is completed, each image will have a directory with the same name in the `kie` directory under the directory specified by the `output` field, where the visual images and prediction results are stored.
......@@ -34,8 +34,6 @@ from ppocr.utils.utility import get_image_file_list, check_and_read
from ppstructure.utility import parse_args
from ppstructure.kie.predict_kie_token_ser import SerPredictor
from paddleocr import PaddleOCR
logger = get_logger()
......
......@@ -30,6 +30,7 @@ from copy import deepcopy
from ppocr.utils.utility import get_image_file_list, check_and_read
from ppocr.utils.logging import get_logger
from ppocr.utils.visual import draw_re_results
from tools.infer.predict_system import TextSystem
from ppstructure.layout.predict_layout import LayoutPredictor
from ppstructure.table.predict_table import TableSystem, to_excel
......@@ -75,7 +76,8 @@ class StructureSystem(object):
self.table_system = TableSystem(args)
elif self.mode == 'kie':
raise NotImplementedError
from ppstructure.kie.predict_kie_token_ser_re import SerRePredictor
self.kie_predictor = SerRePredictor(args)
def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
time_dict = {
......@@ -176,7 +178,9 @@ class StructureSystem(object):
time_dict['all'] = end - start
return res_list, time_dict
elif self.mode == 'kie':
raise NotImplementedError
re_res, elapse = self.kie_predictor(img)
time_dict['kie'] = elapse
return re_res[0], time_dict
return None, None
......@@ -235,15 +239,28 @@ def main(args):
all_res = []
for index, img in enumerate(imgs):
res, time_dict = structure_sys(img, img_idx=index)
img_save_path = os.path.join(save_folder, img_name,
'show_{}.jpg'.format(index))
os.makedirs(os.path.join(save_folder, img_name), exist_ok=True)
if structure_sys.mode == 'structure' and res != []:
save_structure_res(res, save_folder, img_name, index)
draw_img = draw_structure_result(img, res, args.vis_font_path)
img_save_path = os.path.join(save_folder, img_name,
'show_{}.jpg'.format(index))
save_structure_res(res, save_folder, img_name, index)
elif structure_sys.mode == 'kie':
raise NotImplementedError
# draw_img = draw_ser_results(img, res, args.vis_font_path)
# img_save_path = os.path.join(save_folder, img_name + '.jpg')
draw_img = draw_re_results(
img, res, font_path=args.vis_font_path)
with open(
os.path.join(save_folder, img_name,
'res_{}_kie.txt'.format(index)),
'w',
encoding='utf8') as f:
res_str = '{}\t{}\n'.format(
image_file,
json.dumps(
{
"ocr_info": res
}, ensure_ascii=False))
f.write(res_str)
if res != []:
cv2.imwrite(img_save_path, draw_img)
logger.info('result save to {}'.format(img_save_path))
......
......@@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import ast
from PIL import Image
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from tools.infer.utility import draw_ocr_box_txt, str2bool, init_args as infer_args
......@@ -64,6 +64,7 @@ def init_args():
parser.add_argument(
"--mode",
type=str,
choices=['structure', 'kie'],
default='structure',
help='structure and kie is supported')
parser.add_argument(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册