提交 6c19d15a 编写于 作者: W WenmuZhou

save figure

上级 6a121aa7
......@@ -369,6 +369,8 @@ def main():
elif args.type == 'structure':
result = engine(img_path)
save_structure_res(result, args.output, img_name)
for item in result:
save_structure_res(result, args.output, img_name)
......@@ -38,6 +38,7 @@ result = table_engine(img)
save_structure_res(result, save_folder,os.path.basename(img_path).split('.')[0])
for line in result:
from PIL import Image
......@@ -80,7 +81,7 @@ The description of each field in dict is as follows
Most of the parameters are consistent with the paddleocr whl package, see [doc of whl](../doc/doc_en/whl_en.md)
After running, each image will have a directory with the same name under the directory specified in the output field. Each table in the picture will be stored as an excel, and the excel file name will be the coordinates of the table in the image.
After running, each image will have a directory with the same name under the directory specified in the output field. Each table in the picture will be stored as an excel and figure area will be cropped and saved, the excel and image file name will be the coordinates of the table in the image.
## 2. PPStructure Pipeline
......@@ -39,6 +39,7 @@ result = table_engine(img)
save_structure_res(result, save_folder,os.path.basename(img_path).split('.')[0])
for line in result:
from PIL import Image
......@@ -82,7 +83,7 @@ dict 里各个字段说明如下
大部分参数和paddleocr whl包保持一致,见 [whl包文档](../doc/doc_ch/whl.md)
## 2. PPStructure Pipeline
......@@ -65,17 +65,17 @@ class OCRSystem(object):
filter_boxes = [x + [x1, y1] for x in filter_boxes]
filter_boxes = [x.reshape(-1).tolist() for x in filter_boxes]
# remove style char
style_token = ['<strike>','<strike>','<sup>','</sub>','<b>','</b>','<sub>','</sup>',
style_token = ['<strike>', '<strike>', '<sup>', '</sub>', '<b>', '</b>', '<sub>', '</sup>',
'<overline>', '</overline>', '<underline>', '</underline>', '<i>', '</i>']
filter_rec_res_tmp = []
for rec_res in filter_rec_res:
rec_str, rec_conf = rec_res
for token in style_token:
if token in rec_str:
rec_str = rec_str.replace(token, '')
filter_rec_res_tmp.append((rec_str, rec_conf))
res = (filter_boxes, filter_rec_res_tmp)
res_list.append({'type': region.type, 'bbox': [x1, y1, x2, y2], 'res': res})
res_list.append({'type': region.type, 'bbox': [x1, y1, x2, y2], 'img': roi_img, 'res': res})
return res_list
......@@ -88,6 +88,10 @@ def save_structure_res(res, save_folder, img_name):
if region['type'] == 'Table':
excel_path = os.path.join(excel_save_folder, '{}.xlsx'.format(region['bbox']))
to_excel(region['res'], excel_path)
if region['type'] == 'Figure':
roi_img = region['img']
img_path = os.path.join(excel_save_folder, '{}.jpg'.format(region['bbox']))
cv2.imwrite(img_path, roi_img)
for box, rec_res in zip(region['res'][0], region['res'][1]):
f.write('{}\t{}\n'.format(np.array(box).reshape(-1).tolist(), rec_res))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册