提交 27215c65 编写于 作者: A an1018

update doc

...@@ -47,7 +47,7 @@ str_to_cpu_mode(const std::string &cpu_mode) { ...@@ -47,7 +47,7 @@ str_to_cpu_mode(const std::string &cpu_mode) {
std::string upper_key; std::string upper_key;
std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(), std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(),
::toupper); ::toupper);
auto index = cpu_mode_map.find(upper_key); auto index = cpu_mode_map.find(upper_key.c_str());
if (index == cpu_mode_map.end()) { if (index == cpu_mode_map.end()) {
LOGE("cpu_mode not found %s", upper_key.c_str()); LOGE("cpu_mode not found %s", upper_key.c_str());
return paddle::lite_api::LITE_POWER_HIGH; return paddle::lite_api::LITE_POWER_HIGH;
...@@ -116,4 +116,4 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_release( ...@@ -116,4 +116,4 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_release(
ppredictor::OCR_PPredictor *ppredictor = ppredictor::OCR_PPredictor *ppredictor =
(ppredictor::OCR_PPredictor *)java_pointer; (ppredictor::OCR_PPredictor *)java_pointer;
delete ppredictor; delete ppredictor;
} }
\ No newline at end of file
...@@ -54,7 +54,7 @@ public class OCRPredictorNative { ...@@ -54,7 +54,7 @@ public class OCRPredictorNative {
} }
public void destory() { public void destory() {
if (nativePointer > 0) { if (nativePointer != 0) {
release(nativePointer); release(nativePointer);
nativePointer = 0; nativePointer = 0;
} }
......
...@@ -109,8 +109,10 @@ CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT ...@@ -109,8 +109,10 @@ CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT
运行之前,将下面文件拷贝到`build/Release/`文件夹下 运行之前,将下面文件拷贝到`build/Release/`文件夹下
1. `paddle_inference/paddle/lib/paddle_inference.dll` 1. `paddle_inference/paddle/lib/paddle_inference.dll`
2. `opencv/build/x64/vc15/bin/opencv_world455.dll` 2. `paddle_inference/third_party/install/onnxruntime/lib/onnxruntime.dll`
3. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll` 3. `paddle_inference/third_party/install/paddle2onnx/lib/paddle2onnx.dll`
4. `opencv/build/x64/vc15/bin/opencv_world455.dll`
5. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
### Step4: 预测 ### Step4: 预测
......
...@@ -73,4 +73,4 @@ python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_ ...@@ -73,4 +73,4 @@ python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_
The numerical range of the quantized model parameters derived from the above steps is still FP32, but the numerical range of the parameters is int8. The numerical range of the quantized model parameters derived from the above steps is still FP32, but the numerical range of the parameters is int8.
The derived model can be converted through the `opt tool` of PaddleLite. The derived model can be converted through the `opt tool` of PaddleLite.
For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme_en.md) For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme.md)
...@@ -636,4 +636,6 @@ def main(): ...@@ -636,4 +636,6 @@ def main():
for item in result: for item in result:
item.pop('img') item.pop('img')
item.pop('res')
logger.info(item) logger.info(item)
logger.info('result save to {}'.format(args.output))
...@@ -35,10 +35,12 @@ class CopyPaste(object): ...@@ -35,10 +35,12 @@ class CopyPaste(object):
point_num = data['polys'].shape[1] point_num = data['polys'].shape[1]
src_img = data['image'] src_img = data['image']
src_polys = data['polys'].tolist() src_polys = data['polys'].tolist()
src_texts = data['texts']
src_ignores = data['ignore_tags'].tolist() src_ignores = data['ignore_tags'].tolist()
ext_data = data['ext_data'][0] ext_data = data['ext_data'][0]
ext_image = ext_data['image'] ext_image = ext_data['image']
ext_polys = ext_data['polys'] ext_polys = ext_data['polys']
ext_texts = ext_data['texts']
ext_ignores = ext_data['ignore_tags'] ext_ignores = ext_data['ignore_tags']
indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]] indexs = [i for i in range(len(ext_ignores)) if not ext_ignores[i]]
...@@ -53,7 +55,7 @@ class CopyPaste(object): ...@@ -53,7 +55,7 @@ class CopyPaste(object):
src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB) src_img = cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB)
ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB) ext_image = cv2.cvtColor(ext_image, cv2.COLOR_BGR2RGB)
src_img = Image.fromarray(src_img).convert('RGBA') src_img = Image.fromarray(src_img).convert('RGBA')
for poly, tag in zip(select_polys, select_ignores): for idx, poly, tag in zip(select_idxs, select_polys, select_ignores):
box_img = get_rotate_crop_image(ext_image, poly) box_img = get_rotate_crop_image(ext_image, poly)
src_img, box = self.paste_img(src_img, box_img, src_polys) src_img, box = self.paste_img(src_img, box_img, src_polys)
...@@ -62,6 +64,7 @@ class CopyPaste(object): ...@@ -62,6 +64,7 @@ class CopyPaste(object):
for _ in range(len(box), point_num): for _ in range(len(box), point_num):
box.append(box[-1]) box.append(box[-1])
src_polys.append(box) src_polys.append(box)
src_texts.append(ext_texts[idx])
src_ignores.append(tag) src_ignores.append(tag)
src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR) src_img = cv2.cvtColor(np.array(src_img), cv2.COLOR_RGB2BGR)
h, w = src_img.shape[:2] h, w = src_img.shape[:2]
...@@ -70,6 +73,7 @@ class CopyPaste(object): ...@@ -70,6 +73,7 @@ class CopyPaste(object):
src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h) src_polys[:, :, 1] = np.clip(src_polys[:, :, 1], 0, h)
data['image'] = src_img data['image'] = src_img
data['polys'] = src_polys data['polys'] = src_polys
data['texts'] = src_texts
data['ignore_tags'] = np.array(src_ignores) data['ignore_tags'] = np.array(src_ignores)
return data return data
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import Levenshtein from rapidfuzz.distance import Levenshtein
import string import string
...@@ -46,8 +46,7 @@ class RecMetric(object): ...@@ -46,8 +46,7 @@ class RecMetric(object):
if self.is_filter: if self.is_filter:
pred = self._normalize_text(pred) pred = self._normalize_text(pred)
target = self._normalize_text(target) target = self._normalize_text(target)
norm_edit_dis += Levenshtein.distance(pred, target) / max( norm_edit_dis += Levenshtein.normalized_distance(pred, target)
len(pred), len(target), 1)
if pred == target: if pred == target:
correct_num += 1 correct_num += 1
all_num += 1 all_num += 1
......
...@@ -194,6 +194,9 @@ def save_model(model, ...@@ -194,6 +194,9 @@ def save_model(model,
_mkdir_if_not_exist(model_path, logger) _mkdir_if_not_exist(model_path, logger)
model_prefix = os.path.join(model_path, prefix) model_prefix = os.path.join(model_path, prefix)
paddle.save(optimizer.state_dict(), model_prefix + '.pdopt') paddle.save(optimizer.state_dict(), model_prefix + '.pdopt')
is_nlp_model = config['Architecture']["model_type"] == 'kie' and config[
"Architecture"]["algorithm"] not in ["SDMGR"]
if is_nlp_model is not True: if is_nlp_model is not True:
paddle.save(model.state_dict(), model_prefix + '.pdparams') paddle.save(model.state_dict(), model_prefix + '.pdparams')
metric_prefix = model_prefix metric_prefix = model_prefix
......
...@@ -106,9 +106,9 @@ PP-Structure Series Model List (Updating) ...@@ -106,9 +106,9 @@ PP-Structure Series Model List (Updating)
|model name|description|model size|download| |model name|description|model size|download|
| --- | --- | --- | --- | | --- | --- | --- | --- |
|ch_PP-OCRv3_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection| 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)| |ch_PP-OCRv3_det| [New] Lightweight model, supporting Chinese, English, multilingual text detection | 3.8M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
|ch_PP-OCRv3_rec_slim |[New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) | |ch_PP-OCRv3_rec| [New] Lightweight model, supporting Chinese, English, multilingual text recognition | 12.4M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | |ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
### 7.3 KIE model ### 7.3 KIE model
......
...@@ -120,9 +120,9 @@ PP-Structure系列模型列表(更新中) ...@@ -120,9 +120,9 @@ PP-Structure系列模型列表(更新中)
|模型名称|模型简介|模型大小|下载地址| |模型名称|模型简介|模型大小|下载地址|
| --- | --- | --- | --- | | --- | --- | --- | --- |
|ch_PP-OCRv3_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测| 1.1M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar)| |ch_PP-OCRv3_det| 【最新】超轻量模型,支持中英文、多语种文本检测 | 3.8M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
|ch_PP-OCRv3_rec_slim |【最新】slim量化版超轻量模型,支持中英文、数字识别| 4.9M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) | |ch_PP-OCRv3_rec|【最新】超轻量模型,支持中英文、数字识别|12.4M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | |ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
<a name="73"></a> <a name="73"></a>
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
- [1.1 版面分析+表格识别](#1.1) - [1.1 版面分析+表格识别](#1.1)
- [1.2 版面分析](#1.2) - [1.2 版面分析](#1.2)
- [1.3 表格识别](#1.3) - [1.3 表格识别](#1.3)
- [2. DocVQA](#2) - [2. 关键信息抽取](#2)
<a name="1"></a> <a name="1"></a>
## 1. Structure ## 1. Structure
...@@ -16,23 +16,26 @@ cd ppstructure ...@@ -16,23 +16,26 @@ cd ppstructure
下载模型 下载模型
```bash ```bash
mkdir inference && cd inference mkdir inference && cd inference
# 下载PP-OCRv2文本检测模型并解压 # 下载PP-Structurev2版面分析模型并解压
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
# 下载PP-OCRv2文本识别模型并解压 # 下载PP-OCRv3文本检测模型并解压
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# 下载超轻量级英文表格预测模型并解压 # 下载PP-OCRv3文本识别模型并解压
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# 下载PP-Structurev2表格识别模型并解压
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd .. cd ..
``` ```
<a name="1.1"></a> <a name="1.1"></a>
### 1.1 版面分析+表格识别 ### 1.1 版面分析+表格识别
```bash ```bash
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \ --image_dir=./docs/table/1.png \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \ --output=../output \
--vis_font_path=../doc/fonts/simfang.ttf --vis_font_path=../doc/fonts/simfang.ttf
``` ```
...@@ -41,19 +44,23 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i ...@@ -41,19 +44,23 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
<a name="1.2"></a> <a name="1.2"></a>
### 1.2 版面分析 ### 1.2 版面分析
```bash ```bash
python3 predict_system.py --image_dir=./docs/table/1.png --table=false --ocr=false --output=../output/ python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \
--output=../output \
--table=false \
--ocr=false
``` ```
运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,图片区域会被裁剪之后保存下来,图片名为表格在图片里的坐标。版面分析结果会存储在`res.txt`文件中。 运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,图片区域会被裁剪之后保存下来,图片名为表格在图片里的坐标。版面分析结果会存储在`res.txt`文件中。
<a name="1.3"></a> <a name="1.3"></a>
### 1.3 表格识别 ### 1.3 表格识别
```bash ```bash
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--image_dir=./docs/table/table.jpg \ --image_dir=./docs/table/table.jpg \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \ --output=../output \
--vis_font_path=../doc/fonts/simfang.ttf \ --vis_font_path=../doc/fonts/simfang.ttf \
--layout=false --layout=false
...@@ -61,20 +68,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i ...@@ -61,20 +68,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,表格会存储为一个excel,excel文件名为`[0,0,img_h,img_w]`。 运行完成后,每张图片会在`output`字段指定的目录下的`structure`目录下有一个同名目录,表格会存储为一个excel,excel文件名为`[0,0,img_h,img_w]`。
<a name="2"></a> <a name="2"></a>
## 2. DocVQA ## 2. 关键信息抽取
```bash ```bash
cd ppstructure cd ppstructure
# 下载模型
mkdir inference && cd inference mkdir inference && cd inference
# 下载SER xfun 模型并解压 # 下载SER XFUND 模型并解压
wget https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar && tar xf PP-Layout_v1.0_ser_pretrained.tar wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
cd .. cd ..
python3 kie/predict_kie_token_ser.py \
python3 predict_system.py --model_name_or_path=kie/PP-Layout_v1.0_ser_pretrained/ \ --kie_algorithm=LayoutXLM \
--mode=kie \ --ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer \
--image_dir=kie/images/input/zh_val_0.jpg \ --image_dir=./docs/kie/input/zh_val_42.jpg \
--vis_font_path=../doc/fonts/simfang.ttf --ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
--vis_font_path=../doc/fonts/simfang.ttf \
--ocr_order_method="tb-yx"
``` ```
运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片,图片名和输入图片名一致。 运行完成后,每张图片会在`output`字段指定的目录下的`kie`目录下存放可视化之后的图片,图片名和输入图片名一致。
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
- [1.1 layout analysis + table recognition](#1.1) - [1.1 layout analysis + table recognition](#1.1)
- [1.2 layout analysis](#1.2) - [1.2 layout analysis](#1.2)
- [1.3 table recognition](#1.3) - [1.3 table recognition](#1.3)
- [2. DocVQA](#2) - [2. KIE](#2)
<a name="1"></a> <a name="1"></a>
## 1. Structure ## 1. Structure
...@@ -18,23 +18,26 @@ download model ...@@ -18,23 +18,26 @@ download model
```bash ```bash
mkdir inference && cd inference mkdir inference && cd inference
# Download the PP-OCRv2 text detection model and unzip it # Download the PP-Structurev2 layout analysis model and unzip it
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_layout_infer.tar && tar xf picodet_lcnet_x1_0_layout_infer.tar
# Download the PP-OCRv2 text recognition model and unzip it # Download the PP-OCRv3 text detection model and unzip it
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# Download the ultra-lightweight English table structure model and unzip it # Download the PP-OCRv3 text recognition model and unzip it
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# Download the PP-Structurev2 form recognition model and unzip it
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd .. cd ..
``` ```
<a name="1.1"></a> <a name="1.1"></a>
### 1.1 layout analysis + table recognition ### 1.1 layout analysis + table recognition
```bash ```bash
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \ --image_dir=./docs/table/1.png \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \ --output=../output \
--vis_font_path=../doc/fonts/simfang.ttf --vis_font_path=../doc/fonts/simfang.ttf
``` ```
...@@ -43,19 +46,23 @@ After the operation is completed, each image will have a directory with the same ...@@ -43,19 +46,23 @@ After the operation is completed, each image will have a directory with the same
<a name="1.2"></a> <a name="1.2"></a>
### 1.2 layout analysis ### 1.2 layout analysis
```bash ```bash
python3 predict_system.py --image_dir=./docs/table/1.png --table=false --ocr=false --output=../output/ python3 predict_system.py --layout_model_dir=inference/picodet_lcnet_x1_0_layout_infer \
--image_dir=./docs/table/1.png \
--output=../output \
--table=false \
--ocr=false
``` ```
After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each picture in image will be cropped and saved. The filename of picture area is their coordinates in the image. Layout analysis results will be stored in the `res.txt` file After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each picture in image will be cropped and saved. The filename of picture area is their coordinates in the image. Layout analysis results will be stored in the `res.txt` file
<a name="1.3"></a> <a name="1.3"></a>
### 1.3 table recognition ### 1.3 table recognition
```bash ```bash
python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_infer \ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv3_det_infer \
--rec_model_dir=inference/ch_PP-OCRv2_rec_slim_quant_infer \ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer \ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--image_dir=./docs/table/table.jpg \ --image_dir=./docs/table/table.jpg \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
--output=../output \ --output=../output \
--vis_font_path=../doc/fonts/simfang.ttf \ --vis_font_path=../doc/fonts/simfang.ttf \
--layout=false --layout=false
...@@ -63,19 +70,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i ...@@ -63,19 +70,22 @@ python3 predict_system.py --det_model_dir=inference/ch_PP-OCRv2_det_slim_quant_i
After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each table in the image will be stored as an excel. The filename of excel is their coordinates in the image. After the operation is completed, each image will have a directory with the same name in the `structure` directory under the directory specified by the `output` field. Each table in the image will be stored as an excel. The filename of excel is their coordinates in the image.
<a name="2"></a> <a name="2"></a>
## 2. DocVQA ## 2. KIE
```bash ```bash
cd ppstructure cd ppstructure
# download model
mkdir inference && cd inference mkdir inference && cd inference
wget https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar && tar xf PP-Layout_v1.0_ser_pretrained.tar # download model
wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/ser_vi_layoutxlm_xfund_infer.tar && tar -xf ser_vi_layoutxlm_xfund_infer.tar
cd .. cd ..
python3 kie/predict_kie_token_ser.py \
python3 predict_system.py --model_name_or_path=kie/PP-Layout_v1.0_ser_pretrained/ \ --kie_algorithm=LayoutXLM \
--mode=kie \ --ser_model_dir=../inference/ser_vi_layoutxlm_xfund_infer \
--image_dir=kie/images/input/zh_val_0.jpg \ --image_dir=./docs/kie/input/zh_val_42.jpg \
--vis_font_path=../doc/fonts/simfang.ttf --ser_dict_path=../ppocr/utils/dict/kie_dict/xfund_class_list.txt \
--vis_font_path=../doc/fonts/simfang.ttf \
--ocr_order_method="tb-yx"
``` ```
After the operation is completed, each image will store the visualized image in the `kie` directory under the directory specified by the `output` field, and the image name is the same as the input image name. After the operation is completed, each image will store the visualized image in the `kie` directory under the directory specified by the `output` field, and the image name is the same as the input image name.
- [快速安装](#快速安装) - [快速安装](#快速安装)
- [1. PaddlePaddle 和 PaddleOCR](#1-paddlepaddle-和-paddleocr) - [1. PaddlePaddle 和 PaddleOCR](#1-paddlepaddle-和-paddleocr)
- [2. 安装其他依赖](#2-安装其他依赖) - [2. 安装其他依赖](#2-安装其他依赖)
- [2.1 VQA所需依赖](#21--kie所需依赖) - [2.1 KIE所需依赖](#21-kie所需依赖)
# 快速安装 # 快速安装
...@@ -11,16 +11,11 @@ ...@@ -11,16 +11,11 @@
## 2. 安装其他依赖 ## 2. 安装其他依赖
### 2.1 VQA所需依赖 ### 2.1 KIE所需依赖
* paddleocr
```bash * paddleocr
pip3 install paddleocr
```
* PaddleNLP
```bash ```bash
git clone https://github.com/PaddlePaddle/PaddleNLP -b develop pip install paddleocr -U
cd PaddleNLP pip install -r ./kie/requirements.txt
pip3 install -e .
``` ```
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
- [1. PaddlePaddle 和 PaddleOCR](#1) - [1. PaddlePaddle 和 PaddleOCR](#1)
- [2. Install other dependencies](#2) - [2. Install other dependencies](#2)
- [2.1 VQA](#21) - [2.1 KIE](#21)
<a name="1"></a> <a name="1"></a>
...@@ -14,17 +14,11 @@ Please refer to [PaddleOCR installation documentation](../../doc/doc_en/installa ...@@ -14,17 +14,11 @@ Please refer to [PaddleOCR installation documentation](../../doc/doc_en/installa
## 2. Install other dependencies ## 2. Install other dependencies
<a name="21"></a> <a name="21"></a>
### 2.1 VQA ### 2.1 KIE
* paddleocr * paddleocr
```bash ```bash
pip3 install paddleocr pip install paddleocr -U
``` pip install -r ./kie/requirements.txt
* PaddleNLP
```bash
git clone https://github.com/PaddlePaddle/PaddleNLP -b develop
cd PaddleNLP
pip3 install -e .
``` ```
...@@ -28,8 +28,8 @@ ...@@ -28,8 +28,8 @@
|模型名称|模型简介|推理模型大小|下载地址| |模型名称|模型简介|推理模型大小|下载地址|
| --- | --- | --- | --- | | --- | --- | --- | --- |
|en_ppocr_mobile_v2.0_table_det|PubLayNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) | |en_ppocr_mobile_v2.0_table_det|PubTabNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_det_train.tar) |
|en_ppocr_mobile_v2.0_table_rec|PubLayNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) | |en_ppocr_mobile_v2.0_table_rec|PubTabNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_rec_train.tar) |
如需要使用其他OCR模型,可以在 [PP-OCR model_list](../../doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到 `det_model_dir`, `rec_model_dir`两个字段即可。 如需要使用其他OCR模型,可以在 [PP-OCR model_list](../../doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到 `det_model_dir`, `rec_model_dir`两个字段即可。
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
| --- | --- | --- | --- | | --- | --- | --- | --- |
|en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|6.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | |en_ppocr_mobile_v2.0_table_structure|基于TableRec-RARE在PubTabNet数据集上训练的英文表格识别模型|6.8M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
|en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9.2M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) | |en_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的英文表格识别模型|9.2M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|基于SLANet在PubTabNet数据集上训练的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | |ch_ppstructure_mobile_v2.0_SLANet|基于SLANet的中文表格识别模型|9.3M|[推理模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
<a name="3"></a> <a name="3"></a>
......
...@@ -39,7 +39,7 @@ If you need to use other OCR models, you can download the model in [PP-OCR model ...@@ -39,7 +39,7 @@ If you need to use other OCR models, you can download the model in [PP-OCR model
| --- |-----------------------------------------------------------------------------| --- | --- | | --- |-----------------------------------------------------------------------------| --- | --- |
|en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |6.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) | |en_ppocr_mobile_v2.0_table_structure| English table recognition model trained on PubTabNet dataset based on TableRec-RARE |6.8M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar) |
|en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9.2M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) | |en_ppstructure_mobile_v2.0_SLANet|English table recognition model trained on PubTabNet dataset based on SLANet|9.2M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar) |
|ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model trained on PubTabNet dataset based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) | |ch_ppstructure_mobile_v2.0_SLANet|Chinese table recognition model based on SLANet|9.3M|[inference model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_train.tar) |
<a name="3"></a> <a name="3"></a>
## 3. KIE ## 3. KIE
......
...@@ -7,18 +7,22 @@ ...@@ -7,18 +7,22 @@
- [2.1.2 版面分析+表格识别](#212-版面分析表格识别) - [2.1.2 版面分析+表格识别](#212-版面分析表格识别)
- [2.1.3 版面分析](#213-版面分析) - [2.1.3 版面分析](#213-版面分析)
- [2.1.4 表格识别](#214-表格识别) - [2.1.4 表格识别](#214-表格识别)
- [2.1.5 DocVQA](#215-dockie) - [2.1.5 关键信息抽取](#215-关键信息抽取)
- [2.1.6 版面恢复](#216-版面恢复) - [2.1.6 版面恢复](#216-版面恢复)
- [2.2 代码使用](#22-代码使用) - [2.2 代码使用](#22-代码使用)
- [2.2.1 图像方向分类版面分析表格识别](#221-图像方向分类版面分析表格识别)
- [2.2.1 图像方向+分类版面分析+表格识别](#221-图像方向分类版面分析表格识别)
- [2.2.2 版面分析+表格识别](#222-版面分析表格识别) - [2.2.2 版面分析+表格识别](#222-版面分析表格识别)
- [2.2.3 版面分析](#223-版面分析) - [2.2.3 版面分析](#223-版面分析)
- [2.2.4 表格识别](#224-表格识别) - [2.2.4 表格识别](#224-表格识别)
- [2.2.5 DocVQA](#225-dockie)
- [2.2.5 关键信息抽取](#225-关键信息抽取)
- [2.2.6 版面恢复](#226-版面恢复) - [2.2.6 版面恢复](#226-版面恢复)
- [2.3 返回结果说明](#23-返回结果说明) - [2.3 返回结果说明](#23-返回结果说明)
- [2.3.1 版面分+表格识别](#231-版面分析表格识别) - [2.3.1 版面分析+表格识别](#231-版面分析表格识别)
- [2.3.2 DocVQA](#232-dockie) - [2.3.2 关键信息抽取](#232-关键信息抽取)
- [2.4 参数说明](#24-参数说明) - [2.4 参数说明](#24-参数说明)
...@@ -28,8 +32,8 @@ ...@@ -28,8 +32,8 @@
```bash ```bash
# 安装 paddleocr,推荐使用2.6版本 # 安装 paddleocr,推荐使用2.6版本
pip3 install "paddleocr>=2.6" pip3 install "paddleocr>=2.6"
# 安装 DocVQA依赖包paddlenlp(如不需要DocVQA功能,可跳过) # 安装 关键信息抽取 依赖包(如不需要KIE功能,可跳过)
pip3 install paddlenlp pip install -r kie/requirements.txt
# 安装 图像方向分类依赖包paddleclas(如不需要图像方向分类功能,可跳过) # 安装 图像方向分类依赖包paddleclas(如不需要图像方向分类功能,可跳过)
pip3 install paddleclas pip3 install paddleclas
``` ```
...@@ -66,9 +70,8 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur ...@@ -66,9 +70,8 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur
<a name="215"></a> <a name="215"></a>
#### 2.1.5 DocVQA #### 2.1.5 关键信息抽取
请参考:[关键信息抽取教程](../kie/README_ch.md)
请参考:[文档视觉问答](../kie/README.md)
<a name="216"></a> <a name="216"></a>
...@@ -184,9 +187,9 @@ for line in result: ...@@ -184,9 +187,9 @@ for line in result:
``` ```
<a name="225"></a> <a name="225"></a>
#### 2.2.5 DocVQA #### 2.2.5 关键信息抽取
请参考:[文档视觉问答](../kie/README.md) 请参考:[关键信息抽取教程](../kie/README_ch.md)
<a name="226"></a> <a name="226"></a>
...@@ -249,9 +252,9 @@ dict 里各个字段说明如下 ...@@ -249,9 +252,9 @@ dict 里各个字段说明如下
``` ```
<a name="232"></a> <a name="232"></a>
#### 2.3.2 DocVQA #### 2.3.2 关键信息抽取
请参考:[文档视觉问答](../kie/README.md) 请参考:[关键信息抽取教程](../kie/README_ch.md)
<a name="24"></a> <a name="24"></a>
### 2.4 参数说明 ### 2.4 参数说明
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
- [2.1.2 layout analysis + table recognition](#212-layout-analysis--table-recognition) - [2.1.2 layout analysis + table recognition](#212-layout-analysis--table-recognition)
- [2.1.3 layout analysis](#213-layout-analysis) - [2.1.3 layout analysis](#213-layout-analysis)
- [2.1.4 table recognition](#214-table-recognition) - [2.1.4 table recognition](#214-table-recognition)
- [2.1.5 DocVQA](#215-dockie) - [2.1.5 Key Information Extraction](#215-Key-Information-Extraction)
- [2.1.6 layout recovery](#216-layout-recovery) - [2.1.6 layout recovery](#216-layout-recovery)
- [2.2 Use by code](#22-use-by-code) - [2.2 Use by code](#22-use-by-code)
- [2.2.1 image orientation + layout analysis + table recognition](#221-image-orientation--layout-analysis--table-recognition) - [2.2.1 image orientation + layout analysis + table recognition](#221-image-orientation--layout-analysis--table-recognition)
...@@ -15,10 +15,11 @@ ...@@ -15,10 +15,11 @@
- [2.2.3 layout analysis](#223-layout-analysis) - [2.2.3 layout analysis](#223-layout-analysis)
- [2.2.4 table recognition](#224-table-recognition) - [2.2.4 table recognition](#224-table-recognition)
- [2.2.5 DocVQA](#225-dockie) - [2.2.5 DocVQA](#225-dockie)
- [2.2.5 Key Information Extraction](#225-Key-Information-Extraction)
- [2.2.6 layout recovery](#226-layout-recovery) - [2.2.6 layout recovery](#226-layout-recovery)
- [2.3 Result description](#23-result-description) - [2.3 Result description](#23-result-description)
- [2.3.1 layout analysis + table recognition](#231-layout-analysis--table-recognition) - [2.3.1 layout analysis + table recognition](#231-layout-analysis--table-recognition)
- [2.3.2 DocVQA](#232-dockie) - [2.3.2 Key Information Extraction](#232-Key-Information-Extraction)
- [2.4 Parameter Description](#24-parameter-description) - [2.4 Parameter Description](#24-parameter-description)
...@@ -28,14 +29,14 @@ ...@@ -28,14 +29,14 @@
```bash ```bash
# Install paddleocr, version 2.6 is recommended # Install paddleocr, version 2.6 is recommended
pip3 install "paddleocr>=2.6" pip3 install "paddleocr>=2.6"
# Install the DocVQA dependency package paddlenlp (if you do not use the DocVQA, you can skip it) # Install the KIE dependency packages (if you do not use the KIE, you can skip it)
pip3 install paddlenlp pip install -r kie/requirements.txt
# Install the image direction classification dependency package paddleclas (if you do not use the image direction classification, you can skip it) # Install the image direction classification dependency package paddleclas (if you do not use the image direction classification, you can skip it)
pip3 install paddleclas pip3 install paddleclas
``` ```
<a name="2"></a> <a name="2"></a>
## 2. Use ## 2. Use
<a name="21"></a> <a name="21"></a>
...@@ -66,9 +67,9 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur ...@@ -66,9 +67,9 @@ paddleocr --image_dir=PaddleOCR/ppstructure/docs/table/table.jpg --type=structur
``` ```
<a name="215"></a> <a name="215"></a>
#### 2.1.5 DocVQA #### 2.1.5 Key Information Extraction
Please refer to: [Documentation Visual Q&A](../kie/README.md) . Please refer to: [Key Information Extraction](../kie/README.md) .
<a name="216"></a> <a name="216"></a>
#### 2.1.6 layout recovery #### 2.1.6 layout recovery
...@@ -130,7 +131,7 @@ for line in result: ...@@ -130,7 +131,7 @@ for line in result:
from PIL import Image from PIL import Image
font_path = 'PaddleOCR/doc/fonts/simfang.ttf' # PaddleOCR下提供字体包 font_path = 'PaddleOCR/doc/fonts/simfang.ttf' # font provieded in PaddleOCR
image = Image.open(img_path).convert('RGB') image = Image.open(img_path).convert('RGB')
im_show = draw_structure_result(image, result,font_path=font_path) im_show = draw_structure_result(image, result,font_path=font_path)
im_show = Image.fromarray(im_show) im_show = Image.fromarray(im_show)
...@@ -180,9 +181,9 @@ for line in result: ...@@ -180,9 +181,9 @@ for line in result:
``` ```
<a name="225"></a> <a name="225"></a>
#### 2.2.5 DocVQA #### 2.2.5 Key Information Extraction
Please refer to: [Documentation Visual Q&A](../kie/README.md) . Please refer to: [Key Information Extraction](../kie/README.md) .
<a name="226"></a> <a name="226"></a>
#### 2.2.6 layout recovery #### 2.2.6 layout recovery
...@@ -244,9 +245,9 @@ After the recognition is completed, each image will have a directory with the sa ...@@ -244,9 +245,9 @@ After the recognition is completed, each image will have a directory with the sa
``` ```
<a name="232"></a> <a name="232"></a>
#### 2.3.2 DocVQA #### 2.3.2 Key Information Extraction
Please refer to: [Documentation Visual Q&A](../kie/README.md) . Please refer to: [Key Information Extraction](../kie/README.md) .
<a name="24"></a> <a name="24"></a>
### 2.4 Parameter Description ### 2.4 Parameter Description
......
...@@ -246,7 +246,7 @@ For training, evaluation and inference tutorial for text recognition models, ple ...@@ -246,7 +246,7 @@ For training, evaluation and inference tutorial for text recognition models, ple
If you want to finish the KIE tasks in your scene, and don't know what to prepare, please refer to [End cdoc](../../doc/doc_en/recognition.md). If you want to finish the KIE tasks in your scene, and don't know what to prepare, please refer to [End cdoc](../../doc/doc_en/recognition.md).
关于怎样在自己的场景中完成关键信息抽取任务,请参考:[Guide to End-to-end KIE](./how_to_do_kie_en.md) To complete the key information extraction task in your own scenario from data preparation to model selection, please refer to: [Guide to End-to-end KIE](./how_to_do_kie_en.md)
## 5. Reference ## 5. Reference
......
...@@ -20,7 +20,7 @@ from shapely.geometry import Polygon ...@@ -20,7 +20,7 @@ from shapely.geometry import Polygon
import numpy as np import numpy as np
from collections import defaultdict from collections import defaultdict
import operator import operator
import Levenshtein from rapidfuzz.distance import Levenshtein
import argparse import argparse
import json import json
import copy import copy
......
...@@ -59,16 +59,16 @@ cd PaddleOCR/ppstructure ...@@ -59,16 +59,16 @@ cd PaddleOCR/ppstructure
# download model # download model
mkdir inference && cd inference mkdir inference && cd inference
# Download the PP-OCRv3 text detection model and unzip it # Download the PP-OCRv3 text detection model and unzip it
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# Download the PP-OCRv3 text recognition model and unzip it # Download the PP-OCRv3 text recognition model and unzip it
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# Download the PP-Structurev2 form recognition model and unzip it # Download the PP-Structurev2 form recognition model and unzip it
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd .. cd ..
# run # run
python3.7 table/predict_table.py \ python3.7 table/predict_table.py \
--det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \ --det_model_dir=inference/ch_PP-OCRv3_det_infer \
--rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
......
...@@ -64,16 +64,16 @@ cd PaddleOCR/ppstructure ...@@ -64,16 +64,16 @@ cd PaddleOCR/ppstructure
# 下载模型 # 下载模型
mkdir inference && cd inference mkdir inference && cd inference
# 下载PP-OCRv3文本检测模型并解压 # 下载PP-OCRv3文本检测模型并解压
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_det_infer.tar
# 下载PP-OCRv3文本识别模型并解压 # 下载PP-OCRv3文本识别模型并解压
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar
# 下载PP-Structurev2表格识别模型并解压 # 下载PP-Structurev2表格识别模型并解压
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar && tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
cd .. cd ..
# 执行表格识别 # 执行表格识别
python table/predict_table.py \ python table/predict_table.py \
--det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \ --det_model_dir=inference/ch_PP-OCRv3_det_infer \
--rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \ --rec_model_dir=inference/ch_PP-OCRv3_rec_infer \
--table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \ --table_model_dir=inference/ch_ppstructure_mobile_v2.0_SLANet_infer \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \ --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \ --table_char_dict_path=../ppocr/utils/dict/table_structure_dict_ch.txt \
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Apache 2.0 License for more details. # Apache 2.0 License for more details.
import distance from rapidfuzz.distance import Levenshtein
from apted import APTED, Config from apted import APTED, Config
from apted.helpers import Tree from apted.helpers import Tree
from lxml import etree, html from lxml import etree, html
...@@ -39,17 +39,6 @@ class TableTree(Tree): ...@@ -39,17 +39,6 @@ class TableTree(Tree):
class CustomConfig(Config): class CustomConfig(Config):
@staticmethod
def maximum(*sequences):
"""Get maximum possible value
"""
return max(map(len, sequences))
def normalized_distance(self, *sequences):
"""Get distance from 0 to 1
"""
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
def rename(self, node1, node2): def rename(self, node1, node2):
"""Compares attributes of trees""" """Compares attributes of trees"""
#print(node1.tag) #print(node1.tag)
...@@ -58,23 +47,12 @@ class CustomConfig(Config): ...@@ -58,23 +47,12 @@ class CustomConfig(Config):
if node1.tag == 'td': if node1.tag == 'td':
if node1.content or node2.content: if node1.content or node2.content:
#print(node1.content, ) #print(node1.content, )
return self.normalized_distance(node1.content, node2.content) return Levenshtein.normalized_distance(node1.content, node2.content)
return 0. return 0.
class CustomConfig_del_short(Config): class CustomConfig_del_short(Config):
@staticmethod
def maximum(*sequences):
"""Get maximum possible value
"""
return max(map(len, sequences))
def normalized_distance(self, *sequences):
"""Get distance from 0 to 1
"""
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
def rename(self, node1, node2): def rename(self, node1, node2):
"""Compares attributes of trees""" """Compares attributes of trees"""
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan): if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
...@@ -90,21 +68,10 @@ class CustomConfig_del_short(Config): ...@@ -90,21 +68,10 @@ class CustomConfig_del_short(Config):
node1_content = ['####'] node1_content = ['####']
if len(node2_content) < 3: if len(node2_content) < 3:
node2_content = ['####'] node2_content = ['####']
return self.normalized_distance(node1_content, node2_content) return Levenshtein.normalized_distance(node1_content, node2_content)
return 0. return 0.
class CustomConfig_del_block(Config): class CustomConfig_del_block(Config):
@staticmethod
def maximum(*sequences):
"""Get maximum possible value
"""
return max(map(len, sequences))
def normalized_distance(self, *sequences):
"""Get distance from 0 to 1
"""
return float(distance.levenshtein(*sequences)) / self.maximum(*sequences)
def rename(self, node1, node2): def rename(self, node1, node2):
"""Compares attributes of trees""" """Compares attributes of trees"""
if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan): if (node1.tag != node2.tag) or (node1.colspan != node2.colspan) or (node1.rowspan != node2.rowspan):
...@@ -120,7 +87,7 @@ class CustomConfig_del_block(Config): ...@@ -120,7 +87,7 @@ class CustomConfig_del_block(Config):
while ' ' in node2_content: while ' ' in node2_content:
print(node2_content.index(' ')) print(node2_content.index(' '))
node2_content.pop(node2_content.index(' ')) node2_content.pop(node2_content.index(' '))
return self.normalized_distance(node1_content, node2_content) return Levenshtein.normalized_distance(node1_content, node2_content)
return 0. return 0.
class TEDS(object): class TEDS(object):
......
...@@ -38,7 +38,7 @@ def init_args(): ...@@ -38,7 +38,7 @@ def init_args():
parser.add_argument( parser.add_argument(
"--layout_dict_path", "--layout_dict_path",
type=str, type=str,
default="../ppocr/utils/dict/layout_dict/layout_pubalynet_dict.txt") default="../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt")
parser.add_argument( parser.add_argument(
"--layout_score_threshold", "--layout_score_threshold",
type=float, type=float,
......
...@@ -120,11 +120,14 @@ def sorted_boxes(dt_boxes): ...@@ -120,11 +120,14 @@ def sorted_boxes(dt_boxes):
_boxes = list(sorted_boxes) _boxes = list(sorted_boxes)
for i in range(num_boxes - 1): for i in range(num_boxes - 1):
if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ for j in range(i, 0, -1):
(_boxes[i + 1][0][0] < _boxes[i][0][0]): if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
tmp = _boxes[i] (_boxes[j + 1][0][0] < _boxes[j][0][0]):
_boxes[i] = _boxes[i + 1] tmp = _boxes[j]
_boxes[i + 1] = tmp _boxes[j] = _boxes[j + 1]
_boxes[j + 1] = tmp
else:
break
return _boxes return _boxes
......
...@@ -225,23 +225,24 @@ def create_predictor(args, mode, logger): ...@@ -225,23 +225,24 @@ def create_predictor(args, mode, logger):
min_subgraph_size, # skip the minmum trt subgraph min_subgraph_size, # skip the minmum trt subgraph
use_calib_mode=False) use_calib_mode=False)
# collect shape # collect shape
if args.shape_info_filename is not None: if args.shape_info_filename is not None:
if not os.path.exists(args.shape_info_filename): if not os.path.exists(args.shape_info_filename):
config.collect_shape_range_info(args.shape_info_filename) config.collect_shape_range_info(
logger.info( args.shape_info_filename)
f"collect dynamic shape info into : {args.shape_info_filename}" logger.info(
) f"collect dynamic shape info into : {args.shape_info_filename}"
)
else:
logger.info(
f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
)
config.enable_tuned_tensorrt_dynamic_shape(
args.shape_info_filename, True)
else: else:
logger.info( logger.info(
f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again." f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
) )
config.enable_tuned_tensorrt_dynamic_shape(
args.shape_info_filename, True)
else:
logger.info(
f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
)
elif args.use_xpu: elif args.use_xpu:
config.enable_xpu(10 * 1024 * 1024) config.enable_xpu(10 * 1024 * 1024)
...@@ -549,7 +550,7 @@ def text_visual(texts, ...@@ -549,7 +550,7 @@ def text_visual(texts,
def base64_to_cv2(b64str): def base64_to_cv2(b64str):
import base64 import base64
data = base64.b64decode(b64str.encode('utf8')) data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8) data = np.frombuffer(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR) data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data return data
......
...@@ -88,6 +88,29 @@ def draw_kie_result(batch, node, idx_to_cls, count): ...@@ -88,6 +88,29 @@ def draw_kie_result(batch, node, idx_to_cls, count):
cv2.imwrite(save_path, vis_img) cv2.imwrite(save_path, vis_img)
logger.info("The Kie Image saved in {}".format(save_path)) logger.info("The Kie Image saved in {}".format(save_path))
def write_kie_result(fout, node, data):
"""
Write infer result to output file, sorted by the predict label of each line.
The format keeps the same as the input with additional score attribute.
"""
import json
label = data['label']
annotations = json.loads(label)
max_value, max_idx = paddle.max(node, -1), paddle.argmax(node, -1)
node_pred_label = max_idx.numpy().tolist()
node_pred_score = max_value.numpy().tolist()
res = []
for i, label in enumerate(node_pred_label):
pred_score = '{:.2f}'.format(node_pred_score[i])
pred_res = {
'label': label,
'transcription': annotations[i]['transcription'],
'score': pred_score,
'points': annotations[i]['points'],
}
res.append(pred_res)
res.sort(key=lambda x: x['label'])
fout.writelines([json.dumps(res, ensure_ascii=False) + '\n'])
def main(): def main():
global_config = config['Global'] global_config = config['Global']
...@@ -114,7 +137,7 @@ def main(): ...@@ -114,7 +137,7 @@ def main():
warmup_times = 0 warmup_times = 0
count_t = [] count_t = []
with open(save_res_path, "wb") as fout: with open(save_res_path, "w") as fout:
with open(config['Global']['infer_img'], "rb") as f: with open(config['Global']['infer_img'], "rb") as f:
lines = f.readlines() lines = f.readlines()
for index, data_line in enumerate(lines): for index, data_line in enumerate(lines):
...@@ -139,6 +162,8 @@ def main(): ...@@ -139,6 +162,8 @@ def main():
node = F.softmax(node, -1) node = F.softmax(node, -1)
count_t.append(time.time() - st) count_t.append(time.time() - st)
draw_kie_result(batch, node, idx_to_cls, index) draw_kie_result(batch, node, idx_to_cls, index)
write_kie_result(fout, node, data)
fout.close()
logger.info("success!") logger.info("success!")
logger.info("It took {} s for predict {} images.".format( logger.info("It took {} s for predict {} images.".format(
np.sum(count_t), len(count_t))) np.sum(count_t), len(count_t)))
......
...@@ -39,7 +39,7 @@ from ppocr.utils.visual import draw_re_results ...@@ -39,7 +39,7 @@ from ppocr.utils.visual import draw_re_results
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps, print_dict from ppocr.utils.utility import get_image_file_list, load_vqa_bio_label_maps, print_dict
from tools.program import ArgsParser, load_config, merge_config from tools.program import ArgsParser, load_config, merge_config
from tools.infer_vqa_token_ser import SerPredictor from tools.infer_kie_token_ser import SerPredictor
class ReArgsParser(ArgsParser): class ReArgsParser(ArgsParser):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册