提交 01ebe4a5 编写于 作者: qq_25193841's avatar qq_25193841

Merge remote-tracking branch 'origin/release/2.5' into release2.5

...@@ -1149,6 +1149,9 @@ class MainWindow(QMainWindow): ...@@ -1149,6 +1149,9 @@ class MainWindow(QMainWindow):
for box in self.result_dic: for box in self.result_dic:
trans_dic = {"label": box[1][0], "points": box[0], "difficult": False} trans_dic = {"label": box[1][0], "points": box[0], "difficult": False}
if self.kie_mode: if self.kie_mode:
if len(box) == 3:
trans_dic.update({"key_cls": box[2]})
else:
trans_dic.update({"key_cls": "None"}) trans_dic.update({"key_cls": "None"})
if trans_dic["label"] == "" and mode == 'Auto': if trans_dic["label"] == "" and mode == 'Auto':
continue continue
...@@ -2047,6 +2050,7 @@ class MainWindow(QMainWindow): ...@@ -2047,6 +2050,7 @@ class MainWindow(QMainWindow):
rec_flag = 0 rec_flag = 0
for shape in self.canvas.shapes: for shape in self.canvas.shapes:
box = [[int(p.x()), int(p.y())] for p in shape.points] box = [[int(p.x()), int(p.y())] for p in shape.points]
kie_cls = shape.key_cls
if len(box) > 4: if len(box) > 4:
box = self.gen_quad_from_poly(np.array(box)) box = self.gen_quad_from_poly(np.array(box))
...@@ -2062,15 +2066,25 @@ class MainWindow(QMainWindow): ...@@ -2062,15 +2066,25 @@ class MainWindow(QMainWindow):
if shape.line_color == DEFAULT_LOCK_COLOR: if shape.line_color == DEFAULT_LOCK_COLOR:
shape.label = result[0][0] shape.label = result[0][0]
result.insert(0, box) result.insert(0, box)
if self.kie_mode:
result.append(kie_cls)
self.result_dic_locked.append(result) self.result_dic_locked.append(result)
else: else:
result.insert(0, box) result.insert(0, box)
if self.kie_mode:
result.append(kie_cls)
self.result_dic.append(result) self.result_dic.append(result)
else: else:
print('Can not recognise the box') print('Can not recognise the box')
if shape.line_color == DEFAULT_LOCK_COLOR: if shape.line_color == DEFAULT_LOCK_COLOR:
shape.label = result[0][0] shape.label = result[0][0]
if self.kie_mode:
self.result_dic_locked.append([box, (self.noLabelText, 0), kie_cls])
else:
self.result_dic_locked.append([box, (self.noLabelText, 0)]) self.result_dic_locked.append([box, (self.noLabelText, 0)])
else:
if self.kie_mode:
self.result_dic.append([box, (self.noLabelText, 0), kie_cls])
else: else:
self.result_dic.append([box, (self.noLabelText, 0)]) self.result_dic.append([box, (self.noLabelText, 0)])
try: try:
......
...@@ -15,8 +15,7 @@ PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field, ...@@ -15,8 +15,7 @@ PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field,
- 2022.05: Add table annotations, follow `2.2 Table Annotations` for more information (by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest)) - 2022.05: Add table annotations, follow `2.2 Table Annotations` for more information (by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
- 2022.02:(by [PeterH0323](https://github.com/peterh0323) - 2022.02:(by [PeterH0323](https://github.com/peterh0323)
- Add KIE Mode by using `--kie`, for [detection + identification + keyword extraction] labeling. - Add KIE Mode by using `--kie`, for [detection + identification + keyword extraction] labeling.
- Improve user experience: prompt for the number of files and labels, optimize interaction, and fix bugs such as only use CPU when inference - Improve user experience: support using `C` or `X` to rotate box, prompt for the number of files and labels, optimize interaction.
- New functions: Support using `C` or `X` to rotate box.
- 2021.11.17: - 2021.11.17:
- Support install and start PPOCRLabel through the whl package (by [d2623587501](https://github.com/d2623587501)) - Support install and start PPOCRLabel through the whl package (by [d2623587501](https://github.com/d2623587501))
- Dataset segmentation: Divide the annotation file into training, verification and testing parts (refer to section 3.5 below, by [MrCuiHao](https://github.com/MrCuiHao)) - Dataset segmentation: Divide the annotation file into training, verification and testing parts (refer to section 3.5 below, by [MrCuiHao](https://github.com/MrCuiHao))
......
...@@ -107,6 +107,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel ...@@ -107,6 +107,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
- [Serving](./deploy/pdserving/README.md) - [Serving](./deploy/pdserving/README.md)
- [Mobile](./deploy/lite/readme.md) - [Mobile](./deploy/lite/readme.md)
- [Paddle2ONNX](./deploy/paddle2onnx/readme.md) - [Paddle2ONNX](./deploy/paddle2onnx/readme.md)
- [PaddleCloud](./deploy/paddlecloud/README.md)
- [Benchmark](./doc/doc_en/benchmark_en.md) - [Benchmark](./doc/doc_en/benchmark_en.md)
- [PP-Structure 🔥](./ppstructure/README.md) - [PP-Structure 🔥](./ppstructure/README.md)
- [Quick Start](./ppstructure/docs/quickstart_en.md) - [Quick Start](./ppstructure/docs/quickstart_en.md)
......
...@@ -122,6 +122,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -122,6 +122,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [服务化部署](./deploy/pdserving/README_CN.md) - [服务化部署](./deploy/pdserving/README_CN.md)
- [端侧部署](./deploy/lite/readme.md) - [端侧部署](./deploy/lite/readme.md)
- [Paddle2ONNX模型转化与预测](./deploy/paddle2onnx/readme.md) - [Paddle2ONNX模型转化与预测](./deploy/paddle2onnx/readme.md)
- [云上飞桨部署工具](./deploy/paddlecloud/README.md)
- [Benchmark](./doc/doc_ch/benchmark.md) - [Benchmark](./doc/doc_ch/benchmark.md)
- [PP-Structure文档分析🔥](./ppstructure/README_ch.md) - [PP-Structure文档分析🔥](./ppstructure/README_ch.md)
- [快速开始](./ppstructure/docs/quickstart.md) - [快速开始](./ppstructure/docs/quickstart.md)
......
...@@ -32,8 +32,8 @@ def read_params(): ...@@ -32,8 +32,8 @@ def read_params():
#DB parmas #DB parmas
cfg.det_db_thresh = 0.3 cfg.det_db_thresh = 0.3
cfg.det_db_box_thresh = 0.5 cfg.det_db_box_thresh = 0.6
cfg.det_db_unclip_ratio = 1.6 cfg.det_db_unclip_ratio = 1.5
cfg.use_dilation = False cfg.use_dilation = False
cfg.det_db_score_mode = "fast" cfg.det_db_score_mode = "fast"
......
# 云上飞桨部署工具
[云上飞桨(PaddleCloud)](https://github.com/PaddlePaddle/PaddleCloud) 是面向飞桨框架及其模型套件的部署工具,
为用户提供了模型套件Docker化部署和Kubernetes集群部署两种方式,可以满足不同场景与环境的部署需求。
本章节我们将使用PaddleCloud提供的OCR标准镜像以及云原生组件来训练和部署PP-OCRv3识别模型。
## 云上飞桨部署工具的优势
<div align="center">
<img src="./images/paddlecloud.png" title="architecture" width="80%" height="80%" alt="">
</div>
- **模型套件Docker镜像大礼包。**
PaddleCloud为用户提供了飞桨模型套件Docker镜像大礼包,这些镜像中包含运行模型套件案例的所有依赖并能持续更新,支持异构硬件环境和常见CUDA版本、开箱即用。
- **具有丰富的云上飞桨组件。**
云上飞桨具有丰富的云原生功能组件,包括样本数据缓存组件、分布式训练组件、推理服务组件等,使用这些组件用户可以快速地在Kubernetes集群上进行训练和部署工作。
- **功能强大的自运维能力。**
云上飞桨组件基于Kubernetes的Operator机制提供了功能强大的自运维能力,如训练组件支持多种架构模式并具有分布式容错与弹性训练的能力,推理服务组件支持自动扩缩容与蓝绿发版等。
- **针对飞桨框架的定制优化。**
除了部署便捷与自运维的优势,PaddleCloud还针对飞桨框架进行了正对性优化,如通过缓存样本数据来加速云上飞桨分布式训练作业、基于飞桨框架和调度器的协同设计来优化集群GPU利用率等。
## 1. PP-OCRv3 Docker化部署
PaddleCloud基于 [Tekton](https://github.com/tektoncd/pipeline) 为OCR模型套件提供了镜像持续构建的能力,并支持CPU、GPU以及常见CUDA版本的镜像。
您可以查看 [PaddleOCR 镜像仓库](https://hub.docker.com/repository/docker/paddlecloud/paddleocr) 来获取所有的镜像列表。
同时我们也将PP-OCRv3识别模型的训练与推理实战案例放置到了AI Studio平台上,您可以点击 [PP-OCRv3识别训推一体项目实战](https://aistudio.baidu.com/aistudio/projectdetail/3916206?channelType=0&channel=0) 在平台上快速体验。
> **适用场景**:本地测试开发环境、单机部署环境。
### 1.1 安装Docker
如果您所使用的机器上还没有安装 Docker,您可以参考 [Docker 官方文档](https://docs.docker.com/get-docker/) 来进行安装。
如果您需要使用支持 GPU 版本的镜像,则还需安装好NVIDIA相关驱动和 [nvidia-docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker)
**注意**:如果您使用的是Windows系统,需要开启 [WSL2(Linux子系统功能)功能](https://docs.microsoft.com/en-us/windows/wsl/install)
### 1.2 启动容器
**使用CPU版本的Docker镜像**
```bash
# 这是加上参数 --shm-size=32g 是为了防止容器里内存不足
docker run --name ppocr -v $PWD:/mnt -p 8888:8888 -it --shm-size=32g paddlecloud/paddleocr:2.5-cpu-efbb0a /bin/bash
```
**使用GPU版本的Docker镜像**
```bash
docker run --name ppocr --runtime=nvidia -v $PWD:/mnt -p 8888:8888 -it --shm-size=32g paddlecloud/paddleocr:2.5-gpu-cuda10.2-cudnn7-efbb0a /bin/bash
```
进入容器内,则可进行 PP-OCRv3 模型的训练和部署工作。
### 1.3 准备训练数据
本教程以 [HierText](https://github.com/google-research-datasets/hiertext) 数据集为例,HierText是第一个具有自然场景和文档中文本分层注释的数据集。
该数据集包含从 Open Images 数据集中选择的 11639 张图像,提供高质量的单词 (~1.2M)、行和段落级别的注释。值得注意的是该数据集的标注格式与ppocrlabel格式不一样,
我们需要对其数据标签格式进行相应的转换,您可以从AI Studio中直接下载 [标签格式转换后的HierText数据集](https://aistudio.baidu.com/aistudio/datasetdetail/143700)
同时,我们也将数据集上传到了百度云对象存储(BOS),您可以通过运行如下指令,完成数据集的下载和解压操作:
```bash
# 下载数据集
$ wget -P /mnt https://paddleflow-public.hkg.bcebos.com/ppocr/hiertext1.tar
# 解压数据集
$ tar xf /mnt/hiertext1.tar -C /mnt && mv /mnt/hiertext1 /mnt/hiertext
```
运行上述命令后,在 `/mnt` 目录下包含以下文件:
```
/mnt/hiertext
└─ train/ HierText训练集数据
└─ validation/ HierText验证集数据
└─ label_hiertext_train.txt HierText训练集的行标注
└─ label_hiertext_val.txt HierText验证集的行标注
```
### 1.4 修改配置文件
PP-OCRv3模型配置文件位于`/home/PaddleOCR/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml`,需要修改的配置如下:
- 修改训练数据配置:
```yaml
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/icdar2015/text_localization/
label_file_list:
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
```
修改为:
```yaml
Train:
dataset:
name: SimpleDataSet
data_dir: /mnt/
label_file_list:
- /mnt/hiertext/label_hiertext_train.txt
```
- 修改验证数据配置:
```yaml
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/icdar2015/text_localization/
label_file_list:
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
```
修改为:
```yaml
Eval:
dataset:
name: SimpleDataSet
data_dir: /mnt/
label_file_list:
- /mnt/hiertext/label_hiertext_val.txt
```
### 1.5 启动训练
下载PP-OCRv3的蒸馏预训练模型并进行训练的方式如下
```bash
# 下载预训练模型到/home/PaddleOCR/pre_train文件夹下
$ mkdir /home/PaddleOCR/pre_train
$ wget -P /home/PaddleOCR/pre_train https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
$ tar xf /home/PaddleOCR/pre_train/ch_PP-OCRv3_det_distill_train.tar -C /home/PaddleOCR/pre_train/
```
启动训练,训练模型默认保存在`output`目录下,加载PP-OCRv3检测预训练模型。
```bash
# 这里以 GPU 训练为例,使用 CPU 进行训练的话,需要指定参数 Global.use_gpu=false
python3 tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.save_model_dir=./output/ Global.pretrained_model=./pre_train/ch_PP-OCRv3_det_distill_train/best_accuracy
```
如果要使用多GPU分布式训练,请使用如下命令:
```bash
# 启动训练,训练模型默认保存在output目录下,--gpus '0,1,2,3'表示使用0,1,2,3号GPU训练
python3 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1,2,3' tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.save_model_dir=./output/ Global.pretrained_model=./pre_train/ch_PP-OCRv3_det_distill_train/best_accuracy
```
### 1.6 模型评估
训练过程中保存的模型在output目录下,包含以下文件:
```
best_accuracy.states
best_accuracy.pdparams # 默认保存最优精度的模型参数
best_accuracy.pdopt # 默认保存最优精度的优化器相关参数
latest.states
latest.pdparams # 默认保存的最新模型参数
latest.pdopt # 默认保存的最新模型的优化器相关参数
```
其中,best_accuracy是保存的最优模型,可以直接使用该模型评估
```bash
# 进行模型评估
cd /home/PaddleOCR/
python3 tools/eval.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=./output/best_accuracy
```
## 2. PP-OCRv3云端部署
PaddleCloud基于Kubernetes的Operator机制为您提供了多个功能强大的云原生组件,如样本数据缓存组件、分布式训练组件、 以及模型推理服务组件,
使用这些组件您可以快速地在云上进行分布式训练和模型服务化部署。更多关于PaddleCloud云原生组件的内容,请参考文档 [PaddleCloud架构概览](https://github.com/PaddlePaddle/PaddleCloud/blob/main/docs/zh_CN/paddlecloud-overview.md)
> **适用场景**:基于Kubernetes的多机部署环境。
### 2.1 安装云上飞桨组件
**环境要求**
- [Kubernetes v1.16+](https://kubernetes.io/zh/)
- [kubectl](https://kubernetes.io/docs/tasks/tools/)
- [Helm](https://helm.sh/zh/docs/intro/install/)
如果您没有Kubernetes环境,可以使用MicroK8S在本地搭建环境,更多详情请参考 [MicroK8S官方文档](https://microk8s.io/docs/getting-started)
使用Helm一键安装所有组件和所有依赖
```bash
# 添加PaddleCloud Chart仓库
$ helm repo add paddlecloud https://paddleflow-public.hkg.bcebos.com/charts
$ helm repo update
# 安装云上飞桨组件
$ helm install pdc paddlecloud/paddlecloud --set tags.all-dep=true --namespace paddlecloud --create-namespace
# 检查所有云上飞桨组件是否成功启动,命名空间下的所有Pod都为Runing状态则安装成功。
$ kubectl get pods -n paddlecloud
NAME READY STATUS RESTARTS AGE
pdc-hostpath-5b6bd6787d-bxvxg 1/1 Running 0 10h
juicefs-csi-node-pkldt 3/3 Running 0 10h
juicefs-csi-controller-0 3/3 Running 0 10h
pdc-paddlecloud-sampleset-767bdf6947-pb6zm 1/1 Running 0 10h
pdc-paddlecloud-paddlejob-7cc8b7bfc6-7gqnh 1/1 Running 0 10h
pdc-minio-7cc967669d-824q5 1/1 Running 0 10h
pdc-redis-master-0 1/1 Running 0 10h
```
更多安装参数请参考[PaddleCloud安装指南](https://github.com/PaddlePaddle/PaddleCloud/blob/main/docs/zh_CN/installation.md)
### 2.2 云原生组件介绍
<div align="center">
<img src="./images/architecture.jpeg" title="architecture" width="60%" height="60%" alt="">
</div>
- **数据缓存组件。** 数据缓存组件使用JuiceFS作为缓存引擎,能够将远程样本数据缓存到训练集群本地,大幅加速云上飞桨分布式训练作业。
- **分布式训练组件。** 分布式训练组件支持参数服务器(PS)与集合通信(Collective)两种架构模式,方便用户在云上快速运行飞桨分布式训练作业。
以下内容我们将使用这两个云原生组件来在Kubernetes集群中部署PP-OCRv3识别模型的训练作业。
### 2.3 准备hiertext数据集
使用数据缓存组件来准备数据集,编写SampleSet Yaml文件如下:
```yaml
# hiertext.yaml
apiVersion: batch.paddlepaddle.org/v1alpha1
kind: SampleSet
metadata:
name: hiertext
namespace: paddlecloud
spec:
partitions: 1
source:
uri: bos://paddleflow-public.hkg.bcebos.com/ppocr/hiertext
secretRef:
name: none
secretRef:
name: data-center
```
然后在命令行中,使用kubectl执行如下命令。
```bash
# 创建hiertext数据集
$ kubectl apply -f hiertext.yaml
sampleset.batch.paddlepaddle.org/hiertext created
# 查看数据集的状态
$ kubectl get sampleset hiertext -n paddlecloud
NAME TOTAL SIZE CACHED SIZE AVAIL SPACE RUNTIME PHASE AGE
hiertext 3.3 GiB 3.2 GiB 12 GiB 1/1 Ready 11m
```
### 2.4 训练PP-OCRv3模型
使用训练组件在Kubernetes集群上训练PP-OCRv3模型,编写PaddleJob Yaml文件如下:
```yaml
# ppocrv3.yaml
apiVersion: batch.paddlepaddle.org/v1
kind: PaddleJob
metadata:
name: ppocrv3
namespace: paddlecloud
spec:
cleanPodPolicy: OnCompletion
sampleSetRef:
name: hiertext
namespace: paddlecloud
mountPath: /mnt/hiertext
worker:
replicas: 1
template:
spec:
containers:
- name: ppocrv3
image: paddlecloud/paddleocr:2.5-gpu-cuda10.2-cudnn7-efbb0a
command:
- /bin/bash
args:
- "-c"
- >
mkdir /home/PaddleOCR/pre_train &&
wget -P ./pre_train https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar &&
tar xf ./pre_train/ch_PP-OCRv3_det_distill_train.tar -C ./pre_train/ &&
python tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o
Train.dataset.data_dir=/mnt/
Train.dataset.label_file_list=[\"/mnt/hiertext/label_hiertext_train.txt\"]
Eval.dataset.data_dir=/mnt/
Eval.dataset.label_file_list=[\"/mnt/hiertext/label_hiertext_val.txt\"]
Global.save_model_dir=./output/
Global.pretrained_model=./pre_train/ch_PP-OCRv3_det_distill_train/best_accuracy
resources:
limits:
nvidia.com/gpu: 1
volumeMounts: # 添加 shared memory 挂载以防止缓存出错
- mountPath: /dev/shm
name: dshm
volumes:
- name: dshm
emptyDir:
medium: Memory
```
本案例采用GPU进行训练,如果您只有CPU机器,则可以将镜像替换成CPU版本 `paddlecloud/paddleocr:2.5-cpu-efbb0a`,并在args中加上参数`Global.use_gpu=false`
```bash
# 创建PaddleJob训练模型
$ kubectl apply -f ppocrv3.yaml
paddlejob.batch.paddlepaddle.org/ppocrv3 created
# 查看PaddleJob状态
$ kubectl get pods -n paddlecloud -l paddle-res-name=ppocrv3-worker-0
NAME READY STATUS RESTARTS AGE
ppocrv3-worker-0 1/1 Running 0 4s
# 查看训练日志
$ kubectl logs -f ppocrv3-worker-0 -n paddlecloud
```
## 更多资源
欢迎关注[云上飞桨项目PaddleCloud](https://github.com/PaddlePaddle/PaddleCloud),我们为您提供了飞桨模型套件标准镜像以及全栈的云原生模型套件部署组件,如您有任何关于飞桨模型套件的部署问题,请联系我们。
如果你发现任何PaddleCloud存在的问题或者是建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleCloud/issues)给我们提issues。
\ No newline at end of file
...@@ -163,43 +163,41 @@ The recognition model is the same. ...@@ -163,43 +163,41 @@ The recognition model is the same.
The predicted performance data will be automatically written into the `PipelineServingLogs/pipeline.tracer` file. The predicted performance data will be automatically written into the `PipelineServingLogs/pipeline.tracer` file.
Tested on 200 real pictures, and limited the detection long side to 960. The average QPS on T4 GPU can reach around 23: Tested on 200 real pictures, and limited the detection long side to 960. The average QPS on T4 GPU can reach around 62.0:
``` ```
2022-05-12 03:56:46,461 ==================== TRACER ======================
2021-05-13 03:42:36,895 ==================== TRACER ====================== 2022-05-12 03:56:46,860 Op(det):
2021-05-13 03:42:36,975 Op(rec): 2022-05-12 03:56:46,860 in[80.32286641221374 ms]
2021-05-13 03:42:36,976 in[14.472382882882883 ms] 2022-05-12 03:56:46,860 prep[74.27364885496183 ms]
2021-05-13 03:42:36,976 prep[9.556855855855856 ms] 2022-05-12 03:56:46,860 midp[33.41587786259542 ms]
2021-05-13 03:42:36,976 midp[59.921905405405404 ms] 2022-05-12 03:56:46,860 postp[20.935980916030534 ms]
2021-05-13 03:42:36,976 postp[15.345945945945946 ms] 2022-05-12 03:56:46,860 out[1.551145038167939 ms]
2021-05-13 03:42:36,976 out[1.9921216216216215 ms] 2022-05-12 03:56:46,860 idle[0.3889510617728378]
2021-05-13 03:42:36,976 idle[0.16254943864471572] 2022-05-12 03:56:46,860 Op(rec):
2021-05-13 03:42:36,976 Op(det): 2022-05-12 03:56:46,860 in[15.46498846153846 ms]
2021-05-13 03:42:36,976 in[315.4468035714286 ms] 2022-05-12 03:56:46,861 prep[22.565715384615384 ms]
2021-05-13 03:42:36,976 prep[69.5980625 ms] 2022-05-12 03:56:46,861 midp[91.42518076923076 ms]
2021-05-13 03:42:36,976 midp[18.989535714285715 ms] 2022-05-12 03:56:46,861 postp[11.678453846153847 ms]
2021-05-13 03:42:36,976 postp[18.857803571428573 ms] 2022-05-12 03:56:46,861 out[1.1200576923076924 ms]
2021-05-13 03:42:36,977 out[3.1337544642857145 ms] 2022-05-12 03:56:46,861 idle[0.11658723106110291]
2021-05-13 03:42:36,977 idle[0.7477961159203756] 2022-05-12 03:56:46,862 DAGExecutor:
2021-05-13 03:42:36,977 DAGExecutor: 2022-05-12 03:56:46,862 Query count[620]
2021-05-13 03:42:36,977 Query count[224] 2022-05-12 03:56:46,862 QPS[62.0 q/s]
2021-05-13 03:42:36,977 QPS[22.4 q/s] 2022-05-12 03:56:46,862 Succ[0.4193548387096774]
2021-05-13 03:42:36,977 Succ[0.9910714285714286] 2022-05-12 03:56:46,862 Latency:
2021-05-13 03:42:36,977 Error req[169, 170] 2022-05-12 03:56:46,863 ave[165.54603709677417 ms]
2021-05-13 03:42:36,977 Latency: 2022-05-12 03:56:46,863 .50[77.863 ms]
2021-05-13 03:42:36,977 ave[535.1678348214285 ms] 2022-05-12 03:56:46,863 .60[158.414 ms]
2021-05-13 03:42:36,977 .50[172.651 ms] 2022-05-12 03:56:46,863 .70[237.28 ms]
2021-05-13 03:42:36,977 .60[187.904 ms] 2022-05-12 03:56:46,863 .80[316.022 ms]
2021-05-13 03:42:36,977 .70[245.675 ms] 2022-05-12 03:56:46,863 .90[424.416 ms]
2021-05-13 03:42:36,977 .80[526.684 ms] 2022-05-12 03:56:46,863 .95[515.566 ms]
2021-05-13 03:42:36,977 .90[854.596 ms] 2022-05-12 03:56:46,863 .99[762.256 ms]
2021-05-13 03:42:36,977 .95[1722.728 ms] 2022-05-12 03:56:46,863 Channel (server worker num[10]):
2021-05-13 03:42:36,977 .99[3990.292 ms] 2022-05-12 03:56:46,864 chl0(In: ['@DAGExecutor'], Out: ['det']) size[0/0]
2021-05-13 03:42:36,978 Channel (server worker num[10]): 2022-05-12 03:56:46,864 chl1(In: ['det'], Out: ['rec']) size[2/0]
2021-05-13 03:42:36,978 chl0(In: ['@DAGExecutor'], Out: ['det']) size[0/0] 2022-05-12 03:56:46,865 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0]
2021-05-13 03:42:36,979 chl1(In: ['det'], Out: ['rec']) size[6/0]
2021-05-13 03:42:36,979 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0]
``` ```
<a name="C++"></a> <a name="C++"></a>
......
...@@ -162,42 +162,41 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv3_rec_infer/ \ ...@@ -162,42 +162,41 @@ python3 -m paddle_serving_client.convert --dirname ./ch_PP-OCRv3_rec_infer/ \
预测性能数据会被自动写入 `PipelineServingLogs/pipeline.tracer` 文件中。 预测性能数据会被自动写入 `PipelineServingLogs/pipeline.tracer` 文件中。
在200张真实图片上测试,把检测长边限制为960。T4 GPU 上 QPS 均值可达到23左右: 在200张真实图片上测试,把检测长边限制为960。T4 GPU 上 QPS 均值可达到62左右:
``` ```
2021-05-13 03:42:36,895 ==================== TRACER ====================== 2022-05-12 03:56:46,461 ==================== TRACER ======================
2021-05-13 03:42:36,975 Op(rec): 2022-05-12 03:56:46,860 Op(det):
2021-05-13 03:42:36,976 in[14.472382882882883 ms] 2022-05-12 03:56:46,860 in[80.32286641221374 ms]
2021-05-13 03:42:36,976 prep[9.556855855855856 ms] 2022-05-12 03:56:46,860 prep[74.27364885496183 ms]
2021-05-13 03:42:36,976 midp[59.921905405405404 ms] 2022-05-12 03:56:46,860 midp[33.41587786259542 ms]
2021-05-13 03:42:36,976 postp[15.345945945945946 ms] 2022-05-12 03:56:46,860 postp[20.935980916030534 ms]
2021-05-13 03:42:36,976 out[1.9921216216216215 ms] 2022-05-12 03:56:46,860 out[1.551145038167939 ms]
2021-05-13 03:42:36,976 idle[0.16254943864471572] 2022-05-12 03:56:46,860 idle[0.3889510617728378]
2021-05-13 03:42:36,976 Op(det): 2022-05-12 03:56:46,860 Op(rec):
2021-05-13 03:42:36,976 in[315.4468035714286 ms] 2022-05-12 03:56:46,860 in[15.46498846153846 ms]
2021-05-13 03:42:36,976 prep[69.5980625 ms] 2022-05-12 03:56:46,861 prep[22.565715384615384 ms]
2021-05-13 03:42:36,976 midp[18.989535714285715 ms] 2022-05-12 03:56:46,861 midp[91.42518076923076 ms]
2021-05-13 03:42:36,976 postp[18.857803571428573 ms] 2022-05-12 03:56:46,861 postp[11.678453846153847 ms]
2021-05-13 03:42:36,977 out[3.1337544642857145 ms] 2022-05-12 03:56:46,861 out[1.1200576923076924 ms]
2021-05-13 03:42:36,977 idle[0.7477961159203756] 2022-05-12 03:56:46,861 idle[0.11658723106110291]
2021-05-13 03:42:36,977 DAGExecutor: 2022-05-12 03:56:46,862 DAGExecutor:
2021-05-13 03:42:36,977 Query count[224] 2022-05-12 03:56:46,862 Query count[620]
2021-05-13 03:42:36,977 QPS[22.4 q/s] 2022-05-12 03:56:46,862 QPS[62.0 q/s]
2021-05-13 03:42:36,977 Succ[0.9910714285714286] 2022-05-12 03:56:46,862 Succ[0.4193548387096774]
2021-05-13 03:42:36,977 Error req[169, 170] 2022-05-12 03:56:46,862 Latency:
2021-05-13 03:42:36,977 Latency: 2022-05-12 03:56:46,863 ave[165.54603709677417 ms]
2021-05-13 03:42:36,977 ave[535.1678348214285 ms] 2022-05-12 03:56:46,863 .50[77.863 ms]
2021-05-13 03:42:36,977 .50[172.651 ms] 2022-05-12 03:56:46,863 .60[158.414 ms]
2021-05-13 03:42:36,977 .60[187.904 ms] 2022-05-12 03:56:46,863 .70[237.28 ms]
2021-05-13 03:42:36,977 .70[245.675 ms] 2022-05-12 03:56:46,863 .80[316.022 ms]
2021-05-13 03:42:36,977 .80[526.684 ms] 2022-05-12 03:56:46,863 .90[424.416 ms]
2021-05-13 03:42:36,977 .90[854.596 ms] 2022-05-12 03:56:46,863 .95[515.566 ms]
2021-05-13 03:42:36,977 .95[1722.728 ms] 2022-05-12 03:56:46,863 .99[762.256 ms]
2021-05-13 03:42:36,977 .99[3990.292 ms] 2022-05-12 03:56:46,863 Channel (server worker num[10]):
2021-05-13 03:42:36,978 Channel (server worker num[10]): 2022-05-12 03:56:46,864 chl0(In: ['@DAGExecutor'], Out: ['det']) size[0/0]
2021-05-13 03:42:36,978 chl0(In: ['@DAGExecutor'], Out: ['det']) size[0/0] 2022-05-12 03:56:46,864 chl1(In: ['det'], Out: ['rec']) size[2/0]
2021-05-13 03:42:36,979 chl1(In: ['det'], Out: ['rec']) size[6/0] 2022-05-12 03:56:46,865 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0]
2021-05-13 03:42:36,979 chl2(In: ['rec'], Out: ['@DAGExecutor']) size[0/0]
``` ```
<a name="C++"></a> <a name="C++"></a>
......
...@@ -47,9 +47,9 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3 ...@@ -47,9 +47,9 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3
``` ```
# 下载检测预训练模型: # 下载检测预训练模型:
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
tar xf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar tar xf ch_PP-OCRv3_det_distill_train.tar
python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3_det/ch_PP-OCRv3_det_cml.yml -o Global.pretrained_model='./ch_PP-OCRv3_det_distill_train/best_accuracy' Global.save_model_dir=./output/quant_model_distill/ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.pretrained_model='./ch_PP-OCRv3_det_distill_train/best_accuracy' Global.save_model_dir=./output/quant_model_distill/
``` ```
如果要训练识别模型的量化,修改配置文件和加载的模型参数即可。 如果要训练识别模型的量化,修改配置文件和加载的模型参数即可。
......
...@@ -54,9 +54,9 @@ Model distillation and model quantization can be used at the same time, taking t ...@@ -54,9 +54,9 @@ Model distillation and model quantization can be used at the same time, taking t
``` ```
# download provided model # download provided model
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
tar xf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar tar xf ch_PP-OCRv3_det_distill_train.tar
python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3_det/ch_PP-OCRv3_det_cml.yml -o Global.pretrained_model='./ch_PP-OCRv3_det_distill_train/best_accuracy' Global.save_model_dir=./output/quant_model_distill/ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.pretrained_model='./ch_PP-OCRv3_det_distill_train/best_accuracy' Global.save_model_dir=./output/quant_model_distill/
``` ```
If you want to quantify the text recognition model, you can modify the configuration file and loaded model parameters. If you want to quantify the text recognition model, you can modify the configuration file and loaded model parameters.
......
...@@ -59,7 +59,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di ...@@ -59,7 +59,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di
### 2.1 超轻量中文识别模型推理 ### 2.1 超轻量中文识别模型推理
**注意** `PP-OCRv3`的识别模型使用的输入shape为`3,48,320`, 需要添加参数`--rec_image_shape=3,48,320`,如果不使用`PP-OCRv3`的识别模型,则无需设置该参数 **注意** `PP-OCRv3`的识别模型使用的输入shape为`3,48,320`, 如果使用其他识别模型,则需根据模型设置参数`--rec_image_shape`。此外,`PP-OCRv3`的识别模型默认使用的`rec_algorithm``SVTR_LCNet`,注意和原始`SVTR`的区别
超轻量中文识别模型推理,可以执行如下命令: 超轻量中文识别模型推理,可以执行如下命令:
...@@ -67,7 +67,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di ...@@ -67,7 +67,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di
# 下载超轻量中文识别模型: # 下载超轻量中文识别模型:
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
tar xf ch_PP-OCRv3_rec_infer.tar tar xf ch_PP-OCRv3_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_image_shape=3,48,320 python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv3_rec_infer/"
``` ```
![](../imgs_words/ch/word_4.jpg) ![](../imgs_words/ch/word_4.jpg)
...@@ -121,17 +121,17 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982] ...@@ -121,17 +121,17 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
## 4. 文本检测、方向分类和文字识别串联推理 ## 4. 文本检测、方向分类和文字识别串联推理
**注意** `PP-OCRv3`的识别模型使用的输入shape为`3,48,320`, 需要添加参数`--rec_image_shape=3,48,320`,如果不使用`PP-OCRv3`的识别模型,则无需设置该参数 **注意** `PP-OCRv3`的识别模型使用的输入shape为`3,48,320`, 如果使用其他识别模型,则需根据模型设置参数`--rec_image_shape`。此外,`PP-OCRv3`的识别模型默认使用的`rec_algorithm``SVTR_LCNet`,注意和原始`SVTR`的区别
以超轻量中文OCR模型推理为例,在执行预测时,需要通过参数`image_dir`指定单张图像或者图像集合的路径、参数`det_model_dir`,`cls_model_dir``rec_model_dir`分别指定检测,方向分类和识别的inference模型路径。参数`use_angle_cls`用于控制是否启用方向分类模型。`use_mp`表示是否使用多进程。`total_process_num`表示在使用多进程时的进程数。可视化识别结果默认保存到 ./inference_results 文件夹里面。 以超轻量中文OCR模型推理为例,在执行预测时,需要通过参数`image_dir`指定单张图像或者图像集合的路径、参数`det_model_dir`,`cls_model_dir``rec_model_dir`分别指定检测,方向分类和识别的inference模型路径。参数`use_angle_cls`用于控制是否启用方向分类模型。`use_mp`表示是否使用多进程。`total_process_num`表示在使用多进程时的进程数。可视化识别结果默认保存到 ./inference_results 文件夹里面。
```shell ```shell
# 使用方向分类器 # 使用方向分类器
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=true
# 不使用方向分类器 # 不使用方向分类器
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false
# 使用多进程 # 使用多进程
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6
``` ```
执行命令后,识别结果图像如下: 执行命令后,识别结果图像如下:
......
...@@ -20,8 +20,7 @@ ...@@ -20,8 +20,7 @@
## 资料地址 ## 资料地址
- [notebook教程](https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/notebook/notebook_ch) - [教学视频与notebook教程](https://aistudio.baidu.com/aistudio/education/group/info/25207)
- [教学视频与](https://aistudio.baidu.com/aistudio/education/group/info/25207)
- 中文版电子书下载请扫描以下二维码入群后领取 - 中文版电子书下载请扫描以下二维码入群后领取
<div align="center"> <div align="center">
<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" /> <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG" width = "200" height = "200" />
......
...@@ -56,7 +56,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di ...@@ -56,7 +56,7 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_di
<a name="LIGHTWEIGHT_RECOGNITION"></a> <a name="LIGHTWEIGHT_RECOGNITION"></a>
### 1. Lightweight Chinese Recognition Model Inference ### 1. Lightweight Chinese Recognition Model Inference
**Note**: The input shape used by the recognition model of `PP-OCRv3` is `3,48,320`, and the parameter `--rec_image_shape=3,48,320` needs to be added. If the recognition model of `PP-OCRv3` is not used, this parameter does not need to be set. **Note**: The input shape used by the recognition model of `PP-OCRv3` is `3, 48, 320`. If you use other recognition models, you need to set the parameter `--rec_image_shape` according to the model. In addition, the `rec_algorithm` used by the recognition model of `PP-OCRv3` is `SVTR_LCNet` by default. Note the difference from the original `SVTR`.
For lightweight Chinese recognition model inference, you can execute the following commands: For lightweight Chinese recognition model inference, you can execute the following commands:
...@@ -120,19 +120,18 @@ After executing the command, the prediction results (classification angle and sc ...@@ -120,19 +120,18 @@ After executing the command, the prediction results (classification angle and sc
<a name="CONCATENATION"></a> <a name="CONCATENATION"></a>
## Text Detection Angle Classification and Recognition Inference Concatenation ## Text Detection Angle Classification and Recognition Inference Concatenation
**Note**: The input shape used by the recognition model of `PP-OCRv3` is `3,48,320`, and the parameter `--rec_image_shape=3,48,320` needs to be added. If the recognition model of `PP-OCRv3` is not used, this parameter does not need to be set. **Note**: The input shape used by the recognition model of `PP-OCRv3` is `3, 48, 320`. If you use other recognition models, you need to set the parameter `--rec_image_shape` according to the model. In addition, the `rec_algorithm` used by the recognition model of `PP-OCRv3` is `SVTR_LCNet` by default. Note the difference from the original `SVTR`.
When performing prediction, you need to specify the path of a single image or a folder of images through the parameter `image_dir`, the parameter `det_model_dir` specifies the path to detect the inference model, the parameter `cls_model_dir` specifies the path to angle classification inference model and the parameter `rec_model_dir` specifies the path to identify the inference model. The parameter `use_angle_cls` is used to control whether to enable the angle classification model. The parameter `use_mp` specifies whether to use multi-process to infer `total_process_num` specifies process number when using multi-process. The parameter . The visualized recognition results are saved to the `./inference_results` folder by default. When performing prediction, you need to specify the path of a single image or a folder of images through the parameter `image_dir`, the parameter `det_model_dir` specifies the path to detect the inference model, the parameter `cls_model_dir` specifies the path to angle classification inference model and the parameter `rec_model_dir` specifies the path to identify the inference model. The parameter `use_angle_cls` is used to control whether to enable the angle classification model. The parameter `use_mp` specifies whether to use multi-process to infer `total_process_num` specifies process number when using multi-process. The parameter . The visualized recognition results are saved to the `./inference_results` folder by default.
```shell ```shell
# use direction classifier # use direction classifier
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=true --rec_image_shape=3,48,320 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --cls_model_dir="./cls/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=true
# not use use direction classifier # not use use direction classifier
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --rec_image_shape=3,48,320 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false
# use multi-process # use multi-process
python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv2_det_infer/" --rec_model_dir="./ch_PP-OCRv2_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6 --rec_image_shape=3,48,320 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --use_angle_cls=false --use_mp=True --total_process_num=6
``` ```
......
...@@ -17,5 +17,4 @@ ...@@ -17,5 +17,4 @@
## Address ## Address
- [E-book: *Dive Into OCR* (link generating)]() - [E-book: *Dive Into OCR* (link generating)]()
- [Jupyter notebook](https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/notebook/notebook_en) - [Videos and Jupyter notebook (Chinese only)](https://aistudio.baidu.com/aistudio/education/group/info/25207)
- [videos (Chinese only)](https://aistudio.baidu.com/aistudio/education/group/info/25207)
...@@ -28,13 +28,13 @@ ...@@ -28,13 +28,13 @@
- If you have CUDA 9 or CUDA 10 installed on your machine, please run the following command to install - If you have CUDA 9 or CUDA 10 installed on your machine, please run the following command to install
```bash ```bash
python3 -m pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple python3 -m pip install paddlepaddle-gpu
``` ```
- If you have no available GPU on your machine, please run the following command to install the CPU version - If you have no available GPU on your machine, please run the following command to install the CPU version
```bash ```bash
python3 -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple python3 -m pip install paddlepaddle
``` ```
For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation. For more software version requirements, please refer to the instructions in [Installation Document](https://www.paddlepaddle.org.cn/install/quick) for operation.
......
...@@ -22,6 +22,9 @@ from numpy.fft import fft ...@@ -22,6 +22,9 @@ from numpy.fft import fft
from numpy.linalg import norm from numpy.linalg import norm
import sys import sys
def vector_slope(vec):
assert len(vec) == 2
return abs(vec[1] / (vec[0] + 1e-8))
class FCENetTargets: class FCENetTargets:
"""Generate the ground truth targets of FCENet: Fourier Contour Embedding """Generate the ground truth targets of FCENet: Fourier Contour Embedding
...@@ -233,9 +236,9 @@ class FCENetTargets: ...@@ -233,9 +236,9 @@ class FCENetTargets:
head_inds = [head_start, head_end] head_inds = [head_start, head_end]
tail_inds = [tail_start, tail_end] tail_inds = [tail_start, tail_end]
else: else:
if self.vector_slope(points[1] - points[0]) + self.vector_slope( if vector_slope(points[1] - points[0]) + vector_slope(
points[3] - points[2]) < self.vector_slope(points[ points[3] - points[2]) < vector_slope(points[
2] - points[1]) + self.vector_slope(points[0] - points[ 2] - points[1]) + vector_slope(points[0] - points[
3]): 3]):
horizontal_edge_inds = [[0, 1], [2, 3]] horizontal_edge_inds = [[0, 1], [2, 3]]
vertical_edge_inds = [[3, 0], [1, 2]] vertical_edge_inds = [[3, 0], [1, 2]]
......
...@@ -438,7 +438,12 @@ class KieLabelEncode(object): ...@@ -438,7 +438,12 @@ class KieLabelEncode(object):
texts.append(ann['transcription']) texts.append(ann['transcription'])
text_ind = [self.dict[c] for c in text if c in self.dict] text_ind = [self.dict[c] for c in text if c in self.dict]
text_inds.append(text_ind) text_inds.append(text_ind)
if 'label' in ann.keys():
labels.append(ann['label']) labels.append(ann['label'])
elif 'key_cls' in ann.keys():
labels.append(ann['key_cls'])
else:
raise ValueError("Cannot found 'key_cls' in ann.keys(), please check your training annotation.")
edges.append(ann.get('edge', 0)) edges.append(ann.get('edge', 0))
ann_infos = dict( ann_infos = dict(
image=data['image'], image=data['image'],
......
...@@ -18,7 +18,7 @@ The table recognition mainly contains three models ...@@ -18,7 +18,7 @@ The table recognition mainly contains three models
The table recognition flow chart is as follows The table recognition flow chart is as follows
![tableocr_pipeline](../../doc/table/tableocr_pipeline_en.jpg) ![tableocr_pipeline](../docs/table/tableocr_pipeline_en.jpg)
1. The coordinates of single-line text is detected by DB model, and then sends it to the recognition model to get the recognition result. 1. The coordinates of single-line text is detected by DB model, and then sends it to the recognition model to get the recognition result.
2. The table structure and cell coordinates is predicted by RARE model. 2. The table structure and cell coordinates is predicted by RARE model.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册