From b20e2dd53c4b9e17ec6a4510c63ed058444f056e Mon Sep 17 00:00:00 2001
From: zhoujun <zjwenmu@gmail.com>
Date: Mon, 16 Jan 2023 19:44:44 +0800
Subject: [PATCH] add Data annotation link (#8862)

* fixed opencv version

* update infernence

* add Data annotation link

* update opencv version
---
 doc/doc_ch/table_recognition.md    | 17 +++++++++++------
 doc/doc_en/table_recognition_en.md |  5 +++++
 requirements.txt                   |  4 ++--
 3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/doc/doc_ch/table_recognition.md b/doc/doc_ch/table_recognition.md
index f09dedd0..8b8dad66 100644
--- a/doc/doc_ch/table_recognition.md
+++ b/doc/doc_ch/table_recognition.md
@@ -6,6 +6,7 @@
   - [1.1. 数据集格式](#11-数据集格式)
   - [1.2. 数据下载](#12-数据下载)
   - [1.3. 数据集生成](#13-数据集生成)
+  - [1.4 数据标注](#14-数据标注)
 - [2. 开始训练](#2-开始训练)
   - [2.1. 启动训练](#21-启动训练)
   - [2.2. 断点训练](#22-断点训练)
@@ -39,15 +40,15 @@ img_label
 每一行的json格式为:
 ```txt
 {
-   'filename': PMC5755158_010_01.png,							# 图像名
-   'split': ’train‘, 									# 图像属于训练集还是验证集
-   'imgid': 0,								 		# 图像的index
+   'filename': PMC5755158_010_01.png,                            # 图像名
+   'split': ’train‘,                                     # 图像属于训练集还是验证集
+   'imgid': 0,                                         # 图像的index
    'html': {
-     'structure': {'tokens': ['<thead>', '<tr>', '<td>', ...]}, 			# 表格的HTML字符串
+     'structure': {'tokens': ['<thead>', '<tr>', '<td>', ...]},             # 表格的HTML字符串
      'cells': [
        {
-         'tokens': ['P', 'a', 'd', 'd', 'l', 'e', 'P', 'a', 'd', 'd', 'l', 'e'], 	# 表格中的单个文本
-         'bbox': [x0, y0, x1, y1]  							# 表格中的单个文本的坐标
+         'tokens': ['P', 'a', 'd', 'd', 'l', 'e', 'P', 'a', 'd', 'd', 'l', 'e'],     # 表格中的单个文本
+         'bbox': [x0, y0, x1, y1]                              # 表格中的单个文本的坐标
        }
      ]
    }
@@ -78,6 +79,10 @@ TableGeneration是一个开源表格数据集生成工具，其通过浏览器
 |简单表格|![](https://raw.githubusercontent.com/WenmuZhou/TableGeneration/main/imgs/simple.jpg)|
 |彩色表格|![](https://raw.githubusercontent.com/WenmuZhou/TableGeneration/main/imgs/color.jpg)|
 
+## 1.4 数据标注
+
+数据标注可参考[PPOCRLabel](../../PPOCRLabel/README_ch.md)
+
 # 2. 开始训练
 
 PaddleOCR提供了训练脚本、评估脚本和预测脚本，本节将以 [SLANet](../../configs/table/SLANet.yml) 模型训练PubTabNet英文数据集为例：
diff --git a/doc/doc_en/table_recognition_en.md b/doc/doc_en/table_recognition_en.md
index d79d9893..c0a1aa9d 100644
--- a/doc/doc_en/table_recognition_en.md
+++ b/doc/doc_en/table_recognition_en.md
@@ -6,6 +6,7 @@ This article provides a full-process guide for the PaddleOCR table recognition m
   - [1.1. DataSet Format](#11-dataset-format)
   - [1.2. Data Download](#12-data-download)
   - [1.3. Dataset Generation](#13-dataset-generation)
+  - [1.4 Data annotation](#14-data-annotation)
 - [2. Training](#2-training)
   - [2.1. Start Training](#21-start-training)
   - [2.2. Resume Training](#22-resume-training)
@@ -80,6 +81,10 @@ Some samples are as follows:
 |Simple Table|![](https://raw.githubusercontent.com/WenmuZhou/TableGeneration/main/imgs/simple.jpg)|
 |Simple Color Table|![](https://raw.githubusercontent.com/WenmuZhou/TableGeneration/main/imgs/color.jpg)|
 
+## 1.4 Data annotation
+
+Data annotation can refer to[PPOCRLabel](../../PPOCRLabel/README.md)
+
 # 2. Training
 
 PaddleOCR provides training scripts, evaluation scripts, and prediction scripts. In this section, the [SLANet](../../configs/table/SLANet.yml) model will be used as an example:
diff --git a/requirements.txt b/requirements.txt
index d3484033..21a5c463 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,8 +7,8 @@ tqdm
 numpy
 visualdl
 rapidfuzz
-opencv-python==4.6.0.66
-opencv-contrib-python==4.6.0.66
+opencv-python<=4.6.0.66
+opencv-contrib-python<=4.6.0.66
 cython
 lxml
 premailer
-- 
GitLab