From 5b4675e06dcd01eb44200594452d0464833da43d Mon Sep 17 00:00:00 2001
From: LDOUBLEV <liuvv0203@outlook.com>
Date: Tue, 12 May 2020 19:40:57 +0800
Subject: [PATCH] fix problems refer comments

---
 README.md                           | 56 ++++++++++++++++-------------
 doc/detection.md                    |  9 ++---
 doc/installation.md                 |  7 ----
 ppocr/data/det/dataset_traversal.py | 14 +++-----
 tools/infer/utility.py              | 25 +++++++------
 tools/infer_det.py                  | 48 +------------------------
 6 files changed, 57 insertions(+), 102 deletions(-)

diff --git a/README.md b/README.md
index e08448f6..2b889c28 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,49 @@
 
 # 简介
-PaddleOCR旨在打造一套丰富、领先、且实用的文字检测、识别模型/工具库，助力使用者训练出更好的模型，并应用落地。
-
-
-## 文档教程
-- [快速安装](./doc/installation.md)
-- [文本识别模型训练/评估/预测](./doc/detection.md)
-- [文本预测模型训练/评估/预测](./doc/recognition.md)
+PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力使用者训练出更好的模型，并应用落地。
 
 ## 特性：
 - 超轻量级模型
     - (检测模型4.1M + 识别模型4.5M = 8.6M)
-- 支持竖排文字
+- 支持竖排文字识别
     - (单模型同时支持横排和竖排文字识别)
 - 支持长文本识别
 - 支持中英文数字组合识别
 - 提供训练代码
 - 支持模型部署
 
+
+## 文档教程
+- [快速安装](./doc/installation.md)
+- [快速开始]()
+- [文本识别模型训练/评估/预测](./doc/detection.md)
+- [文本预测模型训练/评估/预测](./doc/recognition.md)
+- [基于inference model预测](./doc/)
+
+
 ## 文本检测算法:
 
 PaddleOCR开源的文本检测算法列表：
-- [EAST](https://arxiv.org/abs/1704.03155)
-- [DB](https://arxiv.org/abs/1911.08947)
-- [SAST](https://arxiv.org/abs/1908.05498)
+- [x] [EAST](https://arxiv.org/abs/1704.03155)
+- [x] [DB](https://arxiv.org/abs/1911.08947)
+- [x] [SAST](https://arxiv.org/abs/1908.05498)
+- []
+
 
 算法效果：
-|模型|骨干网络|数据集|Hmean|
-|-|-|-|-|
-|EAST|ResNet50_vd|ICDAR2015|85.85%|
-|EAST|MobileNetV3|ICDAR2015|79.08%|
-|DB|ResNet50_vd|ICDAR2015|83.30%|
-|DB|MobileNetV3|ICDAR2015|73.00%|
+|模型|骨干网络|Hmean|
+|-|-|-|
+|EAST^[1]^|ResNet50_vd|85.85%|
+|EAST^[1]^|MobileNetV3|79.08%|
+|DB^[2]^|ResNet50_vd|83.30%|
+|DB^[2]^|MobileNetV3|73.00%|
 
 PaddleOCR文本检测算法的训练与使用请参考[文档](./doc/detection.md)。
 
 ## 文本识别算法:
 
 PaddleOCR开源的文本识别算法列表：
-- (CRNN)[https://arxiv.org/abs/1507.05717]
+- [CRNN](https://arxiv.org/abs/1507.05717)
 - [Rosetta](https://arxiv.org/abs/1910.05085)
 - [STAR-Net](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)
 - [RARE](https://arxiv.org/abs/1603.03915v1)
@@ -59,7 +64,8 @@ PaddleOCR开源的文本识别算法列表：
 
 PaddleOCR文本识别算法的训练与使用请参考[文档](./doc/recognition.md)。
 
-## 端到端算法
+## TODO
+**端到端OCR算法**
 PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.org/abs/1909.07808)，敬请关注。
 - End2End-PSL (comming soon)
 
@@ -67,7 +73,7 @@ PaddleOCR即将开源百度自研端对端OCR模型[End2End-PSL](https://arxiv.o
 
 # 参考文献
 ```
-EAST:
+1. EAST:
 @inproceedings{zhou2017east,
   title={EAST: an efficient and accurate scene text detector},
   author={Zhou, Xinyu and Yao, Cong and Wen, He and Wang, Yuzhi and Zhou, Shuchang and He, Weiran and Liang, Jiajun},
@@ -76,7 +82,7 @@ EAST:
   year={2017}
 }
 
-DB:
+2. DB:
 @article{liao2019real,
   title={Real-time Scene Text Detection with Differentiable Binarization},
   author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
@@ -84,7 +90,7 @@ DB:
   year={2019}
 }
 
-DTRB:
+3. DTRB:
 @inproceedings{baek2019wrong,
   title={What is wrong with scene text recognition model comparisons? dataset and model analysis},
   author={Baek, Jeonghun and Kim, Geewook and Lee, Junyeop and Park, Sungrae and Han, Dongyoon and Yun, Sangdoo and Oh, Seong Joon and Lee, Hwalsuk},
@@ -93,7 +99,7 @@ DTRB:
   year={2019}
 }
 
-SAST:
+4. SAST:
 @inproceedings{wang2019single,
   title={A Single-Shot Arbitrarily-Shaped Text Detector based on Context Attended Multi-Task Learning},
   author={Wang, Pengfei and Zhang, Chengquan and Qi, Fei and Huang, Zuming and En, Mengyi and Han, Junyu and Liu, Jingtuo and Ding, Errui and Shi, Guangming},
@@ -102,7 +108,7 @@ SAST:
   year={2019}
 }
 
-SRN:
+5. SRN:
 @article{yu2020towards,
   title={Towards Accurate Scene Text Recognition with Semantic Reasoning Networks},
   author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Han, Junyu and Liu, Jingtuo and Ding, Errui},
@@ -110,7 +116,7 @@ SRN:
   year={2020}
 }
 
-end2end-psl:
+6. end2end-psl:
 @inproceedings{sun2019chinese,
   title={Chinese Street View Text: Large-scale Chinese Text Reading with Partially Supervised Learning},
   author={Sun, Yipeng and Liu, Jiaming and Liu, Wei and Han, Junyu and Ding, Errui and Liu, Jingtuo},
diff --git a/doc/detection.md b/doc/detection.md
index 5d54d780..5e550110 100644
--- a/doc/detection.md
+++ b/doc/detection.md
@@ -1,6 +1,6 @@
 # 文字检测
 
-本节以icdar15数据集为例，介绍PaddleOCR中检测模型的使用方式。
+本节以icdar15数据集为例，介绍PaddleOCR中检测模型的训练、评估与测试。
 
 ## 数据准备
 icdar2015数据集可以从[官网](https://rrc.cvc.uab.es/?ch=4&com=downloads)下载到，首次下载需注册。
@@ -26,8 +26,9 @@ wget -P /PaddleOCR/train_data/  测试标注文件链接
 " 图像文件名                    json.dumps编码的图像标注信息"
 ch4_test_images/img_61.jpg    [{"transcription": "MASA", "points": [[310, 104], [416, 141], [418, 216], [312, 179]], ...}]
 ```
-json.dumps编码前的图像标注信息是包含多个字典的list，字典中的points表示文本框的位置，如果您想在其他数据集上训练PaddleOCR,
-可以按照上述形式构建标注文件。
+json.dumps编码前的图像标注信息是包含多个字典的list，字典中的$points$表示文本框的四个点的坐标(x, y)，从左上角的点开始顺时针排列。
+$transcription$表示当前文本框的文字，在文本检测任务中并不需要这个信息。
+如果您想在其他数据集上训练PaddleOCR，可以按照上述形式构建标注文件。
 
 
 ## 快速启动训练
@@ -62,7 +63,7 @@ PaddleOCR计算三个OCR检测相关的指标，分别是：Precision、Recall
 运行如下代码，根据配置文件det_db_mv3.yml中save_res_path指定的测试集检测结果文件，计算评估指标。
 
 ```
-python3 tools/eval.py -c configs/det/det_db_mv3.yml  -o checkpoints="./output/best_accuracy"
+python3 tools/eval.py -c configs/det/det_db_mv3.yml  -o Gloabl.checkpoints="./output/best_accuracy"
 ```
 
 ## 测试检测效果
diff --git a/doc/installation.md b/doc/installation.md
index 5fca344e..25e0d0d5 100644
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -25,10 +25,3 @@ cd PaddleOCR
 pip3 install --upgrade pip
 pip3 install -r requirements.txt
 ```
-
-## 快速运行
-
-```
-python3 tools/infer/predict_eval.py --image_file="./"
-```
-【可视化运行结果】
diff --git a/ppocr/data/det/dataset_traversal.py b/ppocr/data/det/dataset_traversal.py
index 5ba01ee7..2e68d91d 100755
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@@ -22,6 +22,7 @@ import string
 from ppocr.utils.utility import initial_logger
 logger = initial_logger()
 from ppocr.utils.utility import create_module
+from tools.infer.utility import get_image_file_list
 import time
 
 
@@ -72,16 +73,8 @@ class EvalTestReader(object):
             self.params)
         batch_size = self.params['test_batch_size_per_card']
 
-        flag_test_single_img = False
-        if mode == "test":
-            single_img_path = self.params['single_img_path']
-            if single_img_path is not None:
-                flag_test_single_img = True
-
         img_list = []
-        if flag_test_single_img:
-            img_list.append([single_img_path, single_img_path])
-        else:
+        if mode != "test":
             img_set_dir = self.params['img_set_dir']
             img_name_list_path = self.params['label_file_path']
             with open(img_name_list_path, "rb") as fin:
@@ -90,6 +83,9 @@ class EvalTestReader(object):
                     img_name = line.decode().strip("\n").split("\t")[0]
                     img_path = img_set_dir + "/" + img_name
                     img_list.append([img_path, img_name])
+        else:
+            img_path = self.params['single_img_path']
+            img_list = get_image_file_list(img_path)
 
         def batch_iter_reader():
             batch_outs = []
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index f1f7a8a0..01477a5c 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -61,16 +61,21 @@ def parse_args():
     return parser.parse_args()
 
 
-def get_image_file_list(image_dir):
-    image_file_list = []
-    if image_dir is None:
-        return image_file_list
-    if os.path.isfile(image_dir):
-        image_file_list = [image_dir]
-    elif os.path.isdir(image_dir):
-        for single_file in os.listdir(image_dir):
-            image_file_list.append(os.path.join(image_dir, single_file))
-    return image_file_list
+def get_image_file_list(img_file):
+    imgs_lists = []
+    if img_file is None or not os.path.exists(img_file):
+        raise Exception("not found any img file in {}".format(img_file))
+
+    img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
+    if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
+        imgs_lists.append(img_file)
+    elif os.path.isdir(img_file):
+        for single_file in os.listdir(img_file):
+            if single_file.split('.')[-1] in img_end:
+                imgs_lists.append(os.path.join(img_file, single_file))
+    if len(imgs_lists) == 0:
+        raise Exception("not found any img file in {}".format(img_file))
+    return imgs_lists
 
 
 def create_predictor(args, mode):
diff --git a/tools/infer_det.py b/tools/infer_det.py
index 7998cdb6..8d591a65 100755
--- a/tools/infer_det.py
+++ b/tools/infer_det.py
@@ -68,50 +68,6 @@ def draw_det_res(dt_boxes, config, img_name, ino):
         logger.info("The detected Image saved in {}".format(save_path))
 
 
-def simple_reader(img_file, config):
-    imgs_lists = []
-    if img_file is None or not os.path.exists(img_file):
-        raise Exception("not found any img file in {}".format(img_file))
-
-    img_end = ['jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp']
-    if os.path.isfile(img_file) and img_file.split('.')[-1] in img_end:
-        imgs_lists.append(img_file)
-    elif os.path.isdir(img_file):
-        for single_file in os.listdir(img_file):
-            if single_file.split('.')[-1] in img_end:
-                imgs_lists.append(os.path.join(img_file, single_file))
-    if len(imgs_lists) == 0:
-        raise Exception("not found any img file in {}".format(img_file))
-
-    batch_size = config['Global']['test_batch_size_per_card']
-    global_params = config['Global']
-    params = deepcopy(config['TestReader'])
-    params.update(global_params)
-    reader_function = params['process_function']
-    process_function = create_module(reader_function)(params)
-
-    def batch_iter_reader():
-        batch_outs = []
-        for img_path in imgs_lists:
-            img = cv2.imread(img_path)
-            if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-            if img is None:
-                logger.info("load image error:" + img_path)
-                continue
-            outs = process_function(img)
-            outs.append(os.path.basename(img_path))
-            print(outs[0].shape, outs[2])
-            batch_outs.append(outs)
-            if len(batch_outs) == batch_size:
-                yield batch_outs
-                batch_outs = []
-        if len(batch_outs) != 0:
-            yield batch_outs
-
-    return batch_iter_reader
-
-
 def main():
     config = program.load_config(FLAGS.config)
     program.merge_config(FLAGS.opt)
@@ -148,9 +104,7 @@ def main():
 
     save_res_path = config['Global']['save_res_path']
     with open(save_res_path, "wb") as fout:
-        # test_reader = reader_main(config=config, mode='test')
-        single_img_path = config['TestReader']['single_img_path']
-        test_reader = simple_reader(img_file=single_img_path, config=config)
+        test_reader = reader_main(config=config, mode='test')
         tackling_num = 0
         for data in test_reader():
             img_num = len(data)
-- 
GitLab