From 0f2690661fd79eace28ffb3bd5620d18978746e0 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Mon, 11 Oct 2021 14:23:14 +0800
Subject: [PATCH] Update ocr module version (#1637)

---
 .../chinese_ocr_db_crnn_mobile/module.py      | 13 +---
 .../chinese_ocr_db_crnn_server/module.py      | 13 +---
 .../chinese_ocr_db_crnn_server/utils.py       |  2 +-
 .../module.py                                 | 64 +++++++++----------
 .../processor.py                              |  5 +-
 .../module.py                                 |  8 ---
 6 files changed, 38 insertions(+), 67 deletions(-)

diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py
index 892c863f..371e8f97 100644
--- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py
+++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py
@@ -21,7 +21,7 @@ from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_
 
 @moduleinfo(
     name="chinese_ocr_db_crnn_mobile",
-    version="1.1.1",
+    version="1.1.2",
     summary="The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions \
         based on the differentiable_binarization_chn module. Then it classifies the text angle and recognizes the chinese texts. ",
     author="paddle-dev",
@@ -490,14 +490,3 @@ class ChineseOCRDBCRNN(hub.Module):
         Add the command input options
         """
         self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image")
-
-
-if __name__ == '__main__':
-    ocr = ChineseOCRDBCRNN()
-    image_path = [
-        '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg',
-        '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg'
-    ]
-    res = ocr.recognize_text(paths=image_path, visualization=True)
-    ocr.save_inference_model('save')
-    print(res)
diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py
index 91750059..a96673f3 100644
--- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py
+++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py
@@ -25,7 +25,7 @@ from chinese_ocr_db_crnn_server.utils import base64_to_cv2, draw_ocr, get_image_
 
 @moduleinfo(
     name="chinese_ocr_db_crnn_server",
-    version="1.1.1",
+    version="1.1.2",
     summary=
     "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ",
     author="paddle-dev",
@@ -494,14 +494,3 @@ class ChineseOCRDBCRNNServer(hub.Module):
         Add the command input options
         """
         self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image")
-
-
-if __name__ == '__main__':
-    ocr = ChineseOCRDBCRNNServer(enable_mkldnn=False)
-    image_path = [
-        '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg',
-        '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg'
-    ]
-    res = ocr.recognize_text(paths=image_path, visualization=True)
-    ocr.save_inference_model('save')
-    print(res)
diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py
index c5bf34d0..5a90b27d 100644
--- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py
+++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py
@@ -172,6 +172,6 @@ def sorted_boxes(dt_boxes):
 
 def base64_to_cv2(b64str):
     data = base64.b64decode(b64str.encode('utf8'))
-    data = np.frombuffer(data, np.uint8)
+    data = np.fromstring(data, np.uint8)
     data = cv2.imdecode(data, cv2.IMREAD_COLOR)
     return data
diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py
index 14fd6137..aaae4aea 100644
--- a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py
+++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py
@@ -29,7 +29,7 @@ def base64_to_cv2(b64str):
 
 @moduleinfo(
     name="chinese_text_detection_db_mobile",
-    version="1.0.3",
+    version="1.0.4",
     summary=
     "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.",
     author="paddle-dev",
@@ -103,26 +103,6 @@ class ChineseTextDetectionDB(hub.Module):
             images.append(img)
         return images
 
-    def clip_det_res(self, points, img_height, img_width):
-        for pno in range(points.shape[0]):
-            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
-            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
-        return points
-
-    def filter_tag_det_res(self, dt_boxes, image_shape):
-        img_height, img_width = image_shape[0:2]
-        dt_boxes_new = []
-        for box in dt_boxes:
-            box = self.order_points_clockwise(box)
-            box = self.clip_det_res(box, img_height, img_width)
-            rect_width = int(np.linalg.norm(box[0] - box[1]))
-            rect_height = int(np.linalg.norm(box[0] - box[3]))
-            if rect_width <= 10 or rect_height <= 10:
-                continue
-            dt_boxes_new.append(box)
-        dt_boxes = np.array(dt_boxes_new)
-        return dt_boxes
-
     def order_points_clockwise(self, pts):
         """
         reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
@@ -147,6 +127,35 @@ class ChineseTextDetectionDB(hub.Module):
         rect = np.array([tl, tr, br, bl], dtype="float32")
         return rect
 
+    def clip_det_res(self, points, img_height, img_width):
+        for pno in range(points.shape[0]):
+            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
+            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
+        return points
+
+    def filter_tag_det_res(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[0:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            box = self.order_points_clockwise(box)
+            box = self.clip_det_res(box, img_height, img_width)
+            rect_width = int(np.linalg.norm(box[0] - box[1]))
+            rect_height = int(np.linalg.norm(box[0] - box[3]))
+            if rect_width <= 3 or rect_height <= 3:
+                continue
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+
+    def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
+        img_height, img_width = image_shape[0:2]
+        dt_boxes_new = []
+        for box in dt_boxes:
+            box = self.clip_det_res(box, img_height, img_width)
+            dt_boxes_new.append(box)
+        dt_boxes = np.array(dt_boxes_new)
+        return dt_boxes
+
     def detect_text(self,
                     images=[],
                     paths=[],
@@ -193,7 +202,7 @@ class ChineseTextDetectionDB(hub.Module):
             'thresh': 0.3,
             'box_thresh': 0.5,
             'max_candidates': 1000,
-            'unclip_ratio': 2.0
+            'unclip_ratio': 1.6
         })
 
         all_imgs = []
@@ -314,14 +323,3 @@ class ChineseTextDetectionDB(hub.Module):
         Add the command input options
         """
         self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image")
-
-
-if __name__ == '__main__':
-    db = ChineseTextDetectionDB()
-    image_path = [
-        '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg',
-        '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg'
-    ]
-    res = db.detect_text(paths=image_path, visualization=True)
-    db.save_inference_model('save')
-    print(res)
diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py
index 5c6df83d..b5e76cbe 100644
--- a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py
+++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py
@@ -120,6 +120,7 @@ class DBPostProcess(object):
         self.max_candidates = params['max_candidates']
         self.unclip_ratio = params['unclip_ratio']
         self.min_size = 3
+        self.dilation_kernel = np.array([[1, 1], [1, 1]])
 
     def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
         '''
@@ -218,7 +219,9 @@ class DBPostProcess(object):
         boxes_batch = []
         for batch_index in range(pred.shape[0]):
             height, width = pred.shape[-2:]
-            tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], segmentation[batch_index], width, height)
+
+            mask = cv2.dilate(np.array(segmentation[batch_index]).astype(np.uint8), self.dilation_kernel)
+            tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], mask, width, height)
 
             boxes = []
             for k in range(len(tmp_boxes)):
diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/module.py b/modules/image/text_recognition/chinese_text_detection_db_server/module.py
index 91ac7f32..52295bef 100644
--- a/modules/image/text_recognition/chinese_text_detection_db_server/module.py
+++ b/modules/image/text_recognition/chinese_text_detection_db_server/module.py
@@ -297,11 +297,3 @@ class ChineseTextDetectionDBServer(hub.Module):
         Add the command input options
         """
         self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image")
-
-
-if __name__ == '__main__':
-    db = ChineseTextDetectionDBServer()
-    image_path = ['/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg']
-    res = db.detect_text(paths=image_path, visualization=True)
-    db.save_inference_model('save')
-    print(res)
-- 
GitLab