From 0f2690661fd79eace28ffb3bd5620d18978746e0 Mon Sep 17 00:00:00 2001 From: chenjian Date: Mon, 11 Oct 2021 14:23:14 +0800 Subject: [PATCH] Update ocr module version (#1637) --- .../chinese_ocr_db_crnn_mobile/module.py | 13 +--- .../chinese_ocr_db_crnn_server/module.py | 13 +--- .../chinese_ocr_db_crnn_server/utils.py | 2 +- .../module.py | 64 +++++++++---------- .../processor.py | 5 +- .../module.py | 8 --- 6 files changed, 38 insertions(+), 67 deletions(-) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py index 892c863f..371e8f97 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py @@ -21,7 +21,7 @@ from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ @moduleinfo( name="chinese_ocr_db_crnn_mobile", - version="1.1.1", + version="1.1.2", summary="The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions \ based on the differentiable_binarization_chn module. Then it classifies the text angle and recognizes the chinese texts. ", author="paddle-dev", @@ -490,14 +490,3 @@ class ChineseOCRDBCRNN(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - ocr = ChineseOCRDBCRNN() - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py index 91750059..a96673f3 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py @@ -25,7 +25,7 @@ from chinese_ocr_db_crnn_server.utils import base64_to_cv2, draw_ocr, get_image_ @moduleinfo( name="chinese_ocr_db_crnn_server", - version="1.1.1", + version="1.1.2", summary= "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", author="paddle-dev", @@ -494,14 +494,3 @@ class ChineseOCRDBCRNNServer(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - ocr = ChineseOCRDBCRNNServer(enable_mkldnn=False) - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py index c5bf34d0..5a90b27d 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py @@ -172,6 +172,6 @@ def sorted_boxes(dt_boxes): def base64_to_cv2(b64str): data = base64.b64decode(b64str.encode('utf8')) - data = np.frombuffer(data, np.uint8) + data = np.fromstring(data, np.uint8) data = cv2.imdecode(data, cv2.IMREAD_COLOR) return data diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py index 14fd6137..aaae4aea 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py @@ -29,7 +29,7 @@ def base64_to_cv2(b64str): @moduleinfo( name="chinese_text_detection_db_mobile", - version="1.0.3", + version="1.0.4", summary= "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", author="paddle-dev", @@ -103,26 +103,6 @@ class ChineseTextDetectionDB(hub.Module): images.append(img) return images - def clip_det_res(self, points, img_height, img_width): - for pno in range(points.shape[0]): - points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) - points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) - return points - - def filter_tag_det_res(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.order_points_clockwise(box) - box = self.clip_det_res(box, img_height, img_width) - rect_width = int(np.linalg.norm(box[0] - box[1])) - rect_height = int(np.linalg.norm(box[0] - box[3])) - if rect_width <= 10 or rect_height <= 10: - continue - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - def order_points_clockwise(self, pts): """ reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py @@ -147,6 +127,35 @@ class ChineseTextDetectionDB(hub.Module): rect = np.array([tl, tr, br, bl], dtype="float32") return rect + def clip_det_res(self, points, img_height, img_width): + for pno in range(points.shape[0]): + points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) + points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) + return points + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.order_points_clockwise(box) + box = self.clip_det_res(box, img_height, img_width) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 3 or rect_height <= 3: + continue + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.clip_det_res(box, img_height, img_width) + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + def detect_text(self, images=[], paths=[], @@ -193,7 +202,7 @@ class ChineseTextDetectionDB(hub.Module): 'thresh': 0.3, 'box_thresh': 0.5, 'max_candidates': 1000, - 'unclip_ratio': 2.0 + 'unclip_ratio': 1.6 }) all_imgs = [] @@ -314,14 +323,3 @@ class ChineseTextDetectionDB(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - db = ChineseTextDetectionDB() - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = db.detect_text(paths=image_path, visualization=True) - db.save_inference_model('save') - print(res) diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py index 5c6df83d..b5e76cbe 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py @@ -120,6 +120,7 @@ class DBPostProcess(object): self.max_candidates = params['max_candidates'] self.unclip_ratio = params['unclip_ratio'] self.min_size = 3 + self.dilation_kernel = np.array([[1, 1], [1, 1]]) def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): ''' @@ -218,7 +219,9 @@ class DBPostProcess(object): boxes_batch = [] for batch_index in range(pred.shape[0]): height, width = pred.shape[-2:] - tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], segmentation[batch_index], width, height) + + mask = cv2.dilate(np.array(segmentation[batch_index]).astype(np.uint8), self.dilation_kernel) + tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], mask, width, height) boxes = [] for k in range(len(tmp_boxes)): diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/module.py b/modules/image/text_recognition/chinese_text_detection_db_server/module.py index 91ac7f32..52295bef 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_server/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_server/module.py @@ -297,11 +297,3 @@ class ChineseTextDetectionDBServer(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - db = ChineseTextDetectionDBServer() - image_path = ['/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg'] - res = db.detect_text(paths=image_path, visualization=True) - db.save_inference_model('save') - print(res) -- GitLab