From dec76eb75da6fd5ad799039c8b200ab853f52b54 Mon Sep 17 00:00:00 2001
From: WenmuZhou <zjwenmu@gmail.com>
Date: Tue, 8 Jun 2021 10:52:47 +0800
Subject: [PATCH] add pad for small image in det

---
 ppocr/data/imaug/operators.py       | 12 ++++++++++--
 ppocr/postprocess/db_postprocess.py | 11 +++++------
 ppstructure/predict_system.py       |  5 +++--
 tools/infer/predict_det.py          |  4 +++-
 tools/infer/utility.py              |  2 ++
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py
index 9c48b096..ed81d41a 100644
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -81,7 +81,7 @@ class NormalizeImage(object):
         assert isinstance(img,
                           np.ndarray), "invalid input 'img' in NormalizeImage"
         data['image'] = (
-            img.astype('float32') * self.scale - self.mean) / self.std
+                                img.astype('float32') * self.scale - self.mean) / self.std
         return data
 
 
@@ -122,6 +122,8 @@ class DetResizeForTest(object):
         elif 'limit_side_len' in kwargs:
             self.limit_side_len = kwargs['limit_side_len']
             self.limit_type = kwargs.get('limit_type', 'min')
+            self.pad = kwargs.get('pad', False)
+            self.pad_size = kwargs.get('pad_size', 480)
         elif 'resize_long' in kwargs:
             self.resize_type = 2
             self.resize_long = kwargs.get('resize_long', 960)
@@ -163,7 +165,7 @@ class DetResizeForTest(object):
             img, (ratio_h, ratio_w)
         """
         limit_side_len = self.limit_side_len
-        h, w, _ = img.shape
+        h, w, c = img.shape
 
         # limit the max side
         if self.limit_type == 'max':
@@ -172,6 +174,8 @@ class DetResizeForTest(object):
                     ratio = float(limit_side_len) / h
                 else:
                     ratio = float(limit_side_len) / w
+            elif self.pad:
+                ratio = float(self.pad_size) / max(h, w)
             else:
                 ratio = 1.
         else:
@@ -197,6 +201,10 @@ class DetResizeForTest(object):
             sys.exit(0)
         ratio_h = resize_h / float(h)
         ratio_w = resize_w / float(w)
+        if self.limit_type == 'max' and self.pad:
+            padding_im = np.zeros((self.pad_size, self.pad_size, c), dtype=np.float32)
+            padding_im[:resize_h, :resize_w, :] = img
+            img = padding_im
         return img, [ratio_h, ratio_w]
 
     def resize_image_type2(self, img):
diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py
index 769ddbe2..0c149610 100755
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -49,12 +49,12 @@ class DBPostProcess(object):
         self.dilation_kernel = None if not use_dilation else np.array(
             [[1, 1], [1, 1]])
 
-    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
+    def boxes_from_bitmap(self, pred, _bitmap, shape):
         '''
         _bitmap: single map with shape (1, H, W),
                 whose values are binarized as {0, 1}
         '''
-
+        dest_height, dest_width, ratio_h, ratio_w = shape
         bitmap = _bitmap
         height, width = bitmap.shape
 
@@ -89,9 +89,9 @@ class DBPostProcess(object):
             box = np.array(box)
 
             box[:, 0] = np.clip(
-                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+                np.round(box[:, 0] / ratio_w), 0, dest_width)
             box[:, 1] = np.clip(
-                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+                np.round(box[:, 1] / ratio_h), 0, dest_height)
             boxes.append(box.astype(np.int16))
             scores.append(score)
         return np.array(boxes, dtype=np.int16), scores
@@ -175,7 +175,6 @@ class DBPostProcess(object):
 
         boxes_batch = []
         for batch_index in range(pred.shape[0]):
-            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
             if self.dilation_kernel is not None:
                 mask = cv2.dilate(
                     np.array(segmentation[batch_index]).astype(np.uint8),
@@ -183,7 +182,7 @@ class DBPostProcess(object):
             else:
                 mask = segmentation[batch_index]
             boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
-                                                   src_w, src_h)
+                                                   shape_list[batch_index])
 
             boxes_batch.append({'points': boxes})
         return boxes_batch
diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py
index ede85018..87306eae 100644
--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
@@ -38,11 +38,13 @@ logger = get_logger()
 
 class OCRSystem(object):
     def __init__(self, args):
+        args.det_pad = True
+        args.det_pad_size = 640
         self.text_system = TextSystem(args)
         self.table_system = TableSystem(args, self.text_system.text_detector, self.text_system.text_recognizer)
         self.table_layout = lp.PaddleDetectionLayoutModel("lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
                                                           threshold=0.5, enable_mkldnn=args.enable_mkldnn,
-                                                          enforce_cpu=not args.use_gpu,thread_num=args.cpu_threads)
+                                                          enforce_cpu=not args.use_gpu, thread_num=args.cpu_threads)
         self.use_angle_cls = args.use_angle_cls
         self.drop_score = args.drop_score
 
@@ -67,7 +69,6 @@ class OCRSystem(object):
             res_list.append({'type': region.type, 'bbox': [x1, y1, x2, y2], 'res': res})
         return res_list
 
-
 def save_res(res, save_folder, img_name):
     excel_save_folder = os.path.join(save_folder, img_name)
     os.makedirs(excel_save_folder, exist_ok=True)
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index 59bb49f9..b21db4c7 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -41,7 +41,9 @@ class TextDetector(object):
         pre_process_list = [{
             'DetResizeForTest': {
                 'limit_side_len': args.det_limit_side_len,
-                'limit_type': args.det_limit_type
+                'limit_type': args.det_limit_type,
+                'pad':args.det_pad,
+                'pad_size':args.det_pad_size
             }
         }, {
             'NormalizeImage': {
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index a558f490..9fb2e8e5 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -46,6 +46,8 @@ def init_args():
     parser.add_argument("--det_model_dir", type=str)
     parser.add_argument("--det_limit_side_len", type=float, default=960)
     parser.add_argument("--det_limit_type", type=str, default='max')
+    parser.add_argument("--det_pad", type=str2bool, default=False)
+    parser.add_argument("--det_pad_size", type=int, default=640)
 
     # DB parmas
     parser.add_argument("--det_db_thresh", type=float, default=0.3)
-- 
GitLab