Merge pull request #7 from LDOUBLEV/fixocr

add visualize code

Merge pull request #7 from LDOUBLEV/fixocr
add visualize code
8d602649 · dyning · GitHub · de8b5b25 · 759e7385 · 8d602649
7 changed file
--- a/doc/simfang.ttf
+++ b/doc/simfang.ttf
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@@ -109,6 +109,8 @@ class RecModel(object):
                decoded_out, 'label':label}
            return loader, outputs
        elif mode == "export":
-            return [image, {'decoded_out': decoded_out}]
+            predict = predicts['predict']
+            predict = fluid.layers.softmax(predict)
+            return [image, {'decoded_out': decoded_out, 'predicts': predict}]
        else:
            return loader, {'decoded_out': decoded_out}
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -81,7 +81,7 @@ class DBPostProcess(object):
            scores[index] = score
        return boxes, scores
-    def unclip(self, box, unclip_ratio=1.5):
+    def unclip(self, box, unclip_ratio=2.0):
        poly = Polygon(box)
        distance = poly.area * unclip_ratio / poly.length
        offset = pyclipper.PyclipperOffset()

--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -52,7 +52,8 @@ def main():
    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config['Global']['use_gpu']
-    program.check_gpu(True)
+    # program.check_gpu(True)
+    use_gpu = False
    alg = config['Global']['algorithm']
    assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -116,10 +116,10 @@ class TextDetector(object):
            rect_height = int(np.linalg.norm(box[0] - box[3]))
            if rect_width <= 10 or rect_height <= 10:
                continue
-            if diffh <= 10 and diffw <= 10:
+            # if diffh <= 10 and diffw <= 10:
-                box = self.expand_det_res(
+            #     box = self.expand_det_res(
-                    copy.deepcopy(box), bbox_height, bbox_width, img_height,
+            #         copy.deepcopy(box), bbox_height, bbox_width, img_height,
-                    img_width)
+            #         img_width)
            dt_boxes_new.append(box)
        dt_boxes = np.array(dt_boxes_new)
        return dt_boxes

--- a/tools/infer/predict_eval.py
+++ b/tools/infer/predict_eval.py
@@ -22,28 +22,27 @@ import numpy as np
 import math
 import time
 import json
+import os
+from PIL import Image, ImageDraw, ImageFont
+from tools.infer.utility import draw_ocr
+from ppocr.utils.utility import get_image_file_list
 if __name__ == "__main__":
    args = utility.parse_args()
    text_sys = predict_system.TextSystem(args)
-    image_file_list = []
+    if not os.path.exists(args.image_dir):
-    label_file_path = "./eval_perform/gt_res/test_chinese_ic15_500_4pts.txt"
+        raise Exception("{} not exists !!".format(args.image_dir))
-    img_set_path = "./eval_perform/"
+    image_file_list = get_image_file_list(args.image_dir)
-    with open(label_file_path, "rb") as fin:
-        lines = fin.readlines()
-        for line in lines:
-            substr = line.decode('utf-8').strip("\n").split("\t")
-            if "lsvt" in substr[0]:
-                continue
-            image_file_list.append(substr[0])
    total_time_all = 0
    count = 0
-    save_path = "./output/predict.txt"
+    save_path = "./inference_output/predict.txt"
+    if not os.path.exists(os.path.dirname(save_path)):
+        os.makedirs(os.path.dirname(save_path))
    fout = open(save_path, "wb")
    for image_name in image_file_list:
-        image_file = img_set_path + image_name
+        image_file = image_name
        img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
@@ -68,6 +67,23 @@ if __name__ == "__main__":
                "points": points,
                "scores": score * 1.0
            })
+        # draw predict box and text in image
+        # and save drawed image in save_path
+        image = Image.open(image_file)
+        boxes, txts, scores = [], [], []
+        for dic in bbox_list:
+            boxes.append(dic['points'])
+            txts.append(dic['transcription'])
+            scores.append(round(dic['scores'], 3))
+        new_img = draw_ocr(image, boxes, txts, scores, draw_txt=True)
+        draw_img_save = os.path.join(
+            os.path.dirname(save_path), "inference_draw",
+            os.path.basename(image_file))
+        if not os.path.exists(os.path.dirname(draw_img_save)):
+            os.makedirs(os.path.dirname(draw_img_save))
+        cv2.imwrite(draw_img_save, new_img[:, :, ::-1])
+        print("drawed img saved in {}".format(draw_img_save))
+        # save predicted results in txt file
        otstr = image_name + "\t" + json.dumps(bbox_list) + "\n"
        fout.write(otstr.encode('utf-8'))
    avg_time = total_time_all / count

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -21,6 +21,8 @@ from paddle.fluid.core import AnalysisConfig
 from paddle.fluid.core import create_paddle_predictor
 import cv2
 import numpy as np
+import json
+from PIL import Image, ImageDraw, ImageFont
 def parse_args():
@@ -108,3 +110,59 @@ def draw_text_det_res(dt_boxes, img_path):
        cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
    img_name_pure = img_path.split("/")[-1]
    cv2.imwrite("./output/%s" % img_name_pure, src_im)
+def draw_ocr(image, boxes, txts, scores, draw_txt):
+    from PIL import Image, ImageDraw, ImageFont
+    w, h = image.size
+    img = image.copy()
+    draw = ImageDraw.Draw(img)
+    for (box, txt) in zip(boxes, txts):
+        draw.line([(box[0][0], box[0][1]), (box[1][0], box[1][1])], fill='red')
+        draw.line([(box[1][0], box[1][1]), (box[2][0], box[2][1])], fill='red')
+        draw.line([(box[2][0], box[2][1]), (box[3][0], box[3][1])], fill='red')
+        draw.line([(box[3][0], box[3][1]), (box[0][0], box[0][1])], fill='red')
+    if draw_txt:
+        txt_color = (0, 0, 0)
+        blank_img = np.ones(shape=[h, 800], dtype=np.int8) * 255
+        blank_img = Image.fromarray(blank_img).convert("RGB")
+        draw_txt = ImageDraw.Draw(blank_img)
+        font_size = 30
+        gap = 40 if h // len(txts) >= font_size else h // len(txts)
+        for i, txt in enumerate(txts):
+            font = ImageFont.truetype(
+                "./doc/simfang.TTF", font_size, encoding="utf-8")
+            new_txt = str(i) + ':  ' + txt + '    ' + str(scores[i])
+            draw_txt.text((20, gap * (i + 1)), new_txt, txt_color, font=font)
+        img = np.concatenate([np.array(img), np.array(blank_img)], axis=1)
+    return img
+if __name__ == '__main__':
+    test_img = "./doc/test_v2"
+    predict_txt = "./doc/predict.txt"
+    f = open(predict_txt, 'r')
+    data = f.readlines()
+    img_path, anno = data[0].strip().split('\t')
+    img_name = os.path.basename(img_path)
+    img_path = os.path.join(test_img, img_name)
+    image = Image.open(img_path)
+    data = json.loads(anno)
+    boxes, txts, scores = [], [], []
+    for dic in data:
+        boxes.append(dic['points'])
+        txts.append(dic['transcription'])
+        scores.append(round(dic['scores'], 3))
+    new_img = draw_ocr(image, boxes, txts, scores, draw_txt=True)
+    cv2.imwrite(img_name, new_img)