predict_det.py 6.6 KB
Newer Older
L
LDOUBLEV 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
from ppocr.data.det.east_process import EASTProcessTest
from ppocr.data.det.db_process import DBProcessTest
from ppocr.postprocess.db_postprocess import DBPostProcess
from ppocr.postprocess.east_postprocess import EASTPostPocess
import copy
import numpy as np
import math
import time


class TextDetector(object):
    def __init__(self, args):
        max_side_len = args.det_max_side_len
        self.det_algorithm = args.det_algorithm
        preprocess_params = {'max_side_len': max_side_len}
        postprocess_params = {}
        if self.det_algorithm == "DB":
            self.preprocess_op = DBProcessTest(preprocess_params)
            postprocess_params["thresh"] = args.det_db_thresh
            postprocess_params["box_thresh"] = args.det_db_box_thresh
            postprocess_params["max_candidates"] = 1000
            self.postprocess_op = DBPostProcess(postprocess_params)
        elif self.det_algorithm == "EAST":
            self.preprocess_op = EASTProcessTest(preprocess_params)
            postprocess_params["score_thresh"] = args.det_east_score_thresh
            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
            self.postprocess_op = EASTPostPocess(postprocess_params)
        else:
            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
            sys.exit(0)

        self.predictor, self.input_tensor, self.output_tensors =\
            utility.create_predictor(args, mode="det")

    def order_points_clockwise(self, pts):
        #######
        ## https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
        ########
        # sort the points based on their x-coordinates
        xSorted = pts[np.argsort(pts[:, 0]), :]

        # grab the left-most and right-most points from the sorted
        # x-roodinate points
        leftMost = xSorted[:2, :]
        rightMost = xSorted[2:, :]

        # now, sort the left-most coordinates according to their
        # y-coordinates so we can grab the top-left and bottom-left
        # points, respectively
        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
        (tl, bl) = leftMost

        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
        (tr, br) = rightMost

        rect = np.array([tl, tr, br, bl], dtype="float32")
        return rect

    def expand_det_res(self, points, bbox_height, bbox_width, img_height,
                       img_width):
        if bbox_height * 1.0 / bbox_width >= 2.0:
            expand_w = bbox_width * 0.20
            expand_h = bbox_width * 0.20
        elif bbox_width * 1.0 / bbox_height >= 3.0:
            expand_w = bbox_height * 0.20
            expand_h = bbox_height * 0.20
        else:
            expand_w = bbox_height * 0.1
            expand_h = bbox_height * 0.1

        points[0, 0] = int(max((points[0, 0] - expand_w), 0))
        points[1, 0] = int(min((points[1, 0] + expand_w), img_width))
        points[3, 0] = int(max((points[3, 0] - expand_w), 0))
        points[2, 0] = int(min((points[2, 0] + expand_w), img_width))

        points[0, 1] = int(max((points[0, 1] - expand_h), 0))
        points[1, 1] = int(max((points[1, 1] - expand_h), 0))
        points[3, 1] = int(min((points[3, 1] + expand_h), img_height))
        points[2, 1] = int(min((points[2, 1] + expand_h), img_height))
        return points

    def filter_tag_det_res(self, dt_boxes, image_shape):
        img_height, img_width = image_shape[0:2]
        dt_boxes_new = []
        for box in dt_boxes:
            box = self.order_points_clockwise(box)
            left = int(np.min(box[:, 0]))
            right = int(np.max(box[:, 0]))
            top = int(np.min(box[:, 1]))
            bottom = int(np.max(box[:, 1]))
            bbox_height = bottom - top
            bbox_width = right - left
            diffh = math.fabs(box[0, 1] - box[1, 1])
            diffw = math.fabs(box[0, 0] - box[3, 0])
            rect_width = int(np.linalg.norm(box[0] - box[1]))
            rect_height = int(np.linalg.norm(box[0] - box[3]))
            if rect_width <= 10 or rect_height <= 10:
                continue
            if diffh <= 10 and diffw <= 10:
                box = self.expand_det_res(
                    copy.deepcopy(box), bbox_height, bbox_width, img_height,
                    img_width)
            dt_boxes_new.append(box)
        dt_boxes = np.array(dt_boxes_new)
        return dt_boxes

    def __call__(self, img):
        ori_im = img.copy()
        im, ratio_list = self.preprocess_op(img)
        if im is None:
            return None, 0
        im = im.copy()
        starttime = time.time()
        self.input_tensor.copy_from_cpu(im)
        self.predictor.zero_copy_run()
        outputs = []
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()
            outputs.append(output)
        outs_dict = {}
        if self.det_algorithm == "EAST":
            outs_dict['f_score'] = outputs[0]
            outs_dict['f_geo'] = outputs[1]
        else:
            outs_dict['maps'] = [outputs[0]]
        dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list])
        dt_boxes = dt_boxes_list[0]
        dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
        elapse = time.time() - starttime
        return dt_boxes, elapse


if __name__ == "__main__":
    args = utility.parse_args()
    image_file_list = utility.get_image_file_list(args.image_dir)
    text_detector = TextDetector(args)
    count = 0
    total_time = 0
    for image_file in image_file_list:
        img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        dt_boxes, elapse = text_detector(img)
        if count > 0:
            total_time += elapse
        count += 1
        print("Predict time of %s:" % image_file, elapse)
        utility.draw_text_det_res(dt_boxes, image_file)
    print("Avg Time:", total_time / (count - 1))