predict_system.py 9.4 KB
Newer Older
L
LDOUBLEV 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 15
import os
import sys
W
WenmuZhou 已提交
16

17
__dir__ = os.path.dirname(os.path.abspath(__file__))
18
sys.path.append(__dir__)
19
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
L
LDOUBLEV 已提交
20

L
LDOUBLEV 已提交
21 22
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'

L
LDOUBLEV 已提交
23 24 25 26
import cv2
import copy
import numpy as np
import time
W
WenmuZhou 已提交
27
import logging
L
LDOUBLEV 已提交
28
from PIL import Image
W
WenmuZhou 已提交
29 30 31
import tools.infer.utility as utility
import tools.infer.predict_rec as predict_rec
import tools.infer.predict_det as predict_det
W
WenmuZhou 已提交
32
import tools.infer.predict_cls as predict_cls
W
WenmuZhou 已提交
33 34
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger
L
LDOUBLEV 已提交
35 36
from tools.infer.utility import draw_ocr_box_txt, get_current_memory_mb
import tools.infer.benchmark_utils as benchmark_utils
W
WenmuZhou 已提交
37 38
logger = get_logger()

L
LDOUBLEV 已提交
39 40 41

class TextSystem(object):
    def __init__(self, args):
W
WenmuZhou 已提交
42 43 44
        if not args.show_log:
            logger.setLevel(logging.INFO)

L
LDOUBLEV 已提交
45 46
        self.text_detector = predict_det.TextDetector(args)
        self.text_recognizer = predict_rec.TextRecognizer(args)
W
WenmuZhou 已提交
47
        self.use_angle_cls = args.use_angle_cls
W
WenmuZhou 已提交
48
        self.drop_score = args.drop_score
W
WenmuZhou 已提交
49 50
        if self.use_angle_cls:
            self.text_classifier = predict_cls.TextClassifier(args)
L
LDOUBLEV 已提交
51 52

    def get_rotate_crop_image(self, img, points):
53
        '''
L
LDOUBLEV 已提交
54 55 56 57 58 59 60 61
        img_height, img_width = img.shape[0:2]
        left = int(np.min(points[:, 0]))
        right = int(np.max(points[:, 0]))
        top = int(np.min(points[:, 1]))
        bottom = int(np.max(points[:, 1]))
        img_crop = img[top:bottom, left:right, :].copy()
        points[:, 0] = points[:, 0] - left
        points[:, 1] = points[:, 1] - top
62
        '''
L
LDOUBLEV 已提交
63 64 65 66 67 68 69 70 71
        img_crop_width = int(
            max(
                np.linalg.norm(points[0] - points[1]),
                np.linalg.norm(points[2] - points[3])))
        img_crop_height = int(
            max(
                np.linalg.norm(points[0] - points[3]),
                np.linalg.norm(points[1] - points[2])))
        pts_std = np.float32([[0, 0], [img_crop_width, 0],
72 73
                              [img_crop_width, img_crop_height],
                              [0, img_crop_height]])
L
LDOUBLEV 已提交
74
        M = cv2.getPerspectiveTransform(points, pts_std)
L
LDOUBLEV 已提交
75 76 77 78 79
        dst_img = cv2.warpPerspective(
            img,
            M, (img_crop_width, img_crop_height),
            borderMode=cv2.BORDER_REPLICATE,
            flags=cv2.INTER_CUBIC)
L
LDOUBLEV 已提交
80 81 82 83 84 85 86 87 88
        dst_img_height, dst_img_width = dst_img.shape[0:2]
        if dst_img_height * 1.0 / dst_img_width >= 1.5:
            dst_img = np.rot90(dst_img)
        return dst_img

    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
        bbox_num = len(img_crop_list)
        for bno in range(bbox_num):
            cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
W
WenmuZhou 已提交
89
            logger.info(bno, rec_res[bno])
L
LDOUBLEV 已提交
90

91
    def __call__(self, img, cls=True):
L
LDOUBLEV 已提交
92 93
        ori_im = img.copy()
        dt_boxes, elapse = self.text_detector(img)
L
LDOUBLEV 已提交
94

W
WenmuZhou 已提交
95
        logger.debug("dt_boxes num : {}, elapse : {}".format(
96

W
WenmuZhou 已提交
97
            len(dt_boxes), elapse))
L
LDOUBLEV 已提交
98 99 100
        if dt_boxes is None:
            return None, None
        img_crop_list = []
101 102 103

        dt_boxes = sorted_boxes(dt_boxes)

L
LDOUBLEV 已提交
104 105 106 107
        for bno in range(len(dt_boxes)):
            tmp_box = copy.deepcopy(dt_boxes[bno])
            img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
            img_crop_list.append(img_crop)
108
        if self.use_angle_cls and cls:
W
WenmuZhou 已提交
109 110
            img_crop_list, angle_list, elapse = self.text_classifier(
                img_crop_list)
W
WenmuZhou 已提交
111
            logger.debug("cls num  : {}, elapse : {}".format(
W
WenmuZhou 已提交
112 113
                len(img_crop_list), elapse))

L
LDOUBLEV 已提交
114
        rec_res, elapse = self.text_recognizer(img_crop_list)
W
WenmuZhou 已提交
115
        logger.debug("rec_res num  : {}, elapse : {}".format(
W
WenmuZhou 已提交
116
            len(rec_res), elapse))
117
        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
W
WenmuZhou 已提交
118 119 120 121 122 123 124
        filter_boxes, filter_rec_res = [], []
        for box, rec_reuslt in zip(dt_boxes, rec_res):
            text, score = rec_reuslt
            if score >= self.drop_score:
                filter_boxes.append(box)
                filter_rec_res.append(rec_reuslt)
        return filter_boxes, filter_rec_res
L
LDOUBLEV 已提交
125 126


127 128 129 130
def sorted_boxes(dt_boxes):
    """
    Sort text boxes in order from top to bottom, left to right
    args:
T
tink2123 已提交
131
        dt_boxes(array):detected text boxes with shape [4, 2]
132 133 134 135
    return:
        sorted boxes(array) with shape [4, 2]
    """
    num_boxes = dt_boxes.shape[0]
136
    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
137 138 139
    _boxes = list(sorted_boxes)

    for i in range(num_boxes - 1):
W
WenmuZhou 已提交
140 141
        if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
                (_boxes[i + 1][0][0] < _boxes[i][0][0]):
142 143 144 145 146 147
            tmp = _boxes[i]
            _boxes[i] = _boxes[i + 1]
            _boxes[i + 1] = tmp
    return _boxes


148
def main(args):
L
LDOUBLEV 已提交
149
    image_file_list = get_image_file_list(args.image_dir)
L
LDOUBLEV 已提交
150
    text_sys = TextSystem(args)
L
LDOUBLEV 已提交
151
    is_visualize = True
W
WenmuZhou 已提交
152
    font_path = args.vis_font_path
W
WenmuZhou 已提交
153
    drop_score = args.drop_score
L
LDOUBLEV 已提交
154 155 156 157 158
    total_time = 0
    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
    _st = time.time()
    count = 0
    for idx, image_file in enumerate(image_file_list):
L
LDOUBLEV 已提交
159 160 161
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
L
LDOUBLEV 已提交
162
        if img is None:
163
            logger.info("error in loading image:{}".format(image_file))
L
LDOUBLEV 已提交
164 165 166 167
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
L
LDOUBLEV 已提交
168 169 170 171 172 173 174
        total_time += elapse
        if args.benchmark and idx % 20 == 0:
            cm, gm, gu = get_current_memory_mb(0)
            cpu_mem += cm
            gpu_mem += gm
            gpu_util += gu
            count += 1
L
LDOUBLEV 已提交
175

L
LDOUBLEV 已提交
176 177
        logger.info(
            str(idx) + "  Predict time of %s: %.3fs" % (image_file, elapse))
W
WenmuZhou 已提交
178 179
        for text, score in rec_res:
            logger.info("{}, {:.3f}".format(text, score))
L
LDOUBLEV 已提交
180 181 182 183 184 185 186

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

W
WenmuZhou 已提交
187 188 189 190 191 192 193
            draw_img = draw_ocr_box_txt(
                image,
                boxes,
                txts,
                scores,
                drop_score=drop_score,
                font_path=font_path)
194
            draw_img_save = "./inference_results/"
L
LDOUBLEV 已提交
195 196
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
L
LDOUBLEV 已提交
197 198
            if flag:
                image_file = image_file[:-3] + "png"
L
LDOUBLEV 已提交
199 200
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
D
dyning 已提交
201
                draw_img[:, :, ::-1])
W
WenmuZhou 已提交
202
            logger.info("The visualized image saved in {}".format(
203
                os.path.join(draw_img_save, os.path.basename(image_file))))
204

L
LDOUBLEV 已提交
205 206
    logger.info("The predict total time is {}".format(time.time() - _st))
    logger.info("\nThe predict total time is {}".format(total_time))
207

L
LDOUBLEV 已提交
208 209 210 211 212 213 214
    img_num = text_sys.text_detector.det_times.img_num
    if args.benchmark:
        mems = {
            'cpu_rss_mb': cpu_mem / count,
            'gpu_rss_mb': gpu_mem / count,
            'gpu_util': gpu_util * 100 / count
        }
littletomatodonkey's avatar
littletomatodonkey 已提交
215
    else:
L
LDOUBLEV 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
        mems = None
    det_time_dict = text_sys.text_detector.det_times.report(average=True)
    rec_time_dict = text_sys.text_recognizer.rec_times.report(average=True)
    det_model_name = args.det_model_dir
    rec_model_name = args.rec_model_dir

    # construct det log information
    model_info = {
        'model_name': args.det_model_dir.split('/')[-1],
        'precision': args.precision
    }
    data_info = {
        'batch_size': 1,
        'shape': 'dynamic_shape',
        'data_num': det_time_dict['img_num']
    }
    perf_info = {
        'preprocess_time_s': det_time_dict['preprocess_time'],
        'inference_time_s': det_time_dict['inference_time'],
        'postprocess_time_s': det_time_dict['postprocess_time'],
        'total_time_s': det_time_dict['total_time']
    }

    benchmark_log = benchmark_utils.PaddleInferBenchmark(
        text_sys.text_detector.config, model_info, data_info, perf_info, mems,
        args.save_log_path)
    benchmark_log("Det")

    # construct rec log information
    model_info = {
        'model_name': args.rec_model_dir.split('/')[-1],
        'precision': args.precision
    }
    data_info = {
        'batch_size': args.rec_batch_num,
        'shape': 'dynamic_shape',
        'data_num': rec_time_dict['img_num']
    }
    perf_info = {
        'preprocess_time_s': rec_time_dict['preprocess_time'],
        'inference_time_s': rec_time_dict['inference_time'],
        'postprocess_time_s': rec_time_dict['postprocess_time'],
        'total_time_s': rec_time_dict['total_time']
    }
    benchmark_log = benchmark_utils.PaddleInferBenchmark(
        text_sys.text_recognizer.config, model_info, data_info, perf_info, mems,
        args.save_log_path)
    benchmark_log("Rec")


if __name__ == "__main__":
    main(utility.parse_args())