predict_system.py 8.1 KB
Newer Older
L
LDOUBLEV 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 15
import os
import sys
W
WenmuZhou 已提交
16

17
__dir__ = os.path.dirname(os.path.abspath(__file__))
18
sys.path.append(__dir__)
19
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
L
LDOUBLEV 已提交
20

L
LDOUBLEV 已提交
21 22
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'

L
LDOUBLEV 已提交
23 24 25 26
import cv2
import copy
import numpy as np
import time
L
LDOUBLEV 已提交
27
from PIL import Image
W
WenmuZhou 已提交
28 29 30
import tools.infer.utility as utility
import tools.infer.predict_rec as predict_rec
import tools.infer.predict_det as predict_det
W
WenmuZhou 已提交
31
import tools.infer.predict_cls as predict_cls
W
WenmuZhou 已提交
32 33
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger
W
WenmuZhou 已提交
34
from tools.infer.utility import draw_ocr_box_txt, get_current_memory_mb, get_rotate_crop_image
L
LDOUBLEV 已提交
35
import tools.infer.benchmark_utils as benchmark_utils
W
WenmuZhou 已提交
36 37
logger = get_logger()

L
LDOUBLEV 已提交
38 39 40 41 42

class TextSystem(object):
    def __init__(self, args):
        self.text_detector = predict_det.TextDetector(args)
        self.text_recognizer = predict_rec.TextRecognizer(args)
W
WenmuZhou 已提交
43
        self.use_angle_cls = args.use_angle_cls
W
WenmuZhou 已提交
44
        self.drop_score = args.drop_score
W
WenmuZhou 已提交
45 46
        if self.use_angle_cls:
            self.text_classifier = predict_cls.TextClassifier(args)
L
LDOUBLEV 已提交
47 48 49 50 51

    def print_draw_crop_rec_res(self, img_crop_list, rec_res):
        bbox_num = len(img_crop_list)
        for bno in range(bbox_num):
            cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
W
WenmuZhou 已提交
52
            logger.info(bno, rec_res[bno])
L
LDOUBLEV 已提交
53

54
    def __call__(self, img, cls=True):
L
LDOUBLEV 已提交
55 56
        ori_im = img.copy()
        dt_boxes, elapse = self.text_detector(img)
L
LDOUBLEV 已提交
57

58
        logger.info("dt_boxes num : {}, elapse : {}".format(
W
WenmuZhou 已提交
59
            len(dt_boxes), elapse))
L
LDOUBLEV 已提交
60 61 62
        if dt_boxes is None:
            return None, None
        img_crop_list = []
63 64 65

        dt_boxes = sorted_boxes(dt_boxes)

L
LDOUBLEV 已提交
66 67
        for bno in range(len(dt_boxes)):
            tmp_box = copy.deepcopy(dt_boxes[bno])
W
WenmuZhou 已提交
68
            img_crop = get_rotate_crop_image(ori_im, tmp_box)
L
LDOUBLEV 已提交
69
            img_crop_list.append(img_crop)
70
        if self.use_angle_cls and cls:
W
WenmuZhou 已提交
71 72
            img_crop_list, angle_list, elapse = self.text_classifier(
                img_crop_list)
73
            logger.info("cls num  : {}, elapse : {}".format(
W
WenmuZhou 已提交
74 75
                len(img_crop_list), elapse))

L
LDOUBLEV 已提交
76
        rec_res, elapse = self.text_recognizer(img_crop_list)
77
        logger.info("rec_res num  : {}, elapse : {}".format(
W
WenmuZhou 已提交
78
            len(rec_res), elapse))
79
        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
W
WenmuZhou 已提交
80 81 82 83 84 85 86
        filter_boxes, filter_rec_res = [], []
        for box, rec_reuslt in zip(dt_boxes, rec_res):
            text, score = rec_reuslt
            if score >= self.drop_score:
                filter_boxes.append(box)
                filter_rec_res.append(rec_reuslt)
        return filter_boxes, filter_rec_res
L
LDOUBLEV 已提交
87 88


89 90 91 92
def sorted_boxes(dt_boxes):
    """
    Sort text boxes in order from top to bottom, left to right
    args:
T
tink2123 已提交
93
        dt_boxes(array):detected text boxes with shape [4, 2]
94 95 96 97
    return:
        sorted boxes(array) with shape [4, 2]
    """
    num_boxes = dt_boxes.shape[0]
98
    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
99 100 101
    _boxes = list(sorted_boxes)

    for i in range(num_boxes - 1):
W
WenmuZhou 已提交
102 103
        if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
                (_boxes[i + 1][0][0] < _boxes[i][0][0]):
104 105 106 107 108 109
            tmp = _boxes[i]
            _boxes[i] = _boxes[i + 1]
            _boxes[i + 1] = tmp
    return _boxes


110
def main(args):
L
LDOUBLEV 已提交
111
    image_file_list = get_image_file_list(args.image_dir)
L
LDOUBLEV 已提交
112
    text_sys = TextSystem(args)
L
LDOUBLEV 已提交
113
    is_visualize = True
W
WenmuZhou 已提交
114
    font_path = args.vis_font_path
W
WenmuZhou 已提交
115
    drop_score = args.drop_score
L
LDOUBLEV 已提交
116 117 118 119 120
    total_time = 0
    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
    _st = time.time()
    count = 0
    for idx, image_file in enumerate(image_file_list):
L
LDOUBLEV 已提交
121 122 123
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
L
LDOUBLEV 已提交
124
        if img is None:
125
            logger.info("error in loading image:{}".format(image_file))
L
LDOUBLEV 已提交
126 127 128 129
            continue
        starttime = time.time()
        dt_boxes, rec_res = text_sys(img)
        elapse = time.time() - starttime
L
LDOUBLEV 已提交
130 131 132 133 134 135 136
        total_time += elapse
        if args.benchmark and idx % 20 == 0:
            cm, gm, gu = get_current_memory_mb(0)
            cpu_mem += cm
            gpu_mem += gm
            gpu_util += gu
            count += 1
L
LDOUBLEV 已提交
137

L
LDOUBLEV 已提交
138 139
        logger.info(
            str(idx) + "  Predict time of %s: %.3fs" % (image_file, elapse))
W
WenmuZhou 已提交
140 141
        for text, score in rec_res:
            logger.info("{}, {:.3f}".format(text, score))
L
LDOUBLEV 已提交
142 143 144 145 146 147 148

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            boxes = dt_boxes
            txts = [rec_res[i][0] for i in range(len(rec_res))]
            scores = [rec_res[i][1] for i in range(len(rec_res))]

W
WenmuZhou 已提交
149 150 151 152 153 154 155
            draw_img = draw_ocr_box_txt(
                image,
                boxes,
                txts,
                scores,
                drop_score=drop_score,
                font_path=font_path)
156
            draw_img_save = "./inference_results/"
L
LDOUBLEV 已提交
157 158
            if not os.path.exists(draw_img_save):
                os.makedirs(draw_img_save)
L
LDOUBLEV 已提交
159 160
            if flag:
                image_file = image_file[:-3] + "png"
L
LDOUBLEV 已提交
161 162
            cv2.imwrite(
                os.path.join(draw_img_save, os.path.basename(image_file)),
D
dyning 已提交
163
                draw_img[:, :, ::-1])
W
WenmuZhou 已提交
164
            logger.info("The visualized image saved in {}".format(
165
                os.path.join(draw_img_save, os.path.basename(image_file))))
166

L
LDOUBLEV 已提交
167 168
    logger.info("The predict total time is {}".format(time.time() - _st))
    logger.info("\nThe predict total time is {}".format(total_time))
169

L
LDOUBLEV 已提交
170 171 172 173 174 175 176
    img_num = text_sys.text_detector.det_times.img_num
    if args.benchmark:
        mems = {
            'cpu_rss_mb': cpu_mem / count,
            'gpu_rss_mb': gpu_mem / count,
            'gpu_util': gpu_util * 100 / count
        }
littletomatodonkey's avatar
littletomatodonkey 已提交
177
    else:
L
LDOUBLEV 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
        mems = None
    det_time_dict = text_sys.text_detector.det_times.report(average=True)
    rec_time_dict = text_sys.text_recognizer.rec_times.report(average=True)
    det_model_name = args.det_model_dir
    rec_model_name = args.rec_model_dir

    # construct det log information
    model_info = {
        'model_name': args.det_model_dir.split('/')[-1],
        'precision': args.precision
    }
    data_info = {
        'batch_size': 1,
        'shape': 'dynamic_shape',
        'data_num': det_time_dict['img_num']
    }
    perf_info = {
        'preprocess_time_s': det_time_dict['preprocess_time'],
        'inference_time_s': det_time_dict['inference_time'],
        'postprocess_time_s': det_time_dict['postprocess_time'],
        'total_time_s': det_time_dict['total_time']
    }

    benchmark_log = benchmark_utils.PaddleInferBenchmark(
        text_sys.text_detector.config, model_info, data_info, perf_info, mems,
        args.save_log_path)
    benchmark_log("Det")

    # construct rec log information
    model_info = {
        'model_name': args.rec_model_dir.split('/')[-1],
        'precision': args.precision
    }
    data_info = {
        'batch_size': args.rec_batch_num,
        'shape': 'dynamic_shape',
        'data_num': rec_time_dict['img_num']
    }
    perf_info = {
        'preprocess_time_s': rec_time_dict['preprocess_time'],
        'inference_time_s': rec_time_dict['inference_time'],
        'postprocess_time_s': rec_time_dict['postprocess_time'],
        'total_time_s': rec_time_dict['total_time']
    }
    benchmark_log = benchmark_utils.PaddleInferBenchmark(
        text_sys.text_recognizer.config, model_info, data_info, perf_info, mems,
        args.save_log_path)
    benchmark_log("Rec")


if __name__ == "__main__":
    main(utility.parse_args())