db_process.py 7.1 KB
Newer Older
L
LDOUBLEV 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

import math
import cv2
import numpy as np
import json
import sys
L
LDOUBLEV 已提交
20
from ppocr.utils.utility import initial_logger, check_and_read_gif
T
tink2123 已提交
21
logger = initial_logger()
L
LDOUBLEV 已提交
22

23 24 25 26
from .data_augment import AugmentData
from .random_crop_data import RandomCropData
from .make_shrink_map import MakeShrinkMap
from .make_border_map import MakeBorderMap
L
LDOUBLEV 已提交
27 28 29


class DBProcessTrain(object):
L
LDOUBLEV 已提交
30 31 32 33
    """
    DB pre-process for Train mode
    """

L
LDOUBLEV 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    def __init__(self, params):
        self.img_set_dir = params['img_set_dir']
        self.image_shape = params['image_shape']

    def order_points_clockwise(self, pts):
        rect = np.zeros((4, 2), dtype="float32")
        s = pts.sum(axis=1)
        rect[0] = pts[np.argmin(s)]
        rect[2] = pts[np.argmax(s)]
        diff = np.diff(pts, axis=1)
        rect[1] = pts[np.argmin(diff)]
        rect[3] = pts[np.argmax(diff)]
        return rect

    def make_data_dict(self, imgvalue, entry):
        boxes = []
        texts = []
        ignores = []
        for rect in entry:
            points = rect['points']
            transcription = rect['transcription']
            try:
                box = self.order_points_clockwise(
                    np.array(points).reshape(-1, 2))
                if cv2.contourArea(box) > 0:
                    boxes.append(box)
                    texts.append(transcription)
                    ignores.append(transcription in ['*', '###'])
            except:
                print('load label failed!')
        data = {
            'image': imgvalue,
            'shape': [imgvalue.shape[0], imgvalue.shape[1]],
            'polys': np.array(boxes),
            'texts': texts,
            'ignore_tags': ignores,
        }
        return data

    def NormalizeImage(self, data):
        im = data['image']
        img_mean = [0.485, 0.456, 0.406]
        img_std = [0.229, 0.224, 0.225]
        im = im.astype(np.float32, copy=False)
        im = im / 255
        im -= img_mean
        im /= img_std
        channel_swap = (2, 0, 1)
        im = im.transpose(channel_swap)
        data['image'] = im
        return data

    def FilterKeys(self, data):
        filter_keys = ['polys', 'texts', 'ignore_tags', 'shape']
        for key in filter_keys:
            if key in data:
                del data[key]
        return data

    def convert_label_infor(self, label_infor):
        label_infor = label_infor.decode()
        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
        substr = label_infor.strip("\n").split("\t")
        img_path = self.img_set_dir + substr[0]
        label = json.loads(substr[1])
        return img_path, label

    def __call__(self, label_infor):
        img_path, gt_label = self.convert_label_infor(label_infor)
L
LDOUBLEV 已提交
103 104 105
        imgvalue, flag = check_and_read_gif(img_path)
        if not flag:
            imgvalue = cv2.imread(img_path)
L
LDOUBLEV 已提交
106
        if imgvalue is None:
T
tink2123 已提交
107
            logger.info("{} does not exist!".format(img_path))
L
LDOUBLEV 已提交
108
            return None
L
LDOUBLEV 已提交
109 110
        if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
            imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
L
LDOUBLEV 已提交
111 112 113 114 115 116 117 118 119 120 121 122
        data = self.make_data_dict(imgvalue, gt_label)
        data = AugmentData(data)
        data = RandomCropData(data, self.image_shape[1:])
        data = MakeShrinkMap(data)
        data = MakeBorderMap(data)
        data = self.NormalizeImage(data)
        data = self.FilterKeys(data)
        return data['image'], data['shrink_map'], data['shrink_mask'], data[
            'threshold_map'], data['threshold_mask']


class DBProcessTest(object):
L
LDOUBLEV 已提交
123 124 125 126
    """
    DB pre-process for Test mode
    """

L
LDOUBLEV 已提交
127 128 129
    def __init__(self, params):
        super(DBProcessTest, self).__init__()
        self.resize_type = 0
130 131
        if 'test_image_shape' in params:
            self.image_shape = params['test_image_shape']
L
LDOUBLEV 已提交
132 133 134 135 136 137 138 139 140 141
            # print(self.image_shape)
            self.resize_type = 1
        if 'max_side_len' in params:
            self.max_side_len = params['max_side_len']
        else:
            self.max_side_len = 2400

    def resize_image_type0(self, im):
        """
        resize image to a size multiple of 32 which is required by the network
L
LDOUBLEV 已提交
142 143 144 145
        args:
            img(array): array with shape [h, w, c]
        return(tuple):
            img, (ratio_h, ratio_w)
L
LDOUBLEV 已提交
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
        """
        max_side_len = self.max_side_len
        h, w, _ = im.shape

        resize_w = w
        resize_h = h

        # limit the max side
        if max(resize_h, resize_w) > max_side_len:
            if resize_h > resize_w:
                ratio = float(max_side_len) / resize_h
            else:
                ratio = float(max_side_len) / resize_w
        else:
            ratio = 1.
        resize_h = int(resize_h * ratio)
        resize_w = int(resize_w * ratio)
        if resize_h % 32 == 0:
            resize_h = resize_h
L
LDOUBLEV 已提交
165 166
        elif resize_h // 32 <= 1:
            resize_h = 32
L
LDOUBLEV 已提交
167
        else:
L
LDOUBLEV 已提交
168
            resize_h = (resize_h // 32 - 1) * 32
L
LDOUBLEV 已提交
169 170
        if resize_w % 32 == 0:
            resize_w = resize_w
L
LDOUBLEV 已提交
171 172
        elif resize_w // 32 <= 1:
            resize_w = 32
L
LDOUBLEV 已提交
173
        else:
L
LDOUBLEV 已提交
174
            resize_w = (resize_w // 32 - 1) * 32
L
LDOUBLEV 已提交
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
        try:
            if int(resize_w) <= 0 or int(resize_h) <= 0:
                return None, (None, None)
            im = cv2.resize(im, (int(resize_w), int(resize_h)))
        except:
            print(im.shape, resize_w, resize_h)
            sys.exit(0)
        ratio_h = resize_h / float(h)
        ratio_w = resize_w / float(w)
        return im, (ratio_h, ratio_w)

    def resize_image_type1(self, im):
        resize_h, resize_w = self.image_shape
        ori_h, ori_w = im.shape[:2]  # (h, w, c)
        im = cv2.resize(im, (int(resize_w), int(resize_h)))
        ratio_h = float(resize_h) / ori_h
        ratio_w = float(resize_w) / ori_w
        return im, (ratio_h, ratio_w)

    def normalize(self, im):
        img_mean = [0.485, 0.456, 0.406]
        img_std = [0.229, 0.224, 0.225]
        im = im.astype(np.float32, copy=False)
        im = im / 255
199 200 201 202 203 204
        im[:, :, 0] -= img_mean[0]
        im[:, :, 1] -= img_mean[1]
        im[:, :, 2] -= img_mean[2]
        im[:, :, 0] /= img_std[0]
        im[:, :, 1] /= img_std[1]
        im[:, :, 2] /= img_std[2]
L
LDOUBLEV 已提交
205 206 207 208 209 210 211 212 213 214 215 216
        channel_swap = (2, 0, 1)
        im = im.transpose(channel_swap)
        return im

    def __call__(self, im):
        if self.resize_type == 0:
            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
        else:
            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
        im = self.normalize(im)
        im = im[np.newaxis, :]
        return [im, (ratio_h, ratio_w)]