det_model.py 6.0 KB
Newer Older
L
LDOUBLEV 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from paddle import fluid

from ppocr.utils.utility import create_module
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from copy import deepcopy


class DetModel(object):
    def __init__(self, params):
        """
        Detection module for OCR text detection.
        args:
            params (dict): the super parameters for detection module.
        """
        global_params = params['Global']
        self.algorithm = global_params['algorithm']

        backbone_params = deepcopy(params["Backbone"])
        backbone_params.update(global_params)
        self.backbone = create_module(backbone_params['function'])\
                (params=backbone_params)

        head_params = deepcopy(params["Head"])
        head_params.update(global_params)
        self.head = create_module(head_params['function'])\
                (params=head_params)

        loss_params = deepcopy(params["Loss"])
        loss_params.update(global_params)
        self.loss = create_module(loss_params['function'])\
                (params=loss_params)

        self.image_shape = global_params['image_shape']

    def create_feed(self, mode):
        """
        create Dataloader feeds
        args:
            mode (str): 'train' for training  or else for evaluation
        return: (image, corresponding label, dataloader)
        """
        image_shape = deepcopy(self.image_shape)
62 63 64 65 66 67
        if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
            raise Exception("The size of the image must be divisible by 4, "
                            "received image shape is {}, please reset the "
                            "Global.image_shape in the yml file".format(
                                image_shape))

L
LDOUBLEV 已提交
68 69 70 71
        image = fluid.layers.data(
            name='image', shape=image_shape, dtype='float32')
        if mode == "train":
            if self.algorithm == "EAST":
72
                h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
L
LDOUBLEV 已提交
73
                score = fluid.layers.data(
74
                    name='score', shape=[1, h, w], dtype='float32')
L
LDOUBLEV 已提交
75
                geo = fluid.layers.data(
76
                    name='geo', shape=[9, h, w], dtype='float32')
L
LDOUBLEV 已提交
77
                mask = fluid.layers.data(
78
                    name='mask', shape=[1, h, w], dtype='float32')
L
LDOUBLEV 已提交
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
                feed_list = [image, score, geo, mask]
                labels = {'score': score, 'geo': geo, 'mask': mask}
            elif self.algorithm == "DB":
                shrink_map = fluid.layers.data(
                    name='shrink_map', shape=image_shape[1:], dtype='float32')
                shrink_mask = fluid.layers.data(
                    name='shrink_mask', shape=image_shape[1:], dtype='float32')
                threshold_map = fluid.layers.data(
                    name='threshold_map',
                    shape=image_shape[1:],
                    dtype='float32')
                threshold_mask = fluid.layers.data(
                    name='threshold_mask',
                    shape=image_shape[1:],
                    dtype='float32')
                feed_list=[image, shrink_map, shrink_mask,\
                    threshold_map, threshold_mask]
                labels = {'shrink_map':shrink_map,\
                    'shrink_mask':shrink_mask,\
                    'threshold_map':threshold_map,\
                    'threshold_mask':threshold_mask}
L
licx 已提交
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
            elif self.algorithm == "SAST":
                input_score = fluid.layers.data(
                    name='score', shape=[1, 128, 128], dtype='float32')
                input_border = fluid.layers.data(
                    name='border', shape=[5, 128, 128], dtype='float32')
                input_mask = fluid.layers.data(
                    name='mask', shape=[1, 128, 128], dtype='float32')
                input_tvo = fluid.layers.data(
                    # name='tvo', shape=[5, 128, 128], dtype='float32')
                    name='tvo', shape=[9, 128, 128], dtype='float32')
                input_tco = fluid.layers.data(
                    name='tco', shape=[3, 128, 128], dtype='float32')
                feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco]
                labels = {'input_score': input_score,\
                    'input_border': input_border,\
                    'input_mask': input_mask,\
                    'input_tvo': input_tvo,\
                    'input_tco': input_tco}
L
LDOUBLEV 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
            loader = fluid.io.DataLoader.from_generator(
                feed_list=feed_list,
                capacity=64,
                use_double_buffer=True,
                iterable=False)
        else:
            labels = None
            loader = None
        return image, labels, loader

    def __call__(self, mode):
        """
        run forward of defined module
        args:
            mode (str): 'train' for training; 'export'  for inference,
                others for evaluation]
        """
        image, labels, loader = self.create_feed(mode)
        conv_feas = self.backbone(image)
137 138 139 140
        if self.algorithm == "DB":
            predicts = self.head(conv_feas, mode)
        else:
            predicts = self.head(conv_feas)
L
LDOUBLEV 已提交
141 142 143 144 145 146 147
        if mode == "train":
            losses = self.loss(predicts, labels)
            return loader, losses
        elif mode == "export":
            return [image, predicts]
        else:
            return loader, predicts