add ocr postprocess

ccf6d601 · felixhjh · ecd70aef · ccf6d601
隐藏空白更改
内联并排

Showing with 106 addition and 0 deletion

python/paddle_serving_app/reader/ocr_reader.py python/paddle_serving_app/reader/ocr_reader.py +106 -0

未找到文件。
--- a/python/paddle_serving_app/reader/ocr_reader.py
+++ b/python/paddle_serving_app/reader/ocr_reader.py
@@ -118,6 +118,111 @@ class CharacterOps(object):
                % (self.loss_type)
            assert False, err

+class BaseRecLabelDecode(object):
+    """ Convert between text-label and text-index """
+
+    def __init__(self, config):
+        support_character_type = [
+            'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
+            'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
+            'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
+            'ne', 'EN'
+        ]
+        character_type = config['character_type']
+        character_dict_path = config['character_dict_path']
+        use_space_char = True
+        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
+            support_character_type, character_type)
+
+        self.beg_str = "sos"
+        self.end_str = "eos"
+
+        if character_type == "en":
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        elif character_type == "EN_symbol":
+            # same with ASTER setting (use 94 char).
+            self.character_str = string.printable[:-6]
+            dict_character = list(self.character_str)
+        elif character_type in support_character_type:
+            self.character_str = ""
+            assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
+                character_type)
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str += line
+            if use_space_char:
+                self.character_str += " "
+            dict_character = list(self.character_str)
+
+        else:
+            raise NotImplementedError
+        self.character_type = character_type
+        dict_character = self.add_special_char(dict_character)
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+
+    def add_special_char(self, dict_character):
+        return dict_character
+
+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        result_list = []
+        ignored_tokens = self.get_ignored_tokens()
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            char_list = []
+            conf_list = []
+            for idx in range(len(text_index[batch_idx])):
+                if text_index[batch_idx][idx] in ignored_tokens:
+                    continue
+                if is_remove_duplicate:
+                    # only for predict
+                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
+                            batch_idx][idx]:
+                        continue
+                char_list.append(self.character[int(text_index[batch_idx][
+                    idx])])
+                if text_prob is not None:
+                    conf_list.append(text_prob[batch_idx][idx])
+                else:
+                    conf_list.append(1)
+            text = ''.join(char_list)
+            result_list.append((text, np.mean(conf_list)))
+        return result_list
+
+    def get_ignored_tokens(self):
+        return [0]  # for ctc blank
+
+
+class CTCLabelDecode(BaseRecLabelDecode):
+    """ Convert between text-label and text-index """
+
+    def __init__(
+            self,
+            config,
+            #character_dict_path=None,
+            #character_type='ch',
+            #use_space_char=False,
+            **kwargs):
+        super(CTCLabelDecode, self).__init__(config)
+
+    def __call__(self, preds, label=None, *args, **kwargs):
+        preds_idx = preds.argmax(axis=2)
+        preds_prob = preds.max(axis=2)
+        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
+        if label is None:
+            return text
+        label = self.decode(label)
+        return text, label
+
+    def add_special_char(self, dict_character):
+        dict_character = ['blank'] + dict_character
+        return dict_character

 class OCRReader(object):
    def __init__(self,
@@ -134,6 +239,7 @@ class OCRReader(object):
        char_ops_params["character_dict_path"] = char_dict_path
        char_ops_params['loss_type'] = 'ctc'
        self.char_ops = CharacterOps(char_ops_params)
+        self.label_ops = CTCLabelDecode(char_ops_params)

    def resize_norm_img(self, img, max_wh_ratio):
        imgC, imgH, imgW = self.rec_image_shape