From 04b0318b56ebf161f00b985c985b9f6ac776cf53 Mon Sep 17 00:00:00 2001 From: WenmuZhou Date: Wed, 9 Dec 2020 17:27:50 +0800 Subject: [PATCH] Delete unused files --- ppocr/utils/character.py | 214 --------------------------------------- ppocr/utils/check.py | 31 ------ 2 files changed, 245 deletions(-) delete mode 100755 ppocr/utils/character.py delete mode 100755 ppocr/utils/check.py diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py deleted file mode 100755 index b4b2021e..00000000 --- a/ppocr/utils/character.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import string -import re -from .check import check_config_params -import sys - - -class CharacterOps(object): - """ Convert between text-label and text-index """ - - def __init__(self, config): - self.character_type = config['character_type'] - self.loss_type = config['loss_type'] - self.max_text_len = config['max_text_length'] - if self.character_type == "en": - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - elif self.character_type == "ch": - character_dict_path = config['character_dict_path'] - add_space = False - if 'use_space_char' in config: - add_space = config['use_space_char'] - self.character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str += line - if add_space: - self.character_str += " " - dict_character = list(self.character_str) - elif self.character_type == "en_sensitive": - # same with ASTER setting (use 94 char). - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) - else: - self.character_str = None - assert self.character_str is not None, \ - "Nonsupport type of the character: {}".format(self.character_str) - self.beg_str = "sos" - self.end_str = "eos" - if self.loss_type == "attention": - dict_character = [self.beg_str, self.end_str] + dict_character - elif self.loss_type == "srn": - dict_character = dict_character + [self.beg_str, self.end_str] - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def encode(self, text): - """convert text-label into text-index. - input: - text: text labels of each image. [batch_size] - - output: - text: concatenated text index for CTCLoss. - [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] - length: length of each text. [batch_size] - """ - if self.character_type == "en": - text = text.lower() - - text_list = [] - for char in text: - if char not in self.dict: - continue - text_list.append(self.dict[char]) - text = np.array(text_list) - return text - - def decode(self, text_index, is_remove_duplicate=False): - """ convert text-index into text-label. """ - char_list = [] - char_num = self.get_char_num() - - if self.loss_type == "attention": - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - ignored_tokens = [beg_idx, end_idx] - else: - ignored_tokens = [char_num] - - for idx in range(len(text_index)): - if text_index[idx] in ignored_tokens: - continue - if is_remove_duplicate: - if idx > 0 and text_index[idx - 1] == text_index[idx]: - continue - char_list.append(self.character[int(text_index[idx])]) - text = ''.join(char_list) - return text - - def get_char_num(self): - return len(self.character) - - def get_beg_end_flag_idx(self, beg_or_end): - if self.loss_type == "attention": - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx"\ - % beg_or_end - return idx - else: - err = "error in get_beg_end_flag_idx when using the loss %s"\ - % (self.loss_type) - assert False, err - - -def cal_predicts_accuracy(char_ops, - preds, - preds_lod, - labels, - labels_lod, - is_remove_duplicate=False): - acc_num = 0 - img_num = 0 - for ino in range(len(labels_lod) - 1): - beg_no = preds_lod[ino] - end_no = preds_lod[ino + 1] - preds_text = preds[beg_no:end_no].reshape(-1) - preds_text = char_ops.decode(preds_text, is_remove_duplicate) - - beg_no = labels_lod[ino] - end_no = labels_lod[ino + 1] - labels_text = labels[beg_no:end_no].reshape(-1) - labels_text = char_ops.decode(labels_text, is_remove_duplicate) - img_num += 1 - - if preds_text == labels_text: - acc_num += 1 - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def cal_predicts_accuracy_srn(char_ops, - preds, - labels, - max_text_len, - is_debug=False): - acc_num = 0 - img_num = 0 - - char_num = char_ops.get_char_num() - - total_len = preds.shape[0] - img_num = int(total_len / max_text_len) - for i in range(img_num): - cur_label = [] - cur_pred = [] - for j in range(max_text_len): - if labels[j + i * max_text_len] != int(char_num-1): #0 - cur_label.append(labels[j + i * max_text_len][0]) - else: - break - - for j in range(max_text_len + 1): - if j < len(cur_label) and preds[j + i * max_text_len][ - 0] != cur_label[j]: - break - elif j == len(cur_label) and j == max_text_len: - acc_num += 1 - break - elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(char_num-1): - acc_num += 1 - break - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def convert_rec_attention_infer_res(preds): - img_num = preds.shape[0] - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - end_pos = np.where(preds[ino, :] == 1)[0] - if len(end_pos) <= 1: - text_list = preds[ino, 1:] - else: - text_list = preds[ino, 1:end_pos[1]] - target_lod.append(target_lod[ino] + len(text_list)) - convert_ids = convert_ids + list(text_list) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod - - -def convert_rec_label_to_lod(ori_labels): - img_num = len(ori_labels) - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - target_lod.append(target_lod[ino] + len(ori_labels[ino])) - convert_ids = convert_ids + list(ori_labels[ino]) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod diff --git a/ppocr/utils/check.py b/ppocr/utils/check.py deleted file mode 100755 index 3a0b1406..00000000 --- a/ppocr/utils/check.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys - -import logging -logger = logging.getLogger(__name__) - - -def check_config_params(config, config_name, params): - for param in params: - if param not in config: - err = "param %s didn't find in %s!" % (param, config_name) - assert False, err - return -- GitLab