diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index c924af2dfd1183ebbacfaf2f783878e3c69317e2..f3c90050c92803789252304bb4e3a9a4bf04c70b 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -123,7 +123,7 @@ class BaseRecLabelEncode(object): [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] length: length of each text. [batch_size] """ - if len(text) > self.max_text_len: + if len(text) == 0 or len(text) > self.max_text_len: return None if self.character_type == "en": text = text.lower() @@ -138,9 +138,6 @@ class BaseRecLabelEncode(object): return None return text_list - def get_ignored_tokens(self): - return [0] # for ctc blank - class CTCLabelEncode(BaseRecLabelEncode): """ Convert between text-label and text-index """ @@ -193,11 +190,6 @@ class AttnLabelEncode(BaseRecLabelEncode): text = self.encode(text) return text - def get_ignored_tokens(self): - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - return [beg_idx, end_idx] - def get_beg_end_flag_idx(self, beg_or_end): if beg_or_end == "beg": idx = np.array(self.dict[self.beg_str])