From 09ff249363fd58a16fe0b6370bae181ce6a550c8 Mon Sep 17 00:00:00 2001 From: Topdu <784990967@qq.com> Date: Mon, 15 Aug 2022 10:42:02 +0000 Subject: [PATCH] update arabic rec model & add pred reverse function --- .../multi_language/arabic_PP-OCRv3_rec.yml | 2 +- ppocr/postprocess/rec_postprocess.py | 25 +++++++++++++++++++ ppocr/utils/dict/arabic_dict.txt | 1 - 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml b/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml index 0ad1ab0a..8c650bd8 100644 --- a/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml +++ b/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml @@ -12,7 +12,7 @@ Global: checkpoints: save_inference_dir: use_visualdl: false - infer_img: doc/imgs_words/ch/word_1.jpg + infer_img: ./doc/imgs_words/arabic/ar_2.jpg character_dict_path: ppocr/utils/dict/arabic_dict.txt max_text_length: &max_text_length 25 infer_mode: false diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 7b994f81..e8a1cd42 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -45,6 +45,27 @@ class BaseRecLabelDecode(object): self.dict[char] = i self.character = dict_character + if 'arabic' in character_dict_path: + self.reverse = True + else: + self.reverse = False + + def pred_reverse(self, pred): + pred_re = [] + c_current = '' + for c in pred: + if c in self.character[79:]: + if c_current != '': + pred_re.append(c_current) + pred_re.append(c) + c_current = '' + else: + c_current += c + if c_current != '': + pred_re.append(c_current) + + return ''.join(pred_re[::-1]) + def add_special_char(self, dict_character): return dict_character @@ -73,6 +94,10 @@ class BaseRecLabelDecode(object): conf_list = [0] text = ''.join(char_list) + + if self.reverse: # for arabic rec + text = self.pred_reverse(text) + result_list.append((text, np.mean(conf_list).tolist())) return result_list diff --git a/ppocr/utils/dict/arabic_dict.txt b/ppocr/utils/dict/arabic_dict.txt index e97abf39..916d421c 100644 --- a/ppocr/utils/dict/arabic_dict.txt +++ b/ppocr/utils/dict/arabic_dict.txt @@ -1,4 +1,3 @@ - ! # $ -- GitLab