diff --git a/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml b/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml index 0ad1ab0adc189102ff07094fcda92d4f9ea9c662..8c650bd826d127f25c907f97d20d1a52f67f9203 100644 --- a/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml +++ b/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml @@ -12,7 +12,7 @@ Global: checkpoints: save_inference_dir: use_visualdl: false - infer_img: doc/imgs_words/ch/word_1.jpg + infer_img: ./doc/imgs_words/arabic/ar_2.jpg character_dict_path: ppocr/utils/dict/arabic_dict.txt max_text_length: &max_text_length 25 infer_mode: false diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index fc9fccfb143bf31ec66989e279d0bcc1c9baa5cc..f77631700648e84f28223cb14738e7b4ab679012 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -45,6 +45,27 @@ class BaseRecLabelDecode(object): self.dict[char] = i self.character = dict_character + if 'arabic' in character_dict_path: + self.reverse = True + else: + self.reverse = False + + def pred_reverse(self, pred): + pred_re = [] + c_current = '' + for c in pred: + if not bool(re.search('[a-zA-Z0-9 :*./%+-]', c)): + if c_current != '': + pred_re.append(c_current) + pred_re.append(c) + c_current = '' + else: + c_current += c + if c_current != '': + pred_re.append(c_current) + + return ''.join(pred_re[::-1]) + def add_special_char(self, dict_character): return dict_character @@ -73,6 +94,10 @@ class BaseRecLabelDecode(object): conf_list = [0] text = ''.join(char_list) + + if self.reverse: # for arabic rec + text = self.pred_reverse(text) + result_list.append((text, np.mean(conf_list).tolist())) return result_list diff --git a/ppocr/utils/dict/arabic_dict.txt b/ppocr/utils/dict/arabic_dict.txt index e97abf39274df77fbad066ee4635aebc6743140c..916d421c53bad563dfd980c1b64dcce07a3c9d24 100644 --- a/ppocr/utils/dict/arabic_dict.txt +++ b/ppocr/utils/dict/arabic_dict.txt @@ -1,4 +1,3 @@ - ! # $