diff --git a/configs/rec/rec_r31_sar.yml b/configs/rec/rec_r31_sar.yml index c19bcdee099339adfedb76b047d691758f4264cb..73d56f375e81e70166fcf499f372afb2dd2fb5c2 100644 --- a/configs/rec/rec_r31_sar.yml +++ b/configs/rec/rec_r31_sar.yml @@ -15,7 +15,7 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict90.txt - character_type: ch + character_type: EN_symbol max_text_length: 30 infer_mode: False use_space_char: False @@ -47,6 +47,7 @@ Loss: PostProcess: name: SARLabelDecode + rm_symbol: True Metric: name: RecMetric @@ -56,8 +57,8 @@ Train: dataset: name: SimpleDataSet delimiter: ' ' - label_file_list: ['/paddle/data/concat_data/train_list.txt'] - data_dir: /paddle/data/concat_data/ + label_file_list: ['./train_data/train_list.txt'] + data_dir: ./train_data/ ratio_list: 1.0 transforms: - DecodeImage: # load image @@ -79,7 +80,7 @@ Train: Eval: dataset: name: LMDBDataSet - data_dir: /paddle/data/ocr_data/evaluation/ + data_dir: ./eval_data/evaluation/ transforms: - DecodeImage: # load image img_mode: BGR diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 83d7b21540187b3f86739661a11f3baece9a9a13..2043928142b6f59d60d5256bb890a88e062f8fd6 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -526,6 +526,7 @@ class SARLabelDecode(BaseRecLabelDecode): character_dict_path=None, character_type='ch', use_space_char=False, + rm_symbol=True, **kwargs): super(SARLabelDecode, self).__init__(character_dict_path, character_type, use_space_char) @@ -572,9 +573,10 @@ class SARLabelDecode(BaseRecLabelDecode): else: conf_list.append(1) text = ''.join(char_list) - comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]') - text = text.lower() - text = comp.sub('', text) + if self.rm_symbol: + comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]') + text = text.lower() + text = comp.sub('', text) result_list.append((text, np.mean(conf_list))) return result_list diff --git a/ppocr/utils/dict90.txt b/ppocr/utils/dict90.txt deleted file mode 100644 index a945ae9c526e4faa68852eb3fb47d078a2f3f6ce..0000000000000000000000000000000000000000 --- a/ppocr/utils/dict90.txt +++ /dev/null @@ -1,90 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -: -; -< -= -> -? -@ -[ -\ -] -_ -` -~ \ No newline at end of file