delete dict90, update sar postprocess, update sar.yml

37cd4ed3 · andyjpaddle · ea019601 · 37cd4ed3 · 37cd4ed3 · ea019601
Showing with 10 addition and 97 deletion

configs/rec/rec_r31_sar.yml configs/rec/rec_r31_sar.yml +5 -4

ppocr/postprocess/rec_postprocess.py ppocr/postprocess/rec_postprocess.py +5 -3

ppocr/utils/dict90.txt ppocr/utils/dict90.txt +0 -90

未找到文件。
--- a/configs/rec/rec_r31_sar.yml
+++ b/configs/rec/rec_r31_sar.yml
@@ -15,7 +15,7 @@ Global:
  infer_img: 
  # for data or label process
  character_dict_path: ppocr/utils/dict90.txt
-  character_type: ch
+  character_type: EN_symbol
  max_text_length: 30
  infer_mode: False
  use_space_char: False
@@ -47,6 +47,7 @@ Loss:

 PostProcess:
  name: SARLabelDecode
+  rm_symbol: True

 Metric:
  name: RecMetric
@@ -56,8 +57,8 @@ Train:
  dataset:
    name: SimpleDataSet
    delimiter: ' '
-    label_file_list: ['/paddle/data/concat_data/train_list.txt']
-    data_dir: /paddle/data/concat_data/
+    label_file_list: ['./train_data/train_list.txt']
+    data_dir: ./train_data/
    ratio_list: 1.0
    transforms:
      - DecodeImage: # load image
@@ -79,7 +80,7 @@ Train:
 Eval:
  dataset:
    name: LMDBDataSet
-    data_dir: /paddle/data/ocr_data/evaluation/
+    data_dir: ./eval_data/evaluation/
    transforms:
      - DecodeImage: # load image
          img_mode: BGR

--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -526,6 +526,7 @@ class SARLabelDecode(BaseRecLabelDecode):
                 character_dict_path=None,
                 character_type='ch',
                 use_space_char=False,
+                 rm_symbol=True,
                 **kwargs):
        super(SARLabelDecode, self).__init__(character_dict_path,
                                              character_type, use_space_char)
@@ -572,9 +573,10 @@ class SARLabelDecode(BaseRecLabelDecode):
                else:
                    conf_list.append(1)
            text = ''.join(char_list)
-            comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]')
-            text = text.lower()
-            text = comp.sub('', text)
+            if self.rm_symbol:
+                comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]')
+                text = text.lower()
+                text = comp.sub('', text)
            result_list.append((text, np.mean(conf_list)))
        return result_list


--- a/ppocr/utils/dict90.txt
+++ b/ppocr/utils/dict90.txt
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
-
-.
-/
-:
-;
-<
-=
->
-?
-@
-[
-\
-]
-_
-`
-~
\ No newline at end of file