From edeb12b1e00e090102116cb0737b5db194c219d6 Mon Sep 17 00:00:00 2001
From: tink2123 <y_tink@163.com>
Date: Tue, 26 Jan 2021 15:53:49 +0800
Subject: [PATCH] rename en_sensitive EN_symbol

---
 .../multi_language/rec_en_number_lite_train.yml  | 16 ++++++++--------
 doc/doc_ch/recognition.md                        |  2 +-
 doc/doc_en/recognition_en.md                     |  2 +-
 ppocr/data/imaug/label_ops.py                    |  8 ++++----
 ppocr/postprocess/rec_postprocess.py             |  8 ++++----
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/configs/rec/multi_language/rec_en_number_lite_train.yml b/configs/rec/multi_language/rec_en_number_lite_train.yml
index 8217082d..c2295af9 100644
--- a/configs/rec/multi_language/rec_en_number_lite_train.yml
+++ b/configs/rec/multi_language/rec_en_number_lite_train.yml
@@ -1,5 +1,5 @@
 Global:
-  use_gpu: True
+  use_gpu: False
   epoch_num: 500
   log_smooth_window: 20
   print_batch_step: 10
@@ -16,7 +16,7 @@ Global:
   infer_img:
   # for data or label process
   character_dict_path: ppocr/utils/dict/en_dict.txt
-  character_type: En
+  character_type: EN
   max_text_length: 25
   infer_mode: False
   use_space_char: False
@@ -63,8 +63,8 @@ Metric:
 Train:
   dataset:
     name: SimpleDataSet
-    data_dir: ./train_data/
-    label_file_list: ["./train_data/train_list.txt"]
+    data_dir: ./train_data/ic15_data/
+    label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
     transforms:
       - DecodeImage: # load image
           img_mode: BGR
@@ -77,15 +77,15 @@ Train:
           keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
   loader:
     shuffle: True
-    batch_size_per_card: 256
+    batch_size_per_card: 1
     drop_last: True
-    num_workers: 8
+    num_workers: 1
 
 Eval:
   dataset:
     name: SimpleDataSet
-    data_dir: ./train_data/
-    label_file_list: ["./train_data/eval_list.txt"]
+    data_dir: ./train_data/ic15_data/
+    label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
     transforms:
       - DecodeImage: # load image
           img_mode: BGR
diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md
index 76b631fc..c5f459bd 100644
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -348,7 +348,7 @@ PaddleOCR目前已支持26种（除中文外）语种识别，`configs/rec/multi
 | 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language | character_type |
 | :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
 | rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  | chinese_cht|
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语   | En |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   | EN |
 | rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  french |
 | rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   | german |
 | rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  | japan |
diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md
index 9e697819..22f89cde 100644
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -350,7 +350,7 @@ Currently, the multi-language algorithms supported by PaddleOCR are:
 | Configuration file |  Algorithm name |   backbone |   trans   |   seq      |     pred     |  language | character_type |
 | :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
 | rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | chinese traditional  | chinese_cht|
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | English   | En |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | English(Case sensitive)   | EN |
 | rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | French |  french |
 | rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | German   | german |
 | rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | Japanese | japan |
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index f7e21786..14c1cc9c 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -18,6 +18,7 @@ from __future__ import print_function
 from __future__ import unicode_literals
 
 import numpy as np
+import string
 
 
 class ClsLabelEncode(object):
@@ -92,8 +93,8 @@ class BaseRecLabelEncode(object):
                  character_type='ch',
                  use_space_char=False):
         support_character_type = [
-            'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
-            'En', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
+            'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
+            'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
             'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
             'mr', 'ne'
         ]
@@ -104,9 +105,8 @@ class BaseRecLabelEncode(object):
         if character_type == "en":
             self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
             dict_character = list(self.character_str)
-        elif character_type == "en_sensitive":
+        elif character_type == "EN_symbol":
             # same with ASTER setting (use 94 char).
-            import string
             self.character_str = string.printable[:-6]
             dict_character = list(self.character_str)
         elif character_type in support_character_type:
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index 47bdd03f..65ed4671 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import numpy as np
+import string
 import paddle
 from paddle.nn import functional as F
 
@@ -24,10 +25,10 @@ class BaseRecLabelDecode(object):
                  character_type='ch',
                  use_space_char=False):
         support_character_type = [
-            'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean',
+            'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
             'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
             'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
-            'ne', 'En'
+            'ne', 'EN'
         ]
         assert character_type in support_character_type, "Only {} are supported now but get {}".format(
             support_character_type, character_type)
@@ -35,9 +36,8 @@ class BaseRecLabelDecode(object):
         if character_type == "en":
             self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
             dict_character = list(self.character_str)
-        elif character_type == "en_sensitive":
+        elif character_type == "EN_symbol":
             # same with ASTER setting (use 94 char).
-            import string
             self.character_str = string.printable[:-6]
             dict_character = list(self.character_str)
         elif character_type in support_character_type:
-- 
GitLab