From bab3f7591a72145f6592206236d0136f156a925e Mon Sep 17 00:00:00 2001
From: tink2123 <y_tink@163.com>
Date: Wed, 13 May 2020 23:16:25 +0800
Subject: [PATCH] modified yml

---
 configs/rec/rec_chinese_lite_train.yml | 42 ++++++++++++++++++++++++++
 configs/rec/rec_chinese_reader.yml     | 14 +++++++++
 configs/rec/rec_icdar15_train.yml      |  9 +++---
 ppocr/utils/character.py               |  2 +-
 ppocr/utils/ic15_dict.txt              | 20 ++++++------
 5 files changed, 72 insertions(+), 15 deletions(-)
 create mode 100755 configs/rec/rec_chinese_lite_train.yml
 create mode 100755 configs/rec/rec_chinese_reader.yml

diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml
new file mode 100755
index 00000000..279aa699
--- /dev/null
+++ b/configs/rec/rec_chinese_lite_train.yml
@@ -0,0 +1,42 @@
+Global:
+  algorithm: CRNN
+  use_gpu: true
+  epoch_num: 3000
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_CRNN
+  save_epoch_step: 3
+  eval_batch_step: 2000
+  train_batch_size_per_card: 256
+  test_batch_size_per_card: 256
+  image_shape: [3, 32, 100]
+  max_text_length: 25
+  character_type: ch
+  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
+  loss_type: ctc
+  reader_yml: ./configs/rec/rec_chinese_reader.yml
+  pretrain_weights: ./pretrain_models/CRNN/best_accuracy 
+  checkpoints:
+  save_inference_dir:
+Architecture:
+  function: ppocr.modeling.architectures.rec_model,RecModel
+
+Backbone:
+  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  scale: 0.5
+  model_name: small
+
+Head:
+  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
+  encoder_type: rnn
+  SeqRNN:
+    hidden_size: 48
+    
+Loss:
+  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
+
+Optimizer:
+  function: ppocr.optimizer,AdamDecay
+  base_lr: 0.0005
+  beta1: 0.9
+  beta2: 0.999
diff --git a/configs/rec/rec_chinese_reader.yml b/configs/rec/rec_chinese_reader.yml
new file mode 100755
index 00000000..f09a1ea7
--- /dev/null
+++ b/configs/rec/rec_chinese_reader.yml
@@ -0,0 +1,14 @@
+TrainReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  num_workers: 8
+  img_set_dir: ./train_data
+  label_file_path: ./train_data/rec_gt_train.txt
+  
+EvalReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  img_set_dir: ./train_data
+  label_file_path: ./train_data/rec_gt_test.txt
+
+TestReader:
+  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
+  infer_img: ./infer_img
diff --git a/configs/rec/rec_icdar15_train.yml b/configs/rec/rec_icdar15_train.yml
index 35aba668..e8710b47 100755
--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@@ -12,10 +12,11 @@ Global:
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: ch
-  character_dict_path: ./ppocr/utils/ic15_dict.txt
+  #character_dict_path: ./ppocr/utils/ic15_dict.txt
+  character_dict_path: /workspace/PaddleOCR/train_data/ic15_dict.txt
   loss_type: ctc
   reader_yml: ./configs/rec/rec_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/CRNN/best_accuracy 
+  pretrain_weights: /workspace/PaddleOCR/pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy 
   checkpoints:
   save_inference_dir:
 Architecture:
@@ -24,13 +25,13 @@ Architecture:
 Backbone:
   function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
   scale: 0.5
-  model_name: small
+  model_name: large
 
 Head:
   function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
   encoder_type: rnn
   SeqRNN:
-    hidden_size: 48
+    hidden_size: 96
     
 Loss:
   function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py
index 7dfeeef4..b4075039 100755
--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
@@ -64,7 +64,7 @@ class CharacterOps(object):
                     [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
             length: length of each text. [batch_size]
         """
-        if self.character_type == "en" or text.encode( 'UTF-8' ).isalpha():
+        if self.character_type == "en":
             text = text.lower()
 
         text_list = []
diff --git a/ppocr/utils/ic15_dict.txt b/ppocr/utils/ic15_dict.txt
index c1f9993d..71043689 100644
--- a/ppocr/utils/ic15_dict.txt
+++ b/ppocr/utils/ic15_dict.txt
@@ -1,3 +1,13 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
 a
 b
 c
@@ -24,13 +34,3 @@ w
 x
 y
 z
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-- 
GitLab