From b55b8eda220ae9cb83414fbdbc4819694df65fc6 Mon Sep 17 00:00:00 2001
From: tink2123 <y_tink@163.com>
Date: Thu, 4 Jun 2020 19:41:42 +0800
Subject: [PATCH] add windows doc

---
 README.md                                  |  5 +++++
 configs/rec/rec_chinese_lite_train.yml     |  1 -
 configs/rec/rec_icdar15_train.yml          |  3 +--
 configs/rec/rec_mv3_none_bilstm_ctc.yml    |  1 -
 configs/rec/rec_mv3_none_none_ctc.yml      |  1 -
 configs/rec/rec_mv3_tps_bilstm_attn.yml    |  1 -
 configs/rec/rec_mv3_tps_bilstm_ctc.yml     |  1 -
 configs/rec/rec_r34_vd_none_bilstm_ctc.yml |  1 -
 configs/rec/rec_r34_vd_none_none_ctc.yml   |  1 -
 configs/rec/rec_r34_vd_tps_bilstm_attn.yml |  1 -
 configs/rec/rec_r34_vd_tps_bilstm_ctc.yml  |  1 -
 doc/inference.md                           |  4 ++++
 doc/installation.md                        |  6 ++++++
 doc/recognition.md                         |  7 +++++--
 ppocr/data/rec/dataset_traversal.py        | 19 ++++++++++---------
 ppocr/data/rec/img_tools.py                |  8 ++------
 tools/infer/predict_rec.py                 | 12 +++++++++---
 17 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index 9b94466b..56e3f993 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,8 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 
 #### 2.inference模型下载
 
+*windows 环境下没有如果没有安装wget,下载模型时可将链接复制到浏览器中下载，并解压放置在相应目录下*
+
 #### (1)超轻量级中文OCR模型下载
 ```
 mkdir inference && cd inference
@@ -63,6 +65,9 @@ cd ..
 # 设置PYTHONPATH环境变量
 export PYTHONPATH=.
 
+# windows下设置环境变量
+SET PYTHONPATH=.
+
 # 预测image_dir指定的单张图像
 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_mv3_db/"  --rec_model_dir="./inference/ch_rec_mv3_crnn/"
 
diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml
index 4517f3e2..b64313a1 100755
--- a/configs/rec/rec_chinese_lite_train.yml
+++ b/configs/rec/rec_chinese_lite_train.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 320]
   max_text_length: 25
   character_type: ch
diff --git a/configs/rec/rec_icdar15_train.yml b/configs/rec/rec_icdar15_train.yml
index b783cc2f..934a9410 100755
--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@@ -9,13 +9,12 @@ Global:
   eval_batch_step: 500
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
   loss_type: ctc
   reader_yml: ./configs/rec/rec_icdar15_reader.yml
-  pretrain_weights:
+  pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
   checkpoints:
   save_inference_dir:
   infer_img:
diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml
index 35b5206c..d2e096fb 100755
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/configs/rec/rec_mv3_none_none_ctc.yml b/configs/rec/rec_mv3_none_none_ctc.yml
index 72191349..ceec09ce 100755
--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/configs/rec/rec_mv3_tps_bilstm_attn.yml b/configs/rec/rec_mv3_tps_bilstm_attn.yml
index c4f64f0a..7fc4f679 100755
--- a/configs/rec/rec_mv3_tps_bilstm_attn.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/configs/rec/rec_mv3_tps_bilstm_ctc.yml b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
index 8236dd8a..4b9660bc 100755
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
index c234cbb7..b71e8fea 100755
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml
index 457c79ad..d9c9458d 100755
--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
index 2f351ae4..405082bd 100755
--- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
index 93021709..517322c3 100755
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@@ -9,7 +9,6 @@ Global:
   eval_batch_step: 2000
   train_batch_size_per_card: 256
   test_batch_size_per_card: 256
-  drop_last: false
   image_shape: [3, 32, 100]
   max_text_length: 25
   character_type: en
diff --git a/doc/inference.md b/doc/inference.md
index b16b89a9..0d5f45fd 100644
--- a/doc/inference.md
+++ b/doc/inference.md
@@ -166,6 +166,10 @@ STAR-Net文本识别模型推理，可以执行如下命令：
 python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
 ```
 
+### 3.基于Attention损失的识别模型推理
+
+基于Attention损失的识别模型与ctc不同，需要额外设置识别算法参数 --rec_algorithm="RARE"
+
 RARE 文本识别模型推理，可以执行如下命令：
 ```
 python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/sare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE"
diff --git a/doc/installation.md b/doc/installation.md
index 9bdd5379..f1edbf4a 100644
--- a/doc/installation.md
+++ b/doc/installation.md
@@ -8,6 +8,8 @@ PaddleOCR 工作环境
 
 建议使用我们提供的docker运行PaddleOCR，有关docker使用请参考[链接](https://docs.docker.com/get-started/)。
 
+*如您希望使用 mac 或 windows直接运行预测代码，可以从第2步开始执行。*
+
 1. （建议）准备docker环境。第一次使用这个镜像，会自动下载该镜像，请耐心等待。
 ```
 # 切换到工作目录下
@@ -54,6 +56,10 @@ python3 -m pip install paddlepaddle-gpu==1.7.2.post97 -i https://pypi.tuna.tsing
 如果您的机器安装的是CUDA10，请运行以下命令安装
 python3 -m pip install paddlepaddle-gpu==1.7.2.post107 -i https://pypi.tuna.tsinghua.edu.cn/simple
 
+如果您的机器是CPU，请运行以下命令安装
+
+python3 -m pip install paddlepaddle==1.7.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
+
 更多的版本需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
 ```
 
diff --git a/doc/recognition.md b/doc/recognition.md
index ea38c0f3..7dd9ca7e 100644
--- a/doc/recognition.md
+++ b/doc/recognition.md
@@ -41,6 +41,8 @@ PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件，通
 wget -P ./train_data/ic15_data  https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt
 # 测试集标签
 wget -P ./train_data/ic15_data  https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
+
+
 ```
 
 最终训练集应有如下文件结构：
@@ -168,10 +170,11 @@ Global:
 
 评估数据集可以通过 `configs/rec/rec_icdar15_reader.yml`  修改EvalReader中的 `label_file_path` 设置。
 
+*注意* 评估时必须确保配置文件中 infer_img 字段为空
 ```
 export CUDA_VISIBLE_DEVICES=0
 # GPU 评估， Global.checkpoints 为待测权重
-python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
+python3 tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
 ```
 
 ### 预测
@@ -184,7 +187,7 @@ python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkp
 
 ```
 # 预测英文结果
-python3 tools/infer_rec.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
+python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
 ```
 
 预测图片：
diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py
index 6c2b7a1a..71b488f4 100755
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@@ -42,14 +42,15 @@ class LMDBReader(object):
         self.max_text_length = params['max_text_length']
         self.mode = params['mode']
         self.drop_last = False
-        self.tps = False
+        self.use_tps = False
         if "tps" in params:
-            self.tps = True
+            self.ues_tps = True
         if params['mode'] == 'train':
             self.batch_size = params['train_batch_size_per_card']
-            self.drop_last = params['drop_last']
+            self.drop_last = True
         else:
             self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = False
         self.infer_img = params['infer_img']
 
     def load_hierarchical_lmdb_dataset(self):
@@ -114,7 +115,7 @@ class LMDBReader(object):
                         img=img,
                         image_shape=self.image_shape,
                         char_ops=self.char_ops,
-                        tps=self.tps,
+                        tps=self.use_tps,
                         infer_mode=True)
                     yield norm_img
             else:
@@ -181,15 +182,15 @@ class SimpleReader(object):
         self.max_text_length = params['max_text_length']
         self.mode = params['mode']
         self.infer_img = params['infer_img']
-        self.tps = False
+        self.use_tps = False
         if "tps" in params:
-            self.tps = True
-        self.drop_last = False
+            self.ues_tps = True
         if params['mode'] == 'train':
             self.batch_size = params['train_batch_size_per_card']
-            self.drop_last = params['drop_last']
+            self.drop_last = True
         else:
             self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = False
 
     def __call__(self, process_id):
         if self.mode != 'train':
@@ -206,7 +207,7 @@ class SimpleReader(object):
                         img=img,
                         image_shape=self.image_shape,
                         char_ops=self.char_ops,
-                        tps=self.tps,
+                        tps=self.use_tps,
                         infer_mode=True)
                     yield norm_img
             else:
diff --git a/ppocr/data/rec/img_tools.py b/ppocr/data/rec/img_tools.py
index 6d7b66e9..57543293 100755
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
@@ -95,14 +95,10 @@ def process_image(img,
                   max_text_length=None,
                   tps=None,
                   infer_mode=False):
-    if not infer_mode or char_ops.character_type == "en":
+    if not infer_mode or char_ops.character_type == "en" or tps != None:
         norm_img = resize_norm_img(img, image_shape)
     else:
-        if tps != None and char_ops.character_type == "ch":
-            image_shape = [3, 32, 320]
-            norm_img = resize_norm_img(img, image_shape)
-        else:
-            norm_img = resize_norm_img_chinese(img, image_shape)
+        norm_img = resize_norm_img_chinese(img, image_shape)
     norm_img = norm_img[np.newaxis, :]
     if label is not None:
         char_num = char_ops.get_char_num()
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index e8b485fb..3417d500 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -38,8 +38,10 @@ class TextRecognizer(object):
         char_ops_params["character_dict_path"] = args.rec_char_dict_path
         if self.rec_algorithm != "RARE":
             char_ops_params['loss_type'] = 'ctc'
+            self.loss_type = 'ctc'
         else:
             char_ops_params['loss_type'] = 'attention'
+            self.loss_type = 'attention'
         self.char_ops = CharacterOps(char_ops_params)
 
     def resize_norm_img(self, img, max_wh_ratio):
@@ -85,7 +87,7 @@ class TextRecognizer(object):
             self.input_tensor.copy_from_cpu(norm_img_batch)
             self.predictor.zero_copy_run()
 
-            if self.rec_algorithm != "RARE":
+            if self.loss_type == "ctc":
                 rec_idx_batch = self.output_tensors[0].copy_to_cpu()
                 rec_idx_lod = self.output_tensors[0].lod()[0]
                 predict_batch = self.output_tensors[1].copy_to_cpu()
@@ -139,9 +141,13 @@ if __name__ == "__main__":
         img_list.append(img)
     try:
         rec_res, predict_time = text_recognizer(img_list)
-    except:
+    except Exception as e:
+        print(e)
         logger.info(
-            "ERROR!! \nInput image shape is not equal with config. TPS does not support variable shape.\n"
+            "ERROR!!!! \n"
+            "Please read the FAQ：https://github.com/PaddlePaddle/PaddleOCR#faq \n"
+            "If your model has tps module:  "
+            "TPS does not support variable shape.\n"
             "Please set --rec_image_shape=input_shape and --rec_char_type='en' ")
         exit()
     for ino in range(len(img_list)):
-- 
GitLab