提交 380dc6c2 编写于 作者: T tink2123

rm rec_char_type

上级 af0bac58
...@@ -14,7 +14,6 @@ Global: ...@@ -14,7 +14,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -14,7 +14,6 @@ Global: ...@@ -14,7 +14,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -14,7 +14,6 @@ Global: ...@@ -14,7 +14,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: True
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/ppocr_keys_v1.txt character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: True
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/arabic_dict.txt character_dict_path: ppocr/utils/dict/arabic_dict.txt
character_type: arabic
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/cyrillic_dict.txt character_dict_path: ppocr/utils/dict/cyrillic_dict.txt
character_type: cyrillic
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/devanagari_dict.txt character_dict_path: ppocr/utils/dict/devanagari_dict.txt
character_type: devanagari
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/en_dict.txt character_dict_path: ppocr/utils/en_dict.txt
character_type: EN
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: True
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/french_dict.txt character_dict_path: ppocr/utils/dict/french_dict.txt
character_type: french
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/german_dict.txt character_dict_path: ppocr/utils/dict/german_dict.txt
character_type: german
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/japan_dict.txt character_dict_path: ppocr/utils/dict/japan_dict.txt
character_type: japan
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -16,7 +16,6 @@ Global: ...@@ -16,7 +16,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/korean_dict.txt character_dict_path: ppocr/utils/dict/korean_dict.txt
character_type: korean
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
use_visualdl: false use_visualdl: false
infer_img: null infer_img: null
character_dict_path: ppocr/utils/dict/latin_dict.txt character_dict_path: ppocr/utils/dict/latin_dict.txt
character_type: latin
max_text_length: 25 max_text_length: 25
infer_mode: false infer_mode: false
use_space_char: true use_space_char: true
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: ppocr/utils/en_dict.txt character_dict_path: ppocr/utils/en_dict.txt
character_type: EN
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path: ppocr/utils/EN_symbol_dict.txt
character_type: EN_symbol
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: True use_space_char: True
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict90.txt character_dict_path: ppocr/utils/dict90.txt
character_type: EN_symbol
max_text_length: 30 max_text_length: 30
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -15,7 +15,6 @@ Global: ...@@ -15,7 +15,6 @@ Global:
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process # for data or label process
character_dict_path: character_dict_path:
character_type: en
max_text_length: 25 max_text_length: 25
num_heads: 8 num_heads: 8
infer_mode: False infer_mode: False
......
...@@ -14,8 +14,7 @@ Global: ...@@ -14,8 +14,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words_en/word_10.png infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: character_dict_path: ppocr/utils/EN_symbol_dict.txt
character_type: EN_symbol
max_text_length: 100 max_text_length: 100
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
......
...@@ -273,7 +273,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G ...@@ -273,7 +273,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G
CRNN 文本识别模型推理,可以执行如下命令: CRNN 文本识别模型推理,可以执行如下命令:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
![](../imgs_words_en/word_336.png) ![](../imgs_words_en/word_336.png)
...@@ -288,7 +288,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073) ...@@ -288,7 +288,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073)
- 训练时采用的图像分辨率不同,训练上述模型采用的图像分辨率是[3,32,100],而中文模型训练时,为了保证长文本的识别效果,训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率,即[3, 32, 320]。因此,这里推理上述英文模型时,需要通过参数rec_image_shape设置识别图像的形状。 - 训练时采用的图像分辨率不同,训练上述模型采用的图像分辨率是[3,32,100],而中文模型训练时,为了保证长文本的识别效果,训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率,即[3, 32, 320]。因此,这里推理上述英文模型时,需要通过参数rec_image_shape设置识别图像的形状。
- 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_type,指定为英文"en"。 - 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_dict_path,指定为英文字典"./ppocr/utils/ic15_dict.txt"。
``` ```
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
...@@ -303,15 +303,15 @@ dict_character = list(self.character_str) ...@@ -303,15 +303,15 @@ dict_character = list(self.character_str)
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
--rec_model_dir="./inference/srn/" \ --rec_model_dir="./inference/srn/" \
--rec_image_shape="1, 64, 256" \ --rec_image_shape="1, 64, 256" \
--rec_char_type="en" \ --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \
--rec_algorithm="SRN" --rec_algorithm="SRN"
``` ```
### 4. 自定义文本识别字典的推理 ### 4. 自定义文本识别字典的推理
如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径,并且设置 `rec_char_type=ch` 如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path"
``` ```
<a name="多语言模型的推理"></a> <a name="多语言模型的推理"></a>
...@@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ...@@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别: 需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
``` ```
![](../imgs_words/korean/1.jpg) ![](../imgs_words/korean/1.jpg)
...@@ -388,7 +388,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de ...@@ -388,7 +388,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de
下面给出基于EAST文本检测和STAR-Net文本识别执行命令: 下面给出基于EAST文本检测和STAR-Net文本识别执行命令:
``` ```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
执行命令后,识别结果图像如下: 执行命令后,识别结果图像如下:
......
...@@ -21,7 +21,7 @@ Next, we first introduce how to convert a trained model into an inference model, ...@@ -21,7 +21,7 @@ Next, we first introduce how to convert a trained model into an inference model,
- [2.2 DB Text Detection Model Inference](#DB_DETECTION) - [2.2 DB Text Detection Model Inference](#DB_DETECTION)
- [2.3 East Text Detection Model Inference](#EAST_DETECTION) - [2.3 East Text Detection Model Inference](#EAST_DETECTION)
- [2.4 Sast Text Detection Model Inference](#SAST_DETECTION) - [2.4 Sast Text Detection Model Inference](#SAST_DETECTION)
- [3. Text Recognition Model Inference](#RECOGNITION_MODEL_INFERENCE) - [3. Text Recognition Model Inference](#RECOGNITION_MODEL_INFERENCE)
- [3.1 Lightweight Chinese Text Recognition Model Reference](#LIGHTWEIGHT_RECOGNITION) - [3.1 Lightweight Chinese Text Recognition Model Reference](#LIGHTWEIGHT_RECOGNITION)
- [3.2 CTC-Based Text Recognition Model Inference](#CTC-BASED_RECOGNITION) - [3.2 CTC-Based Text Recognition Model Inference](#CTC-BASED_RECOGNITION)
...@@ -281,7 +281,7 @@ python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o G ...@@ -281,7 +281,7 @@ python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o G
For CRNN text recognition model inference, execute the following commands: For CRNN text recognition model inference, execute the following commands:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
![](../imgs_words_en/word_336.png) ![](../imgs_words_en/word_336.png)
...@@ -314,7 +314,7 @@ with the training, such as: --rec_image_shape="1, 64, 256" ...@@ -314,7 +314,7 @@ with the training, such as: --rec_image_shape="1, 64, 256"
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
--rec_model_dir="./inference/srn/" \ --rec_model_dir="./inference/srn/" \
--rec_image_shape="1, 64, 256" \ --rec_image_shape="1, 64, 256" \
--rec_char_type="en" \ --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \
--rec_algorithm="SRN" --rec_algorithm="SRN"
``` ```
...@@ -323,7 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ...@@ -323,7 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch` If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch`
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path"
``` ```
<a name="MULTILINGUAL_MODEL_INFERENCE"></a> <a name="MULTILINGUAL_MODEL_INFERENCE"></a>
...@@ -333,7 +333,7 @@ If you need to predict other language models, when using inference model predict ...@@ -333,7 +333,7 @@ If you need to predict other language models, when using inference model predict
You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition: You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
``` ```
![](../imgs_words/korean/1.jpg) ![](../imgs_words/korean/1.jpg)
...@@ -399,7 +399,7 @@ If you want to try other detection algorithms or recognition algorithms, please ...@@ -399,7 +399,7 @@ If you want to try other detection algorithms or recognition algorithms, please
The following command uses the combination of the EAST text detection and STAR-Net text recognition: The following command uses the combination of the EAST text detection and STAR-Net text recognition:
``` ```
python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
``` ```
After executing the command, the recognition result image is as follows: After executing the command, the recognition result image is as follows:
......
...@@ -21,6 +21,8 @@ import numpy as np ...@@ -21,6 +21,8 @@ import numpy as np
import string import string
import json import json
from ppocr.utils.logging import get_logger
class ClsLabelEncode(object): class ClsLabelEncode(object):
def __init__(self, label_list, **kwargs): def __init__(self, label_list, **kwargs):
...@@ -92,31 +94,22 @@ class BaseRecLabelEncode(object): ...@@ -92,31 +94,22 @@ class BaseRecLabelEncode(object):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False): use_space_char=False):
support_character_type = [
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
'mr', 'ne', 'latin', 'arabic', 'cyrillic', 'devanagari'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)
self.max_text_len = max_text_length self.max_text_len = max_text_length
self.beg_str = "sos" self.beg_str = "sos"
self.end_str = "eos" self.end_str = "eos"
if character_type == "en":
if character_dict_path is None:
logger = get_logger()
logger.warning(
"The character_dict_path is None, model can only recognize number and lower letters"
)
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "EN_symbol": self.lower = True
# same with ASTER setting (use 94 char). else:
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
elif character_type in support_character_type:
self.character_str = "" self.character_str = ""
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
character_type)
with open(character_dict_path, "rb") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
...@@ -125,7 +118,6 @@ class BaseRecLabelEncode(object): ...@@ -125,7 +118,6 @@ class BaseRecLabelEncode(object):
if use_space_char: if use_space_char:
self.character_str += " " self.character_str += " "
dict_character = list(self.character_str) dict_character = list(self.character_str)
self.character_type = character_type
dict_character = self.add_special_char(dict_character) dict_character = self.add_special_char(dict_character)
self.dict = {} self.dict = {}
for i, char in enumerate(dict_character): for i, char in enumerate(dict_character):
...@@ -147,7 +139,7 @@ class BaseRecLabelEncode(object): ...@@ -147,7 +139,7 @@ class BaseRecLabelEncode(object):
""" """
if len(text) == 0 or len(text) > self.max_text_len: if len(text) == 0 or len(text) > self.max_text_len:
return None return None
if self.character_type == "en": if self.lower:
text = text.lower() text = text.lower()
text_list = [] text_list = []
for char in text: for char in text:
...@@ -167,13 +159,11 @@ class NRTRLabelEncode(BaseRecLabelEncode): ...@@ -167,13 +159,11 @@ class NRTRLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='EN_symbol',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(NRTRLabelEncode, super(NRTRLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def __call__(self, data): def __call__(self, data):
text = data['label'] text = data['label']
...@@ -200,12 +190,10 @@ class CTCLabelEncode(BaseRecLabelEncode): ...@@ -200,12 +190,10 @@ class CTCLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(CTCLabelEncode, super(CTCLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def __call__(self, data): def __call__(self, data):
text = data['label'] text = data['label']
...@@ -231,12 +219,10 @@ class E2ELabelEncodeTest(BaseRecLabelEncode): ...@@ -231,12 +219,10 @@ class E2ELabelEncodeTest(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='EN',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(E2ELabelEncodeTest, super(E2ELabelEncodeTest, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def __call__(self, data): def __call__(self, data):
import json import json
...@@ -305,12 +291,10 @@ class AttnLabelEncode(BaseRecLabelEncode): ...@@ -305,12 +291,10 @@ class AttnLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(AttnLabelEncode, super(AttnLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.beg_str = "sos" self.beg_str = "sos"
...@@ -353,12 +337,10 @@ class SEEDLabelEncode(BaseRecLabelEncode): ...@@ -353,12 +337,10 @@ class SEEDLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(SEEDLabelEncode, super(SEEDLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.end_str = "eos" self.end_str = "eos"
...@@ -385,12 +367,10 @@ class SRNLabelEncode(BaseRecLabelEncode): ...@@ -385,12 +367,10 @@ class SRNLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length=25, max_text_length=25,
character_dict_path=None, character_dict_path=None,
character_type='en',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(SRNLabelEncode, super(SRNLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
dict_character = dict_character + [self.beg_str, self.end_str] dict_character = dict_character + [self.beg_str, self.end_str]
...@@ -598,12 +578,10 @@ class SARLabelEncode(BaseRecLabelEncode): ...@@ -598,12 +578,10 @@ class SARLabelEncode(BaseRecLabelEncode):
def __init__(self, def __init__(self,
max_text_length, max_text_length,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
**kwargs): **kwargs):
super(SARLabelEncode, super(SARLabelEncode, self).__init__(
self).__init__(max_text_length, character_dict_path, max_text_length, character_dict_path, use_space_char)
character_type, use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
beg_end_str = "<BOS/EOS>" beg_end_str = "<BOS/EOS>"
......
...@@ -21,33 +21,16 @@ import re ...@@ -21,33 +21,16 @@ import re
class BaseRecLabelDecode(object): class BaseRecLabelDecode(object):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False):
character_dict_path=None,
character_type='ch',
use_space_char=False):
support_character_type = [
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
'ne', 'EN', 'latin', 'arabic', 'cyrillic', 'devanagari'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)
self.beg_str = "sos" self.beg_str = "sos"
self.end_str = "eos" self.end_str = "eos"
if character_type == "en": self.character_str = []
if character_dict_path is None:
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "EN_symbol": self.lower = True
# same with ASTER setting (use 94 char). else:
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
elif character_type in support_character_type:
self.character_str = []
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
character_type)
with open(character_dict_path, "rb") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
...@@ -57,9 +40,6 @@ class BaseRecLabelDecode(object): ...@@ -57,9 +40,6 @@ class BaseRecLabelDecode(object):
self.character_str.append(" ") self.character_str.append(" ")
dict_character = list(self.character_str) dict_character = list(self.character_str)
else:
raise NotImplementedError
self.character_type = character_type
dict_character = self.add_special_char(dict_character) dict_character = self.add_special_char(dict_character)
self.dict = {} self.dict = {}
for i, char in enumerate(dict_character): for i, char in enumerate(dict_character):
...@@ -102,13 +82,10 @@ class BaseRecLabelDecode(object): ...@@ -102,13 +82,10 @@ class BaseRecLabelDecode(object):
class CTCLabelDecode(BaseRecLabelDecode): class CTCLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(CTCLabelDecode, self).__init__(character_dict_path, super(CTCLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def __call__(self, preds, label=None, *args, **kwargs): def __call__(self, preds, label=None, *args, **kwargs):
if isinstance(preds, tuple): if isinstance(preds, tuple):
...@@ -136,13 +113,12 @@ class DistillationCTCLabelDecode(CTCLabelDecode): ...@@ -136,13 +113,12 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
def __init__(self, def __init__(self,
character_dict_path=None, character_dict_path=None,
character_type='ch',
use_space_char=False, use_space_char=False,
model_name=["student"], model_name=["student"],
key=None, key=None,
**kwargs): **kwargs):
super(DistillationCTCLabelDecode, self).__init__( super(DistillationCTCLabelDecode, self).__init__(character_dict_path,
character_dict_path, character_type, use_space_char) use_space_char)
if not isinstance(model_name, list): if not isinstance(model_name, list):
model_name = [model_name] model_name = [model_name]
self.model_name = model_name self.model_name = model_name
...@@ -162,13 +138,9 @@ class DistillationCTCLabelDecode(CTCLabelDecode): ...@@ -162,13 +138,9 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
class NRTRLabelDecode(BaseRecLabelDecode): class NRTRLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=True, **kwargs):
character_dict_path=None,
character_type='EN_symbol',
use_space_char=True,
**kwargs):
super(NRTRLabelDecode, self).__init__(character_dict_path, super(NRTRLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def __call__(self, preds, label=None, *args, **kwargs): def __call__(self, preds, label=None, *args, **kwargs):
...@@ -230,13 +202,10 @@ class NRTRLabelDecode(BaseRecLabelDecode): ...@@ -230,13 +202,10 @@ class NRTRLabelDecode(BaseRecLabelDecode):
class AttnLabelDecode(BaseRecLabelDecode): class AttnLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(AttnLabelDecode, self).__init__(character_dict_path, super(AttnLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.beg_str = "sos" self.beg_str = "sos"
...@@ -313,13 +282,10 @@ class AttnLabelDecode(BaseRecLabelDecode): ...@@ -313,13 +282,10 @@ class AttnLabelDecode(BaseRecLabelDecode):
class SEEDLabelDecode(BaseRecLabelDecode): class SEEDLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(SEEDLabelDecode, self).__init__(character_dict_path, super(SEEDLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
def add_special_char(self, dict_character): def add_special_char(self, dict_character):
self.beg_str = "sos" self.beg_str = "sos"
...@@ -394,13 +360,10 @@ class SEEDLabelDecode(BaseRecLabelDecode): ...@@ -394,13 +360,10 @@ class SEEDLabelDecode(BaseRecLabelDecode):
class SRNLabelDecode(BaseRecLabelDecode): class SRNLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='en',
use_space_char=False,
**kwargs): **kwargs):
super(SRNLabelDecode, self).__init__(character_dict_path, super(SRNLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
self.max_text_length = kwargs.get('max_text_length', 25) self.max_text_length = kwargs.get('max_text_length', 25)
def __call__(self, preds, label=None, *args, **kwargs): def __call__(self, preds, label=None, *args, **kwargs):
...@@ -616,13 +579,10 @@ class TableLabelDecode(object): ...@@ -616,13 +579,10 @@ class TableLabelDecode(object):
class SARLabelDecode(BaseRecLabelDecode): class SARLabelDecode(BaseRecLabelDecode):
""" Convert between text-label and text-index """ """ Convert between text-label and text-index """
def __init__(self, def __init__(self, character_dict_path=None, use_space_char=False,
character_dict_path=None,
character_type='ch',
use_space_char=False,
**kwargs): **kwargs):
super(SARLabelDecode, self).__init__(character_dict_path, super(SARLabelDecode, self).__init__(character_dict_path,
character_type, use_space_char) use_space_char)
self.rm_symbol = kwargs.get('rm_symbol', False) self.rm_symbol = kwargs.get('rm_symbol', False)
......
0
1
2
3
4
5
6
7
8
9
a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q
r
s
t
u
v
w
x
y
z
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
:
;
<
=
>
?
@
[
\
]
^
_
`
{
|
}
~
\ No newline at end of file
...@@ -131,14 +131,9 @@ def main(args): ...@@ -131,14 +131,9 @@ def main(args):
img_list.append(img) img_list.append(img)
try: try:
img_list, cls_res, predict_time = text_classifier(img_list) img_list, cls_res, predict_time = text_classifier(img_list)
except: except Exception as E:
logger.info(traceback.format_exc()) logger.info(traceback.format_exc())
logger.info( logger.info(E)
"ERROR!!!! \n"
"Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n"
"If your model has tps module: "
"TPS does not support variable shape.\n"
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
exit() exit()
for ino in range(len(img_list)): for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
......
...@@ -38,40 +38,34 @@ logger = get_logger() ...@@ -38,40 +38,34 @@ logger = get_logger()
class TextRecognizer(object): class TextRecognizer(object):
def __init__(self, args): def __init__(self, args):
self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
self.character_type = args.rec_char_type
self.rec_batch_num = args.rec_batch_num self.rec_batch_num = args.rec_batch_num
self.rec_algorithm = args.rec_algorithm self.rec_algorithm = args.rec_algorithm
postprocess_params = { postprocess_params = {
'name': 'CTCLabelDecode', 'name': 'CTCLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
if self.rec_algorithm == "SRN": if self.rec_algorithm == "SRN":
postprocess_params = { postprocess_params = {
'name': 'SRNLabelDecode', 'name': 'SRNLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
elif self.rec_algorithm == "RARE": elif self.rec_algorithm == "RARE":
postprocess_params = { postprocess_params = {
'name': 'AttnLabelDecode', 'name': 'AttnLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
elif self.rec_algorithm == 'NRTR': elif self.rec_algorithm == 'NRTR':
postprocess_params = { postprocess_params = {
'name': 'NRTRLabelDecode', 'name': 'NRTRLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
elif self.rec_algorithm == "SAR": elif self.rec_algorithm == "SAR":
postprocess_params = { postprocess_params = {
'name': 'SARLabelDecode', 'name': 'SARLabelDecode',
"character_type": args.rec_char_type,
"character_dict_path": args.rec_char_dict_path, "character_dict_path": args.rec_char_dict_path,
"use_space_char": args.use_space_char "use_space_char": args.use_space_char
} }
......
...@@ -74,7 +74,6 @@ def init_args(): ...@@ -74,7 +74,6 @@ def init_args():
parser.add_argument("--rec_algorithm", type=str, default='CRNN') parser.add_argument("--rec_algorithm", type=str, default='CRNN')
parser.add_argument("--rec_model_dir", type=str) parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
parser.add_argument("--rec_batch_num", type=int, default=6) parser.add_argument("--rec_batch_num", type=int, default=6)
parser.add_argument("--max_text_length", type=int, default=25) parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument( parser.add_argument(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册