From 9b6abc12cd733648f62b95f329259a1ca3e6ef0e Mon Sep 17 00:00:00 2001 From: tink2123 Date: Tue, 13 Oct 2020 14:15:58 +0800 Subject: [PATCH] add dict and corpus folder --- .../rec/multi_languages/rec_french_lite_train.yml | 2 +- configs/rec/multi_languages/rec_ger_lite_train.yml | 2 +- configs/rec/multi_languages/rec_japan_lite_train.yml | 2 +- .../rec/multi_languages/rec_korean_lite_train.yml | 2 +- doc/doc_ch/inference.md | 2 +- doc/doc_ch/recognition.md | 12 ++++++------ doc/doc_en/inference_en.md | 2 +- doc/doc_en/recognition_en.md | 12 ++++++------ ppocr/utils/{ => dict}/french_dict.txt | 0 ppocr/utils/{ => dict}/german_dict.txt | 0 ppocr/utils/{ => dict}/japan_dict.txt | 0 ppocr/utils/{ => dict}/korean_dict.txt | 0 12 files changed, 18 insertions(+), 18 deletions(-) rename ppocr/utils/{ => dict}/french_dict.txt (100%) rename ppocr/utils/{ => dict}/german_dict.txt (100%) rename ppocr/utils/{ => dict}/japan_dict.txt (100%) rename ppocr/utils/{ => dict}/korean_dict.txt (100%) diff --git a/configs/rec/multi_languages/rec_french_lite_train.yml b/configs/rec/multi_languages/rec_french_lite_train.yml index 2cf54c42..b4347bcd 100755 --- a/configs/rec/multi_languages/rec_french_lite_train.yml +++ b/configs/rec/multi_languages/rec_french_lite_train.yml @@ -12,7 +12,7 @@ Global: image_shape: [3, 32, 320] max_text_length: 25 character_type: french - character_dict_path: ./ppocr/utils/french_dict.txt + character_dict_path: ./ppocr/utils/dict/french_dict.txt loss_type: ctc distort: true use_space_char: false diff --git a/configs/rec/multi_languages/rec_ger_lite_train.yml b/configs/rec/multi_languages/rec_ger_lite_train.yml index beb1755b..4e3b6c9e 100755 --- a/configs/rec/multi_languages/rec_ger_lite_train.yml +++ b/configs/rec/multi_languages/rec_ger_lite_train.yml @@ -12,7 +12,7 @@ Global: image_shape: [3, 32, 320] max_text_length: 25 character_type: german - character_dict_path: ./ppocr/utils/german_dict.txt + character_dict_path: ./ppocr/utils/dict/german_dict.txt loss_type: ctc distort: true use_space_char: false diff --git a/configs/rec/multi_languages/rec_japan_lite_train.yml b/configs/rec/multi_languages/rec_japan_lite_train.yml index fbbab33e..ddfb7e2f 100755 --- a/configs/rec/multi_languages/rec_japan_lite_train.yml +++ b/configs/rec/multi_languages/rec_japan_lite_train.yml @@ -12,7 +12,7 @@ Global: image_shape: [3, 32, 320] max_text_length: 25 character_type: japan - character_dict_path: ./ppocr/utils/japan_dict.txt + character_dict_path: ./ppocr/utils/dict/japan_dict.txt loss_type: ctc distort: true use_space_char: false diff --git a/configs/rec/multi_languages/rec_korean_lite_train.yml b/configs/rec/multi_languages/rec_korean_lite_train.yml index 29cc08aa..2dba0885 100755 --- a/configs/rec/multi_languages/rec_korean_lite_train.yml +++ b/configs/rec/multi_languages/rec_korean_lite_train.yml @@ -12,7 +12,7 @@ Global: image_shape: [3, 32, 320] max_text_length: 25 character_type: korean - character_dict_path: ./ppocr/utils/korean_dict.txt + character_dict_path: ./ppocr/utils/dict/korean_dict.txt loss_type: ctc distort: true use_space_char: false diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index 9e6143ef..0432695a 100644 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -325,7 +325,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png 需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/` 路径下有默认提供的小语种字体,例如韩文识别: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/korean_dict.txt" --vis_font_path="doc/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 918050cc..939fe6e7 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -120,19 +120,19 @@ word_dict.txt 每行有一个单字,将字符与数字索引映射在一起, `ppocr/utils/ic15_dict.txt` 是一个包含36个字符的英文字典, -`ppocr/utils/french_dict.txt` 是一个包含118个字符的法文字典 +`ppocr/utils/dict/french_dict.txt` 是一个包含118个字符的法文字典 -`ppocr/utils/japan_dict.txt` 是一个包含4399个字符的法文字典 +`ppocr/utils/dict/japan_dict.txt` 是一个包含4399个字符的法文字典 -`ppocr/utils/korean_dict.txt` 是一个包含3636个字符的法文字典 +`ppocr/utils/dict/korean_dict.txt` 是一个包含3636个字符的法文字典 -`ppocr/utils/german_dict.txt` 是一个包含131个字符的法文字典 +`ppocr/utils/dict/german_dict.txt` 是一个包含131个字符的法文字典 您可以按需使用。 目前的多语言模型仍处在demo阶段,会持续优化模型并补充语种,**非常欢迎您为我们提供其他语言的字典和字体**, -如您愿意可将字典文件提交至 [utils](../../ppocr/utils) ,我们会在Repo中感谢您。 +如您愿意可将字典文件提交至 [dict](../../ppocr/utils/dict) 将语料文件提交至[corpus](../../ppocr/utils/corpus),我们会在Repo中感谢您。 - 自定义字典 @@ -269,7 +269,7 @@ PaddleOCR也提供了多语言的, `configs/rec/multi_languages` 路径下的 Global: ... # 添加自定义字典,如修改字典请将路径指向新字典 - character_dict_path: ./ppocr/utils/french_dict.txt + character_dict_path: ./ppocr/utils/dict/french_dict.txt # 训练时添加数据增强 distort: true # 识别空格 diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md index 38cc5489..609b65fa 100644 --- a/doc/doc_en/inference_en.md +++ b/doc/doc_en/inference_en.md @@ -330,7 +330,7 @@ If you need to predict other language models, when using inference model predict You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/` path, such as Korean recognition: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/ utils/korean_dict.txt" --vis_font_path="doc/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 61d829b8..41b00c52 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -112,18 +112,18 @@ In `word_dict.txt`, there is a single word in each line, which maps characters a `ppocr/utils/ic15_dict.txt` is an English dictionary with 63 characters -`ppocr/utils/french_dict.txt` is a French dictionary with 118 characters +`ppocr/utils/dict/french_dict.txt` is a French dictionary with 118 characters -`ppocr/utils/japan_dict.txt` is a French dictionary with 4399 characters +`ppocr/utils/dict/japan_dict.txt` is a French dictionary with 4399 characters -`ppocr/utils/korean_dict.txt` is a French dictionary with 3636 characters +`ppocr/utils/dict/korean_dict.txt` is a French dictionary with 3636 characters -`ppocr/utils/german_dict.txt` is a French dictionary with 131 characters +`ppocr/utils/dict/german_dict.txt` is a French dictionary with 131 characters You can use it on demand. The current multi-language model is still in the demo stage and will continue to optimize the model and add languages. **You are very welcome to provide us with dictionaries and fonts in other languages**, -If you like, you can submit the dictionary file to [utils](../../ppocr/utils) and we will thank you in the Repo. +If you like, you can submit the dictionary file to [dict](../../ppocr/utils/dict) or corpus file to [corpus](../../ppocr/utils/corpus) and we will thank you in the Repo. To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` and set `character_type` to `ch`. @@ -259,7 +259,7 @@ Global: ... # Add a custom dictionary, if you modify the dictionary # please point the path to the new dictionary - character_dict_path: ./ppocr/utils/french_dict.txt + character_dict_path: ./ppocr/utils/dict/french_dict.txt # Add data augmentation during training distort: true # Identify spaces diff --git a/ppocr/utils/french_dict.txt b/ppocr/utils/dict/french_dict.txt similarity index 100% rename from ppocr/utils/french_dict.txt rename to ppocr/utils/dict/french_dict.txt diff --git a/ppocr/utils/german_dict.txt b/ppocr/utils/dict/german_dict.txt similarity index 100% rename from ppocr/utils/german_dict.txt rename to ppocr/utils/dict/german_dict.txt diff --git a/ppocr/utils/japan_dict.txt b/ppocr/utils/dict/japan_dict.txt similarity index 100% rename from ppocr/utils/japan_dict.txt rename to ppocr/utils/dict/japan_dict.txt diff --git a/ppocr/utils/korean_dict.txt b/ppocr/utils/dict/korean_dict.txt similarity index 100% rename from ppocr/utils/korean_dict.txt rename to ppocr/utils/dict/korean_dict.txt -- GitLab