diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml index 38f77f7372c4e422b5601deb5119c24fd1e3f787..e2aa50106ff60aa61858a22ba6fdd03b8cd04d85 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml @@ -14,7 +14,6 @@ Global: use_visualdl: false infer_img: doc/imgs_words/ch/word_1.jpg character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: ch max_text_length: 25 infer_mode: false use_space_char: true diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml index d2308fd5747f3fadf3bb1c98c5602c67d5e63eca..ab48b99791d00785d143cd933ccc31b3f69d0f8f 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml @@ -14,7 +14,6 @@ Global: use_visualdl: false infer_img: doc/imgs_words/ch/word_1.jpg character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: ch max_text_length: 25 infer_mode: false use_space_char: true diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml index 8b568637a189ac47438b84e89fc55ddc643ab297..7161203035b2324c7afc56b2b0c743428558a098 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml @@ -14,7 +14,6 @@ Global: use_visualdl: false infer_img: doc/imgs_words/ch/word_1.jpg character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: ch max_text_length: 25 infer_mode: false use_space_char: true diff --git a/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml b/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml index 717c16814bac2f6fca78aa63566df12bd8cbf67b..c76063d5cedc31985404ddfff5147e1e0c100d20 100644 --- a/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml +++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml @@ -15,7 +15,6 @@ Global: infer_img: doc/imgs_words/ch/word_1.jpg # for data or label process character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: ch max_text_length: 25 infer_mode: False use_space_char: True diff --git a/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml b/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml index 660465f301047110db7001db7a32e687f2917b61..563ce110b865adabf320616227bdf8d2eb465c11 100644 --- a/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml +++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml @@ -15,7 +15,6 @@ Global: infer_img: doc/imgs_words/ch/word_1.jpg # for data or label process character_dict_path: ppocr/utils/ppocr_keys_v1.txt - character_type: ch max_text_length: 25 infer_mode: False use_space_char: True diff --git a/configs/rec/multi_language/rec_arabic_lite_train.yml b/configs/rec/multi_language/rec_arabic_lite_train.yml index 6dcfd1b69988b09c7dfc05cdbacce9756ea1f7cb..a746260e0001e34b1f50fb066885091b3686cb4d 100644 --- a/configs/rec/multi_language/rec_arabic_lite_train.yml +++ b/configs/rec/multi_language/rec_arabic_lite_train.yml @@ -15,7 +15,6 @@ Global: use_visualdl: false infer_img: null character_dict_path: ppocr/utils/dict/arabic_dict.txt - character_type: arabic max_text_length: 25 infer_mode: false use_space_char: true diff --git a/configs/rec/multi_language/rec_cyrillic_lite_train.yml b/configs/rec/multi_language/rec_cyrillic_lite_train.yml index 52527c1dfb9a306429bbab9241c623581d546e45..98544f627111340b61abd210ea5b4d7979511a15 100644 --- a/configs/rec/multi_language/rec_cyrillic_lite_train.yml +++ b/configs/rec/multi_language/rec_cyrillic_lite_train.yml @@ -15,7 +15,6 @@ Global: use_visualdl: false infer_img: null character_dict_path: ppocr/utils/dict/cyrillic_dict.txt - character_type: cyrillic max_text_length: 25 infer_mode: false use_space_char: true diff --git a/configs/rec/multi_language/rec_devanagari_lite_train.yml b/configs/rec/multi_language/rec_devanagari_lite_train.yml index e1a7c829c3e6d3c3a57f1d501cdd80a560703ec7..518b9f19ccaccb6405f7e9cb4d783b441e8c7ae7 100644 --- a/configs/rec/multi_language/rec_devanagari_lite_train.yml +++ b/configs/rec/multi_language/rec_devanagari_lite_train.yml @@ -15,7 +15,6 @@ Global: use_visualdl: false infer_img: null character_dict_path: ppocr/utils/dict/devanagari_dict.txt - character_type: devanagari max_text_length: 25 infer_mode: false use_space_char: true diff --git a/configs/rec/multi_language/rec_en_number_lite_train.yml b/configs/rec/multi_language/rec_en_number_lite_train.yml index fff4dfcd905b406964bb07cf14017af22f40e91e..ff1fb8698163d00fae57e682059da47d2007505d 100644 --- a/configs/rec/multi_language/rec_en_number_lite_train.yml +++ b/configs/rec/multi_language/rec_en_number_lite_train.yml @@ -16,7 +16,6 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/en_dict.txt - character_type: EN max_text_length: 25 infer_mode: False use_space_char: True diff --git a/configs/rec/multi_language/rec_french_lite_train.yml b/configs/rec/multi_language/rec_french_lite_train.yml index 63378d38a0d31fc77c33173e0ed864f28c5c3a8b..217369d30bc3ac6e09c2a580facbd0395e0ce727 100644 --- a/configs/rec/multi_language/rec_french_lite_train.yml +++ b/configs/rec/multi_language/rec_french_lite_train.yml @@ -16,7 +16,6 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict/french_dict.txt - character_type: french max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/multi_language/rec_german_lite_train.yml b/configs/rec/multi_language/rec_german_lite_train.yml index 1651510c5e4597e82298135d2f6c64aa747cf961..67520f5fb668327fdbd0cddb68cb6a3d6d3d112e 100644 --- a/configs/rec/multi_language/rec_german_lite_train.yml +++ b/configs/rec/multi_language/rec_german_lite_train.yml @@ -16,7 +16,6 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict/german_dict.txt - character_type: german max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/multi_language/rec_japan_lite_train.yml b/configs/rec/multi_language/rec_japan_lite_train.yml index bb47584edbc70f68d8d2d89dced3ec9b12f0e1cb..448aff1ebd0b418191c622cee97346931a86929b 100644 --- a/configs/rec/multi_language/rec_japan_lite_train.yml +++ b/configs/rec/multi_language/rec_japan_lite_train.yml @@ -16,7 +16,6 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict/japan_dict.txt - character_type: japan max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/multi_language/rec_korean_lite_train.yml b/configs/rec/multi_language/rec_korean_lite_train.yml index 77f15524f78cd7f1c3dcf4988960e718422f5d89..8118119da8f15102ad4c8485b7e26b9436d65cda 100644 --- a/configs/rec/multi_language/rec_korean_lite_train.yml +++ b/configs/rec/multi_language/rec_korean_lite_train.yml @@ -16,7 +16,6 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict/korean_dict.txt - character_type: korean max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/multi_language/rec_latin_lite_train.yml b/configs/rec/multi_language/rec_latin_lite_train.yml index e71112b4b4f0afd3ceab9f10078bc5d518ee9e59..04fe6d1a49ea06341b2218123d2319a5962b934b 100644 --- a/configs/rec/multi_language/rec_latin_lite_train.yml +++ b/configs/rec/multi_language/rec_latin_lite_train.yml @@ -15,7 +15,6 @@ Global: use_visualdl: false infer_img: null character_dict_path: ppocr/utils/dict/latin_dict.txt - character_type: latin max_text_length: 25 infer_mode: false use_space_char: true diff --git a/configs/rec/rec_icdar15_train.yml b/configs/rec/rec_icdar15_train.yml index 17a4d76483635d648ebb8cb897f621a186dcd516..893f7382f8b82f3c2d5f10cdf10735645fd3a5ee 100644 --- a/configs/rec/rec_icdar15_train.yml +++ b/configs/rec/rec_icdar15_train.yml @@ -15,7 +15,6 @@ Global: infer_img: doc/imgs_words_en/word_10.png # for data or label process character_dict_path: ppocr/utils/en_dict.txt - character_type: EN max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_mtb_nrtr.yml b/configs/rec/rec_mtb_nrtr.yml index 8639a28a931247ee34f2e3842407fd1d2e065950..392afc98d52194fdd144ccee626dbda4ddc547e5 100644 --- a/configs/rec/rec_mtb_nrtr.yml +++ b/configs/rec/rec_mtb_nrtr.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process - character_dict_path: - character_type: EN_symbol + character_dict_path: ppocr/utils/EN_symbol_dict.txt max_text_length: 25 infer_mode: False use_space_char: True diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml index 9e0bd23edba053b44fc7241c0a587ced5cd1ac76..9a950923b0cd4292f3f4d70ae51abc60c59dc615 100644 --- a/configs/rec/rec_mv3_none_bilstm_ctc.yml +++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process - character_dict_path: - character_type: en + character_dict_path: max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_mv3_none_none_ctc.yml b/configs/rec/rec_mv3_none_none_ctc.yml index 904afe1134b565d6459cdcda4cbfa43ae4925b92..28f0252adb4b74f88f8c6203521adb66c851e6b0 100644 --- a/configs/rec/rec_mv3_none_none_ctc.yml +++ b/configs/rec/rec_mv3_none_none_ctc.yml @@ -15,7 +15,6 @@ Global: infer_img: doc/imgs_words_en/word_10.png # for data or label process character_dict_path: - character_type: en max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_mv3_tps_bilstm_att.yml b/configs/rec/rec_mv3_tps_bilstm_att.yml index feaeb0545c687774938521e4c45c026207172f11..6c347e765fe04ca3e5330de6cabb9998855436c9 100644 --- a/configs/rec/rec_mv3_tps_bilstm_att.yml +++ b/configs/rec/rec_mv3_tps_bilstm_att.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words/ch/word_1.jpg # for data or label process - character_dict_path: - character_type: en + character_dict_path: max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_mv3_tps_bilstm_ctc.yml b/configs/rec/rec_mv3_tps_bilstm_ctc.yml index 65ab23c42aff54ee548867e3482d7400603551ad..9d1ebbe4e2ce25d746ff9d6993bf820347a3558a 100644 --- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml +++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml @@ -15,7 +15,6 @@ Global: infer_img: doc/imgs_words_en/word_10.png # for data or label process character_dict_path: - character_type: en max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_r31_sar.yml b/configs/rec/rec_r31_sar.yml index 41609fdf28e78f5340ab08878c2b8b23f46020d2..65e7877b28da80e0730f551b07d60b8a8c0ac48e 100644 --- a/configs/rec/rec_r31_sar.yml +++ b/configs/rec/rec_r31_sar.yml @@ -15,7 +15,6 @@ Global: infer_img: # for data or label process character_dict_path: ppocr/utils/dict90.txt - character_type: EN_symbol max_text_length: 30 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml index 331bb36ed84b83dc62a0f9b15524457238dedc13..9fdb5e99acec4ab5b2c3ff4b29158a41c766844b 100644 --- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml +++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process - character_dict_path: - character_type: en + character_dict_path: max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml index 695a46958f669e4cb9508646080b45ac0767b8c9..0af2b2ff21938ce9b1750bd0fd8e27dabfd39998 100644 --- a/configs/rec/rec_r34_vd_none_none_ctc.yml +++ b/configs/rec/rec_r34_vd_none_none_ctc.yml @@ -15,7 +15,6 @@ Global: infer_img: doc/imgs_words_en/word_10.png # for data or label process character_dict_path: - character_type: en max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_r34_vd_tps_bilstm_att.yml b/configs/rec/rec_r34_vd_tps_bilstm_att.yml index fdd3588c844ffd7ed61de73077ae2994f0ad498d..8919aae75720d1e2f786957dd44e2d5d6dcbb5af 100644 --- a/configs/rec/rec_r34_vd_tps_bilstm_att.yml +++ b/configs/rec/rec_r34_vd_tps_bilstm_att.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words/ch/word_1.jpg # for data or label process - character_dict_path: - character_type: en + character_dict_path: max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml index 67108a6eaca2dd6f239261f5184341e5ade00dc0..c21fe61fbe62bab940bdb5ec1fef7833f402cb6c 100644 --- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml +++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process - character_dict_path: - character_type: en + character_dict_path: max_text_length: 25 infer_mode: False use_space_char: False diff --git a/configs/rec/rec_r50_fpn_srn.yml b/configs/rec/rec_r50_fpn_srn.yml index fa7b1ae4e5fed41d3aa3670d6672cca01b63c359..b685362dedbcd6022fa247fe1499017647fa1546 100644 --- a/configs/rec/rec_r50_fpn_srn.yml +++ b/configs/rec/rec_r50_fpn_srn.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words/ch/word_1.jpg # for data or label process - character_dict_path: - character_type: en + character_dict_path: max_text_length: 25 num_heads: 8 infer_mode: False diff --git a/configs/rec/rec_resnet_stn_bilstm_att.yml b/configs/rec/rec_resnet_stn_bilstm_att.yml index 1f6e534a6878a7ae84fc7fa7e1d975077f164d80..0f599258d46e2ce89a6b7deccf8287a2ec0f7e4e 100644 --- a/configs/rec/rec_resnet_stn_bilstm_att.yml +++ b/configs/rec/rec_resnet_stn_bilstm_att.yml @@ -14,8 +14,7 @@ Global: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process - character_dict_path: - character_type: EN_symbol + character_dict_path: ppocr/utils/EN_symbol_dict.txt max_text_length: 100 infer_mode: False use_space_char: False diff --git a/doc/doc_ch/config.md b/doc/doc_ch/config.md index 600d5bdb120444ec89222360af02adb3f96a8640..dcd0318ed908375c896d7a6730cd72db4cc4b848 100644 --- a/doc/doc_ch/config.md +++ b/doc/doc_ch/config.md @@ -37,10 +37,9 @@ | checkpoints | 加载模型参数路径 | None | 用于中断后加载参数继续训练 | | use_visualdl | 设置是否启用visualdl进行可视化log展示 | False | [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) | | infer_img | 设置预测图像路径或文件夹路径 | ./infer_img | \| -| character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | \ | +| character_dict_path | 设置字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | 如果为空,则默认使用小写字母+数字作为字典 | | max_text_length | 设置文本最大长度 | 25 | \ | -| character_type | 设置字符类型 | ch | en/ch, en时将使用默认dict,ch时使用自定义dict| -| use_space_char | 设置是否识别空格 | True | 仅在 character_type=ch 时支持空格 | +| use_space_char | 设置是否识别空格 | True | | | label_list | 设置方向分类器支持的角度 | ['0','180'] | 仅在方向分类器中生效 | | save_res_path | 设置检测模型的结果保存地址 | ./output/det_db/predicts_db.txt | 仅在检测模型中生效 | @@ -177,7 +176,7 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi --dict {path/of/dict} \ # 字典文件路径 -o Global.use_gpu=False # 是否使用gpu ... - + ``` 意大利文由拉丁字母组成,因此执行完命令后会得到名为 rec_latin_lite_train.yml 的配置文件。 @@ -191,38 +190,37 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi use_gpu: True epoch_num: 500 ... - character_type: it # 需要识别的语种 character_dict_path: {path/of/dict} # 字典文件所在路径 - + Train: dataset: name: SimpleDataSet data_dir: train_data/ # 数据存放根目录 label_file_list: ["./train_data/train_list.txt"] # 训练集label路径 ... - + Eval: dataset: name: SimpleDataSet data_dir: train_data/ # 数据存放根目录 label_file_list: ["./train_data/val_list.txt"] # 验证集label路径 ... - + ``` 目前PaddleOCR支持的多语言算法有: -| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | -| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean | -| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | latin | -| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | ar | -| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | cyrillic | -| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | devanagari | +| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | +| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | +| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | +| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | +| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md index b9be1e4cb2d1b256a05b82ef5d6db49dfcb2f31f..4e0f1d131e2547f0d4a8bdf35c0f4a6f8bf2e7a3 100755 --- a/doc/doc_ch/inference.md +++ b/doc/doc_ch/inference.md @@ -273,7 +273,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G CRNN 文本识别模型推理,可以执行如下命令: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_type="en" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" ``` ![](../imgs_words_en/word_336.png) @@ -288,7 +288,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073) - 训练时采用的图像分辨率不同,训练上述模型采用的图像分辨率是[3,32,100],而中文模型训练时,为了保证长文本的识别效果,训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率,即[3, 32, 320]。因此,这里推理上述英文模型时,需要通过参数rec_image_shape设置识别图像的形状。 -- 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_type,指定为英文"en"。 +- 字符列表,DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验,总共36个字符。所有大小字符都转成了小写字符,不在上面列表的字符都忽略,认为是空格。因此这里没有输入字符字典,而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_dict_path,指定为英文字典"./ppocr/utils/ic15_dict.txt"。 ``` self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" @@ -303,15 +303,15 @@ dict_character = list(self.character_str) python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \ --rec_model_dir="./inference/srn/" \ --rec_image_shape="1, 64, 256" \ - --rec_char_type="en" \ + --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \ --rec_algorithm="SRN" ``` ### 4. 自定义文本识别字典的推理 -如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径,并且设置 `rec_char_type=ch` +如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径 ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path" ``` @@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png 需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) @@ -388,7 +388,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de 下面给出基于EAST文本检测和STAR-Net文本识别执行命令: ``` -python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" +python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" ``` 执行命令后,识别结果图像如下: diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 52f978a734cbc750f4e2f36bb3ff28b2e67ab612..bb7d01712a85c92a02109e41814059e6c98c7cdc 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -159,7 +159,6 @@ PaddleOCR内置了一部分字典,可以按需使用。 - 自定义字典 如需自定义dic文件,请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 指向您的字典路径。 -并将 `character_type` 设置为 `ch`。 ### 1.4 添加空格类别 @@ -246,8 +245,6 @@ Global: ... # 添加自定义字典,如修改字典请将路径指向新字典 character_dict_path: ppocr/utils/ppocr_keys_v1.txt - # 修改字符类型 - character_type: ch ... # 识别空格 use_space_char: True @@ -311,18 +308,18 @@ PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi 按语系划分,目前PaddleOCR支持的语种有: -| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | -| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean | -| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | latin | -| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | ar | -| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | cyrillic | -| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | devanagari | +| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | +| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | +| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | +| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | +| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99) diff --git a/doc/doc_en/config_en.md b/doc/doc_en/config_en.md index aa78263e4b73a3ac35250e5483a394ab77450c90..ce76da9b2f39532b387e3e45ca2ff497b0408635 100644 --- a/doc/doc_en/config_en.md +++ b/doc/doc_en/config_en.md @@ -1,4 +1,4 @@ -# Configuration +# Configuration - [1. Optional Parameter List](#1-optional-parameter-list) - [2. Intorduction to Global Parameters of Configuration File](#2-intorduction-to-global-parameters-of-configuration-file) @@ -37,9 +37,8 @@ Take rec_chinese_lite_train_v2.0.yml as an example | checkpoints | set model parameter path | None | Used to load parameters after interruption to continue training| | use_visualdl | Set whether to enable visualdl for visual log display | False | [Tutorial](https://www.paddlepaddle.org.cn/paddle/visualdl) | | infer_img | Set inference image path or folder path | ./infer_img | \| -| character_dict_path | Set dictionary path | ./ppocr/utils/ppocr_keys_v1.txt | \ | +| character_dict_path | Set dictionary path | ./ppocr/utils/ppocr_keys_v1.txt | If the character_dict_path is None, model can only recognize number and lower letters | | max_text_length | Set the maximum length of text | 25 | \ | -| character_type | Set character type | ch | en/ch, the default dict will be used for en, and the custom dict will be used for ch | | use_space_char | Set whether to recognize spaces | True | Only support in character_type=ch mode | | label_list | Set the angle supported by the direction classifier | ['0','180'] | Only valid in angle classifier model | | save_res_path | Set the save address of the test model results | ./output/det_db/predicts_db.txt | Only valid in the text detection model | @@ -196,40 +195,39 @@ Italian is made up of Latin letters, so after executing the command, you will ge use_gpu: True epoch_num: 500 ... - character_type: it # language character_dict_path: {path/of/dict} # path of dict - + Train: dataset: name: SimpleDataSet data_dir: train_data/ # root directory of training data label_file_list: ["./train_data/train_list.txt"] # train label path ... - + Eval: dataset: name: SimpleDataSet data_dir: train_data/ # root directory of val data label_file_list: ["./train_data/val_list.txt"] # val label path ... - + ``` Currently, the multi-language algorithms supported by PaddleOCR are: -| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | -| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean | -| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | latin | -| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | ar | -| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | cyrillic | -| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | devanagari | +| Configuration file | Algorithm name | backbone | trans | seq | pred | language | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | +| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | +| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | +| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | +| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations) diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md index b445232feeefadc355e0f38b329050e26ccc0368..019ac4d0ac15aceed89286048d2c4d88a259e501 100755 --- a/doc/doc_en/inference_en.md +++ b/doc/doc_en/inference_en.md @@ -21,7 +21,7 @@ Next, we first introduce how to convert a trained model into an inference model, - [2.2 DB Text Detection Model Inference](#DB_DETECTION) - [2.3 East Text Detection Model Inference](#EAST_DETECTION) - [2.4 Sast Text Detection Model Inference](#SAST_DETECTION) - + - [3. Text Recognition Model Inference](#RECOGNITION_MODEL_INFERENCE) - [3.1 Lightweight Chinese Text Recognition Model Reference](#LIGHTWEIGHT_RECOGNITION) - [3.2 CTC-Based Text Recognition Model Inference](#CTC-BASED_RECOGNITION) @@ -281,7 +281,7 @@ python3 tools/export_model.py -c configs/det/rec_r34_vd_none_bilstm_ctc.yml -o G For CRNN text recognition model inference, execute the following commands: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" ``` ![](../imgs_words_en/word_336.png) @@ -314,7 +314,7 @@ with the training, such as: --rec_image_shape="1, 64, 256" python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \ --rec_model_dir="./inference/srn/" \ --rec_image_shape="1, 64, 256" \ - --rec_char_type="en" \ + --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \ --rec_algorithm="SRN" ``` @@ -323,7 +323,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch` ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path" ``` @@ -333,7 +333,7 @@ If you need to predict other language models, when using inference model predict You need to specify the visual font path through `--vis_font_path`. There are small language fonts provided by default under the `doc/fonts` path, such as Korean recognition: ``` -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) @@ -399,7 +399,7 @@ If you want to try other detection algorithms or recognition algorithms, please The following command uses the combination of the EAST text detection and STAR-Net text recognition: ``` -python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en" +python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" ``` After executing the command, the recognition result image is as follows: diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 84f5541562f9ce267da10abfad209ea1eb909a3e..51857ba16b7773ef38452fad6aa070f2117a9086 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -161,7 +161,7 @@ The current multi-language model is still in the demo stage and will continue to If you like, you can submit the dictionary file to [dict](../../ppocr/utils/dict) and we will thank you in the Repo. -To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` and set `character_type` to `ch`. +To customize the dict file, please modify the `character_dict_path` field in `configs/rec/rec_icdar15_train.yml` . - Custom dictionary @@ -172,8 +172,6 @@ If you need to customize dic file, please add character_dict_path field in confi If you want to support the recognition of the `space` category, please set the `use_space_char` field in the yml file to `True`. -**Note: use_space_char only takes effect when character_type=ch** - ## 2.Training @@ -250,7 +248,6 @@ Global: # Add a custom dictionary, such as modify the dictionary, please point the path to the new dictionary character_dict_path: ppocr/utils/ppocr_keys_v1.txt # Modify character type - character_type: ch ... # Whether to recognize spaces use_space_char: True @@ -312,18 +309,18 @@ Eval: Currently, the multi-language algorithms supported by PaddleOCR are: -| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | -| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | -| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| -| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN | -| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | -| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | -| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | -| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean | -| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | latin | -| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | ar | -| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | cyrillic | -| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | devanagari | +| Configuration file | Algorithm name | backbone | trans | seq | pred | language | +| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | +| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | +| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | +| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | +| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | +| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | +| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | +| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | +| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | +| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | +| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations) @@ -471,6 +468,3 @@ inference/det_db/ ``` python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" ``` - - - diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index ebf52ec4e1d8713fd4da407318b14e682952606d..0a4fad621a9038e71a9d43eb4e12f78e7e92d73d 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -21,6 +21,8 @@ import numpy as np import string import json +from ppocr.utils.logging import get_logger + class ClsLabelEncode(object): def __init__(self, label_list, **kwargs): @@ -92,31 +94,23 @@ class BaseRecLabelEncode(object): def __init__(self, max_text_length, character_dict_path=None, - character_type='ch', use_space_char=False): - support_character_type = [ - 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean', - 'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', - 'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', - 'mr', 'ne', 'latin', 'arabic', 'cyrillic', 'devanagari' - ] - assert character_type in support_character_type, "Only {} are supported now but get {}".format( - support_character_type, character_type) self.max_text_len = max_text_length self.beg_str = "sos" self.end_str = "eos" - if character_type == "en": + self.lower = False + + if character_dict_path is None: + logger = get_logger() + logger.warning( + "The character_dict_path is None, model can only recognize number and lower letters" + ) self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type == "EN_symbol": - # same with ASTER setting (use 94 char). - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) - elif character_type in support_character_type: + self.lower = True + else: self.character_str = "" - assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format( - character_type) with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: @@ -125,7 +119,6 @@ class BaseRecLabelEncode(object): if use_space_char: self.character_str += " " dict_character = list(self.character_str) - self.character_type = character_type dict_character = self.add_special_char(dict_character) self.dict = {} for i, char in enumerate(dict_character): @@ -147,7 +140,7 @@ class BaseRecLabelEncode(object): """ if len(text) == 0 or len(text) > self.max_text_len: return None - if self.character_type == "en": + if self.lower: text = text.lower() text_list = [] for char in text: @@ -167,13 +160,11 @@ class NRTRLabelEncode(BaseRecLabelEncode): def __init__(self, max_text_length, character_dict_path=None, - character_type='EN_symbol', use_space_char=False, **kwargs): - super(NRTRLabelEncode, - self).__init__(max_text_length, character_dict_path, - character_type, use_space_char) + super(NRTRLabelEncode, self).__init__( + max_text_length, character_dict_path, use_space_char) def __call__(self, data): text = data['label'] @@ -200,12 +191,10 @@ class CTCLabelEncode(BaseRecLabelEncode): def __init__(self, max_text_length, character_dict_path=None, - character_type='ch', use_space_char=False, **kwargs): - super(CTCLabelEncode, - self).__init__(max_text_length, character_dict_path, - character_type, use_space_char) + super(CTCLabelEncode, self).__init__( + max_text_length, character_dict_path, use_space_char) def __call__(self, data): text = data['label'] @@ -231,12 +220,10 @@ class E2ELabelEncodeTest(BaseRecLabelEncode): def __init__(self, max_text_length, character_dict_path=None, - character_type='EN', use_space_char=False, **kwargs): - super(E2ELabelEncodeTest, - self).__init__(max_text_length, character_dict_path, - character_type, use_space_char) + super(E2ELabelEncodeTest, self).__init__( + max_text_length, character_dict_path, use_space_char) def __call__(self, data): import json @@ -305,12 +292,10 @@ class AttnLabelEncode(BaseRecLabelEncode): def __init__(self, max_text_length, character_dict_path=None, - character_type='ch', use_space_char=False, **kwargs): - super(AttnLabelEncode, - self).__init__(max_text_length, character_dict_path, - character_type, use_space_char) + super(AttnLabelEncode, self).__init__( + max_text_length, character_dict_path, use_space_char) def add_special_char(self, dict_character): self.beg_str = "sos" @@ -353,12 +338,10 @@ class SEEDLabelEncode(BaseRecLabelEncode): def __init__(self, max_text_length, character_dict_path=None, - character_type='ch', use_space_char=False, **kwargs): - super(SEEDLabelEncode, - self).__init__(max_text_length, character_dict_path, - character_type, use_space_char) + super(SEEDLabelEncode, self).__init__( + max_text_length, character_dict_path, use_space_char) def add_special_char(self, dict_character): self.end_str = "eos" @@ -385,12 +368,10 @@ class SRNLabelEncode(BaseRecLabelEncode): def __init__(self, max_text_length=25, character_dict_path=None, - character_type='en', use_space_char=False, **kwargs): - super(SRNLabelEncode, - self).__init__(max_text_length, character_dict_path, - character_type, use_space_char) + super(SRNLabelEncode, self).__init__( + max_text_length, character_dict_path, use_space_char) def add_special_char(self, dict_character): dict_character = dict_character + [self.beg_str, self.end_str] @@ -598,12 +579,10 @@ class SARLabelEncode(BaseRecLabelEncode): def __init__(self, max_text_length, character_dict_path=None, - character_type='ch', use_space_char=False, **kwargs): - super(SARLabelEncode, - self).__init__(max_text_length, character_dict_path, - character_type, use_space_char) + super(SARLabelEncode, self).__init__( + max_text_length, character_dict_path, use_space_char) def add_special_char(self, dict_character): beg_end_str = "" diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py index 71ed8976db7de24a489d1f75612a9a9a67995ba2..b4de6de95b09ced803375d9a3bb857194ef3e64b 100644 --- a/ppocr/data/imaug/rec_img_aug.py +++ b/ppocr/data/imaug/rec_img_aug.py @@ -87,17 +87,17 @@ class RecResizeImg(object): def __init__(self, image_shape, infer_mode=False, - character_type='ch', + character_dict_path='./ppocr/utils/ppocr_keys_v1.txt', padding=True, **kwargs): self.image_shape = image_shape self.infer_mode = infer_mode - self.character_type = character_type + self.character_dict_path = character_dict_path self.padding = padding def __call__(self, data): img = data['image'] - if self.infer_mode and self.character_type == "ch": + if self.infer_mode and self.character_dict_path is not None: norm_img = resize_norm_img_chinese(img, self.image_shape) else: norm_img = resize_norm_img(img, self.image_shape, self.padding) diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index c06159ca55600e7afe01a68ab43acd1919cf742c..ef1a43fd0ee65f3e55a8f72dfd2f96c478da1a9a 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -21,33 +21,15 @@ import re class BaseRecLabelDecode(object): """ Convert between text-label and text-index """ - def __init__(self, - character_dict_path=None, - character_type='ch', - use_space_char=False): - support_character_type = [ - 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean', - 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', - 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', - 'ne', 'EN', 'latin', 'arabic', 'cyrillic', 'devanagari' - ] - assert character_type in support_character_type, "Only {} are supported now but get {}".format( - support_character_type, character_type) - + def __init__(self, character_dict_path=None, use_space_char=False): self.beg_str = "sos" self.end_str = "eos" - if character_type == "en": + self.character_str = [] + if character_dict_path is None: self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type == "EN_symbol": - # same with ASTER setting (use 94 char). - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) - elif character_type in support_character_type: - self.character_str = [] - assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format( - character_type) + else: with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: @@ -57,9 +39,6 @@ class BaseRecLabelDecode(object): self.character_str.append(" ") dict_character = list(self.character_str) - else: - raise NotImplementedError - self.character_type = character_type dict_character = self.add_special_char(dict_character) self.dict = {} for i, char in enumerate(dict_character): @@ -102,13 +81,10 @@ class BaseRecLabelDecode(object): class CTCLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ - def __init__(self, - character_dict_path=None, - character_type='ch', - use_space_char=False, + def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(CTCLabelDecode, self).__init__(character_dict_path, - character_type, use_space_char) + use_space_char) def __call__(self, preds, label=None, *args, **kwargs): if isinstance(preds, tuple): @@ -136,13 +112,12 @@ class DistillationCTCLabelDecode(CTCLabelDecode): def __init__(self, character_dict_path=None, - character_type='ch', use_space_char=False, model_name=["student"], key=None, **kwargs): - super(DistillationCTCLabelDecode, self).__init__( - character_dict_path, character_type, use_space_char) + super(DistillationCTCLabelDecode, self).__init__(character_dict_path, + use_space_char) if not isinstance(model_name, list): model_name = [model_name] self.model_name = model_name @@ -162,13 +137,9 @@ class DistillationCTCLabelDecode(CTCLabelDecode): class NRTRLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ - def __init__(self, - character_dict_path=None, - character_type='EN_symbol', - use_space_char=True, - **kwargs): + def __init__(self, character_dict_path=None, use_space_char=True, **kwargs): super(NRTRLabelDecode, self).__init__(character_dict_path, - character_type, use_space_char) + use_space_char) def __call__(self, preds, label=None, *args, **kwargs): @@ -230,13 +201,10 @@ class NRTRLabelDecode(BaseRecLabelDecode): class AttnLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ - def __init__(self, - character_dict_path=None, - character_type='ch', - use_space_char=False, + def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(AttnLabelDecode, self).__init__(character_dict_path, - character_type, use_space_char) + use_space_char) def add_special_char(self, dict_character): self.beg_str = "sos" @@ -313,13 +281,10 @@ class AttnLabelDecode(BaseRecLabelDecode): class SEEDLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ - def __init__(self, - character_dict_path=None, - character_type='ch', - use_space_char=False, + def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(SEEDLabelDecode, self).__init__(character_dict_path, - character_type, use_space_char) + use_space_char) def add_special_char(self, dict_character): self.beg_str = "sos" @@ -394,13 +359,10 @@ class SEEDLabelDecode(BaseRecLabelDecode): class SRNLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ - def __init__(self, - character_dict_path=None, - character_type='en', - use_space_char=False, + def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(SRNLabelDecode, self).__init__(character_dict_path, - character_type, use_space_char) + use_space_char) self.max_text_length = kwargs.get('max_text_length', 25) def __call__(self, preds, label=None, *args, **kwargs): @@ -616,13 +578,10 @@ class TableLabelDecode(object): class SARLabelDecode(BaseRecLabelDecode): """ Convert between text-label and text-index """ - def __init__(self, - character_dict_path=None, - character_type='ch', - use_space_char=False, + def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): super(SARLabelDecode, self).__init__(character_dict_path, - character_type, use_space_char) + use_space_char) self.rm_symbol = kwargs.get('rm_symbol', False) diff --git a/ppocr/utils/EN_symbol_dict.txt b/ppocr/utils/EN_symbol_dict.txt new file mode 100644 index 0000000000000000000000000000000000000000..1aef43d6b842731a54cbe682ccda5c2dbfa694d9 --- /dev/null +++ b/ppocr/utils/EN_symbol_dict.txt @@ -0,0 +1,94 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +: +; +< += +> +? +@ +[ +\ +] +^ +_ +` +{ +| +} +~ \ No newline at end of file diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py index 53e50bd6d1d1a2bd07b9f1204b9f56594c669d13..1c68494861e60b4aaef541a4e247071944cf420c 100755 --- a/tools/infer/predict_cls.py +++ b/tools/infer/predict_cls.py @@ -131,14 +131,9 @@ def main(args): img_list.append(img) try: img_list, cls_res, predict_time = text_classifier(img_list) - except: + except Exception as E: logger.info(traceback.format_exc()) - logger.info( - "ERROR!!!! \n" - "Please read the FAQ:https://github.com/PaddlePaddle/PaddleOCR#faq \n" - "If your model has tps module: " - "TPS does not support variable shape.\n" - "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ") + logger.info(E) exit() for ino in range(len(img_list)): logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index dad70281ef7604f110d29963103068bba1c8fd9d..936994a215d10d543537b29cb41bfa42b42590c7 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -38,40 +38,34 @@ logger = get_logger() class TextRecognizer(object): def __init__(self, args): self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")] - self.character_type = args.rec_char_type self.rec_batch_num = args.rec_batch_num self.rec_algorithm = args.rec_algorithm postprocess_params = { 'name': 'CTCLabelDecode', - "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } if self.rec_algorithm == "SRN": postprocess_params = { 'name': 'SRNLabelDecode', - "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } elif self.rec_algorithm == "RARE": postprocess_params = { 'name': 'AttnLabelDecode', - "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } elif self.rec_algorithm == 'NRTR': postprocess_params = { 'name': 'NRTRLabelDecode', - "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } elif self.rec_algorithm == "SAR": postprocess_params = { 'name': 'SARLabelDecode', - "character_type": args.rec_char_type, "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 538f55c42b223f9741c5c7006dd7d1478ce1920b..8b899ca9fdeec21dac949f6916f87238ac851591 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -74,7 +74,6 @@ def init_args(): parser.add_argument("--rec_algorithm", type=str, default='CRNN') parser.add_argument("--rec_model_dir", type=str) parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") - parser.add_argument("--rec_char_type", type=str, default='ch') parser.add_argument("--rec_batch_num", type=int, default=6) parser.add_argument("--max_text_length", type=int, default=25) parser.add_argument(