diff --git a/README.md b/README.md
index a5c52bdb356fa765931fa7046aca5e4ef72caa95..e0557b2749ae2b37b2710e3fa0066118d9c24f75 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,8 @@ PaddleOCR supports both dynamic graph and static graph programming paradigm
The above pictures are the visualizations of the general ppocr_server model. For more effect pictures, please see [More visualizations](./doc/doc_en/visualization_en.md).
diff --git a/configs/rec/multi_language/generate_multi_language_configs.py b/configs/rec/multi_language/generate_multi_language_configs.py
index c3e33c0eba72f21cd4022a492b5635ed258c8b39..6759ca2a4640e9b1eee75d366bbe203b6a4d1b87 100644
--- a/configs/rec/multi_language/generate_multi_language_configs.py
+++ b/configs/rec/multi_language/generate_multi_language_configs.py
@@ -118,7 +118,6 @@ class ArgsParser(ArgumentParser):
return config
def _set_language(self, type):
- print("type:", type)
lang = type[0]
assert (type), "please use -l or --language to choose language type"
assert(
diff --git a/doc/doc_ch/models_list.md b/doc/doc_ch/models_list.md
index 4842a8c57ba7893b5753cde04ae54399a45d8a56..0d0e287d311e4e057c5be2fb4cbc242e60041111 100644
--- a/doc/doc_ch/models_list.md
+++ b/doc/doc_ch/models_list.md
@@ -113,7 +113,7 @@ python3 generate_multi_language_configs.py -l it \
| cyrillic_mobile_v2.0_rec | 斯拉夫字母 | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
| devanagari_mobile_v2.0_rec | 梵文字母 | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
-更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
+更多支持语种请参考: [多语言模型](./multi_languages.md)
diff --git a/doc/doc_ch/multi_languages.md b/doc/doc_ch/multi_languages.md
index 4e84c08582448722fa9e213ba64abfecd4f190eb..741602e3c26725304c8a5e8300969fbea6ece4d0 100644
--- a/doc/doc_ch/multi_languages.md
+++ b/doc/doc_ch/multi_languages.md
@@ -134,7 +134,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
### 2.2 python 脚本运行
-ppocr 也支持在python脚本中运行,便于嵌入到您自己的代码中:
+ppocr 也支持在python脚本中运行,便于嵌入到您自己的代码中 :
* 整图预测(检测+识别)
@@ -155,7 +155,7 @@ image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
-im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/korean.ttf')
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/fonts/korean.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
```
@@ -240,7 +240,7 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别
|德文|german|german|
|日文|japan|japan|
|韩文|korean|korean|
-|中文繁体|chinese traditional |ch_tra|
+|中文繁体|chinese traditional |chinese_cht|
|意大利文| Italian |it|
|西班牙文|Spanish |es|
|葡萄牙文| Portuguese|pt|
@@ -259,10 +259,9 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别
|乌克兰文|Ukranian|uk|
|白俄罗斯文|Belarusian|be|
|泰卢固文|Telugu |te|
-|卡纳达文|Kannada |kn|
|泰米尔文|Tamil |ta|
|南非荷兰文 |Afrikaans |af|
-|阿塞拜疆文 |Azerbaijani |az|
+|阿塞拜疆文 |Azerbaijani |az|
|波斯尼亚文|Bosnian|bs|
|捷克文|Czech|cs|
|威尔士文 |Welsh |cy|
diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md
index 7d5b0d086fc7e8b329e4fba475bb36445d05d018..e45effdaf1468081ccb41b64816183fce973c84a 100644
--- a/doc/doc_en/models_list_en.md
+++ b/doc/doc_en/models_list_en.md
@@ -111,7 +111,7 @@ python3 generate_multi_language_configs.py -l it \
| cyrillic_mobile_v2.0_rec | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
| devanagari_mobile_v2.0_rec | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
-For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)
+For more supported languages, please refer to : [Multi-language model](./multi_languages_en.md)
diff --git a/doc/doc_en/multi_languages_en.md b/doc/doc_en/multi_languages_en.md
index 3a7dde9600fd4081b413c2c01747a8c1f610e5f6..f801db5067e70e174491f41bc6ac5f9764364a0f 100644
--- a/doc/doc_en/multi_languages_en.md
+++ b/doc/doc_en/multi_languages_en.md
@@ -153,7 +153,7 @@ image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
-im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/korean.ttf')
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/fonts/korean.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
```
@@ -232,7 +232,7 @@ For functions such as data annotation, you can read the complete [Document Tutor
|german|german|
|japan|japan|
|korean|korean|
-|chinese traditional |ch_tra|
+|chinese traditional |chinese_cht|
| Italian |it|
|Spanish |es|
| Portuguese|pt|
@@ -251,7 +251,6 @@ For functions such as data annotation, you can read the complete [Document Tutor
|Ukranian|uk|
|Belarusian|be|
|Telugu |te|
-|Kannada |kn|
|Tamil |ta|
|Afrikaans |af|
|Azerbaijani |az|
diff --git a/doc/imgs_results/multi_lang/img_01.jpg b/doc/imgs_results/multi_lang/img_01.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ee6ca69207f85bc3dac01a3b845462163d3cfd8d
Binary files /dev/null and b/doc/imgs_results/multi_lang/img_01.jpg differ
diff --git a/doc/imgs_results/multi_lang/img_02.jpg b/doc/imgs_results/multi_lang/img_02.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e139c76bc101fc06a0eae85cc203f48c45c08e1
Binary files /dev/null and b/doc/imgs_results/multi_lang/img_02.jpg differ
diff --git a/paddleocr.py b/paddleocr.py
index d5b6a01ba1fa8b233652de9b972f88643e89c5d8..016b00c1b15ccfafd46a6bdb71878f961acc29c4 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -30,6 +30,7 @@ from ppocr.utils.logging import get_logger
logger = get_logger()
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
+from tools.infer.utility import draw_ocr
__all__ = ['PaddleOCR']
@@ -117,7 +118,7 @@ model_urls = {
}
SUPPORT_DET_MODEL = ['DB']
-VERSION = 2.1
+VERSION = '2.1'
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
@@ -315,14 +316,13 @@ class PaddleOCR(predict_system.TextSystem):
# init model dir
if postprocess_params.det_model_dir is None:
- postprocess_params.det_model_dir = os.path.join(
- BASE_DIR, '{}/det/{}'.format(VERSION, det_lang))
+ postprocess_params.det_model_dir = os.path.join(BASE_DIR, VERSION,
+ 'det', det_lang)
if postprocess_params.rec_model_dir is None:
- postprocess_params.rec_model_dir = os.path.join(
- BASE_DIR, '{}/rec/{}'.format(VERSION, lang))
+ postprocess_params.rec_model_dir = os.path.join(BASE_DIR, VERSION,
+ 'rec', lang)
if postprocess_params.cls_model_dir is None:
- postprocess_params.cls_model_dir = os.path.join(
- BASE_DIR, '{}/cls'.format(VERSION))
+ postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
print(postprocess_params)
# download model
maybe_download(postprocess_params.det_model_dir,
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index cbb110090cfff3ebee4b30b009f88fc9aaba1617..ed1b35e8f60b19c551a919a4d68ddcc137bf3ff9 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -96,7 +96,7 @@ class BaseRecLabelEncode(object):
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
- 'mr', 'ne'
+ 'mr', 'ne', 'latin', 'arabic', 'cyrillic', 'devanagari'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index af243caa44e8390657b7a95e971aede0c0f90edd..e6bc07aa98ab2991a77811d7ea0f723ed26f75e2 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -28,7 +28,7 @@ class BaseRecLabelDecode(object):
'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
- 'ne', 'EN'
+ 'ne', 'EN', 'latin', 'arabic', 'cyrillic', 'devanagari'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type)