diff --git a/MANIFEST.in b/MANIFEST.in
index 4c16c09d588d39906e1fa581c901cd9fcd7778c8..e16f157d6e9dd249d6c6a14ae54313759a6752c4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,4 +4,5 @@ include README.md
recursive-include ppocr/utils *.txt utility.py logging.py
recursive-include ppocr/data/ *.py
recursive-include ppocr/postprocess *.py
-recursive-include tools/infer *.py
\ No newline at end of file
+recursive-include tools/infer *.py
+recursive-include ppocr/utils/e2e_utils/ *.py
\ No newline at end of file
diff --git a/configs/e2e/e2e_r50_vd_pg.yml b/configs/e2e/e2e_r50_vd_pg.yml
index 0a232f7a4f3b9ca214bbc6fd1840cec186c027e4..e4d868f98b5847fa064e14f87a69932806791320 100644
--- a/configs/e2e/e2e_r50_vd_pg.yml
+++ b/configs/e2e/e2e_r50_vd_pg.yml
@@ -59,8 +59,10 @@ Optimizer:
PostProcess:
name: PGPostProcess
score_thresh: 0.5
+ mode: fast # fast or slow two ways
Metric:
name: E2EMetric
+ gt_mat_dir: # the dir of gt_mat
character_dict_path: ppocr/utils/ic15_dict.txt
main_indicator: f_score_e2e
@@ -106,7 +108,7 @@ Eval:
order: 'hwc'
- ToCHWImage:
- KeepKeys:
- keep_keys: [ 'image', 'shape', 'polys', 'strs', 'tags' ]
+ keep_keys: [ 'image', 'shape', 'polys', 'strs', 'tags', 'img_id']
loader:
shuffle: False
drop_last: False
diff --git a/configs/rec/multi_language/generate_multi_language_configs.py b/configs/rec/multi_language/generate_multi_language_configs.py
index 027a65c0c73e4b39ebb3b407271450d23281aa80..c3e33c0eba72f21cd4022a492b5635ed258c8b39 100644
--- a/configs/rec/multi_language/generate_multi_language_configs.py
+++ b/configs/rec/multi_language/generate_multi_language_configs.py
@@ -19,21 +19,56 @@ import logging
logging.basicConfig(level=logging.INFO)
support_list = {
- 'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic',
- 'ta':'tamil', 'ug':'uyghur', 'fa':'persian', 'ur':'urdu', 'rs':'serbian latin',
- 'oc':'occitan', 'rsc':'serbian cyrillic', 'bg':'bulgarian', 'uk':'ukranian', 'be':'belarusian',
- 'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi',
- 'ne':'nepali',
+ 'it': 'italian',
+ 'xi': 'spanish',
+ 'pu': 'portuguese',
+ 'ru': 'russian',
+ 'ar': 'arabic',
+ 'ta': 'tamil',
+ 'ug': 'uyghur',
+ 'fa': 'persian',
+ 'ur': 'urdu',
+ 'rs': 'serbian latin',
+ 'oc': 'occitan',
+ 'rsc': 'serbian cyrillic',
+ 'bg': 'bulgarian',
+ 'uk': 'ukranian',
+ 'be': 'belarusian',
+ 'te': 'telugu',
+ 'ka': 'kannada',
+ 'chinese_cht': 'chinese tradition',
+ 'hi': 'hindi',
+ 'mr': 'marathi',
+ 'ne': 'nepali',
}
-assert(
- os.path.isfile("./rec_multi_language_lite_train.yml")
- ),"Loss basic configuration file rec_multi_language_lite_train.yml.\
+
+latin_lang = [
+ 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',
+ 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
+ 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
+ 'sw', 'tl', 'tr', 'uz', 'vi', 'latin'
+]
+arabic_lang = ['ar', 'fa', 'ug', 'ur']
+cyrillic_lang = [
+ 'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',
+ 'dar', 'inh', 'che', 'lbe', 'lez', 'tab', 'cyrillic'
+]
+devanagari_lang = [
+ 'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',
+ 'sa', 'bgc', 'devanagari'
+]
+multi_lang = latin_lang + arabic_lang + cyrillic_lang + devanagari_lang
+
+assert (os.path.isfile("./rec_multi_language_lite_train.yml")
+ ), "Loss basic configuration file rec_multi_language_lite_train.yml.\
You can download it from \
https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/"
-
-global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
+
+global_config = yaml.load(
+ open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader)
project_path = os.path.abspath(os.path.join(os.getcwd(), "../../../"))
+
class ArgsParser(ArgumentParser):
def __init__(self):
super(ArgsParser, self).__init__(
@@ -41,15 +76,30 @@ class ArgsParser(ArgumentParser):
self.add_argument(
"-o", "--opt", nargs='+', help="set configuration options")
self.add_argument(
- "-l", "--language", nargs='+', help="set language type, support {}".format(support_list))
+ "-l",
+ "--language",
+ nargs='+',
+ help="set language type, support {}".format(support_list))
self.add_argument(
- "--train",type=str,help="you can use this command to change the train dataset default path")
+ "--train",
+ type=str,
+ help="you can use this command to change the train dataset default path"
+ )
self.add_argument(
- "--val",type=str,help="you can use this command to change the eval dataset default path")
+ "--val",
+ type=str,
+ help="you can use this command to change the eval dataset default path"
+ )
self.add_argument(
- "--dict",type=str,help="you can use this command to change the dictionary default path")
+ "--dict",
+ type=str,
+ help="you can use this command to change the dictionary default path"
+ )
self.add_argument(
- "--data_dir",type=str,help="you can use this command to change the dataset default root path")
+ "--data_dir",
+ type=str,
+ help="you can use this command to change the dataset default root path"
+ )
def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv)
@@ -68,21 +118,38 @@ class ArgsParser(ArgumentParser):
return config
def _set_language(self, type):
- assert(type),"please use -l or --language to choose language type"
+ print("type:", type)
+ lang = type[0]
+ assert (type), "please use -l or --language to choose language type"
assert(
- type[0] in support_list.keys()
+ lang in support_list.keys() or lang in multi_lang
),"the sub_keys(-l or --language) can only be one of support list: \n{},\nbut get: {}, " \
- "please check your running command".format(support_list, type)
- global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0])
- global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0])
- global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])]
- global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])]
- global_config['Global']['character_type'] = type[0]
- assert(
- os.path.isfile(os.path.join(project_path,global_config['Global']['character_dict_path']))
- ),"Loss default dictionary file {}_dict.txt.You can download it from \
-https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(type[0])
- return type[0]
+ "please check your running command".format(multi_lang, type)
+ if lang in latin_lang:
+ lang = "latin"
+ elif lang in arabic_lang:
+ lang = "arabic"
+ elif lang in cyrillic_lang:
+ lang = "cyrillic"
+ elif lang in devanagari_lang:
+ lang = "devanagari"
+ global_config['Global'][
+ 'character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(lang)
+ global_config['Global'][
+ 'save_model_dir'] = './output/rec_{}_lite'.format(lang)
+ global_config['Train']['dataset'][
+ 'label_file_list'] = ["train_data/{}_train.txt".format(lang)]
+ global_config['Eval']['dataset'][
+ 'label_file_list'] = ["train_data/{}_val.txt".format(lang)]
+ global_config['Global']['character_type'] = lang
+ assert (
+ os.path.isfile(
+ os.path.join(project_path, global_config['Global'][
+ 'character_dict_path']))
+ ), "Loss default dictionary file {}_dict.txt.You can download it from \
+https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/ppocr/utils/dict/".format(
+ lang)
+ return lang
def merge_config(config):
@@ -110,43 +177,51 @@ def merge_config(config):
cur[sub_key] = value
else:
cur = cur[sub_key]
-
+
+
def loss_file(path):
- assert(
- os.path.exists(path)
- ),"There is no such file:{},Please do not forget to put in the specified file".format(path)
+ assert (
+ os.path.exists(path)
+ ), "There is no such file:{},Please do not forget to put in the specified file".format(
+ path)
+
-
if __name__ == '__main__':
FLAGS = ArgsParser().parse_args()
merge_config(FLAGS.opt)
save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language)
if os.path.isfile(save_file_path):
os.remove(save_file_path)
-
+
if FLAGS.train:
global_config['Train']['dataset']['label_file_list'] = [FLAGS.train]
- train_label_path = os.path.join(project_path,FLAGS.train)
+ train_label_path = os.path.join(project_path, FLAGS.train)
loss_file(train_label_path)
if FLAGS.val:
global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val]
- eval_label_path = os.path.join(project_path,FLAGS.val)
+ eval_label_path = os.path.join(project_path, FLAGS.val)
loss_file(eval_label_path)
if FLAGS.dict:
global_config['Global']['character_dict_path'] = FLAGS.dict
- dict_path = os.path.join(project_path,FLAGS.dict)
+ dict_path = os.path.join(project_path, FLAGS.dict)
loss_file(dict_path)
if FLAGS.data_dir:
global_config['Eval']['dataset']['data_dir'] = FLAGS.data_dir
global_config['Train']['dataset']['data_dir'] = FLAGS.data_dir
- data_dir = os.path.join(project_path,FLAGS.data_dir)
+ data_dir = os.path.join(project_path, FLAGS.data_dir)
loss_file(data_dir)
-
+
with open(save_file_path, 'w') as f:
- yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False)
+ yaml.dump(
+ dict(global_config), f, default_flow_style=False, sort_keys=False)
logging.info("Project path is :{}".format(project_path))
- logging.info("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0]))
- logging.info("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0]))
- logging.info("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir']))
- logging.info("Dict path set to :{}".format(global_config['Global']['character_dict_path']))
- logging.info("Config file set to :configs/rec/multi_language/{}".format(save_file_path))
+ logging.info("Train list path set to :{}".format(global_config['Train'][
+ 'dataset']['label_file_list'][0]))
+ logging.info("Eval list path set to :{}".format(global_config['Eval'][
+ 'dataset']['label_file_list'][0]))
+ logging.info("Dataset root path set to :{}".format(global_config['Eval'][
+ 'dataset']['data_dir']))
+ logging.info("Dict path set to :{}".format(global_config['Global'][
+ 'character_dict_path']))
+ logging.info("Config file set to :configs/rec/multi_language/{}".
+ format(save_file_path))
diff --git a/configs/rec/multi_language/rec_arabic_lite_train.yml b/configs/rec/multi_language/rec_arabic_lite_train.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6dcfd1b69988b09c7dfc05cdbacce9756ea1f7cb
--- /dev/null
+++ b/configs/rec/multi_language/rec_arabic_lite_train.yml
@@ -0,0 +1,111 @@
+Global:
+ use_gpu: true
+ epoch_num: 500
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec_arabic_lite
+ save_epoch_step: 3
+ eval_batch_step:
+ - 0
+ - 2000
+ cal_metric_during_train: true
+ pretrained_model: null
+ checkpoints: null
+ save_inference_dir: null
+ use_visualdl: false
+ infer_img: null
+ character_dict_path: ppocr/utils/dict/arabic_dict.txt
+ character_type: arabic
+ max_text_length: 25
+ infer_mode: false
+ use_space_char: true
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Cosine
+ learning_rate: 0.001
+ regularizer:
+ name: L2
+ factor: 1.0e-05
+Architecture:
+ model_type: rec
+ algorithm: CRNN
+ Transform: null
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: small
+ small_stride:
+ - 1
+ - 2
+ - 2
+ - 2
+ Neck:
+ name: SequenceEncoder
+ encoder_type: rnn
+ hidden_size: 48
+ Head:
+ name: CTCHead
+ fc_decay: 1.0e-05
+Loss:
+ name: CTCLoss
+PostProcess:
+ name: CTCLabelDecode
+Metric:
+ name: RecMetric
+ main_indicator: acc
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/arabic_train.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - RecAug: null
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: true
+ batch_size_per_card: 256
+ drop_last: true
+ num_workers: 8
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/arabic_val.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: false
+ drop_last: false
+ batch_size_per_card: 256
+ num_workers: 8
diff --git a/configs/rec/multi_language/rec_cyrillic_lite_train.yml b/configs/rec/multi_language/rec_cyrillic_lite_train.yml
new file mode 100644
index 0000000000000000000000000000000000000000..52527c1dfb9a306429bbab9241c623581d546e45
--- /dev/null
+++ b/configs/rec/multi_language/rec_cyrillic_lite_train.yml
@@ -0,0 +1,111 @@
+Global:
+ use_gpu: true
+ epoch_num: 500
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec_cyrillic_lite
+ save_epoch_step: 3
+ eval_batch_step:
+ - 0
+ - 2000
+ cal_metric_during_train: true
+ pretrained_model: null
+ checkpoints: null
+ save_inference_dir: null
+ use_visualdl: false
+ infer_img: null
+ character_dict_path: ppocr/utils/dict/cyrillic_dict.txt
+ character_type: cyrillic
+ max_text_length: 25
+ infer_mode: false
+ use_space_char: true
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Cosine
+ learning_rate: 0.001
+ regularizer:
+ name: L2
+ factor: 1.0e-05
+Architecture:
+ model_type: rec
+ algorithm: CRNN
+ Transform: null
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: small
+ small_stride:
+ - 1
+ - 2
+ - 2
+ - 2
+ Neck:
+ name: SequenceEncoder
+ encoder_type: rnn
+ hidden_size: 48
+ Head:
+ name: CTCHead
+ fc_decay: 1.0e-05
+Loss:
+ name: CTCLoss
+PostProcess:
+ name: CTCLabelDecode
+Metric:
+ name: RecMetric
+ main_indicator: acc
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/cyrillic_train.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - RecAug: null
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: true
+ batch_size_per_card: 256
+ drop_last: true
+ num_workers: 8
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/cyrillic_val.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: false
+ drop_last: false
+ batch_size_per_card: 256
+ num_workers: 8
diff --git a/configs/rec/multi_language/rec_devanagari_lite_train.yml b/configs/rec/multi_language/rec_devanagari_lite_train.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e1a7c829c3e6d3c3a57f1d501cdd80a560703ec7
--- /dev/null
+++ b/configs/rec/multi_language/rec_devanagari_lite_train.yml
@@ -0,0 +1,111 @@
+Global:
+ use_gpu: true
+ epoch_num: 500
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec_devanagari_lite
+ save_epoch_step: 3
+ eval_batch_step:
+ - 0
+ - 2000
+ cal_metric_during_train: true
+ pretrained_model: null
+ checkpoints: null
+ save_inference_dir: null
+ use_visualdl: false
+ infer_img: null
+ character_dict_path: ppocr/utils/dict/devanagari_dict.txt
+ character_type: devanagari
+ max_text_length: 25
+ infer_mode: false
+ use_space_char: true
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Cosine
+ learning_rate: 0.001
+ regularizer:
+ name: L2
+ factor: 1.0e-05
+Architecture:
+ model_type: rec
+ algorithm: CRNN
+ Transform: null
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: small
+ small_stride:
+ - 1
+ - 2
+ - 2
+ - 2
+ Neck:
+ name: SequenceEncoder
+ encoder_type: rnn
+ hidden_size: 48
+ Head:
+ name: CTCHead
+ fc_decay: 1.0e-05
+Loss:
+ name: CTCLoss
+PostProcess:
+ name: CTCLabelDecode
+Metric:
+ name: RecMetric
+ main_indicator: acc
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/devanagari_train.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - RecAug: null
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: true
+ batch_size_per_card: 256
+ drop_last: true
+ num_workers: 8
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/devanagari_val.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: false
+ drop_last: false
+ batch_size_per_card: 256
+ num_workers: 8
diff --git a/configs/rec/multi_language/rec_en_number_lite_train.yml b/configs/rec/multi_language/rec_en_number_lite_train.yml
index 13eda8481cad8ca308cd0629214b52146c3ebf13..fff4dfcd905b406964bb07cf14017af22f40e91e 100644
--- a/configs/rec/multi_language/rec_en_number_lite_train.yml
+++ b/configs/rec/multi_language/rec_en_number_lite_train.yml
@@ -15,11 +15,11 @@ Global:
use_visualdl: False
infer_img:
# for data or label process
- character_dict_path: ppocr/utils/dict/en_dict.txt
+ character_dict_path: ppocr/utils/en_dict.txt
character_type: EN
max_text_length: 25
infer_mode: False
- use_space_char: False
+ use_space_char: True
Optimizer:
diff --git a/configs/rec/multi_language/rec_latin_lite_train.yml b/configs/rec/multi_language/rec_latin_lite_train.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e71112b4b4f0afd3ceab9f10078bc5d518ee9e59
--- /dev/null
+++ b/configs/rec/multi_language/rec_latin_lite_train.yml
@@ -0,0 +1,111 @@
+Global:
+ use_gpu: true
+ epoch_num: 500
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec_latin_lite
+ save_epoch_step: 3
+ eval_batch_step:
+ - 0
+ - 2000
+ cal_metric_during_train: true
+ pretrained_model: null
+ checkpoints: null
+ save_inference_dir: null
+ use_visualdl: false
+ infer_img: null
+ character_dict_path: ppocr/utils/dict/latin_dict.txt
+ character_type: latin
+ max_text_length: 25
+ infer_mode: false
+ use_space_char: true
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Cosine
+ learning_rate: 0.001
+ regularizer:
+ name: L2
+ factor: 1.0e-05
+Architecture:
+ model_type: rec
+ algorithm: CRNN
+ Transform: null
+ Backbone:
+ name: MobileNetV3
+ scale: 0.5
+ model_name: small
+ small_stride:
+ - 1
+ - 2
+ - 2
+ - 2
+ Neck:
+ name: SequenceEncoder
+ encoder_type: rnn
+ hidden_size: 48
+ Head:
+ name: CTCHead
+ fc_decay: 1.0e-05
+Loss:
+ name: CTCLoss
+PostProcess:
+ name: CTCLabelDecode
+Metric:
+ name: RecMetric
+ main_indicator: acc
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/latin_train.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - RecAug: null
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: true
+ batch_size_per_card: 256
+ drop_last: true
+ num_workers: 8
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/
+ label_file_list:
+ - train_data/latin_val.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - CTCLabelEncode: null
+ - RecResizeImg:
+ image_shape:
+ - 3
+ - 32
+ - 320
+ - KeepKeys:
+ keep_keys:
+ - image
+ - label
+ - length
+ loader:
+ shuffle: false
+ drop_last: false
+ batch_size_per_card: 256
+ num_workers: 8
diff --git a/doc/doc_ch/inference.md b/doc/doc_ch/inference.md
index 1288d90692e154220b8ceb22cd7b6d98f53d3efb..f0f7401538a9f8940f671fdcc170aca6c003040d 100755
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -13,7 +13,6 @@ inference 模型(`paddle.jit.save`保存的模型)
- [检测模型转inference模型](#检测模型转inference模型)
- [识别模型转inference模型](#识别模型转inference模型)
- [方向分类模型转inference模型](#方向分类模型转inference模型)
- - [端到端模型转inference模型](#端到端模型转inference模型)
- [二、文本检测模型推理](#文本检测模型推理)
- [1. 超轻量中文检测模型推理](#超轻量中文检测模型推理)
@@ -28,13 +27,10 @@ inference 模型(`paddle.jit.save`保存的模型)
- [4. 自定义文本识别字典的推理](#自定义文本识别字典的推理)
- [5. 多语言模型的推理](#多语言模型的推理)
-- [四、端到端模型推理](#端到端模型推理)
- - [1. PGNet端到端模型推理](#PGNet端到端模型推理)
-
-- [五、方向分类模型推理](#方向识别模型推理)
+- [四、方向分类模型推理](#方向识别模型推理)
- [1. 方向分类模型推理](#方向分类模型推理)
-- [六、文本检测、方向分类和文字识别串联推理](#文本检测、方向分类和文字识别串联推理)
+- [五、文本检测、方向分类和文字识别串联推理](#文本检测、方向分类和文字识别串联推理)
- [1. 超轻量中文OCR模型推理](#超轻量中文OCR模型推理)
- [2. 其他模型推理](#其他模型推理)
@@ -122,32 +118,6 @@ python3 tools/export_model.py -c configs/cls/cls_mv3.yml -o Global.pretrained_mo
├── inference.pdiparams.info # 分类inference模型的参数信息,可忽略
└── inference.pdmodel # 分类inference模型的program文件
```
-
-### 端到端模型转inference模型
-
-下载端到端模型:
-```
-wget -P ./ch_lite/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar && tar xf ./ch_lite/ch_ppocr_mobile_v2.0_cls_train.tar -C ./ch_lite/
-```
-
-端到端模型转inference模型与检测的方式相同,如下:
-```
-# -c 后面设置训练算法的yml配置文件
-# -o 配置可选参数
-# Global.pretrained_model 参数设置待转换的训练模型地址,不用添加文件后缀 .pdmodel,.pdopt或.pdparams。
-# Global.load_static_weights 参数需要设置为 False。
-# Global.save_inference_dir参数设置转换的模型将保存的地址。
-
-python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./ch_lite/ch_ppocr_mobile_v2.0_cls_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e/
-```
-
-转换成功后,在目录下有三个文件:
-```
-/inference/e2e/
- ├── inference.pdiparams # 分类inference模型的参数文件
- ├── inference.pdiparams.info # 分类inference模型的参数信息,可忽略
- └── inference.pdmodel # 分类inference模型的program文件
-```
## 二、文本检测模型推理
@@ -362,38 +332,8 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" -
Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
```
-
-## 四、端到端模型推理
-
-端到端模型推理,默认使用PGNet模型的配置参数。当不使用PGNet模型时,在推理时,需要通过传入相应的参数进行算法适配,细节参考下文。
-
-### 1. PGNet端到端模型推理
-#### (1). 四边形文本检测模型(ICDAR2015)
-首先将PGNet端到端训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,在ICDAR2015英文数据集训练的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar)),可以使用如下命令进行转换:
-```
-python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/iter_epoch_450 Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
-```
-**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`**,可以执行如下命令:
-```
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=False
-```
-可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下:
-
-![](../imgs_results/e2e_res_img_10_pgnet.jpg)
-
-#### (2). 弯曲文本检测模型(Total-Text)
-和四边形文本检测模型共用一个推理模型
-**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`,同时,还需要增加参数`--e2e_pgnet_polygon=True`,**可以执行如下命令:
-```
-python3.7 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
-```
-可视化文本端到端结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下:
-
-![](../imgs_results/e2e_res_img623_pgnet.jpg)
-
-
-## 五、方向分类模型推理
+## 四、方向分类模型推理
下面将介绍方向分类模型推理。
@@ -418,7 +358,7 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:['0', 0.9999982]
```
-## 六、文本检测、方向分类和文字识别串联推理
+## 五、文本检测、方向分类和文字识别串联推理
### 1. 超轻量中文OCR模型推理
diff --git a/doc/doc_ch/models_list.md b/doc/doc_ch/models_list.md
index d647d032674693466525d12c25b15c961e0eb86f..4842a8c57ba7893b5753cde04ae54399a45d8a56 100644
--- a/doc/doc_ch/models_list.md
+++ b/doc/doc_ch/models_list.md
@@ -104,27 +104,16 @@ python3 generate_multi_language_configs.py -l it \
| german_mobile_v2.0_rec |德文识别|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_mobile_v2.0_rec |韩文识别|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_mobile_v2.0_rec |日文识别|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
-| it_mobile_v2.0_rec |意大利文识别|rec_it_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) |
-| xi_mobile_v2.0_rec |西班牙文识别|rec_xi_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) |
-| pu_mobile_v2.0_rec |葡萄牙文识别|rec_pu_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) |
-| ru_mobile_v2.0_rec |俄罗斯文识别|rec_ru_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) |
-| ar_mobile_v2.0_rec |阿拉伯文识别|rec_ar_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) |
-| hi_mobile_v2.0_rec |印地文识别|rec_hi_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) |
| chinese_cht_mobile_v2.0_rec |中文繁体识别|rec_chinese_cht_lite_train.yml|5.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
-| ug_mobile_v2.0_rec |维吾尔文识别|rec_ug_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) |
-| fa_mobile_v2.0_rec |波斯文识别|rec_fa_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) |
-| ur_mobile_v2.0_rec |乌尔都文识别|rec_ur_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) |
-| rs_mobile_v2.0_rec |塞尔维亚文(latin)识别|rec_rs_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) |
-| oc_mobile_v2.0_rec |欧西坦文识别|rec_oc_lite_train.yml|2.53M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) |
-| mr_mobile_v2.0_rec |马拉地文识别|rec_mr_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) |
-| ne_mobile_v2.0_rec |尼泊尔文识别|rec_ne_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) |
-| rsc_mobile_v2.0_rec |塞尔维亚文(cyrillic)识别|rec_rsc_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) |
-| bg_mobile_v2.0_rec |保加利亚文识别|rec_bg_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) |
-| uk_mobile_v2.0_rec |乌克兰文识别|rec_uk_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) |
-| be_mobile_v2.0_rec |白俄罗斯文识别|rec_be_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) |
| te_mobile_v2.0_rec |泰卢固文识别|rec_te_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
| ka_mobile_v2.0_rec |卡纳达文识别|rec_ka_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
| ta_mobile_v2.0_rec |泰米尔文识别|rec_ta_lite_train.yml|2.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
+| latin_mobile_v2.0_rec | 拉丁文识别 | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) |
+| arabic_mobile_v2.0_rec | 阿拉伯字母 | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) |
+| cyrillic_mobile_v2.0_rec | 斯拉夫字母 | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
+| devanagari_mobile_v2.0_rec | 梵文字母 | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
+
+更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
diff --git a/doc/doc_ch/multi_languages.md b/doc/doc_ch/multi_languages.md
index a8f7c2b77f64285e0edfbd22c248e84f0bb84d42..4e84c08582448722fa9e213ba64abfecd4f190eb 100644
--- a/doc/doc_ch/multi_languages.md
+++ b/doc/doc_ch/multi_languages.md
@@ -5,6 +5,25 @@
- 2021.4.9 支持**80种**语言的检测和识别
- 2021.4.9 支持**轻量高精度**英文模型检测识别
+PaddleOCR 旨在打造一套丰富、领先、且实用的OCR工具库,不仅提供了通用场景下的中英文模型,也提供了专门在英文场景下训练的模型,
+和覆盖[80个语言](#语种缩写)的小语种模型。
+
+其中英文模型支持,大小写字母和常见标点的检测识别,并优化了空格字符的识别:
+
+
+
+
+
+小语种模型覆盖了拉丁语系、阿拉伯语系、中文繁体、韩语、日语等等:
+
+
+
+
+
+
+
+本文档将简要介绍小语种模型的使用方法。
+
- [1 安装](#安装)
- [1.1 paddle 安装](#paddle安装)
- [1.2 paddleocr package 安装](#paddleocr_package_安装)
@@ -40,7 +59,7 @@ pip instll paddlepaddle-gpu
pip 安装
```
-pip install "paddleocr>=2.0.4" # 推荐使用2.0.4版本
+pip install "paddleocr>=2.0.6" # 推荐使用2.0.6版本
```
本地构建并安装
```
@@ -68,7 +87,11 @@ Paddleocr目前支持80个语种,可以通过修改--lang参数进行切换,
paddleocr --image_dir doc/imgs/japan_2.jpg --lang=japan
```
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs/japan_2.jpg)
+
+
+
+
+
结果是一个list,每个item包含了文本框,文字和识别置信度
```text
@@ -138,8 +161,10 @@ im_show.save('result.jpg')
```
结果可视化:
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_results/korean.jpg)
+
+
+
* 识别预测
@@ -152,7 +177,8 @@ for line in result:
print(line)
```
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_words/german/1.jpg)
+
+![](../imgs_words/german/1.jpg)
结果是一个tuple,只包含识别结果和识别置信度
@@ -187,7 +213,10 @@ im_show.save('result.jpg')
```
结果可视化 :
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_results/whl/12_det.jpg)
+
+
+
+
ppocr 还支持方向分类, 更多使用方式请参考:[whl包使用说明](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md)。
@@ -233,7 +262,7 @@ ppocr 支持使用自己的数据进行自定义训练或finetune, 其中识别
|卡纳达文|Kannada |kn|
|泰米尔文|Tamil |ta|
|南非荷兰文 |Afrikaans |af|
-|阿塞拜疆文 |Azerbaijani |az|
+|阿塞拜疆文 |Azerbaijani |az|
|波斯尼亚文|Bosnian|bs|
|捷克文|Czech|cs|
|威尔士文 |Welsh |cy|
diff --git a/doc/doc_ch/pgnet.md b/doc/doc_ch/pgnet.md
index 4d3b8208777873dc7c0cdb87346eb950d3e3e2f4..265853860854317ab00f40b1f447edfad47dc557 100644
--- a/doc/doc_ch/pgnet.md
+++ b/doc/doc_ch/pgnet.md
@@ -2,7 +2,7 @@
- [一、简介](#简介)
- [二、环境配置](#环境配置)
- [三、快速使用](#快速使用)
-- [四、模型训练、评估、推理](#快速训练)
+- [四、模型训练、评估、推理](#模型训练、评估、推理)
## 一、简介
@@ -16,14 +16,31 @@ OCR算法可以分为两阶段算法和端对端的算法。二阶段OCR算法
- 提出基于图的修正模块(GRM)来进一步提高模型识别性能
- 精度更高,预测速度更快
-PGNet算法细节详见[论文](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf), 算法原理图如下所示:
+PGNet算法细节详见[论文](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf) ,算法原理图如下所示:
![](../pgnet_framework.png)
输入图像经过特征提取送入四个分支,分别是:文本边缘偏移量预测TBO模块,文本中心线预测TCL模块,文本方向偏移量预测TDO模块,以及文本字符分类图预测TCC模块。
其中TBO以及TCL的输出经过后处理后可以得到文本的检测结果,TCL、TDO、TCC负责文本识别。
+
其检测识别效果图如下:
+
![](../imgs_results/e2e_res_img293_pgnet.png)
![](../imgs_results/e2e_res_img295_pgnet.png)
+### 性能指标
+
+测试集: Total Text
+
+测试环境: NVIDIA Tesla V100-SXM2-16GB
+
+|PGNetA|det_precision|det_recall|det_f_score|e2e_precision|e2e_recall|e2e_f_score|FPS|下载|
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+|Paper|85.30|86.80|86.1|-|-|61.7|38.20 (size=640)|-|
+|Ours|87.03|82.48|84.69|61.71|58.43|60.03|48.73 (size=768)|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar)|
+
+*note:PaddleOCR里的PGNet实现针对预测速度做了优化,在精度下降可接受范围内,可以显著提升端对端预测速度*
+
+
+
## 二、环境配置
请先参考[快速安装](./installation.md)配置PaddleOCR运行环境。
@@ -49,24 +66,24 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/e2e_server_pgnetA_infer.
### 单张图像或者图像集合预测
```bash
# 预测image_dir指定的单张图像
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
+python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True
# 预测image_dir指定的图像集合
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
+python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True
# 如果想使用CPU进行预测,需设置use_gpu参数为False
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True --use_gpu=False
+python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True --use_gpu=False
```
### 可视化结果
可视化文本检测结果默认保存到./inference_results文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下:
![](../imgs_results/e2e_res_img623_pgnet.jpg)
-
+
## 四、模型训练、评估、推理
本节以totaltext数据集为例,介绍PaddleOCR中端到端模型的训练、评估与测试。
### 准备数据
-下载解压[totaltext](https://github.com/cs-chan/Total-Text-Dataset/blob/master/Dataset/README.md)数据集到PaddleOCR/train_data/目录,数据集组织结构:
+下载解压[totaltext](https://github.com/cs-chan/Total-Text-Dataset/blob/master/Dataset/README.md) 数据集到PaddleOCR/train_data/目录,数据集组织结构:
```
/PaddleOCR/train_data/total_text/train/
|- rgb/ # total_text数据集的训练数据
@@ -135,20 +152,20 @@ python3 tools/eval.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.checkpoints="{
### 模型预测
测试单张图像的端到端识别效果
```shell
-python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
+python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/e2e_pgnet/best_accuracy" Global.load_static_weights=false
```
测试文件夹下所有图像的端到端识别效果
```shell
-python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
+python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/e2e_pgnet/best_accuracy" Global.load_static_weights=false
```
### 预测推理
-#### (1).四边形文本检测模型(ICDAR2015)
+#### (1). 四边形文本检测模型(ICDAR2015)
首先将PGNet端到端训练过程中保存的模型,转换成inference model。以基于Resnet50_vd骨干网络,以英文数据集训练的模型为例[模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar) ,可以使用如下命令进行转换:
```
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar
-python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/iter_epoch_450 Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
+python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
```
**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`**,可以执行如下命令:
```
@@ -158,7 +175,7 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im
![](../imgs_results/e2e_res_img_10_pgnet.jpg)
-#### (2).弯曲文本检测模型(Total-Text)
+#### (2). 弯曲文本检测模型(Total-Text)
对于弯曲文本样例
**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`,同时,还需要增加参数`--e2e_pgnet_polygon=True`,**可以执行如下命令:
diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md
index 907cf24e1a31104096ab6c0cf0819457852d1490..8a7c341cf24738b8af8c974a6da41bcb1b51ce48 100644
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -138,7 +138,7 @@ PaddleOCR内置了一部分字典,可以按需使用。
`ppocr/utils/dict/german_dict.txt` 是一个包含131个字符的德文字典
-`ppocr/utils/dict/en_dict.txt` 是一个包含63个字符的英文字典
+`ppocr/utils/en_dict.txt` 是一个包含96个字符的英文字典
@@ -285,7 +285,7 @@ Eval:
#### 2.3 小语种
-PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi_languages` 路径下提供了一个多语言的配置文件模版: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。
+PaddleOCR目前已支持80种(除中文外)语种识别,`configs/rec/multi_languages` 路径下提供了一个多语言的配置文件模版: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。
您有两种方式创建所需的配置文件:
@@ -368,26 +368,12 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 韩语 | korean |
-| rec_it_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 意大利语 | it |
-| rec_xi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 西班牙语 | xi |
-| rec_pu_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 葡萄牙语 | pu |
-| rec_ru_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 俄罗斯语 | ru |
-| rec_ar_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯语 | ar |
-| rec_hi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 印地语 | hi |
-| rec_ug_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 维吾尔语 | ug |
-| rec_fa_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 波斯语 | fa |
-| rec_ur_ite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 乌尔都语 | ur |
-| rec_rs_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 塞尔维亚(latin)语 | rs |
-| rec_oc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 欧西坦语 | oc |
-| rec_mr_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 马拉地语 | mr |
-| rec_ne_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 尼泊尔语 | ne |
-| rec_rsc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 塞尔维亚(cyrillic)语 | rsc |
-| rec_bg_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 保加利亚语 | bg |
-| rec_uk_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 乌克兰语 | uk |
-| rec_be_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 白俄罗斯语 | be |
-| rec_te_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 泰卢固语 | te |
-| rec_ka_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 卡纳达语 | ka |
-| rec_ta_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 泰米尔语 | ta |
+| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 拉丁字母 | latin |
+| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 阿拉伯字母 | ar |
+| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 斯拉夫字母 | cyrillic |
+| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 梵文字母 | devanagari |
+
+更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
多语言模型训练方式与中文模型一致,训练数据集均为100w的合成数据,少量的字体可以在 [百度网盘](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA) 上下载,提取码:frgi。
diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md
index e0163972b94ddfde8215cd323e2ba037752d3b4c..7d5b0d086fc7e8b329e4fba475bb36445d05d018 100644
--- a/doc/doc_en/models_list_en.md
+++ b/doc/doc_en/models_list_en.md
@@ -102,27 +102,16 @@ python3 generate_multi_language_configs.py -l it \
| german_mobile_v2.0_rec |Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) |
| korean_mobile_v2.0_rec |Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) |
| japan_mobile_v2.0_rec |Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) |
-| it_mobile_v2.0_rec |Lightweight model for Italian recognition|rec_it_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/it_mobile_v2.0_rec_train.tar) |
-| xi_mobile_v2.0_rec |Lightweight model for Spanish recognition|rec_xi_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/xi_mobile_v2.0_rec_train.tar) |
-| pu_mobile_v2.0_rec |Lightweight model for Portuguese recognition|rec_pu_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/pu_mobile_v2.0_rec_train.tar) |
-| ru_mobile_v2.0_rec |Lightweight model for Russia recognition|rec_ru_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ru_mobile_v2.0_rec_train.tar) |
-| ar_mobile_v2.0_rec |Lightweight model for Arabic recognition|rec_ar_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ar_mobile_v2.0_rec_train.tar) |
-| hi_mobile_v2.0_rec |Lightweight model for Hindi recognition|rec_hi_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/hi_mobile_v2.0_rec_train.tar) |
-| chinese_cht_mobile_v2.0_rec |Lightweight model for chinese traditional recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
-| ug_mobile_v2.0_rec |Lightweight model for Uyghur recognition|rec_ug_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ug_mobile_v2.0_rec_train.tar) |
-| fa_mobile_v2.0_rec |Lightweight model for Persian recognition|rec_fa_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/fa_mobile_v2.0_rec_train.tar) |
-| ur_mobile_v2.0_rec |Lightweight model for Urdu recognition|rec_ur_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ur_mobile_v2.0_rec_train.tar) |
-| rs_mobile_v2.0_rec |Lightweight model for Serbian(latin) recognition|rec_rs_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rs_mobile_v2.0_rec_train.tar) |
-| oc_mobile_v2.0_rec |Lightweight model for Occitan recognition|rec_oc_lite_train.yml|2.53M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/oc_mobile_v2.0_rec_train.tar) |
-| mr_mobile_v2.0_rec |Lightweight model for Marathi recognition|rec_mr_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/mr_mobile_v2.0_rec_train.tar) |
-| ne_mobile_v2.0_rec |Lightweight model for Nepali recognition|rec_ne_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ne_mobile_v2.0_rec_train.tar) |
-| rsc_mobile_v2.0_rec |Lightweight model for Serbian(cyrillic) recognition|rec_rsc_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/rsc_mobile_v2.0_rec_train.tar) |
-| bg_mobile_v2.0_rec |Lightweight model for Bulgarian recognition|rec_bg_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/bg_mobile_v2.0_rec_train.tar) |
-| uk_mobile_v2.0_rec |Lightweight model for Ukranian recognition|rec_uk_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/uk_mobile_v2.0_rec_train.tar) |
-| be_mobile_v2.0_rec |Lightweight model for Belarusian recognition|rec_be_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/be_mobile_v2.0_rec_train.tar) |
+| chinese_cht_mobile_v2.0_rec |Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) |
| te_mobile_v2.0_rec |Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) |
| ka_mobile_v2.0_rec |Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) |
| ta_mobile_v2.0_rec |Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) |
+| latin_mobile_v2.0_rec | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) |
+| arabic_mobile_v2.0_rec | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) |
+| cyrillic_mobile_v2.0_rec | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) |
+| devanagari_mobile_v2.0_rec | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) |
+
+For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)
diff --git a/doc/doc_en/multi_languages_en.md b/doc/doc_en/multi_languages_en.md
index d1c4583f66f611fbef7191d52d32e02187853c9b..3a7dde9600fd4081b413c2c01747a8c1f610e5f6 100644
--- a/doc/doc_en/multi_languages_en.md
+++ b/doc/doc_en/multi_languages_en.md
@@ -2,24 +2,44 @@
**Recent Update**
--2021.4.9 supports the detection and recognition of 80 languages
--2021.4.9 supports **lightweight high-precision** English model detection and recognition
-
--[1 Installation](#Install)
- -[1.1 paddle installation](#paddleinstallation)
- -[1.2 paddleocr package installation](#paddleocr_package_install)
-
--[2 Quick Use](#Quick_Use)
- -[2.1 Command line operation](#Command_line_operation)
- -[2.1.1 Prediction of the whole image](#bash_detection+recognition)
- -[2.1.2 Recognition](#bash_Recognition)
- -[2.1.3 Detection](#bash_detection)
- -[2.2 python script running](#python_Script_running)
- -[2.2.1 Whole image prediction](#python_detection+recognition)
- -[2.2.2 Recognition](#python_Recognition)
- -[2.2.3 Detection](#python_detection)
--[3 Custom Training](#Custom_Training)
--[4 Supported languages and abbreviations](#language_abbreviations)
+- 2021.4.9 supports the detection and recognition of 80 languages
+- 2021.4.9 supports **lightweight high-precision** English model detection and recognition
+
+PaddleOCR aims to create a rich, leading, and practical OCR tool library, which not only provides
+Chinese and English models in general scenarios, but also provides models specifically trained
+in English scenarios. And multilingual models covering [80 languages](#language_abbreviations).
+
+Among them, the English model supports the detection and recognition of uppercase and lowercase
+letters and common punctuation, and the recognition of space characters is optimized:
+
+
+
+
+
+The multilingual models cover Latin, Arabic, Traditional Chinese, Korean, Japanese, etc.:
+
+
+
+
+
+
+This document will briefly introduce how to use the multilingual model.
+
+- [1 Installation](#Install)
+ - [1.1 paddle installation](#paddleinstallation)
+ - [1.2 paddleocr package installation](#paddleocr_package_install)
+
+- [2 Quick Use](#Quick_Use)
+ - [2.1 Command line operation](#Command_line_operation)
+ - [2.1.1 Prediction of the whole image](#bash_detection+recognition)
+ - [2.1.2 Recognition](#bash_Recognition)
+ - [2.1.3 Detection](#bash_detection)
+ - [2.2 python script running](#python_Script_running)
+ - [2.2.1 Whole image prediction](#python_detection+recognition)
+ - [2.2.2 Recognition](#python_Recognition)
+ - [2.2.3 Detection](#python_detection)
+- [3 Custom Training](#Custom_Training)
+- [4 Supported languages and abbreviations](#language_abbreviations)
## 1 Installation
@@ -40,7 +60,7 @@ pip instll paddlepaddle-gpu
pip install
```
-pip install "paddleocr>=2.0.4" # 2.0.4 version is recommended
+pip install "paddleocr>=2.0.6" # 2.0.6 version is recommended
```
Build and install locally
```
@@ -69,7 +89,7 @@ The specific supported [language] (#language_abbreviations) can be viewed in the
paddleocr --image_dir doc/imgs/japan_2.jpg --lang=japan
```
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs/japan_2.jpg)
+![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs/japan_2.jpg)
The result is a list, each item contains a text box, text and recognition confidence
```text
@@ -86,7 +106,7 @@ The result is a list, each item contains a text box, text and recognition confid
paddleocr --image_dir doc/imgs_words/japan/1.jpg --det false --lang=japan
```
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_words/japan/1.jpg)
+![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_words/japan/1.jpg)
The result is a tuple, which returns the recognition result and recognition confidence
@@ -139,7 +159,7 @@ im_show.save('result.jpg')
```
Visualization of results:
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_results/korean.jpg)
+![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/korean.jpg)
* Recognition
@@ -153,7 +173,7 @@ for line in result:
print(line)
```
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_words/german/1.jpg)
+![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_words/german/1.jpg)
The result is a tuple, which only contains the recognition result and recognition confidence
@@ -188,7 +208,7 @@ The result is a list, each item contains only text boxes
```
Visualization of results:
-![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.0/doc/imgs_results/whl/12_det.jpg)
+![](https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/release/2.1/doc/imgs_results/whl/12_det.jpg)
ppocr also supports direction classification. For more usage methods, please refer to: [whl package instructions](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.0/doc/doc_ch/whl.md).
diff --git a/doc/doc_en/pgnet_en.md b/doc/doc_en/pgnet_en.md
index 0f47f0e656f922e944710a746a6cd29ab6d46d8e..2ab1116ce7085e2e322b4be45ee5628c247040ea 100644
--- a/doc/doc_en/pgnet_en.md
+++ b/doc/doc_en/pgnet_en.md
@@ -15,7 +15,7 @@ In recent years, the end-to-end OCR algorithm has been well developed, including
- A graph based modification module (GRM) is proposed to further improve the performance of model recognition
- Higher accuracy and faster prediction speed
-For details of PGNet algorithm, please refer to [paper](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf), The schematic diagram of the algorithm is as follows:
+For details of PGNet algorithm, please refer to [paper](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf) ,The schematic diagram of the algorithm is as follows:
![](../pgnet_framework.png)
After feature extraction, the input image is sent to four branches: TBO module for text edge offset prediction, TCL module for text centerline prediction, TDO module for text direction offset prediction, and TCC module for text character classification graph prediction.
The output of TBO and TCL can get text detection results after post-processing, and TCL, TDO and TCC are responsible for text recognition.
@@ -23,6 +23,16 @@ The output of TBO and TCL can get text detection results after post-processing,
The results of detection and recognition are as follows:
![](../imgs_results/e2e_res_img293_pgnet.png)
![](../imgs_results/e2e_res_img295_pgnet.png)
+### Performance
+####Test set: Total Text
+
+####Test environment: NVIDIA Tesla V100-SXM2-16GB
+|PGNetA|det_precision|det_recall|det_f_score|e2e_precision|e2e_recall|e2e_f_score|FPS|download|
+| --- | --- | --- | --- | --- | --- | --- | --- | --- |
+|Paper|85.30|86.80|86.1|-|-|61.7|38.20 (size=640)|-|
+|Ours|87.03|82.48|84.69|61.71|58.43|60.03|48.73 (size=768)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar)|
+
+*note:PGNet in PaddleOCR optimizes the prediction speed, and can significantly improve the end-to-end prediction speed within the acceptable range of accuracy reduction*
## 2. Environment Configuration
@@ -49,13 +59,13 @@ After decompression, there should be the following file structure:
### Single image or image set prediction
```bash
# Prediction single image specified by image_dir
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
+python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True
# Prediction the collection of images specified by image_dir
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True
+python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True
# If you want to use CPU for prediction, you need to set use_gpu parameter is false
-python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True --use_gpu=False
+python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True --use_gpu=False
```
### Visualization results
The visualized end-to-end results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows:
@@ -141,12 +151,12 @@ python3 tools/eval.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.checkpoints="{
### Model Test
Test the end-to-end result on a single image:
```shell
-python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
+python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/e2e_pgnet/best_accuracy" Global.load_static_weights=false
```
Test the end-to-end result on all images in the folder:
```shell
-python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
+python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/e2e_pgnet/best_accuracy" Global.load_static_weights=false
```
### Model inference
@@ -154,7 +164,7 @@ python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img=
First, convert the model saved in the PGNet end-to-end training process into an inference model. In the first stage of training based on composite dataset, the model of English data set training is taken as an example[model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar), you can use the following command to convert:
```
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar
-python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/iter_epoch_450 Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
+python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e
```
**For PGNet quadrangle end-to-end model inference, you need to set the parameter `--e2e_algorithm="PGNet"`**, run the following command:
```
diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md
index aeb9aa0d43e400c4d6e733b2c9f4a74559dccecb..0b3db6a235bdbfeb930d6cf3f7d086829fd32c43 100644
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -131,7 +131,7 @@ PaddleOCR has built-in dictionaries, which can be used on demand.
`ppocr/utils/dict/german_dict.txt` is a German dictionary with 131 characters
-`ppocr/utils/dict/en_dict.txt` is a English dictionary with 63 characters
+`ppocr/utils/en_dict.txt` is a English dictionary with 96 characters
The current multi-language model is still in the demo stage and will continue to optimize the model and add languages. **You are very welcome to provide us with dictionaries and fonts in other languages**,
@@ -279,7 +279,7 @@ Eval:
#### 2.3 Multi-language
-PaddleOCR currently supports 26 (except Chinese) language recognition. A multi-language configuration file template is
+PaddleOCR currently supports 80 (except Chinese) language recognition. A multi-language configuration file template is
provided under the path `configs/rec/multi_languages`: [rec_multi_language_lite_train.yml](../../configs/rec/multi_language/rec_multi_language_lite_train.yml)。
There are two ways to create the required configuration file::
@@ -368,27 +368,12 @@ Currently, the multi-language algorithms supported by PaddleOCR are:
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |
| rec_korean_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Korean | korean |
-| rec_it_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Italian | it |
-| rec_xi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Spanish | xi |
-| rec_pu_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Portuguese | pu |
-| rec_ru_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Russia | ru |
-| rec_ar_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Arabic | ar |
-| rec_hi_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Hindi | hi |
-| rec_ug_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Uyghur | ug |
-| rec_fa_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Persian(Farsi) | fa |
-| rec_ur_ite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Urdu | ur |
-| rec_rs_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Serbian(latin) | rs |
-| rec_oc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Occitan | oc |
-| rec_mr_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Marathi | mr |
-| rec_ne_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Nepali | ne |
-| rec_rsc_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Serbian(cyrillic) | rsc |
-| rec_bg_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Bulgarian | bg |
-| rec_uk_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Ukranian | uk |
-| rec_be_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Belarusian | be |
-| rec_te_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Telugu | te |
-| rec_ka_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Kannada | ka |
-| rec_ta_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Tamil | ta |
+| rec_latin_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Latin | latin |
+| rec_arabic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | arabic | ar |
+| rec_cyrillic_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | cyrillic | cyrillic |
+| rec_devanagari_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | devanagari | devanagari |
+For more supported languages, please refer to : [Multi-language model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md#4-support-languages-and-abbreviations)
The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded on [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi.
diff --git a/doc/imgs_results/multi_lang/en_1.jpg b/doc/imgs_results/multi_lang/en_1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2dc84d3f04610effa7888e81848b015443e8091f
Binary files /dev/null and b/doc/imgs_results/multi_lang/en_1.jpg differ
diff --git a/doc/imgs_results/multi_lang/en_2.jpg b/doc/imgs_results/multi_lang/en_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..455ec98ed337c6af9c45f2861b8078b31abb4054
Binary files /dev/null and b/doc/imgs_results/multi_lang/en_2.jpg differ
diff --git a/doc/imgs_results/multi_lang/en_3.jpg b/doc/imgs_results/multi_lang/en_3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..36eb063d78466a9000306dab11507e0a43664568
Binary files /dev/null and b/doc/imgs_results/multi_lang/en_3.jpg differ
diff --git a/doc/imgs_results/multi_lang/french_0.jpg b/doc/imgs_results/multi_lang/french_0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3c2abe6304b93e3025dd19b75980c548f70bd3c7
Binary files /dev/null and b/doc/imgs_results/multi_lang/french_0.jpg differ
diff --git a/doc/imgs_results/multi_lang/japan_2.jpg b/doc/imgs_results/multi_lang/japan_2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7038ba2effab289b2895ea8d0fddaa365149ff80
Binary files /dev/null and b/doc/imgs_results/multi_lang/japan_2.jpg differ
diff --git a/paddleocr.py b/paddleocr.py
index 47e1267ac40effbe8b4ab80723c66eb5378be179..d5b6a01ba1fa8b233652de9b972f88643e89c5d8 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -34,8 +34,12 @@ from ppocr.utils.utility import check_and_read_gif, get_image_file_list
__all__ = ['PaddleOCR']
model_urls = {
- 'det':
- 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
+ 'det': {
+ 'ch':
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
+ 'en':
+ 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar'
+ },
'rec': {
'ch': {
'url':
@@ -45,7 +49,7 @@ model_urls = {
'en': {
'url':
'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
- 'dict_path': './ppocr/utils/dict/en_dict.txt'
+ 'dict_path': './ppocr/utils/en_dict.txt'
},
'french': {
'url':
@@ -113,7 +117,7 @@ model_urls = {
}
SUPPORT_DET_MODEL = ['DB']
-VERSION = 2.0
+VERSION = 2.1
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
@@ -199,7 +203,7 @@ def parse_args(mMain=True, add_help=True):
parser.add_argument("--rec_model_dir", type=str, default=None)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
- parser.add_argument("--rec_batch_num", type=int, default=30)
+ parser.add_argument("--rec_batch_num", type=int, default=6)
parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument("--rec_char_dict_path", type=str, default=None)
parser.add_argument("--use_space_char", type=bool, default=True)
@@ -209,7 +213,7 @@ def parse_args(mMain=True, add_help=True):
parser.add_argument("--cls_model_dir", type=str, default=None)
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
parser.add_argument("--label_list", type=list, default=['0', '180'])
- parser.add_argument("--cls_batch_num", type=int, default=30)
+ parser.add_argument("--cls_batch_num", type=int, default=6)
parser.add_argument("--cls_thresh", type=float, default=0.9)
parser.add_argument("--enable_mkldnn", type=bool, default=False)
@@ -243,7 +247,7 @@ def parse_args(mMain=True, add_help=True):
rec_model_dir=None,
rec_image_shape="3, 32, 320",
rec_char_type='ch',
- rec_batch_num=30,
+ rec_batch_num=6,
max_text_length=25,
rec_char_dict_path=None,
use_space_char=True,
@@ -251,7 +255,7 @@ def parse_args(mMain=True, add_help=True):
cls_model_dir=None,
cls_image_shape="3, 48, 192",
label_list=['0', '180'],
- cls_batch_num=30,
+ cls_batch_num=6,
cls_thresh=0.9,
enable_mkldnn=False,
use_zero_copy_run=False,
@@ -274,10 +278,10 @@ class PaddleOCR(predict_system.TextSystem):
self.use_angle_cls = postprocess_params.use_angle_cls
lang = postprocess_params.lang
latin_lang = [
- 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'en', 'es', 'et', 'fr',
- 'ga', 'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi',
- 'ms', 'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin',
- 'sk', 'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
+ 'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga',
+ 'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms',
+ 'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk',
+ 'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
]
arabic_lang = ['ar', 'fa', 'ug', 'ur']
cyrillic_lang = [
@@ -299,6 +303,10 @@ class PaddleOCR(predict_system.TextSystem):
assert lang in model_urls[
'rec'], 'param lang must in {}, but got {}'.format(
model_urls['rec'].keys(), lang)
+ if lang == "ch":
+ det_lang = "ch"
+ else:
+ det_lang = "en"
use_inner_dict = False
if postprocess_params.rec_char_dict_path is None:
use_inner_dict = True
@@ -308,7 +316,7 @@ class PaddleOCR(predict_system.TextSystem):
# init model dir
if postprocess_params.det_model_dir is None:
postprocess_params.det_model_dir = os.path.join(
- BASE_DIR, '{}/det'.format(VERSION))
+ BASE_DIR, '{}/det/{}'.format(VERSION, det_lang))
if postprocess_params.rec_model_dir is None:
postprocess_params.rec_model_dir = os.path.join(
BASE_DIR, '{}/rec/{}'.format(VERSION, lang))
@@ -317,7 +325,8 @@ class PaddleOCR(predict_system.TextSystem):
BASE_DIR, '{}/cls'.format(VERSION))
print(postprocess_params)
# download model
- maybe_download(postprocess_params.det_model_dir, model_urls['det'])
+ maybe_download(postprocess_params.det_model_dir,
+ model_urls['det'][det_lang])
maybe_download(postprocess_params.rec_model_dir,
model_urls['rec'][lang]['url'])
maybe_download(postprocess_params.cls_model_dir, model_urls['cls'])
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 47e0cbf07d8bd8b6ad838fa2d211345c65a6751a..cbb110090cfff3ebee4b30b009f88fc9aaba1617 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -200,18 +200,16 @@ class E2ELabelEncode(BaseRecLabelEncode):
self.pad_num = len(self.dict) # the length to pad
def __call__(self, data):
- text_label_index_list, temp_text = [], []
texts = data['strs']
+ temp_texts = []
for text in texts:
text = text.lower()
- temp_text = []
- for c_ in text:
- if c_ in self.dict:
- temp_text.append(self.dict[c_])
- temp_text = temp_text + [self.pad_num] * (self.max_text_len -
- len(temp_text))
- text_label_index_list.append(temp_text)
- data['strs'] = np.array(text_label_index_list)
+ text = self.encode(text)
+ if text is None:
+ return None
+ text = text + [self.pad_num] * (self.max_text_len - len(text))
+ temp_texts.append(text)
+ data['strs'] = np.array(temp_texts)
return data
diff --git a/ppocr/data/pgnet_dataset.py b/ppocr/data/pgnet_dataset.py
index ae0638350ad02f10202a67bc6cd531daf742f984..543dbe79ef6ff548c91b4e17bf6424797cdeeea7 100644
--- a/ppocr/data/pgnet_dataset.py
+++ b/ppocr/data/pgnet_dataset.py
@@ -64,9 +64,6 @@ class PGDataSet(Dataset):
for line in f.readlines():
poly_str, txt = line.strip().split('\t')
poly = list(map(float, poly_str.split(',')))
- if self.mode.lower() == "eval":
- while len(poly) < 100:
- poly.append(-1)
text_polys.append(
np.array(
poly, dtype=np.float32).reshape(-1, 2))
@@ -139,23 +136,21 @@ class PGDataSet(Dataset):
try:
if self.data_format == 'icdar':
im_path = os.path.join(data_path, 'rgb', data_line)
- if self.mode.lower() == "eval":
- poly_path = os.path.join(data_path, 'poly_gt',
- data_line.split('.')[0] + '.txt')
- else:
- poly_path = os.path.join(data_path, 'poly',
- data_line.split('.')[0] + '.txt')
+ poly_path = os.path.join(data_path, 'poly',
+ data_line.split('.')[0] + '.txt')
text_polys, text_tags, text_strs = self.extract_polys(poly_path)
else:
image_dir = os.path.join(os.path.dirname(data_path), 'image')
im_path, text_polys, text_tags, text_strs = self.extract_info_textnet(
data_line, image_dir)
+ img_id = int(data_line.split(".")[0][3:])
data = {
'img_path': im_path,
'polys': text_polys,
'tags': text_tags,
- 'strs': text_strs
+ 'strs': text_strs,
+ 'img_id': img_id
}
with open(data['img_path'], 'rb') as f:
img = f.read()
diff --git a/ppocr/metrics/e2e_metric.py b/ppocr/metrics/e2e_metric.py
index 684d77421c659d4150ea4a28a99b4ae43d678b69..8a604192fa455071202eec157e3832e2804bfdfd 100644
--- a/ppocr/metrics/e2e_metric.py
+++ b/ppocr/metrics/e2e_metric.py
@@ -19,58 +19,29 @@ from __future__ import print_function
__all__ = ['E2EMetric']
from ppocr.utils.e2e_metric.Deteval import get_socre, combine_results
-from ppocr.utils.e2e_utils.extract_textpoint import get_dict
+from ppocr.utils.e2e_utils.extract_textpoint_slow import get_dict
class E2EMetric(object):
def __init__(self,
+ gt_mat_dir,
character_dict_path,
main_indicator='f_score_e2e',
**kwargs):
+ self.gt_mat_dir = gt_mat_dir
self.label_list = get_dict(character_dict_path)
self.max_index = len(self.label_list)
self.main_indicator = main_indicator
self.reset()
def __call__(self, preds, batch, **kwargs):
- temp_gt_polyons_batch = batch[2]
- temp_gt_strs_batch = batch[3]
- ignore_tags_batch = batch[4]
- gt_polyons_batch = []
- gt_strs_batch = []
-
- temp_gt_polyons_batch = temp_gt_polyons_batch[0].tolist()
- for temp_list in temp_gt_polyons_batch:
- t = []
- for index in temp_list:
- if index[0] != -1 and index[1] != -1:
- t.append(index)
- gt_polyons_batch.append(t)
-
- temp_gt_strs_batch = temp_gt_strs_batch[0].tolist()
- for temp_list in temp_gt_strs_batch:
- t = ""
- for index in temp_list:
- if index < self.max_index:
- t += self.label_list[index]
- gt_strs_batch.append(t)
-
- for pred, gt_polyons, gt_strs, ignore_tags in zip(
- [preds], [gt_polyons_batch], [gt_strs_batch], ignore_tags_batch):
- # prepare gt
- gt_info_list = [{
- 'points': gt_polyon,
- 'text': gt_str,
- 'ignore': ignore_tag
- } for gt_polyon, gt_str, ignore_tag in
- zip(gt_polyons, gt_strs, ignore_tags)]
- # prepare det
- e2e_info_list = [{
- 'points': det_polyon,
- 'text': pred_str
- } for det_polyon, pred_str in zip(pred['points'], pred['strs'])]
- result = get_socre(gt_info_list, e2e_info_list)
- self.results.append(result)
+ img_id = batch[5][0]
+ e2e_info_list = [{
+ 'points': det_polyon,
+ 'text': pred_str
+ } for det_polyon, pred_str in zip(preds['points'], preds['strs'])]
+ result = get_socre(self.gt_mat_dir, img_id, e2e_info_list)
+ self.results.append(result)
def get_metric(self):
metircs = combine_results(self.results)
diff --git a/ppocr/postprocess/pg_postprocess.py b/ppocr/postprocess/pg_postprocess.py
index d9c0048f20ff46850ab8a26554af31532c73efd6..0b1455181fddb0adb5347406bb2eb3093ee6fb30 100644
--- a/ppocr/postprocess/pg_postprocess.py
+++ b/ppocr/postprocess/pg_postprocess.py
@@ -22,10 +22,7 @@ import sys
__dir__ = os.path.dirname(__file__)
sys.path.append(__dir__)
sys.path.append(os.path.join(__dir__, '..'))
-
-from ppocr.utils.e2e_utils.extract_textpoint import *
-from ppocr.utils.e2e_utils.visual import *
-import paddle
+from ppocr.utils.e2e_utils.pgnet_pp_utils import PGNet_PostProcess
class PGPostProcess(object):
@@ -33,10 +30,12 @@ class PGPostProcess(object):
The post process for PGNet.
"""
- def __init__(self, character_dict_path, valid_set, score_thresh, **kwargs):
- self.Lexicon_Table = get_dict(character_dict_path)
+ def __init__(self, character_dict_path, valid_set, score_thresh, mode,
+ **kwargs):
+ self.character_dict_path = character_dict_path
self.valid_set = valid_set
self.score_thresh = score_thresh
+ self.mode = mode
# c++ la-nms is faster, but only support python 3.5
self.is_python35 = False
@@ -44,112 +43,10 @@ class PGPostProcess(object):
self.is_python35 = True
def __call__(self, outs_dict, shape_list):
- p_score = outs_dict['f_score']
- p_border = outs_dict['f_border']
- p_char = outs_dict['f_char']
- p_direction = outs_dict['f_direction']
- if isinstance(p_score, paddle.Tensor):
- p_score = p_score[0].numpy()
- p_border = p_border[0].numpy()
- p_direction = p_direction[0].numpy()
- p_char = p_char[0].numpy()
+ post = PGNet_PostProcess(self.character_dict_path, self.valid_set,
+ self.score_thresh, outs_dict, shape_list)
+ if self.mode == 'fast':
+ data = post.pg_postprocess_fast()
else:
- p_score = p_score[0]
- p_border = p_border[0]
- p_direction = p_direction[0]
- p_char = p_char[0]
- src_h, src_w, ratio_h, ratio_w = shape_list[0]
- is_curved = self.valid_set == "totaltext"
- instance_yxs_list = generate_pivot_list(
- p_score,
- p_char,
- p_direction,
- score_thresh=self.score_thresh,
- is_backbone=True,
- is_curved=is_curved)
- p_char = paddle.to_tensor(np.expand_dims(p_char, axis=0))
- char_seq_idx_set = []
- for i in range(len(instance_yxs_list)):
- gather_info_lod = paddle.to_tensor(instance_yxs_list[i])
- f_char_map = paddle.transpose(p_char, [0, 2, 3, 1])
- feature_seq = paddle.gather_nd(f_char_map, gather_info_lod)
- feature_seq = np.expand_dims(feature_seq.numpy(), axis=0)
- feature_len = [len(feature_seq[0])]
- featyre_seq = paddle.to_tensor(feature_seq)
- feature_len = np.array([feature_len]).astype(np.int64)
- length = paddle.to_tensor(feature_len)
- seq_pred = paddle.fluid.layers.ctc_greedy_decoder(
- input=featyre_seq, blank=36, input_length=length)
- seq_pred_str = seq_pred[0].numpy().tolist()[0]
- seq_len = seq_pred[1].numpy()[0][0]
- temp_t = []
- for c in seq_pred_str[:seq_len]:
- temp_t.append(c)
- char_seq_idx_set.append(temp_t)
- seq_strs = []
- for char_idx_set in char_seq_idx_set:
- pr_str = ''.join([self.Lexicon_Table[pos] for pos in char_idx_set])
- seq_strs.append(pr_str)
- poly_list = []
- keep_str_list = []
- all_point_list = []
- all_point_pair_list = []
- for yx_center_line, keep_str in zip(instance_yxs_list, seq_strs):
- if len(yx_center_line) == 1:
- yx_center_line.append(yx_center_line[-1])
-
- offset_expand = 1.0
- if self.valid_set == 'totaltext':
- offset_expand = 1.2
-
- point_pair_list = []
- for batch_id, y, x in yx_center_line:
- offset = p_border[:, y, x].reshape(2, 2)
- if offset_expand != 1.0:
- offset_length = np.linalg.norm(
- offset, axis=1, keepdims=True)
- expand_length = np.clip(
- offset_length * (offset_expand - 1),
- a_min=0.5,
- a_max=3.0)
- offset_detal = offset / offset_length * expand_length
- offset = offset + offset_detal
- ori_yx = np.array([y, x], dtype=np.float32)
- point_pair = (ori_yx + offset)[:, ::-1] * 4.0 / np.array(
- [ratio_w, ratio_h]).reshape(-1, 2)
- point_pair_list.append(point_pair)
-
- all_point_list.append([
- int(round(x * 4.0 / ratio_w)),
- int(round(y * 4.0 / ratio_h))
- ])
- all_point_pair_list.append(point_pair.round().astype(np.int32)
- .tolist())
-
- detected_poly, pair_length_info = point_pair2poly(point_pair_list)
- detected_poly = expand_poly_along_width(
- detected_poly, shrink_ratio_of_width=0.2)
- detected_poly[:, 0] = np.clip(
- detected_poly[:, 0], a_min=0, a_max=src_w)
- detected_poly[:, 1] = np.clip(
- detected_poly[:, 1], a_min=0, a_max=src_h)
-
- if len(keep_str) < 2:
- continue
-
- keep_str_list.append(keep_str)
- if self.valid_set == 'partvgg':
- middle_point = len(detected_poly) // 2
- detected_poly = detected_poly[
- [0, middle_point - 1, middle_point, -1], :]
- poly_list.append(detected_poly)
- elif self.valid_set == 'totaltext':
- poly_list.append(detected_poly)
- else:
- print('--> Not supported format.')
- exit(-1)
- data = {
- 'points': poly_list,
- 'strs': keep_str_list,
- }
+ data = post.pg_postprocess_slow()
return data
diff --git a/ppocr/utils/e2e_metric/Deteval.py b/ppocr/utils/e2e_metric/Deteval.py
index 8033a9ff9f1f55200d43472f405d5805e238085b..e30a498eaf2e24f7a337ee48536466e7c4f0d91c 100755
--- a/ppocr/utils/e2e_metric/Deteval.py
+++ b/ppocr/utils/e2e_metric/Deteval.py
@@ -13,10 +13,11 @@
# limitations under the License.
import numpy as np
+import scipy.io as io
from ppocr.utils.e2e_metric.polygon_fast import iod, area_of_intersection, area
-def get_socre(gt_dict, pred_dict):
+def get_socre(gt_dir, img_id, pred_dict):
allInputs = 1
def input_reading_mod(pred_dict):
@@ -30,31 +31,9 @@ def get_socre(gt_dict, pred_dict):
det.append([point, text])
return det
- def gt_reading_mod(gt_dict):
- """This helper reads groundtruths from mat files"""
- gt = []
- n = len(gt_dict)
- for i in range(n):
- points = gt_dict[i]['points']
- h = len(points)
- text = gt_dict[i]['text']
- xx = [
- np.array(
- ['x:'], dtype=' y, x
+ sorted_point, sorted_direction = sort_part_with_direction(pos_list,
+ point_direction)
+
+ point_num = len(sorted_point)
+ if point_num >= 16:
+ middle_num = point_num // 2
+ first_part_point = sorted_point[:middle_num]
+ first_point_direction = sorted_direction[:middle_num]
+ sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction(
+ first_part_point, first_point_direction)
+
+ last_part_point = sorted_point[middle_num:]
+ last_point_direction = sorted_direction[middle_num:]
+ sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction(
+ last_part_point, last_point_direction)
+ sorted_point = sorted_fist_part_point + sorted_last_part_point
+ sorted_direction = sorted_fist_part_direction + sorted_last_part_direction
+
+ return sorted_point, np.array(sorted_direction)
+
+
+def add_id(pos_list, image_id=0):
+ """
+ Add id for gather feature, for inference.
+ """
+ new_list = []
+ for item in pos_list:
+ new_list.append((image_id, item[0], item[1]))
+ return new_list
+
+
+def sort_and_expand_with_direction(pos_list, f_direction):
+ """
+ f_direction: h x w x 2
+ pos_list: [[y, x], [y, x], [y, x] ...]
+ """
+ h, w, _ = f_direction.shape
+ sorted_list, point_direction = sort_with_direction(pos_list, f_direction)
+
+ point_num = len(sorted_list)
+ sub_direction_len = max(point_num // 3, 2)
+ left_direction = point_direction[:sub_direction_len, :]
+ right_dirction = point_direction[point_num - sub_direction_len:, :]
+
+ left_average_direction = -np.mean(left_direction, axis=0, keepdims=True)
+ left_average_len = np.linalg.norm(left_average_direction)
+ left_start = np.array(sorted_list[0])
+ left_step = left_average_direction / (left_average_len + 1e-6)
+
+ right_average_direction = np.mean(right_dirction, axis=0, keepdims=True)
+ right_average_len = np.linalg.norm(right_average_direction)
+ right_step = right_average_direction / (right_average_len + 1e-6)
+ right_start = np.array(sorted_list[-1])
+
+ append_num = max(
+ int((left_average_len + right_average_len) / 2.0 * 0.15), 1)
+ left_list = []
+ right_list = []
+ for i in range(append_num):
+ ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype(
+ 'int32').tolist()
+ if ly < h and lx < w and (ly, lx) not in left_list:
+ left_list.append((ly, lx))
+ ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype(
+ 'int32').tolist()
+ if ry < h and rx < w and (ry, rx) not in right_list:
+ right_list.append((ry, rx))
+
+ all_list = left_list[::-1] + sorted_list + right_list
+ return all_list
+
+
+def sort_and_expand_with_direction_v2(pos_list, f_direction, binary_tcl_map):
+ """
+ f_direction: h x w x 2
+ pos_list: [[y, x], [y, x], [y, x] ...]
+ binary_tcl_map: h x w
+ """
+ h, w, _ = f_direction.shape
+ sorted_list, point_direction = sort_with_direction(pos_list, f_direction)
+
+ point_num = len(sorted_list)
+ sub_direction_len = max(point_num // 3, 2)
+ left_direction = point_direction[:sub_direction_len, :]
+ right_dirction = point_direction[point_num - sub_direction_len:, :]
+
+ left_average_direction = -np.mean(left_direction, axis=0, keepdims=True)
+ left_average_len = np.linalg.norm(left_average_direction)
+ left_start = np.array(sorted_list[0])
+ left_step = left_average_direction / (left_average_len + 1e-6)
+
+ right_average_direction = np.mean(right_dirction, axis=0, keepdims=True)
+ right_average_len = np.linalg.norm(right_average_direction)
+ right_step = right_average_direction / (right_average_len + 1e-6)
+ right_start = np.array(sorted_list[-1])
+
+ append_num = max(
+ int((left_average_len + right_average_len) / 2.0 * 0.15), 1)
+ max_append_num = 2 * append_num
+
+ left_list = []
+ right_list = []
+ for i in range(max_append_num):
+ ly, lx = np.round(left_start + left_step * (i + 1)).flatten().astype(
+ 'int32').tolist()
+ if ly < h and lx < w and (ly, lx) not in left_list:
+ if binary_tcl_map[ly, lx] > 0.5:
+ left_list.append((ly, lx))
+ else:
+ break
+
+ for i in range(max_append_num):
+ ry, rx = np.round(right_start + right_step * (i + 1)).flatten().astype(
+ 'int32').tolist()
+ if ry < h and rx < w and (ry, rx) not in right_list:
+ if binary_tcl_map[ry, rx] > 0.5:
+ right_list.append((ry, rx))
+ else:
+ break
+
+ all_list = left_list[::-1] + sorted_list + right_list
+ return all_list
+
+
+def point_pair2poly(point_pair_list):
+ """
+ Transfer vertical point_pairs into poly point in clockwise.
+ """
+ point_num = len(point_pair_list) * 2
+ point_list = [0] * point_num
+ for idx, point_pair in enumerate(point_pair_list):
+ point_list[idx] = point_pair[0]
+ point_list[point_num - 1 - idx] = point_pair[1]
+ return np.array(point_list).reshape(-1, 2)
+
+
+def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.):
+ ratio_pair = np.array(
+ [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
+ p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
+ p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
+ return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
+
+
+def expand_poly_along_width(poly, shrink_ratio_of_width=0.3):
+ """
+ expand poly along width.
+ """
+ point_num = poly.shape[0]
+ left_quad = np.array(
+ [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
+ left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \
+ (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
+ left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0)
+ right_quad = np.array(
+ [
+ poly[point_num // 2 - 2], poly[point_num // 2 - 1],
+ poly[point_num // 2], poly[point_num // 2 + 1]
+ ],
+ dtype=np.float32)
+ right_ratio = 1.0 + shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
+ (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
+ right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio)
+ poly[0] = left_quad_expand[0]
+ poly[-1] = left_quad_expand[-1]
+ poly[point_num // 2 - 1] = right_quad_expand[1]
+ poly[point_num // 2] = right_quad_expand[2]
+ return poly
+
+
+def restore_poly(instance_yxs_list, seq_strs, p_border, ratio_w, ratio_h, src_w,
+ src_h, valid_set):
+ poly_list = []
+ keep_str_list = []
+ for yx_center_line, keep_str in zip(instance_yxs_list, seq_strs):
+ if len(keep_str) < 2:
+ print('--> too short, {}'.format(keep_str))
+ continue
+
+ offset_expand = 1.0
+ if valid_set == 'totaltext':
+ offset_expand = 1.2
+
+ point_pair_list = []
+ for y, x in yx_center_line:
+ offset = p_border[:, y, x].reshape(2, 2) * offset_expand
+ ori_yx = np.array([y, x], dtype=np.float32)
+ point_pair = (ori_yx + offset)[:, ::-1] * 4.0 / np.array(
+ [ratio_w, ratio_h]).reshape(-1, 2)
+ point_pair_list.append(point_pair)
+
+ detected_poly = point_pair2poly(point_pair_list)
+ detected_poly = expand_poly_along_width(
+ detected_poly, shrink_ratio_of_width=0.2)
+ detected_poly[:, 0] = np.clip(detected_poly[:, 0], a_min=0, a_max=src_w)
+ detected_poly[:, 1] = np.clip(detected_poly[:, 1], a_min=0, a_max=src_h)
+
+ keep_str_list.append(keep_str)
+ if valid_set == 'partvgg':
+ middle_point = len(detected_poly) // 2
+ detected_poly = detected_poly[
+ [0, middle_point - 1, middle_point, -1], :]
+ poly_list.append(detected_poly)
+ elif valid_set == 'totaltext':
+ poly_list.append(detected_poly)
+ else:
+ print('--> Not supported format.')
+ exit(-1)
+ return poly_list, keep_str_list
+
+
+def generate_pivot_list_fast(p_score,
+ p_char_maps,
+ f_direction,
+ Lexicon_Table,
+ score_thresh=0.5):
+ """
+ return center point and end point of TCL instance; filter with the char maps;
+ """
+ p_score = p_score[0]
+ f_direction = f_direction.transpose(1, 2, 0)
+ p_tcl_map = (p_score > score_thresh) * 1.0
+ skeleton_map = thin(p_tcl_map.astype(np.uint8))
+ instance_count, instance_label_map = cv2.connectedComponents(
+ skeleton_map.astype(np.uint8), connectivity=8)
+
+ # get TCL Instance
+ all_pos_yxs = []
+ if instance_count > 0:
+ for instance_id in range(1, instance_count):
+ pos_list = []
+ ys, xs = np.where(instance_label_map == instance_id)
+ pos_list = list(zip(ys, xs))
+
+ if len(pos_list) < 3:
+ continue
+
+ pos_list_sorted = sort_and_expand_with_direction_v2(
+ pos_list, f_direction, p_tcl_map)
+ all_pos_yxs.append(pos_list_sorted)
+
+ p_char_maps = p_char_maps.transpose([1, 2, 0])
+ decoded_str, keep_yxs_list = ctc_decoder_for_image(
+ all_pos_yxs, logits_map=p_char_maps, Lexicon_Table=Lexicon_Table)
+ return keep_yxs_list, decoded_str
+
+
+def extract_main_direction(pos_list, f_direction):
+ """
+ f_direction: h x w x 2
+ pos_list: [[y, x], [y, x], [y, x] ...]
+ """
+ pos_list = np.array(pos_list)
+ point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]]
+ point_direction = point_direction[:, ::-1] # x, y -> y, x
+ average_direction = np.mean(point_direction, axis=0, keepdims=True)
+ average_direction = average_direction / (
+ np.linalg.norm(average_direction) + 1e-6)
+ return average_direction
+
+
+def sort_by_direction_with_image_id_deprecated(pos_list, f_direction):
+ """
+ f_direction: h x w x 2
+ pos_list: [[id, y, x], [id, y, x], [id, y, x] ...]
+ """
+ pos_list_full = np.array(pos_list).reshape(-1, 3)
+ pos_list = pos_list_full[:, 1:]
+ point_direction = f_direction[pos_list[:, 0], pos_list[:, 1]] # x, y
+ point_direction = point_direction[:, ::-1] # x, y -> y, x
+ average_direction = np.mean(point_direction, axis=0, keepdims=True)
+ pos_proj_leng = np.sum(pos_list * average_direction, axis=1)
+ sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist()
+ return sorted_list
+
+
+def sort_by_direction_with_image_id(pos_list, f_direction):
+ """
+ f_direction: h x w x 2
+ pos_list: [[y, x], [y, x], [y, x] ...]
+ """
+
+ def sort_part_with_direction(pos_list_full, point_direction):
+ pos_list_full = np.array(pos_list_full).reshape(-1, 3)
+ pos_list = pos_list_full[:, 1:]
+ point_direction = np.array(point_direction).reshape(-1, 2)
+ average_direction = np.mean(point_direction, axis=0, keepdims=True)
+ pos_proj_leng = np.sum(pos_list * average_direction, axis=1)
+ sorted_list = pos_list_full[np.argsort(pos_proj_leng)].tolist()
+ sorted_direction = point_direction[np.argsort(pos_proj_leng)].tolist()
+ return sorted_list, sorted_direction
+
+ pos_list = np.array(pos_list).reshape(-1, 3)
+ point_direction = f_direction[pos_list[:, 1], pos_list[:, 2]] # x, y
+ point_direction = point_direction[:, ::-1] # x, y -> y, x
+ sorted_point, sorted_direction = sort_part_with_direction(pos_list,
+ point_direction)
+
+ point_num = len(sorted_point)
+ if point_num >= 16:
+ middle_num = point_num // 2
+ first_part_point = sorted_point[:middle_num]
+ first_point_direction = sorted_direction[:middle_num]
+ sorted_fist_part_point, sorted_fist_part_direction = sort_part_with_direction(
+ first_part_point, first_point_direction)
+
+ last_part_point = sorted_point[middle_num:]
+ last_point_direction = sorted_direction[middle_num:]
+ sorted_last_part_point, sorted_last_part_direction = sort_part_with_direction(
+ last_part_point, last_point_direction)
+ sorted_point = sorted_fist_part_point + sorted_last_part_point
+ sorted_direction = sorted_fist_part_direction + sorted_last_part_direction
+
+ return sorted_point
diff --git a/ppocr/utils/e2e_utils/extract_textpoint.py b/ppocr/utils/e2e_utils/extract_textpoint_slow.py
similarity index 88%
rename from ppocr/utils/e2e_utils/extract_textpoint.py
rename to ppocr/utils/e2e_utils/extract_textpoint_slow.py
index 975ca16174f2ee1c7f985a5eb9ae1ec66aa7ca28..db0c30e67bea472da6c7ed5176b1c70f0ab1cbc6 100644
--- a/ppocr/utils/e2e_utils/extract_textpoint.py
+++ b/ppocr/utils/e2e_utils/extract_textpoint_slow.py
@@ -35,6 +35,64 @@ def get_dict(character_dict_path):
return dict_character
+def point_pair2poly(point_pair_list):
+ """
+ Transfer vertical point_pairs into poly point in clockwise.
+ """
+ pair_length_list = []
+ for point_pair in point_pair_list:
+ pair_length = np.linalg.norm(point_pair[0] - point_pair[1])
+ pair_length_list.append(pair_length)
+ pair_length_list = np.array(pair_length_list)
+ pair_info = (pair_length_list.max(), pair_length_list.min(),
+ pair_length_list.mean())
+
+ point_num = len(point_pair_list) * 2
+ point_list = [0] * point_num
+ for idx, point_pair in enumerate(point_pair_list):
+ point_list[idx] = point_pair[0]
+ point_list[point_num - 1 - idx] = point_pair[1]
+ return np.array(point_list).reshape(-1, 2), pair_info
+
+
+def shrink_quad_along_width(quad, begin_width_ratio=0., end_width_ratio=1.):
+ """
+ Generate shrink_quad_along_width.
+ """
+ ratio_pair = np.array(
+ [[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
+ p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
+ p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
+ return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
+
+
+def expand_poly_along_width(poly, shrink_ratio_of_width=0.3):
+ """
+ expand poly along width.
+ """
+ point_num = poly.shape[0]
+ left_quad = np.array(
+ [poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
+ left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \
+ (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
+ left_quad_expand = shrink_quad_along_width(left_quad, left_ratio, 1.0)
+ right_quad = np.array(
+ [
+ poly[point_num // 2 - 2], poly[point_num // 2 - 1],
+ poly[point_num // 2], poly[point_num // 2 + 1]
+ ],
+ dtype=np.float32)
+ right_ratio = 1.0 + \
+ shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
+ (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
+ right_quad_expand = shrink_quad_along_width(right_quad, 0.0, right_ratio)
+ poly[0] = left_quad_expand[0]
+ poly[-1] = left_quad_expand[-1]
+ poly[point_num // 2 - 1] = right_quad_expand[1]
+ poly[point_num // 2] = right_quad_expand[2]
+ return poly
+
+
def softmax(logits):
"""
logits: N x d
@@ -399,13 +457,13 @@ def generate_pivot_list_horizontal(p_score,
return center_pos_yxs, end_points_yxs
-def generate_pivot_list(p_score,
- p_char_maps,
- f_direction,
- score_thresh=0.5,
- is_backbone=False,
- is_curved=True,
- image_id=0):
+def generate_pivot_list_slow(p_score,
+ p_char_maps,
+ f_direction,
+ score_thresh=0.5,
+ is_backbone=False,
+ is_curved=True,
+ image_id=0):
"""
Warp all the function together.
"""
diff --git a/ppocr/utils/e2e_utils/pgnet_pp_utils.py b/ppocr/utils/e2e_utils/pgnet_pp_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..64bfd372cc75ab533ce3ef46216a33345dae40c4
--- /dev/null
+++ b/ppocr/utils/e2e_utils/pgnet_pp_utils.py
@@ -0,0 +1,181 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+import os
+import sys
+
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..'))
+from extract_textpoint_slow import *
+from extract_textpoint_fast import generate_pivot_list_fast, restore_poly
+
+
+class PGNet_PostProcess(object):
+ # two different post-process
+ def __init__(self, character_dict_path, valid_set, score_thresh, outs_dict,
+ shape_list):
+ self.Lexicon_Table = get_dict(character_dict_path)
+ self.valid_set = valid_set
+ self.score_thresh = score_thresh
+ self.outs_dict = outs_dict
+ self.shape_list = shape_list
+
+ def pg_postprocess_fast(self):
+ p_score = self.outs_dict['f_score']
+ p_border = self.outs_dict['f_border']
+ p_char = self.outs_dict['f_char']
+ p_direction = self.outs_dict['f_direction']
+ if isinstance(p_score, paddle.Tensor):
+ p_score = p_score[0].numpy()
+ p_border = p_border[0].numpy()
+ p_direction = p_direction[0].numpy()
+ p_char = p_char[0].numpy()
+ else:
+ p_score = p_score[0]
+ p_border = p_border[0]
+ p_direction = p_direction[0]
+ p_char = p_char[0]
+
+ src_h, src_w, ratio_h, ratio_w = self.shape_list[0]
+ instance_yxs_list, seq_strs = generate_pivot_list_fast(
+ p_score,
+ p_char,
+ p_direction,
+ self.Lexicon_Table,
+ score_thresh=self.score_thresh)
+ poly_list, keep_str_list = restore_poly(instance_yxs_list, seq_strs,
+ p_border, ratio_w, ratio_h,
+ src_w, src_h, self.valid_set)
+ data = {
+ 'points': poly_list,
+ 'strs': keep_str_list,
+ }
+ return data
+
+ def pg_postprocess_slow(self):
+ p_score = self.outs_dict['f_score']
+ p_border = self.outs_dict['f_border']
+ p_char = self.outs_dict['f_char']
+ p_direction = self.outs_dict['f_direction']
+ if isinstance(p_score, paddle.Tensor):
+ p_score = p_score[0].numpy()
+ p_border = p_border[0].numpy()
+ p_direction = p_direction[0].numpy()
+ p_char = p_char[0].numpy()
+ else:
+ p_score = p_score[0]
+ p_border = p_border[0]
+ p_direction = p_direction[0]
+ p_char = p_char[0]
+ src_h, src_w, ratio_h, ratio_w = self.shape_list[0]
+ is_curved = self.valid_set == "totaltext"
+ instance_yxs_list = generate_pivot_list_slow(
+ p_score,
+ p_char,
+ p_direction,
+ score_thresh=self.score_thresh,
+ is_backbone=True,
+ is_curved=is_curved)
+ p_char = paddle.to_tensor(np.expand_dims(p_char, axis=0))
+ char_seq_idx_set = []
+ for i in range(len(instance_yxs_list)):
+ gather_info_lod = paddle.to_tensor(instance_yxs_list[i])
+ f_char_map = paddle.transpose(p_char, [0, 2, 3, 1])
+ feature_seq = paddle.gather_nd(f_char_map, gather_info_lod)
+ feature_seq = np.expand_dims(feature_seq.numpy(), axis=0)
+ feature_len = [len(feature_seq[0])]
+ featyre_seq = paddle.to_tensor(feature_seq)
+ feature_len = np.array([feature_len]).astype(np.int64)
+ length = paddle.to_tensor(feature_len)
+ seq_pred = paddle.fluid.layers.ctc_greedy_decoder(
+ input=featyre_seq, blank=36, input_length=length)
+ seq_pred_str = seq_pred[0].numpy().tolist()[0]
+ seq_len = seq_pred[1].numpy()[0][0]
+ temp_t = []
+ for c in seq_pred_str[:seq_len]:
+ temp_t.append(c)
+ char_seq_idx_set.append(temp_t)
+ seq_strs = []
+ for char_idx_set in char_seq_idx_set:
+ pr_str = ''.join([self.Lexicon_Table[pos] for pos in char_idx_set])
+ seq_strs.append(pr_str)
+ poly_list = []
+ keep_str_list = []
+ all_point_list = []
+ all_point_pair_list = []
+ for yx_center_line, keep_str in zip(instance_yxs_list, seq_strs):
+ if len(yx_center_line) == 1:
+ yx_center_line.append(yx_center_line[-1])
+
+ offset_expand = 1.0
+ if self.valid_set == 'totaltext':
+ offset_expand = 1.2
+
+ point_pair_list = []
+ for batch_id, y, x in yx_center_line:
+ offset = p_border[:, y, x].reshape(2, 2)
+ if offset_expand != 1.0:
+ offset_length = np.linalg.norm(
+ offset, axis=1, keepdims=True)
+ expand_length = np.clip(
+ offset_length * (offset_expand - 1),
+ a_min=0.5,
+ a_max=3.0)
+ offset_detal = offset / offset_length * expand_length
+ offset = offset + offset_detal
+ ori_yx = np.array([y, x], dtype=np.float32)
+ point_pair = (ori_yx + offset)[:, ::-1] * 4.0 / np.array(
+ [ratio_w, ratio_h]).reshape(-1, 2)
+ point_pair_list.append(point_pair)
+
+ all_point_list.append([
+ int(round(x * 4.0 / ratio_w)),
+ int(round(y * 4.0 / ratio_h))
+ ])
+ all_point_pair_list.append(point_pair.round().astype(np.int32)
+ .tolist())
+
+ detected_poly, pair_length_info = point_pair2poly(point_pair_list)
+ detected_poly = expand_poly_along_width(
+ detected_poly, shrink_ratio_of_width=0.2)
+ detected_poly[:, 0] = np.clip(
+ detected_poly[:, 0], a_min=0, a_max=src_w)
+ detected_poly[:, 1] = np.clip(
+ detected_poly[:, 1], a_min=0, a_max=src_h)
+
+ if len(keep_str) < 2:
+ continue
+
+ keep_str_list.append(keep_str)
+ detected_poly = np.round(detected_poly).astype('int32')
+ if self.valid_set == 'partvgg':
+ middle_point = len(detected_poly) // 2
+ detected_poly = detected_poly[
+ [0, middle_point - 1, middle_point, -1], :]
+ poly_list.append(detected_poly)
+ elif self.valid_set == 'totaltext':
+ poly_list.append(detected_poly)
+ else:
+ print('--> Not supported format.')
+ exit(-1)
+ data = {
+ 'points': poly_list,
+ 'strs': keep_str_list,
+ }
+ return data
diff --git a/ppocr/utils/en_dict.txt b/ppocr/utils/en_dict.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7677d31b9d3f08eef2823c2cf051beeab1f0470b
--- /dev/null
+++ b/ppocr/utils/en_dict.txt
@@ -0,0 +1,95 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+
diff --git a/setup.py b/setup.py
index d491adb17e6251355c0190d0ddecb9a82b09bc2e..a1ddbbb6d6d0c2657bb699a72bde75ef07ab3a94 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(
package_dir={'paddleocr': ''},
include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
- version='2.0.4',
+ version='2.0.6',
install_requires=requirements,
license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
diff --git a/tools/infer/predict_e2e.py b/tools/infer/predict_e2e.py
index a5c57914173b7d44c9479f7bb120e4ff409b91e3..8b94f24a9ffa16ca5683795afa6392fa23c24a94 100755
--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
@@ -66,6 +66,7 @@ class TextE2E(object):
postprocess_params["score_thresh"] = args.e2e_pgnet_score_thresh
postprocess_params["character_dict_path"] = args.e2e_char_dict_path
postprocess_params["valid_set"] = args.e2e_pgnet_valid_set
+ postprocess_params["mode"] = args.e2e_pgnet_mode
self.e2e_pgnet_polygon = args.e2e_pgnet_polygon
else:
logger.info("unknown e2e_algorithm:{}".format(self.e2e_algorithm))
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index b273eaf3258421d5c5c30c132f99e78f9f0999ba..9fa51d80b41ae80abe206ee779accd44d49cebba 100755
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -86,6 +86,7 @@ def parse_args():
"--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
parser.add_argument("--e2e_pgnet_polygon", type=bool, default=True)
+ parser.add_argument("--e2e_pgnet_mode", type=str, default='fast')
# params for text classifier
parser.add_argument("--use_angle_cls", type=str2bool, default=False)