From dd0f8c1d89bae629a9877c2649a994642dd0794b Mon Sep 17 00:00:00 2001 From: tink2123 Date: Tue, 8 Dec 2020 19:07:39 +0800 Subject: [PATCH] update for multi-language --- configs/rec/multi_language/rec_en_number_lite_train.yml | 2 +- configs/rec/multi_language/rec_french_lite_train.yml | 4 ++-- configs/rec/multi_language/rec_german_lite_train.yml | 2 +- configs/rec/multi_language/rec_japan_lite_train.yml | 2 +- configs/rec/multi_language/rec_korean_lite_train.yml | 2 +- ppocr/data/imaug/label_ops.py | 6 ++++-- ppocr/postprocess/rec_postprocess.py | 8 +++++--- 7 files changed, 15 insertions(+), 11 deletions(-) diff --git a/configs/rec/multi_language/rec_en_number_lite_train.yml b/configs/rec/multi_language/rec_en_number_lite_train.yml index 9d0f1f00..70d825e6 100644 --- a/configs/rec/multi_language/rec_en_number_lite_train.yml +++ b/configs/rec/multi_language/rec_en_number_lite_train.yml @@ -15,7 +15,7 @@ Global: use_visualdl: False infer_img: # for data or label process - character_dict_path: ppocr/utils/ic15_dict.txt + character_dict_path: ppocr/utils/dict/ic15_dict.txt character_type: ch max_text_length: 25 infer_mode: False diff --git a/configs/rec/multi_language/rec_french_lite_train.yml b/configs/rec/multi_language/rec_french_lite_train.yml index da3aad58..0b8659ab 100644 --- a/configs/rec/multi_language/rec_french_lite_train.yml +++ b/configs/rec/multi_language/rec_french_lite_train.yml @@ -15,7 +15,7 @@ Global: use_visualdl: False infer_img: # for data or label process - character_dict_path: ppocr/utils/french_dict.txt + character_dict_path: ppocr/utils/dict/french_dict.txt character_type: french max_text_length: 25 infer_mode: False @@ -85,7 +85,7 @@ Eval: dataset: name: SimpleDataSet data_dir: ./train_data/ - label_file_list: ["./train_data/eval_list.txt"] + label_file_list: ["./train_data/train_list.txt"] transforms: - DecodeImage: # load image img_mode: BGR diff --git a/configs/rec/multi_language/rec_german_lite_train.yml b/configs/rec/multi_language/rec_german_lite_train.yml index 403be669..9978a21e 100644 --- a/configs/rec/multi_language/rec_german_lite_train.yml +++ b/configs/rec/multi_language/rec_german_lite_train.yml @@ -15,7 +15,7 @@ Global: use_visualdl: False infer_img: # for data or label process - character_dict_path: ppocr/utils/german_dict.txt + character_dict_path: ppocr/utils/dict/german_dict.txt character_type: german max_text_length: 25 infer_mode: False diff --git a/configs/rec/multi_language/rec_japan_lite_train.yml b/configs/rec/multi_language/rec_japan_lite_train.yml index 5ff61c01..938d377e 100644 --- a/configs/rec/multi_language/rec_japan_lite_train.yml +++ b/configs/rec/multi_language/rec_japan_lite_train.yml @@ -15,7 +15,7 @@ Global: use_visualdl: False infer_img: # for data or label process - character_dict_path: ppocr/utils/japan_dict.txt + character_dict_path: ppocr/utils/dict/japan_dict.txt character_type: japan max_text_length: 25 infer_mode: False diff --git a/configs/rec/multi_language/rec_korean_lite_train.yml b/configs/rec/multi_language/rec_korean_lite_train.yml index 2b2211ef..7b070c44 100644 --- a/configs/rec/multi_language/rec_korean_lite_train.yml +++ b/configs/rec/multi_language/rec_korean_lite_train.yml @@ -15,7 +15,7 @@ Global: use_visualdl: False infer_img: # for data or label process - character_dict_path: ppocr/utils/korean_dict.txt + character_dict_path: ppocr/utils/dict/korean_dict.txt character_type: korean max_text_length: 25 infer_mode: False diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index f3c90050..2932f2a1 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -79,7 +79,9 @@ class BaseRecLabelEncode(object): character_dict_path=None, character_type='ch', use_space_char=False): - support_character_type = ['ch', 'en', 'en_sensitive'] + support_character_type = [ + 'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'french' + ] assert character_type in support_character_type, "Only {} are supported now but get {}".format( support_character_type, self.character_str) @@ -87,7 +89,7 @@ class BaseRecLabelEncode(object): if character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type == "ch": + elif character_type in ["ch", "french", "german", "japan", "french"]: self.character_str = "" assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch" with open(character_dict_path, "rb") as fin: diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index eb9be686..8f6fca9e 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -23,14 +23,16 @@ class BaseRecLabelDecode(object): character_dict_path=None, character_type='ch', use_space_char=False): - support_character_type = ['ch', 'en', 'en_sensitive'] + support_character_type = [ + 'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'french' + ] assert character_type in support_character_type, "Only {} are supported now but get {}".format( support_character_type, self.character_str) if character_type == "en": self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" dict_character = list(self.character_str) - elif character_type == "ch": + elif character_type in ["ch", "french", "german", "japan", "french"]: self.character_str = "" assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch" with open(character_dict_path, "rb") as fin: @@ -150,4 +152,4 @@ class AttnLabelDecode(BaseRecLabelDecode): else: assert False, "unsupport type %s in get_beg_end_flag_idx" \ % beg_or_end - return idx \ No newline at end of file + return idx -- GitLab