提交 dd0f8c1d 编写于 作者: T tink2123

update for multi-language

上级 8a5566c9
...@@ -15,7 +15,7 @@ Global: ...@@ -15,7 +15,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/ic15_dict.txt character_dict_path: ppocr/utils/dict/ic15_dict.txt
character_type: ch character_type: ch
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
......
...@@ -15,7 +15,7 @@ Global: ...@@ -15,7 +15,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/french_dict.txt character_dict_path: ppocr/utils/dict/french_dict.txt
character_type: french character_type: french
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
...@@ -85,7 +85,7 @@ Eval: ...@@ -85,7 +85,7 @@ Eval:
dataset: dataset:
name: SimpleDataSet name: SimpleDataSet
data_dir: ./train_data/ data_dir: ./train_data/
label_file_list: ["./train_data/eval_list.txt"] label_file_list: ["./train_data/train_list.txt"]
transforms: transforms:
- DecodeImage: # load image - DecodeImage: # load image
img_mode: BGR img_mode: BGR
......
...@@ -15,7 +15,7 @@ Global: ...@@ -15,7 +15,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/german_dict.txt character_dict_path: ppocr/utils/dict/german_dict.txt
character_type: german character_type: german
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
......
...@@ -15,7 +15,7 @@ Global: ...@@ -15,7 +15,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/japan_dict.txt character_dict_path: ppocr/utils/dict/japan_dict.txt
character_type: japan character_type: japan
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
......
...@@ -15,7 +15,7 @@ Global: ...@@ -15,7 +15,7 @@ Global:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/korean_dict.txt character_dict_path: ppocr/utils/dict/korean_dict.txt
character_type: korean character_type: korean
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
......
...@@ -79,7 +79,9 @@ class BaseRecLabelEncode(object): ...@@ -79,7 +79,9 @@ class BaseRecLabelEncode(object):
character_dict_path=None, character_dict_path=None,
character_type='ch', character_type='ch',
use_space_char=False): use_space_char=False):
support_character_type = ['ch', 'en', 'en_sensitive'] support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'french'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format( assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, self.character_str) support_character_type, self.character_str)
...@@ -87,7 +89,7 @@ class BaseRecLabelEncode(object): ...@@ -87,7 +89,7 @@ class BaseRecLabelEncode(object):
if character_type == "en": if character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "ch": elif character_type in ["ch", "french", "german", "japan", "french"]:
self.character_str = "" self.character_str = ""
assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch" assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
with open(character_dict_path, "rb") as fin: with open(character_dict_path, "rb") as fin:
......
...@@ -23,14 +23,16 @@ class BaseRecLabelDecode(object): ...@@ -23,14 +23,16 @@ class BaseRecLabelDecode(object):
character_dict_path=None, character_dict_path=None,
character_type='ch', character_type='ch',
use_space_char=False): use_space_char=False):
support_character_type = ['ch', 'en', 'en_sensitive'] support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'french'
]
assert character_type in support_character_type, "Only {} are supported now but get {}".format( assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, self.character_str) support_character_type, self.character_str)
if character_type == "en": if character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "ch": elif character_type in ["ch", "french", "german", "japan", "french"]:
self.character_str = "" self.character_str = ""
assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch" assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
with open(character_dict_path, "rb") as fin: with open(character_dict_path, "rb") as fin:
...@@ -150,4 +152,4 @@ class AttnLabelDecode(BaseRecLabelDecode): ...@@ -150,4 +152,4 @@ class AttnLabelDecode(BaseRecLabelDecode):
else: else:
assert False, "unsupport type %s in get_beg_end_flag_idx" \ assert False, "unsupport type %s in get_beg_end_flag_idx" \
% beg_or_end % beg_or_end
return idx return idx
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册