diff --git a/configs/rec/multi_language/generate_multi_language_configs.py b/configs/rec/multi_language/generate_multi_language_configs.py index 5c354c0080f3bc11b988fbc9bd591bb341d8cff8..79354bd61fa2c10ff96762390b68ee03749a9dc1 100644 --- a/configs/rec/multi_language/generate_multi_language_configs.py +++ b/configs/rec/multi_language/generate_multi_language_configs.py @@ -1,5 +1,6 @@ import yaml from argparse import ArgumentParser, RawDescriptionHelpFormatter +import os.path support_list = { 'it':'italian', 'xi':'spanish', 'pu':'portuguese', 'ru':'russian', 'ar':'arabic', @@ -8,6 +9,12 @@ support_list = { 'te':'telugu', 'ka':'kannada', 'chinese_cht':'chinese tradition','hi':'hindi','mr':'marathi', 'ne':'nepali', } +assert( + os.path.isfile("./rec_multi_language_lite_train.yml") + ),"Loss basic configuration file rec_multi_language_lite_train.yml.\ +You can download it from \ +https://github.com/PaddlePaddle/PaddleOCR/tree/dygraph/configs/rec/multi_language/" + global_config = yaml.load(open("./rec_multi_language_lite_train.yml", 'rb'), Loader=yaml.Loader) class ArgsParser(ArgumentParser): @@ -17,18 +24,20 @@ class ArgsParser(ArgumentParser): self.add_argument( "-o", "--opt", nargs='+', help="set configuration options") self.add_argument( - "-t", "--type", nargs='+', help="set language type, support {}".format(support_list)) + "-l", "--language", nargs='+', help="set language type, support {}".format(support_list)) self.add_argument( - "--train",type=str,help="you can use this command to change the default path") + "--train",type=str,help="you can use this command to change the train dataset default path") self.add_argument( - "--val",type=str,help="you can use this command to change the default path") + "--val",type=str,help="you can use this command to change the eval dataset default path") self.add_argument( - "--dict",type=str,help="you can use this command to change the default path") + "--dict",type=str,help="you can use this command to change the dictionary default path") + self.add_argument( + "--dataset_root_path",type=str,help="you can use this command to change the dataset default root path") def parse_args(self, argv=None): args = super(ArgsParser, self).parse_args(argv) args.opt = self._parse_opt(args.opt) - args.type,args.config = self._set_language(args.type) + args.language = self._set_language(args.language) return args def _parse_opt(self, opts): @@ -42,33 +51,26 @@ class ArgsParser(ArgumentParser): return config def _set_language(self, type): - config={ - 'Global':{'character_dict_path':None,'save_model_dir':None}, - 'Train':{'dataset':{'label_file_list':None}}, - 'Eval': {'dataset': {'label_file_list': None}}, - } - assert(type),"please use -t or --type to choose language type" assert( type[0] in support_list.keys() ),"the sub_keys(-t or --type) can only be one of support list: \n{},\nbut get: {}, " \ "please check your running command".format(support_list, type) - config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0]) - config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0]) - config['Train']['dataset']['label_file_list'] = ["./train_data/{}_train.txt".format(type[0])] - config['Eval']['dataset']['label_file_list'] = ["./train_data/{}_val.txt".format(type[0])] - return type[0],config + global_config['Global']['character_dict_path'] = 'ppocr/utils/dict/{}_dict.txt'.format(type[0]) + global_config['Global']['save_model_dir'] = './output/rec_{}_lite'.format(type[0]) + global_config['Train']['dataset']['label_file_list'] = ["train_data/{}_train.txt".format(type[0])] + global_config['Eval']['dataset']['label_file_list'] = ["train_data/{}_val.txt".format(type[0])] + return type[0] -def merge_config(global_config,new_config): +def merge_config(config): """ Merge config into global config. Args: - global_config: Source config - new_config (dict): Config to be merged. + config (dict): Config to be merged. Returns: global config """ - for key, value in new_config.items(): + for key, value in config.items(): if "." not in key: if isinstance(value, dict) and key in global_config: global_config[key].update(value) @@ -78,8 +80,7 @@ def merge_config(global_config,new_config): sub_keys = key.split('.') assert ( sub_keys[0] in global_config - ), "the sub_keys can only be one of global_config: {}, but get: {}, " \ - "please check your running command".format( + ), "the sub_keys can only be one of global_config: {}, but get: {}, please check your running command".format( global_config.keys(), sub_keys[0]) cur = global_config[sub_keys[0]] for idx, sub_key in enumerate(sub_keys[1:]): @@ -87,22 +88,27 @@ def merge_config(global_config,new_config): cur[sub_key] = value else: cur = cur[sub_key] - return global_config if __name__ == '__main__': FLAGS = ArgsParser().parse_args() - global_config = merge_config(global_config,FLAGS.opt) - global_config = merge_config(global_config, FLAGS.config) + merge_config(FLAGS.opt) if FLAGS.train: global_config['Train']['dataset']['label_file_list'] = [FLAGS.train] if FLAGS.val: global_config['Eval']['dataset']['label_file_list'] = [FLAGS.val] if FLAGS.dict: global_config['Global']['character_dict_path'] = FLAGS.dict + if FLAGS.dataset_root_path: + global_config['Eval']['dataset']['data_dir'] = FLAGS.dataset_root_path + global_config['Train']['dataset']['data_dir'] = FLAGS.dataset_root_path - print("train list path set to:{}".format(global_config['Train']['dataset']['label_file_list'][0])) - print("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0])) - print("dict path set to :{}".format(global_config['Global']['character_dict_path'])) - with open('rec_{}_lite_train.yml'.format(FLAGS.type), 'w') as f: + save_file_path = 'rec_{}_lite_train.yml'.format(FLAGS.language) + if os.path.isfile(save_file_path): + os.remove(save_file_path) + with open(save_file_path, 'w') as f: yaml.dump(dict(global_config), f, default_flow_style=False, sort_keys=False) - print("config file set to :configs/rec/multi_language/rec_{}_lite_train.yml".format(FLAGS.type)) + print("Train list path set to :{}".format(global_config['Train']['dataset']['label_file_list'][0])) + print("Eval list path set to :{}".format(global_config['Eval']['dataset']['label_file_list'][0])) + print("Dataset root path set to :{}".format(global_config['Eval']['dataset']['data_dir'])) + print("Dict path set to :{}".format(global_config['Global']['character_dict_path'])) + print("Config file set to :configs/rec/multi_language/{}".format(save_file_path)) diff --git a/ppocr/utils/dict/german_dict.txt b/ppocr/utils/dict/german_dict.txt index af0b01ebc9c5f588b621e318a9d85760cd8f42d9..6c4f7df8b199b950a48f554e189bc081e23c9578 100644 --- a/ppocr/utils/dict/german_dict.txt +++ b/ppocr/utils/dict/german_dict.txt @@ -1,5 +1,7 @@ + ! " +# $ % & @@ -72,7 +74,7 @@ l m n o -p +p q r s @@ -83,45 +85,59 @@ w x y z -¡ -¢ £ -¤ -¥ -¦ § -¨ -© -ª -« -¬ ­ -® -¯ ° -± -² -³ ´ µ -¶ · -¸ -¹ º -» -¼ -½ ¿ -Â -Ã +Á +Ä Å -Ê -Î -Ð +É +Ï +Ô +Ö +Ü +ß +à á â +ã +ä å æ +ç +è é - +ê +ë +í +ï +ñ +ò +ó +ô +ö +ø +ù +ú +û +ü +ō +Š +Ÿ +ʒ +β +δ +з +Ṡ +‘ +€ +店 +旺 +潮 +酒