diff --git a/ppocr/data/det/dataset_traversal.py b/ppocr/data/det/dataset_traversal.py old mode 100755 new mode 100644 index 3051c60d370e532248dc45497792f26017f0f337..272d7317c9a98a695dad3451bfc108cc29e74bce --- a/ppocr/data/det/dataset_traversal.py +++ b/ppocr/data/det/dataset_traversal.py @@ -13,6 +13,7 @@ #limitations under the License. import os +import sys import math import random import functools @@ -42,6 +43,10 @@ class TrainReader(object): img_num = len(label_infor_list) img_id_list = list(range(img_num)) random.shuffle(img_id_list) + if sys.platform == "win32": + print("multiprocess is not fully compatible with Windows." + "num_workers will be 1.") + self.num_workers = 1 for img_id in range(process_id, img_num, self.num_workers): label_infor = label_infor_list[img_id_list[img_id]] outs = self.process(label_infor) diff --git a/ppocr/data/reader_main.py b/ppocr/data/reader_main.py index 55bd1e0842558635533b6bf2d746a3ad8a7c5b9d..b0df0d462b3dd851c8c5ffbe2aff988b8f6b69f6 100755 --- a/ppocr/data/reader_main.py +++ b/ppocr/data/reader_main.py @@ -66,6 +66,8 @@ def reader_main(config=None, mode=None): reader_function = params['reader_function'] function = create_module(reader_function)(params) if mode == "train": + if sys.platform == "win32": + return function(0) readers = [] num_workers = params['num_workers'] for process_id in range(num_workers): diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index 357a89fbe53689177b34ef95f03fb532a9bae9d0..f60b9fe36dda95dd6ce58a0c0e6b57c843280b35 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -13,6 +13,7 @@ #limitations under the License. import os +import sys import math import random import numpy as np @@ -191,16 +192,21 @@ class SimpleReader(object): img_num = len(label_infor_list) img_id_list = list(range(img_num)) random.shuffle(img_id_list) + if sys.platform=="win32": + print("multiprocess is not fully compatible with Windows." + "num_workers will be 1.") + self.num_workers = 1 for img_id in range(process_id, img_num, self.num_workers): label_infor = label_infor_list[img_id_list[img_id]] substr = label_infor.decode('utf-8').strip("\n").split("\t") img_path = self.img_set_dir + "/" + substr[0] img = cv2.imread(img_path) - if img.shape[-1]==1 or len(list(img.shape))==2: - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if img is None: logger.info("{} does not exist!".format(img_path)) continue + if img.shape[-1]==1 or len(list(img.shape))==2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + label = substr[1] outs = process_image(img, self.image_shape, label, self.char_ops, self.loss_type, diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py index b40750392291f271b26fef88e58844be9020d2ea..3cbc31a49b991cab7f2f8d8c56db4e0d611fbf55 100755 --- a/ppocr/utils/character.py +++ b/ppocr/utils/character.py @@ -34,7 +34,7 @@ class CharacterOps(object): with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: - line = line.decode('utf-8').strip("\n") + line = line.decode('utf-8').strip("\n").strip("\r\n") self.character_str += line dict_character = list(self.character_str) elif self.character_type == "en_sensitive": diff --git a/tools/eval_utils/eval_rec_utils.py b/tools/eval_utils/eval_rec_utils.py index 2d7d7e1d4e200e12643f8cfcb812a3cba3229b8f..3ceaa159ce1a98940bbdf1127b96e82243e96658 100644 --- a/tools/eval_utils/eval_rec_utils.py +++ b/tools/eval_utils/eval_rec_utils.py @@ -48,7 +48,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode): total_sample_num = 0 total_acc_num = 0 total_batch_num = 0 - if mode == "eval": + if mode == "test": is_remove_duplicate = False else: is_remove_duplicate = True @@ -91,11 +91,11 @@ def test_rec_benchmark(exe, config, eval_info_dict): total_correct_number = 0 eval_data_acc_info = {} for eval_data in eval_data_list: - config['TestReader']['lmdb_sets_dir'] = \ + config['EvalReader']['lmdb_sets_dir'] = \ eval_data_dir + "/" + eval_data - eval_reader = reader_main(config=config, mode="test") + eval_reader = reader_main(config=config, mode="eval") eval_info_dict['reader'] = eval_reader - metrics = eval_rec_run(exe, config, eval_info_dict, "test") + metrics = eval_rec_run(exe, config, eval_info_dict, "eval") total_evaluation_data_number += metrics['total_sample_num'] total_correct_number += metrics['total_acc_num'] eval_data_acc_info[eval_data] = metrics