提交 afecc497 编写于 作者: L LDOUBLEV

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleOCR into fixocr

......@@ -13,6 +13,7 @@
#limitations under the License.
import os
import sys
import math
import random
import functools
......@@ -42,6 +43,10 @@ class TrainReader(object):
img_num = len(label_infor_list)
img_id_list = list(range(img_num))
random.shuffle(img_id_list)
if sys.platform == "win32":
print("multiprocess is not fully compatible with Windows."
"num_workers will be 1.")
self.num_workers = 1
for img_id in range(process_id, img_num, self.num_workers):
label_infor = label_infor_list[img_id_list[img_id]]
outs = self.process(label_infor)
......
......@@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
reader_function = params['reader_function']
function = create_module(reader_function)(params)
if mode == "train":
if sys.platform == "win32":
return function(0)
readers = []
num_workers = params['num_workers']
for process_id in range(num_workers):
......
......@@ -13,6 +13,7 @@
#limitations under the License.
import os
import sys
import math
import random
import numpy as np
......@@ -191,16 +192,21 @@ class SimpleReader(object):
img_num = len(label_infor_list)
img_id_list = list(range(img_num))
random.shuffle(img_id_list)
if sys.platform=="win32":
print("multiprocess is not fully compatible with Windows."
"num_workers will be 1.")
self.num_workers = 1
for img_id in range(process_id, img_num, self.num_workers):
label_infor = label_infor_list[img_id_list[img_id]]
substr = label_infor.decode('utf-8').strip("\n").split("\t")
img_path = self.img_set_dir + "/" + substr[0]
img = cv2.imread(img_path)
if img.shape[-1]==1 or len(list(img.shape))==2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if img is None:
logger.info("{} does not exist!".format(img_path))
continue
if img.shape[-1]==1 or len(list(img.shape))==2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
label = substr[1]
outs = process_image(img, self.image_shape, label,
self.char_ops, self.loss_type,
......
......@@ -34,7 +34,7 @@ class CharacterOps(object):
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
line = line.decode('utf-8').strip("\n")
line = line.decode('utf-8').strip("\n").strip("\r\n")
self.character_str += line
dict_character = list(self.character_str)
elif self.character_type == "en_sensitive":
......
......@@ -48,7 +48,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
total_sample_num = 0
total_acc_num = 0
total_batch_num = 0
if mode == "eval":
if mode == "test":
is_remove_duplicate = False
else:
is_remove_duplicate = True
......@@ -91,11 +91,11 @@ def test_rec_benchmark(exe, config, eval_info_dict):
total_correct_number = 0
eval_data_acc_info = {}
for eval_data in eval_data_list:
config['TestReader']['lmdb_sets_dir'] = \
config['EvalReader']['lmdb_sets_dir'] = \
eval_data_dir + "/" + eval_data
eval_reader = reader_main(config=config, mode="test")
eval_reader = reader_main(config=config, mode="eval")
eval_info_dict['reader'] = eval_reader
metrics = eval_rec_run(exe, config, eval_info_dict, "test")
metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
total_evaluation_data_number += metrics['total_sample_num']
total_correct_number += metrics['total_acc_num']
eval_data_acc_info[eval_data] = metrics
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册