From d42eb4ffebcd405496dc6975bfe75afe62cfccf6 Mon Sep 17 00:00:00 2001 From: "shaohua.zhang" Date: Thu, 27 Aug 2020 17:14:10 +0800 Subject: [PATCH] fix some bugs --- ppocr/data/rec/dataset_traversal.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index 67cbf9b5..79167a6e 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -13,20 +13,16 @@ #limitations under the License. import os -import sys -import math import random -import numpy as np -import cv2 +import sys -import string +import cv2 import lmdb -from ppocr.utils.utility import initial_logger from ppocr.utils.utility import get_image_file_list -logger = initial_logger() - +from ppocr.utils.utility import initial_logger from .img_tools import process_image, process_image_srn, get_img_data +logger = initial_logger() class LMDBReader(object): @@ -268,7 +264,7 @@ class SimpleReader(object): infer_mode=True) yield norm_img else: - with open(self.label_file_path, "rb") as fin: + with open(self.label_file_path, "r", encoding="utf-8") as fin: label_infor_list = fin.readlines() img_num = len(label_infor_list) img_id_list = list(range(img_num)) @@ -285,8 +281,8 @@ class SimpleReader(object): self.num_workers)) for img_id in range(process_id, img_num, self.num_workers): label_infor = label_infor_list[img_id_list[img_id]] - substr = label_infor.decode('utf-8').strip("\n").split("\t") - img_path = self.img_set_dir + "/" + substr[0] + substr = label_infor.strip("\n").strip().split() + img_path = os.path.join(self.img_set_dir, substr[0]) img = cv2.imread(img_path) if img is None: logger.info("{} does not exist!".format(img_path)) -- GitLab