diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py index 67cbf9b53ad7b877be8985d76627cdf97d49f423..79167a6e5f45f665b30aef7976faeae3067d462b 100755 --- a/ppocr/data/rec/dataset_traversal.py +++ b/ppocr/data/rec/dataset_traversal.py @@ -13,20 +13,16 @@ #limitations under the License. import os -import sys -import math import random -import numpy as np -import cv2 +import sys -import string +import cv2 import lmdb -from ppocr.utils.utility import initial_logger from ppocr.utils.utility import get_image_file_list -logger = initial_logger() - +from ppocr.utils.utility import initial_logger from .img_tools import process_image, process_image_srn, get_img_data +logger = initial_logger() class LMDBReader(object): @@ -268,7 +264,7 @@ class SimpleReader(object): infer_mode=True) yield norm_img else: - with open(self.label_file_path, "rb") as fin: + with open(self.label_file_path, "r", encoding="utf-8") as fin: label_infor_list = fin.readlines() img_num = len(label_infor_list) img_id_list = list(range(img_num)) @@ -285,8 +281,8 @@ class SimpleReader(object): self.num_workers)) for img_id in range(process_id, img_num, self.num_workers): label_infor = label_infor_list[img_id_list[img_id]] - substr = label_infor.decode('utf-8').strip("\n").split("\t") - img_path = self.img_set_dir + "/" + substr[0] + substr = label_infor.strip("\n").strip().split() + img_path = os.path.join(self.img_set_dir, substr[0]) img = cv2.imread(img_path) if img is None: logger.info("{} does not exist!".format(img_path))