未验证 提交 07026825 编写于 作者: Z zhoujun 提交者: GitHub

Merge pull request #3289 from WenmuZhou/fx_pse

fix a bug of train log not save and add filepath check to dataset
...@@ -46,6 +46,7 @@ class SimpleDataSet(Dataset): ...@@ -46,6 +46,7 @@ class SimpleDataSet(Dataset):
self.seed = seed self.seed = seed
logger.info("Initialize indexs of datasets:%s" % label_file_list) logger.info("Initialize indexs of datasets:%s" % label_file_list)
self.data_lines = self.get_image_info_list(label_file_list, ratio_list) self.data_lines = self.get_image_info_list(label_file_list, ratio_list)
self.check_data()
self.data_idx_order_list = list(range(len(self.data_lines))) self.data_idx_order_list = list(range(len(self.data_lines)))
if self.mode == "train" and self.do_shuffle: if self.mode == "train" and self.do_shuffle:
self.shuffle_data_random() self.shuffle_data_random()
...@@ -102,16 +103,8 @@ class SimpleDataSet(Dataset): ...@@ -102,16 +103,8 @@ class SimpleDataSet(Dataset):
def __getitem__(self, idx): def __getitem__(self, idx):
file_idx = self.data_idx_order_list[idx] file_idx = self.data_idx_order_list[idx]
data_line = self.data_lines[file_idx] data = self.data_lines[file_idx]
try: try:
data_line = data_line.decode('utf-8')
substr = data_line.strip("\n").strip("\r").split(self.delimiter)
file_name = substr[0]
label = substr[1]
img_path = os.path.join(self.data_dir, file_name)
data = {'img_path': img_path, 'label': label}
if not os.path.exists(img_path):
raise Exception("{} does not exist!".format(img_path))
with open(data['img_path'], 'rb') as f: with open(data['img_path'], 'rb') as f:
img = f.read() img = f.read()
data['image'] = img data['image'] = img
...@@ -120,8 +113,8 @@ class SimpleDataSet(Dataset): ...@@ -120,8 +113,8 @@ class SimpleDataSet(Dataset):
except: except:
error_meg = traceback.format_exc() error_meg = traceback.format_exc()
self.logger.error( self.logger.error(
"When parsing line {}, error happened with msg: {}".format( "When parsing file {} and label {}, error happened with msg: {}".format(
data_line, error_meg)) data['img_path'],data['label'], error_meg))
outs = None outs = None
if outs is None: if outs is None:
# during evaluation, we should fix the idx to get same results for many times of evaluation. # during evaluation, we should fix the idx to get same results for many times of evaluation.
...@@ -132,3 +125,17 @@ class SimpleDataSet(Dataset): ...@@ -132,3 +125,17 @@ class SimpleDataSet(Dataset):
def __len__(self): def __len__(self):
return len(self.data_idx_order_list) return len(self.data_idx_order_list)
def check_data(self):
new_data_lines = []
for data_line in self.data_lines:
data_line = data_line.decode('utf-8')
substr = data_line.strip("\n").strip("\r").split(self.delimiter)
file_name = substr[0]
label = substr[1]
img_path = os.path.join(self.data_dir, file_name)
if os.path.exists(img_path):
new_data_lines.append({'img_path': img_path, 'label': label})
else:
self.logger.info("{} does not exist!".format(img_path))
self.data_lines = new_data_lines
\ No newline at end of file
...@@ -24,9 +24,6 @@ from paddle import inference ...@@ -24,9 +24,6 @@ from paddle import inference
import time import time
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
logger = get_logger()
def str2bool(v): def str2bool(v):
return v.lower() in ("true", "t", "1") return v.lower() in ("true", "t", "1")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册