提交 06ed9ffd 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!3183 Remove files on test fail for test_minddataset*.py

Merge pull request !3183 from tony_liu2/master
...@@ -99,8 +99,13 @@ def test_invalid_mindrecord(): ...@@ -99,8 +99,13 @@ def test_invalid_mindrecord():
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(): for _ in data_set.create_dict_iterator():
num_iter += 1 num_iter += 1
assert num_iter == 0 try:
os.remove('dummy.mindrecord') assert num_iter == 0
except Exception as error:
os.remove('dummy.mindrecord')
raise error
else:
os.remove('dummy.mindrecord')
def test_minddataset_lack_db(): def test_minddataset_lack_db():
...@@ -113,8 +118,13 @@ def test_minddataset_lack_db(): ...@@ -113,8 +118,13 @@ def test_minddataset_lack_db():
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(): for _ in data_set.create_dict_iterator():
num_iter += 1 num_iter += 1
assert num_iter == 0 try:
os.remove(CV_FILE_NAME) assert num_iter == 0
except Exception as error:
os.remove(CV_FILE_NAME)
raise error
else:
os.remove(CV_FILE_NAME)
def test_cv_minddataset_pk_sample_error_class_column(): def test_cv_minddataset_pk_sample_error_class_column():
...@@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards(): ...@@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(): for _ in data_set.create_dict_iterator():
num_iter += 1 num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value) try:
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_minddataset_invalidate_shard_id(): def test_minddataset_invalidate_shard_id():
create_cv_mindrecord(1) create_cv_mindrecord(1)
...@@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id(): ...@@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(): for _ in data_set.create_dict_iterator():
num_iter += 1 num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value) try:
os.remove(CV_FILE_NAME) assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
os.remove("{}.db".format(CV_FILE_NAME)) except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_minddataset_shard_id_bigger_than_num_shard(): def test_minddataset_shard_id_bigger_than_num_shard():
...@@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard(): ...@@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(): for _ in data_set.create_dict_iterator():
num_iter += 1 num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value) try:
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
with pytest.raises(Exception) as error_info: with pytest.raises(Exception) as error_info:
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5) data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
num_iter = 0 num_iter = 0
for _ in data_set.create_dict_iterator(): for _ in data_set.create_dict_iterator():
num_iter += 1 num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value) try:
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_cv_minddataset_partition_num_samples_equals_0(): def test_cv_minddataset_partition_num_samples_equals_0():
"""tutorial for cv minddataset.""" """tutorial for cv minddataset."""
...@@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0(): ...@@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
num_iter += 1 num_iter += 1
with pytest.raises(Exception) as error_info: with pytest.raises(Exception) as error_info:
partitions(5) partitions(5)
assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value) try:
assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME) if __name__ == '__main__':
os.remove("{}.db".format(CV_FILE_NAME)) test_cv_lack_json()
test_cv_lack_mindrecord()
test_invalid_mindrecord()
test_minddataset_lack_db()
test_cv_minddataset_pk_sample_error_class_column()
test_cv_minddataset_pk_sample_exclusive_shuffle()
test_cv_minddataset_reader_different_schema()
test_cv_minddataset_reader_different_page_size()
test_minddataset_invalidate_num_shards()
test_minddataset_invalidate_shard_id()
test_minddataset_shard_id_bigger_than_num_shard()
test_cv_minddataset_partition_num_samples_equals_0()
...@@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord" ...@@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"
def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial(): def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
writer = FileWriter(CV_FILE_NAME, FILES_NUM) try:
cv_schema_json = {"id": {"type": "int32"}, writer = FileWriter(CV_FILE_NAME, FILES_NUM)
"image_0": {"type": "bytes"}, cv_schema_json = {"id": {"type": "int32"},
"image_2": {"type": "bytes"}, "image_0": {"type": "bytes"},
"image_3": {"type": "bytes"}, "image_2": {"type": "bytes"},
"image_4": {"type": "bytes"}, "image_3": {"type": "bytes"},
"input_mask": {"type": "int32", "shape": [-1]}, "image_4": {"type": "bytes"},
"segments": {"type": "float32", "shape": [2, 3]}} "input_mask": {"type": "int32", "shape": [-1]},
writer.add_schema(cv_schema_json, "two_images_schema") "segments": {"type": "float32", "shape": [2, 3]}}
with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader: writer.add_schema(cv_schema_json, "two_images_schema")
img_data = file_reader.read() with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32) img_data = file_reader.read()
ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32) ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
data = [] ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
for i in range(5): data = []
item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data, for i in range(5):
"input_mask": ndarray_1, "segments": ndarray_2} item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
data.append(item) "input_mask": ndarray_1, "segments": ndarray_2}
writer.write_raw_data(data) data.append(item)
writer.commit() writer.write_raw_data(data)
assert os.path.exists(CV_FILE_NAME) writer.commit()
assert os.path.exists(CV_FILE_NAME + ".db") assert os.path.exists(CV_FILE_NAME)
assert os.path.exists(CV_FILE_NAME + ".db")
# tutorial for minderdataset. # tutorial for minderdataset.
columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"] columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
num_readers = 1 num_readers = 1
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5 assert data_set.get_dataset_size() == 5
num_iter = 0 num_iter = 0
for item in data_set.create_dict_iterator(): for item in data_set.create_dict_iterator():
assert len(item) == 7 assert len(item) == 7
logger.info("item: {}".format(item)) logger.info("item: {}".format(item))
assert item["image_0"].dtype == np.uint8 assert item["image_0"].dtype == np.uint8
assert (item["image_0"] == item["image_2"]).all() assert (item["image_0"] == item["image_2"]).all()
assert (item["image_3"] == item["image_4"]).all() assert (item["image_3"] == item["image_4"]).all()
assert (item["image_0"] == item["image_4"]).all() assert (item["image_0"] == item["image_4"]).all()
assert item["image_2"].dtype == np.uint8 assert item["image_2"].dtype == np.uint8
assert item["image_3"].dtype == np.uint8 assert item["image_3"].dtype == np.uint8
assert item["image_4"].dtype == np.uint8 assert item["image_4"].dtype == np.uint8
assert item["id"].dtype == np.int32 assert item["id"].dtype == np.int32
assert item["input_mask"].shape == (5,) assert item["input_mask"].shape == (5,)
assert item["input_mask"].dtype == np.int32 assert item["input_mask"].dtype == np.int32
assert item["segments"].shape == (2, 3) assert item["segments"].shape == (2, 3)
assert item["segments"].dtype == np.float32 assert item["segments"].dtype == np.float32
num_iter += 1 num_iter += 1
assert num_iter == 5 assert num_iter == 5
except Exception as error:
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
raise error
else:
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
if os.path.exists("{}".format(CV_FILE_NAME + ".db")): if __name__ == '__main__':
os.remove(CV_FILE_NAME + ".db") test_cv_minddataset_reader_multi_image_and_ndarray_tutorial()
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
...@@ -44,24 +44,31 @@ def add_and_remove_cv_file(): ...@@ -44,24 +44,31 @@ def add_and_remove_cv_file():
"""add/remove cv file""" """add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
for x in paths: try:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None for x in paths:
os.remove("{}.db".format(x)) if os.path.exists( os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
"{}.db".format(x)) else None os.remove("{}.db".format(x)) if os.path.exists(
writer = FileWriter(CV_FILE_NAME, FILES_NUM) "{}.db".format(x)) else None
data = get_data(CV_DIR_NAME) writer = FileWriter(CV_FILE_NAME, FILES_NUM)
cv_schema_json = {"id": {"type": "int32"}, data = get_data(CV_DIR_NAME)
"file_name": {"type": "string"}, cv_schema_json = {"id": {"type": "int32"},
"label": {"type": "int32"}, "file_name": {"type": "string"},
"data": {"type": "bytes"}} "label": {"type": "int32"},
writer.add_schema(cv_schema_json, "img_schema") "data": {"type": "bytes"}}
writer.add_index(["file_name", "label"]) writer.add_schema(cv_schema_json, "img_schema")
writer.write_raw_data(data) writer.add_index(["file_name", "label"])
writer.commit() writer.write_raw_data(data)
yield "yield_cv_data" writer.commit()
for x in paths: yield "yield_cv_data"
os.remove("{}".format(x)) except Exception as error:
os.remove("{}.db".format(x)) for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
@pytest.fixture @pytest.fixture
...@@ -69,32 +76,39 @@ def add_and_remove_nlp_file(): ...@@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
"""add/remove nlp file""" """add/remove nlp file"""
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0')) paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
for x in paths: try:
if os.path.exists("{}".format(x)): for x in paths:
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
"input_ids": {"type": "int64",
"shape": [-1]},
"input_mask": {"type": "int64",
"shape": [1, -1]},
"segment_ids": {"type": "int64",
"shape": [2, -1]}
}
writer.set_header_size(1 << 14)
writer.set_page_size(1 << 15)
writer.add_schema(nlp_schema_json, "nlp_schema")
writer.add_index(["id", "rating"])
writer.write_raw_data(data)
writer.commit()
yield "yield_nlp_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x)) os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x)) os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
"input_ids": {"type": "int64",
"shape": [-1]},
"input_mask": {"type": "int64",
"shape": [1, -1]},
"segment_ids": {"type": "int64",
"shape": [2, -1]}
}
writer.set_header_size(1 << 14)
writer.set_page_size(1 << 15)
writer.add_schema(nlp_schema_json, "nlp_schema")
writer.add_index(["id", "rating"])
writer.write_raw_data(data)
writer.commit()
yield "yield_nlp_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """tutorial for cv minderdataset."""
...@@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): ...@@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
encoding='utf8') encoding='utf8')
assert item['label'] == padded_sample['label'] assert item['label'] == padded_sample['label']
assert (item['data'] == np.array(list(padded_sample['data']))).all() assert (item['data'] == np.array(list(padded_sample['data']))).all()
num_iter += 1 num_iter += 1
assert num_padded_iter == 5 assert num_padded_iter == 5
assert num_iter == 15 assert num_iter == 15
...@@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50): ...@@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
mask = [1] * length + [0] * (maxlen - length) mask = [1] * length + [0] * (maxlen - length)
segment = [0] * maxlen segment = [0] * maxlen
return input_, mask, segment return input_, mask, segment
if __name__ == '__main__':
test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file)
test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file)
test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file)
test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file)
...@@ -34,26 +34,32 @@ def add_and_remove_cv_file(): ...@@ -34,26 +34,32 @@ def add_and_remove_cv_file():
"""add/remove cv file""" """add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)] for x in range(FILES_NUM)]
for x in paths: try:
if os.path.exists("{}".format(x)): for x in paths:
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME, True)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x)) os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x)) os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME, True)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
"""tutorial for cv minderdataset.""" """tutorial for cv minderdataset."""
...@@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False): ...@@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
except FileNotFoundError: except FileNotFoundError:
continue continue
return data_list return data_list
if __name__ == '__main__':
test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file)
test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file)
test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file)
test_cv_minddataset_split_basic(add_and_remove_cv_file)
test_cv_minddataset_split_exact_percent(add_and_remove_cv_file)
test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file)
test_cv_minddataset_split_deterministic(add_and_remove_cv_file)
test_cv_minddataset_split_sharding(add_and_remove_cv_file)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册