提交 06ed9ffd 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!3183 Remove files on test fail for test_minddataset*.py

Merge pull request !3183 from tony_liu2/master
......@@ -99,8 +99,13 @@ def test_invalid_mindrecord():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert num_iter == 0
os.remove('dummy.mindrecord')
try:
assert num_iter == 0
except Exception as error:
os.remove('dummy.mindrecord')
raise error
else:
os.remove('dummy.mindrecord')
def test_minddataset_lack_db():
......@@ -113,8 +118,13 @@ def test_minddataset_lack_db():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert num_iter == 0
os.remove(CV_FILE_NAME)
try:
assert num_iter == 0
except Exception as error:
os.remove(CV_FILE_NAME)
raise error
else:
os.remove(CV_FILE_NAME)
def test_cv_minddataset_pk_sample_error_class_column():
......@@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
try:
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_minddataset_invalidate_shard_id():
create_cv_mindrecord(1)
......@@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
try:
assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_minddataset_shard_id_bigger_than_num_shard():
......@@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
try:
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
with pytest.raises(Exception) as error_info:
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
num_iter = 0
for _ in data_set.create_dict_iterator():
num_iter += 1
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
try:
assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
def test_cv_minddataset_partition_num_samples_equals_0():
"""tutorial for cv minddataset."""
......@@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
num_iter += 1
with pytest.raises(Exception) as error_info:
partitions(5)
assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
try:
assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
except Exception as error:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
raise error
else:
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
os.remove(CV_FILE_NAME)
os.remove("{}.db".format(CV_FILE_NAME))
if __name__ == '__main__':
test_cv_lack_json()
test_cv_lack_mindrecord()
test_invalid_mindrecord()
test_minddataset_lack_db()
test_cv_minddataset_pk_sample_error_class_column()
test_cv_minddataset_pk_sample_exclusive_shuffle()
test_cv_minddataset_reader_different_schema()
test_cv_minddataset_reader_different_page_size()
test_minddataset_invalidate_num_shards()
test_minddataset_invalidate_shard_id()
test_minddataset_shard_id_bigger_than_num_shard()
test_cv_minddataset_partition_num_samples_equals_0()
......@@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"
def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
cv_schema_json = {"id": {"type": "int32"},
"image_0": {"type": "bytes"},
"image_2": {"type": "bytes"},
"image_3": {"type": "bytes"},
"image_4": {"type": "bytes"},
"input_mask": {"type": "int32", "shape": [-1]},
"segments": {"type": "float32", "shape": [2, 3]}}
writer.add_schema(cv_schema_json, "two_images_schema")
with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
img_data = file_reader.read()
ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
data = []
for i in range(5):
item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
"input_mask": ndarray_1, "segments": ndarray_2}
data.append(item)
writer.write_raw_data(data)
writer.commit()
assert os.path.exists(CV_FILE_NAME)
assert os.path.exists(CV_FILE_NAME + ".db")
try:
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
cv_schema_json = {"id": {"type": "int32"},
"image_0": {"type": "bytes"},
"image_2": {"type": "bytes"},
"image_3": {"type": "bytes"},
"image_4": {"type": "bytes"},
"input_mask": {"type": "int32", "shape": [-1]},
"segments": {"type": "float32", "shape": [2, 3]}}
writer.add_schema(cv_schema_json, "two_images_schema")
with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
img_data = file_reader.read()
ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
data = []
for i in range(5):
item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
"input_mask": ndarray_1, "segments": ndarray_2}
data.append(item)
writer.write_raw_data(data)
writer.commit()
assert os.path.exists(CV_FILE_NAME)
assert os.path.exists(CV_FILE_NAME + ".db")
# tutorial for minderdataset.
columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
num_readers = 1
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5
num_iter = 0
for item in data_set.create_dict_iterator():
assert len(item) == 7
logger.info("item: {}".format(item))
assert item["image_0"].dtype == np.uint8
assert (item["image_0"] == item["image_2"]).all()
assert (item["image_3"] == item["image_4"]).all()
assert (item["image_0"] == item["image_4"]).all()
assert item["image_2"].dtype == np.uint8
assert item["image_3"].dtype == np.uint8
assert item["image_4"].dtype == np.uint8
assert item["id"].dtype == np.int32
assert item["input_mask"].shape == (5,)
assert item["input_mask"].dtype == np.int32
assert item["segments"].shape == (2, 3)
assert item["segments"].dtype == np.float32
num_iter += 1
assert num_iter == 5
# tutorial for minderdataset.
columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
num_readers = 1
data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
assert data_set.get_dataset_size() == 5
num_iter = 0
for item in data_set.create_dict_iterator():
assert len(item) == 7
logger.info("item: {}".format(item))
assert item["image_0"].dtype == np.uint8
assert (item["image_0"] == item["image_2"]).all()
assert (item["image_3"] == item["image_4"]).all()
assert (item["image_0"] == item["image_4"]).all()
assert item["image_2"].dtype == np.uint8
assert item["image_3"].dtype == np.uint8
assert item["image_4"].dtype == np.uint8
assert item["id"].dtype == np.int32
assert item["input_mask"].shape == (5,)
assert item["input_mask"].dtype == np.int32
assert item["segments"].shape == (2, 3)
assert item["segments"].dtype == np.float32
num_iter += 1
assert num_iter == 5
except Exception as error:
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
raise error
else:
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
os.remove(CV_FILE_NAME + ".db")
if os.path.exists("{}".format(CV_FILE_NAME)):
os.remove(CV_FILE_NAME)
if __name__ == '__main__':
test_cv_minddataset_reader_multi_image_and_ndarray_tutorial()
......@@ -44,24 +44,31 @@ def add_and_remove_cv_file():
"""add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
try:
for x in paths:
os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
os.remove("{}.db".format(x)) if os.path.exists(
"{}.db".format(x)) else None
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
@pytest.fixture
......@@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
"""add/remove nlp file"""
paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
if os.path.exists("{}".format(x)):
try:
for x in paths:
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
"input_ids": {"type": "int64",
"shape": [-1]},
"input_mask": {"type": "int64",
"shape": [1, -1]},
"segment_ids": {"type": "int64",
"shape": [2, -1]}
}
writer.set_header_size(1 << 14)
writer.set_page_size(1 << 15)
writer.add_schema(nlp_schema_json, "nlp_schema")
writer.add_index(["id", "rating"])
writer.write_raw_data(data)
writer.commit()
yield "yield_nlp_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
"rating": {"type": "float32"},
"input_ids": {"type": "int64",
"shape": [-1]},
"input_mask": {"type": "int64",
"shape": [1, -1]},
"segment_ids": {"type": "int64",
"shape": [2, -1]}
}
writer.set_header_size(1 << 14)
writer.set_page_size(1 << 15)
writer.add_schema(nlp_schema_json, "nlp_schema")
writer.add_index(["id", "rating"])
writer.write_raw_data(data)
writer.commit()
yield "yield_nlp_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
......@@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
encoding='utf8')
assert item['label'] == padded_sample['label']
assert (item['data'] == np.array(list(padded_sample['data']))).all()
num_iter += 1
num_iter += 1
assert num_padded_iter == 5
assert num_iter == 15
......@@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
mask = [1] * length + [0] * (maxlen - length)
segment = [0] * maxlen
return input_, mask, segment
if __name__ == '__main__':
test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file)
test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file)
test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file)
test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file)
test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file)
......@@ -34,26 +34,32 @@ def add_and_remove_cv_file():
"""add/remove cv file"""
paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
for x in range(FILES_NUM)]
for x in paths:
if os.path.exists("{}".format(x)):
try:
for x in paths:
if os.path.exists("{}".format(x)):
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME, True)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
except Exception as error:
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
raise error
else:
for x in paths:
os.remove("{}".format(x))
if os.path.exists("{}.db".format(x)):
os.remove("{}.db".format(x))
writer = FileWriter(CV_FILE_NAME, FILES_NUM)
data = get_data(CV_DIR_NAME, True)
cv_schema_json = {"id": {"type": "int32"},
"file_name": {"type": "string"},
"label": {"type": "int32"},
"data": {"type": "bytes"}}
writer.add_schema(cv_schema_json, "img_schema")
writer.add_index(["file_name", "label"])
writer.write_raw_data(data)
writer.commit()
yield "yield_cv_data"
for x in paths:
os.remove("{}".format(x))
os.remove("{}.db".format(x))
def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
"""tutorial for cv minderdataset."""
......@@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
except FileNotFoundError:
continue
return data_list
if __name__ == '__main__':
test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file)
test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file)
test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file)
test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file)
test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file)
test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file)
test_cv_minddataset_split_basic(add_and_remove_cv_file)
test_cv_minddataset_split_exact_percent(add_and_remove_cv_file)
test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file)
test_cv_minddataset_split_deterministic(add_and_remove_cv_file)
test_cv_minddataset_split_sharding(add_and_remove_cv_file)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册