!3183 Remove files on test fail for test_minddataset*.py

Merge pull request !3183 from tony_liu2/master

!3183 Remove files on test fail for test_minddataset*.py
Merge pull request !3183 from tony_liu2/master
06ed9ffd · mindspore-ci-bot · Gitee · 60feafd7 · 1f4251a4 · 06ed9ffd
5 changed file
--- a/tests/ut/python/dataset/test_minddataset.py
+++ b/tests/ut/python/dataset/test_minddataset.py
--- a/tests/ut/python/dataset/test_minddataset_exception.py
+++ b/tests/ut/python/dataset/test_minddataset_exception.py
@@ -99,8 +99,13 @@ def test_invalid_mindrecord():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-        assert num_iter == 0
-    os.remove('dummy.mindrecord')
+        try:
+            assert num_iter == 0
+        except Exception as error:
+            os.remove('dummy.mindrecord')
+            raise error
+        else:
+            os.remove('dummy.mindrecord')


 def test_minddataset_lack_db():
@@ -113,8 +118,13 @@ def test_minddataset_lack_db():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-        assert num_iter == 0
-    os.remove(CV_FILE_NAME)
+        try:
+            assert num_iter == 0
+        except Exception as error:
+            os.remove(CV_FILE_NAME)
+            raise error
+        else:
+            os.remove(CV_FILE_NAME)


 def test_cv_minddataset_pk_sample_error_class_column():
@@ -189,10 +199,16 @@ def test_minddataset_invalidate_num_shards():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))

-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))

 def test_minddataset_invalidate_shard_id():
    create_cv_mindrecord(1)
@@ -203,9 +219,15 @@ def test_minddataset_invalidate_shard_id():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 0).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))


 def test_minddataset_shard_id_bigger_than_num_shard():
@@ -217,17 +239,28 @@ def test_minddataset_shard_id_bigger_than_num_shard():
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error

    with pytest.raises(Exception) as error_info:
        data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, True, 2, 5)
        num_iter = 0
        for _ in data_set.create_dict_iterator():
            num_iter += 1
-    assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    try:
+        assert 'Input shard_id is not within the required interval of (0 to 1).' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))

-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))

 def test_cv_minddataset_partition_num_samples_equals_0():
    """tutorial for cv minddataset."""
@@ -245,7 +278,26 @@ def test_cv_minddataset_partition_num_samples_equals_0():
                num_iter += 1
    with pytest.raises(Exception) as error_info:
        partitions(5)
-    assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
+    try:
+        assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
+    except Exception as error:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))
+        raise error
+    else:
+        os.remove(CV_FILE_NAME)
+        os.remove("{}.db".format(CV_FILE_NAME))

-    os.remove(CV_FILE_NAME)
-    os.remove("{}.db".format(CV_FILE_NAME))
+if __name__ == '__main__':
+    test_cv_lack_json()
+    test_cv_lack_mindrecord()
+    test_invalid_mindrecord()
+    test_minddataset_lack_db()
+    test_cv_minddataset_pk_sample_error_class_column()
+    test_cv_minddataset_pk_sample_exclusive_shuffle()
+    test_cv_minddataset_reader_different_schema()
+    test_cv_minddataset_reader_different_page_size()
+    test_minddataset_invalidate_num_shards()
+    test_minddataset_invalidate_shard_id()
+    test_minddataset_shard_id_bigger_than_num_shard()
+    test_cv_minddataset_partition_num_samples_equals_0()
--- a/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
+++ b/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py
@@ -27,54 +27,64 @@ CV_FILE_NAME = "./complex.mindrecord"


 def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial():
-    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
-    cv_schema_json = {"id": {"type": "int32"},
-                      "image_0": {"type": "bytes"},
-                      "image_2": {"type": "bytes"},
-                      "image_3": {"type": "bytes"},
-                      "image_4": {"type": "bytes"},
-                      "input_mask": {"type": "int32", "shape": [-1]},
-                      "segments": {"type": "float32", "shape": [2, 3]}}
-    writer.add_schema(cv_schema_json, "two_images_schema")
-    with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
-        img_data = file_reader.read()
-    ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
-    ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
-    data = []
-    for i in range(5):
-        item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
-                "input_mask": ndarray_1, "segments": ndarray_2}
-        data.append(item)
-    writer.write_raw_data(data)
-    writer.commit()
-    assert os.path.exists(CV_FILE_NAME)
-    assert os.path.exists(CV_FILE_NAME + ".db")
+    try:
+        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
+        cv_schema_json = {"id": {"type": "int32"},
+                          "image_0": {"type": "bytes"},
+                          "image_2": {"type": "bytes"},
+                          "image_3": {"type": "bytes"},
+                          "image_4": {"type": "bytes"},
+                          "input_mask": {"type": "int32", "shape": [-1]},
+                          "segments": {"type": "float32", "shape": [2, 3]}}
+        writer.add_schema(cv_schema_json, "two_images_schema")
+        with open("../data/mindrecord/testImageNetData/images/image_00010.jpg", "rb") as file_reader:
+            img_data = file_reader.read()
+        ndarray_1 = np.array([1, 2, 3, 4, 5], np.int32)
+        ndarray_2 = np.array(([2, 3, 1], [7, 9, 0]), np.float32)
+        data = []
+        for i in range(5):
+            item = {"id": i, "image_0": img_data, "image_2": img_data, "image_3": img_data, "image_4": img_data,
+                    "input_mask": ndarray_1, "segments": ndarray_2}
+            data.append(item)
+        writer.write_raw_data(data)
+        writer.commit()
+        assert os.path.exists(CV_FILE_NAME)
+        assert os.path.exists(CV_FILE_NAME + ".db")

-    # tutorial for minderdataset.
-    columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
-    num_readers = 1
-    data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
-    assert data_set.get_dataset_size() == 5
-    num_iter = 0
-    for item in data_set.create_dict_iterator():
-        assert len(item) == 7
-        logger.info("item: {}".format(item))
-        assert item["image_0"].dtype == np.uint8
-        assert (item["image_0"] == item["image_2"]).all()
-        assert (item["image_3"] == item["image_4"]).all()
-        assert (item["image_0"] == item["image_4"]).all()
-        assert item["image_2"].dtype == np.uint8
-        assert item["image_3"].dtype == np.uint8
-        assert item["image_4"].dtype == np.uint8
-        assert item["id"].dtype == np.int32
-        assert item["input_mask"].shape == (5,)
-        assert item["input_mask"].dtype == np.int32
-        assert item["segments"].shape == (2, 3)
-        assert item["segments"].dtype == np.float32
-        num_iter += 1
-    assert num_iter == 5
+        # tutorial for minderdataset.
+        columns_list = ["id", "image_0", "image_2", "image_3", "image_4", "input_mask", "segments"]
+        num_readers = 1
+        data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers)
+        assert data_set.get_dataset_size() == 5
+        num_iter = 0
+        for item in data_set.create_dict_iterator():
+            assert len(item) == 7
+            logger.info("item: {}".format(item))
+            assert item["image_0"].dtype == np.uint8
+            assert (item["image_0"] == item["image_2"]).all()
+            assert (item["image_3"] == item["image_4"]).all()
+            assert (item["image_0"] == item["image_4"]).all()
+            assert item["image_2"].dtype == np.uint8
+            assert item["image_3"].dtype == np.uint8
+            assert item["image_4"].dtype == np.uint8
+            assert item["id"].dtype == np.int32
+            assert item["input_mask"].shape == (5,)
+            assert item["input_mask"].dtype == np.int32
+            assert item["segments"].shape == (2, 3)
+            assert item["segments"].dtype == np.float32
+            num_iter += 1
+        assert num_iter == 5
+    except Exception as error:
+        if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
+            os.remove(CV_FILE_NAME + ".db")
+        if os.path.exists("{}".format(CV_FILE_NAME)):
+            os.remove(CV_FILE_NAME)
+        raise error
+    else:
+        if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
+            os.remove(CV_FILE_NAME + ".db")
+        if os.path.exists("{}".format(CV_FILE_NAME)):
+            os.remove(CV_FILE_NAME)

-    if os.path.exists("{}".format(CV_FILE_NAME + ".db")):
-        os.remove(CV_FILE_NAME + ".db")
-    if os.path.exists("{}".format(CV_FILE_NAME)):
-        os.remove(CV_FILE_NAME)
+if __name__ == '__main__':
+    test_cv_minddataset_reader_multi_image_and_ndarray_tutorial()
--- a/tests/ut/python/dataset/test_minddataset_padded.py
+++ b/tests/ut/python/dataset/test_minddataset_padded.py
@@ -44,24 +44,31 @@ def add_and_remove_cv_file():
    """add/remove cv file"""
    paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
-    for x in paths:
-        os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
-        os.remove("{}.db".format(x)) if os.path.exists(
-            "{}.db".format(x)) else None
-    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
-    data = get_data(CV_DIR_NAME)
-    cv_schema_json = {"id": {"type": "int32"},
-                      "file_name": {"type": "string"},
-                      "label": {"type": "int32"},
-                      "data": {"type": "bytes"}}
-    writer.add_schema(cv_schema_json, "img_schema")
-    writer.add_index(["file_name", "label"])
-    writer.write_raw_data(data)
-    writer.commit()
-    yield "yield_cv_data"
-    for x in paths:
-        os.remove("{}".format(x))
-        os.remove("{}.db".format(x))
+    try:
+        for x in paths:
+            os.remove("{}".format(x)) if os.path.exists("{}".format(x)) else None
+            os.remove("{}.db".format(x)) if os.path.exists(
+                "{}.db".format(x)) else None
+        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
+        data = get_data(CV_DIR_NAME)
+        cv_schema_json = {"id": {"type": "int32"},
+                        "file_name": {"type": "string"},
+                        "label": {"type": "int32"},
+                        "data": {"type": "bytes"}}
+        writer.add_schema(cv_schema_json, "img_schema")
+        writer.add_index(["file_name", "label"])
+        writer.write_raw_data(data)
+        writer.commit()
+        yield "yield_cv_data"
+    except Exception as error:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))
+        raise error
+    else:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))


 @pytest.fixture
@@ -69,32 +76,39 @@ def add_and_remove_nlp_file():
    """add/remove nlp file"""
    paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
-    for x in paths:
-        if os.path.exists("{}".format(x)):
+    try:
+        for x in paths:
+            if os.path.exists("{}".format(x)):
+                os.remove("{}".format(x))
+            if os.path.exists("{}.db".format(x)):
+                os.remove("{}.db".format(x))
+        writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
+        data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
+        nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
+                        "rating": {"type": "float32"},
+                        "input_ids": {"type": "int64",
+                                        "shape": [-1]},
+                        "input_mask": {"type": "int64",
+                                        "shape": [1, -1]},
+                        "segment_ids": {"type": "int64",
+                                        "shape": [2, -1]}
+                        }
+        writer.set_header_size(1 << 14)
+        writer.set_page_size(1 << 15)
+        writer.add_schema(nlp_schema_json, "nlp_schema")
+        writer.add_index(["id", "rating"])
+        writer.write_raw_data(data)
+        writer.commit()
+        yield "yield_nlp_data"
+    except Exception as error:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))
+        raise error
+    else:
+        for x in paths:
            os.remove("{}".format(x))
-        if os.path.exists("{}.db".format(x)):
            os.remove("{}.db".format(x))
-    writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
-    data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
-    nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
-                       "rating": {"type": "float32"},
-                       "input_ids": {"type": "int64",
-                                     "shape": [-1]},
-                       "input_mask": {"type": "int64",
-                                      "shape": [1, -1]},
-                       "segment_ids": {"type": "int64",
-                                       "shape": [2, -1]}
-                       }
-    writer.set_header_size(1 << 14)
-    writer.set_page_size(1 << 15)
-    writer.add_schema(nlp_schema_json, "nlp_schema")
-    writer.add_index(["id", "rating"])
-    writer.write_raw_data(data)
-    writer.commit()
-    yield "yield_nlp_data"
-    for x in paths:
-        os.remove("{}".format(x))
-        os.remove("{}.db".format(x))

 def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
@@ -119,7 +133,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file):
                    encoding='utf8')
            assert item['label'] == padded_sample['label']
            assert (item['data'] == np.array(list(padded_sample['data']))).all()
-        num_iter += 1 
+        num_iter += 1
    assert num_padded_iter == 5
    assert num_iter == 15

@@ -636,3 +650,17 @@ def inputs(vectors, maxlen=50):
    mask = [1] * length + [0] * (maxlen - length)
    segment = [0] * maxlen
    return input_, mask, segment
+
+if __name__ == '__main__':
+    test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_dataset_size_no_divisible(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv_file)
+    test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remove_cv_file)
+    test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file)
+    test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_nlp_file)
+    test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_result_per_epoch(add_and_remove_nlp_file)
--- a/tests/ut/python/dataset/test_minddataset_sampler.py
+++ b/tests/ut/python/dataset/test_minddataset_sampler.py
@@ -34,26 +34,32 @@ def add_and_remove_cv_file():
    """add/remove cv file"""
    paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
-    for x in paths:
-        if os.path.exists("{}".format(x)):
+    try:
+        for x in paths:
+            if os.path.exists("{}".format(x)):
+                os.remove("{}".format(x))
+            if os.path.exists("{}.db".format(x)):
+                os.remove("{}.db".format(x))
+        writer = FileWriter(CV_FILE_NAME, FILES_NUM)
+        data = get_data(CV_DIR_NAME, True)
+        cv_schema_json = {"id": {"type": "int32"},
+                          "file_name": {"type": "string"},
+                          "label": {"type": "int32"},
+                          "data": {"type": "bytes"}}
+        writer.add_schema(cv_schema_json, "img_schema")
+        writer.add_index(["file_name", "label"])
+        writer.write_raw_data(data)
+        writer.commit()
+        yield "yield_cv_data"
+    except Exception as error:
+        for x in paths:
+            os.remove("{}".format(x))
+            os.remove("{}.db".format(x))
+        raise error
+    else:
+        for x in paths:
            os.remove("{}".format(x))
-        if os.path.exists("{}.db".format(x)):
            os.remove("{}.db".format(x))
-    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
-    data = get_data(CV_DIR_NAME, True)
-    cv_schema_json = {"id": {"type": "int32"},
-                      "file_name": {"type": "string"},
-                      "label": {"type": "int32"},
-                      "data": {"type": "bytes"}}
-    writer.add_schema(cv_schema_json, "img_schema")
-    writer.add_index(["file_name", "label"])
-    writer.write_raw_data(data)
-    writer.commit()
-    yield "yield_cv_data"
-    for x in paths:
-        os.remove("{}".format(x))
-        os.remove("{}.db".format(x))
-

 def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file):
    """tutorial for cv minderdataset."""
@@ -626,3 +632,24 @@ def get_data(dir_name, sampler=False):
        except FileNotFoundError:
            continue
    return data_list
+
+if __name__ == '__main__':
+    test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file)
+    test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file)
+    test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file)
+    test_cv_minddataset_pk_sample_out_of_range(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file)
+    test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file)
+    test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file)
+    test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file)
+    test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file)
+    test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file)
+    test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file)
+    test_cv_minddataset_split_basic(add_and_remove_cv_file)
+    test_cv_minddataset_split_exact_percent(add_and_remove_cv_file)
+    test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file)
+    test_cv_minddataset_split_deterministic(add_and_remove_cv_file)
+    test_cv_minddataset_split_sharding(add_and_remove_cv_file)