diff --git a/tests/ut/python/mindrecord/test_mindrecord_exception.py b/tests/ut/python/mindrecord/test_mindrecord_exception.py index 46c2371b244d98deb6f376191f54315a4939faa8..e37d9692a49a313bf6cf6b5224fac82fe0f6dd6d 100644 --- a/tests/ut/python/mindrecord/test_mindrecord_exception.py +++ b/tests/ut/python/mindrecord/test_mindrecord_exception.py @@ -15,6 +15,8 @@ """test mindrecord exception""" import os import pytest + +import numpy as np from utils import get_data from mindspore import log as logger @@ -341,3 +343,532 @@ def test_mindpage_filename_not_exist(fixture_cv_file): _ = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0')) for x in range(FILES_NUM)] + +def test_invalid_schema(): + mindrecord_file_name = "test.mindrecord" + writer = FileWriter(mindrecord_file_name) + + # string => str + schema = {"file_name": {"type": "str"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # int32 => np.int32 + schema = {"file_name": {"type": "string"}, + "label": {"type": "np.int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # float64 => np.float64 + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "np.float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # int64 => int8 + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int8", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # int64 => uint64 + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "uint64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # bytes => byte + schema = {"file_name": {"type": "strint"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "byte"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # float32 => float3 + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float3", "shape": [2, 88]}, + "data": {"type": "bytes"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # string with shape + schema = {"file_name": {"type": "string", "shape": [-1]}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + + # bytes with shape + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes", "shape": [100]}} + with pytest.raises(Exception, match="Schema format is error"): + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + os.remove("{}".format(mindrecord_file_name)) + os.remove("{}.db".format(mindrecord_file_name)) + +def test_write_with_invalid_data(): + mindrecord_file_name = "test.mindrecord" + + # field: file_name => filename + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"filename": "001.jpg", "label": 43, "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"filename": "002.jpg", "label": 91, "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"filename": "003.jpg", "label": 61, "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"filename": "004.jpg", "label": 29, "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"filename": "005.jpg", "label": 78, "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"filename": "006.jpg", "label": 37, "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # field: mask => masks + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "label": 43, "score": 0.8, "masks": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": "002.jpg", "label": 91, "score": 5.4, "masks": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": "003.jpg", "label": 61, "score": 6.4, "masks": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": "004.jpg", "label": 29, "score": 8.1, "masks": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": "005.jpg", "label": 78, "score": 7.7, "masks": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": "006.jpg", "label": 37, "score": 9.4, "masks": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # field: data => image + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "label": 43, "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "image": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": "002.jpg", "label": 91, "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "image": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": "003.jpg", "label": 61, "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "image": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": "004.jpg", "label": 29, "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "image": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": "005.jpg", "label": 78, "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "image": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": "006.jpg", "label": 37, "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "image": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # field: label => lable + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "lable": 43, "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": "002.jpg", "lable": 91, "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": "003.jpg", "lable": 61, "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": "004.jpg", "lable": 29, "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": "005.jpg", "lable": 78, "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": "006.jpg", "lable": 37, "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # field: score => scores + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "label": 43, "scores": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": "002.jpg", "label": 91, "scores": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": "003.jpg", "label": 61, "scores": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": "004.jpg", "label": 29, "scores": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": "005.jpg", "label": 78, "scores": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": "006.jpg", "label": 37, "scores": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # string type with int value + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": 1, "label": 43, "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": 2, "label": 91, "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": 3, "label": 61, "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": 4, "label": 29, "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": 5, "label": 78, "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": 6, "label": 37, "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # field with int64 type, but the real data is string + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "label": "cat", "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": "002.jpg", "label": "dog", "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.6], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": "003.jpg", "label": "bird", "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": "004.jpg", "label": "mouse", "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": "005.jpg", "label": "tiger", "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": "006.jpg", "label": "lion", "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # bytes field is string + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "label": 43, "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": "image bytes abc"}, + {"file_name": "002.jpg", "label": 91, "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": "image bytes def"}, + {"file_name": "003.jpg", "label": 61, "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": "image bytes ghi"}, + {"file_name": "004.jpg", "label": 29, "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": "image bytes jkl"}, + {"file_name": "005.jpg", "label": 78, "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": "image bytes mno"}, + {"file_name": "006.jpg", "label": 37, "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": "image bytes pqr"} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # field is not numpy type + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "label": 43, "score": 0.8, "mask": [3, 6, 9], + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": "002.jpg", "label": 91, "score": 5.4, "mask": [1, 4, 7], + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": "003.jpg", "label": 61, "score": 6.4, "mask": [7, 6, 3], + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": "004.jpg", "label": 29, "score": 8.1, "mask": [2, 8, 0], + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": "005.jpg", "label": 78, "score": 7.7, "mask": [3, 1, 2], + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": "006.jpg", "label": 37, "score": 9.4, "mask": [7, 6, 7], + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # not enough field + with pytest.raises(Exception, match="Failed to write dataset"): + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8')}, + {"file_name": "002.jpg", "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8')}, + {"file_name": "003.jpg", "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8')}, + {"file_name": "004.jpg", "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8')}, + {"file_name": "005.jpg", "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8')}, + {"file_name": "006.jpg", "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8')} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + # more field is ok + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db") + + data = [{"file_name": "001.jpg", "label": 43, "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64), + "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32), + "data": bytes("image bytes abc", encoding='UTF-8'), "test": 0}, + {"file_name": "002.jpg", "label": 91, "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64), + "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32), + "data": bytes("image bytes def", encoding='UTF-8'), "test": 1}, + {"file_name": "003.jpg", "label": 61, "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64), + "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32), + "data": bytes("image bytes ghi", encoding='UTF-8'), "test": 2}, + {"file_name": "004.jpg", "label": 29, "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64), + "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32), + "data": bytes("image bytes jkl", encoding='UTF-8'), "test": 3}, + {"file_name": "005.jpg", "label": 78, "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64), + "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32), + "data": bytes("image bytes mno", encoding='UTF-8'), "test": 4}, + {"file_name": "006.jpg", "label": 37, "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64), + "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32), + "data": bytes("image bytes pqr", encoding='UTF-8'), "test": 5} + ] + writer = FileWriter(mindrecord_file_name) + schema = {"file_name": {"type": "string"}, + "label": {"type": "int32"}, + "score": {"type": "float64"}, + "mask": {"type": "int64", "shape": [-1]}, + "segments": {"type": "float32", "shape": [2, 2]}, + "data": {"type": "bytes"}} + writer.add_schema(schema, "data is so cool") + writer.write_raw_data(data) + writer.commit() + + remove_one_file(mindrecord_file_name) + remove_one_file(mindrecord_file_name + ".db")