diff --git a/tests/milvus_python_test/collection/test_collection.py b/tests/milvus_python_test/collection/test_collection.py deleted file mode 100644 index fe46e47521ec1e1e00c65989828e1845e151718c..0000000000000000000000000000000000000000 --- a/tests/milvus_python_test/collection/test_collection.py +++ /dev/null @@ -1,583 +0,0 @@ -import pdb -import pytest -import logging -import itertools -from time import sleep -from multiprocessing import Process -from milvus import IndexType, MetricType -from utils import * - -dim = 128 -default_segment_size = 1024 -drop_collection_interval_time = 3 -segment_size = 10 -vectors = gen_vectors(100, dim) -default_fields = gen_default_fields() - - -class TestCollection: - - """ - ****************************************************************** - The following cases are used to test `create_collection` function - ****************************************************************** - """ - @pytest.fixture( - scope="function", - params=gen_single_filter_fields() - ) - def get_filter_field(self, request): - yield request.param - - @pytest.fixture( - scope="function", - params=gen_single_vector_fields() - ) - def get_vector_field(self, request): - yield request.param - - @pytest.fixture( - scope="function", - params=gen_segment_sizes() - ) - def get_segment_size(self, request): - yield request.param - - """ - ****************************************************************** - The following cases are used to test `get_collection_info` function - ****************************************************************** - """ - - def test_collection_describe_result(self, connect): - ''' - target: test describe collection created with correct params - method: create collection, assert the value returned by describe method - expected: collection_name equals with the collection name created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.L2} - connect.create_collection(param) - status, res = connect.get_collection_info(collection_name) - assert res.collection_name == collection_name - assert res.metric_type == MetricType.L2 - - @pytest.mark.level(2) - def test_collection_get_collection_info_name_ip(self, connect): - ''' - target: test describe collection created with correct params - method: create collection, assert the value returned by describe method - expected: collection_name equals with the collection name created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.IP} - connect.create_collection(param) - status, res = connect.get_collection_info(collection_name) - assert res.collection_name == collection_name - assert res.metric_type == MetricType.IP - - @pytest.mark.level(2) - def test_collection_get_collection_info_name_jaccard(self, connect): - ''' - target: test describe collection created with correct params - method: create collection, assert the value returned by describe method - expected: collection_name equals with the collection name created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.JACCARD} - connect.create_collection(param) - status, res = connect.get_collection_info(collection_name) - assert res.collection_name == collection_name - assert res.metric_type == MetricType.JACCARD - - @pytest.mark.level(2) - def test_collection_get_collection_info_name_hamming(self, connect): - ''' - target: test describe collection created with correct params - method: create collection, assert the value returned by describe method - expected: collection_name equals with the collection name created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.HAMMING} - connect.create_collection(param) - status, res = connect.get_collection_info(collection_name) - assert res.collection_name == collection_name - assert res.metric_type == MetricType.HAMMING - - def test_collection_get_collection_info_name_substructure(self, connect): - ''' - target: test describe collection created with correct params - method: create collection, assert the value returned by describe method - expected: collection_name equals with the collection name created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.SUBSTRUCTURE} - connect.create_collection(param) - status, res = connect.get_collection_info(collection_name) - assert res.collection_name == collection_name - assert res.metric_type == MetricType.SUBSTRUCTURE - - def test_collection_get_collection_info_name_superstructure(self, connect): - ''' - target: test describe collection created with correct params - method: create collection, assert the value returned by describe method - expected: collection_name equals with the collection name created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.SUPERSTRUCTURE} - connect.create_collection(param) - status, res = connect.get_collection_info(collection_name) - assert res.collection_name == collection_name - assert res.metric_type == MetricType.SUPERSTRUCTURE - - # TODO: enable - @pytest.mark.level(2) - def _test_collection_get_collection_info_name_multiprocessing(self, connect, args): - ''' - target: test describe collection created with multiprocess - method: create collection, assert the value returned by describe method - expected: collection_name equals with the collection name created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.L2} - connect.create_collection(param) - - def describecollection(milvus): - status, res = milvus.get_collection_info(collection_name) - assert res.collection_name == collection_name - - process_num = 4 - processes = [] - for i in range(process_num): - milvus = get_milvus(args["ip"], args["port"], handler=args["handler"]) - p = Process(target=describecollection, args=(milvus,)) - processes.append(p) - p.start() - for p in processes: - p.join() - - def test_collection_describe_dimension(self, connect): - ''' - target: test describe collection created with correct params - method: create collection, assert the dimention value returned by describe method - expected: dimention equals with dimention when created - ''' - collection_name = gen_unique_str("test_collection") - param = {'collection_name': collection_name, - 'dimension': dim+1, - 'segment_size': segment_size, - 'metric_type': MetricType.L2} - connect.create_collection(param) - status, res = connect.get_collection_info(collection_name) - assert res.dimension == dim+1 - - """ - ****************************************************************** - The following cases are used to test `drop_collection` function - ****************************************************************** - """ - - def test_drop_collection(self, connect, collection): - ''' - target: test delete collection created with correct params - method: create collection and then delete, - assert the value returned by delete method - expected: status ok, and no collection in collections - ''' - status = connect.drop_collection(collection) - assert not assert_has_collection(connect, collection) - - @pytest.mark.level(2) - def test_drop_collection_ip(self, connect, ip_collection): - ''' - target: test delete collection created with correct params - method: create collection and then delete, - assert the value returned by delete method - expected: status ok, and no collection in collections - ''' - status = connect.drop_collection(ip_collection) - assert not assert_has_collection(connect, ip_collection) - - @pytest.mark.level(2) - def test_drop_collection_jaccard(self, connect, jac_collection): - ''' - target: test delete collection created with correct params - method: create collection and then delete, - assert the value returned by delete method - expected: status ok, and no collection in collections - ''' - status = connect.drop_collection(jac_collection) - assert not assert_has_collection(connect, jac_collection) - - @pytest.mark.level(2) - def test_drop_collection_hamming(self, connect, ham_collection): - ''' - target: test delete collection created with correct params - method: create collection and then delete, - assert the value returned by delete method - expected: status ok, and no collection in collections - ''' - status = connect.drop_collection(ham_collection) - assert not assert_has_collection(connect, ham_collection) - - # @pytest.mark.level(2) - # def test_collection_delete_without_connection(self, collection, dis_connect): - # ''' - # target: test describe collection, without connection - # method: describe collection with correct params, with a disconnected instance - # expected: describe raise exception - # ''' - # with pytest.raises(Exception) as e: - # status = dis_connect.drop_collection(collection) - - def test_drop_collection_not_existed(self, connect): - ''' - target: test delete collection not in index - method: delete all collections, and delete collection again, - assert the value returned by delete method - expected: status not ok - ''' - collection_name = gen_unique_str("test_collection") - status = connect.drop_collection(collection_name) - assert not status.OK() - - def test_delete_create_collection_repeatedly(self, connect): - ''' - target: test delete and create the same collection repeatedly - method: try to create the same collection and delete repeatedly, - assert the value returned by delete method - expected: create ok and delete ok - ''' - loops = 2 - timeout = 5 - for i in range(loops): - collection_name = "test_collection" - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.L2} - connect.create_collection(param) - status = None - while i < timeout: - status = connect.drop_collection(collection_name) - time.sleep(1) - i += 1 - if status.OK(): - break - if i > timeout: - assert False - - # TODO: enable - @pytest.mark.level(2) - def _test_drop_collection_multiprocessing(self, args): - ''' - target: test delete collection with multiprocess - method: create collection and then delete, - assert the value returned by delete method - expected: status ok, and no collection in collections - ''' - process_num = 6 - processes = [] - def deletecollection(milvus): - status = milvus.drop_collection(collection) - # assert not status.code==0 - assert assert_has_collection(milvus, collection) - assert status.OK() - - for i in range(process_num): - milvus = get_milvus(args["ip"], args["port"], handler=args["handler"]) - p = Process(target=deletecollection, args=(milvus,)) - processes.append(p) - p.start() - for p in processes: - p.join() - - # TODO: enable - @pytest.mark.level(2) - def _test_drop_collection_multiprocessing_multicollection(self, connect): - ''' - target: test delete collection with multiprocess - method: create collection and then delete, - assert the value returned by delete method - expected: status ok, and no collection in collections - ''' - process_num = 5 - loop_num = 2 - processes = [] - - collection = [] - j = 0 - while j < (process_num*loop_num): - collection_name = gen_unique_str("test_drop_collection_with_multiprocessing") - collection.append(collection_name) - param = {'collection_name': collection_name, - 'dimension': dim, - 'segment_size': segment_size, - 'metric_type': MetricType.L2} - connect.create_collection(param) - j = j + 1 - - def delete(connect,ids): - i = 0 - while i < loop_num: - status = connect.drop_collection(collection[ids*process_num+i]) - time.sleep(2) - assert status.OK() - assert not assert_has_collection(connect, collection[ids*process_num+i]) - i = i + 1 - - for i in range(process_num): - ids = i - p = Process(target=delete, args=(connect,ids)) - processes.append(p) - p.start() - for p in processes: - p.join() - - """ - ****************************************************************** - The following cases are used to test `load_collection` function - ****************************************************************** - """ - - """ - generate valid create_index params - """ - @pytest.fixture( - scope="function", - params=gen_simple_index() - ) - def get_simple_index(self, request, connect): - if str(connect._cmd("mode")[1]) == "CPU": - if request.param["index_type"] == IndexType.IVF_SQ8H: - pytest.skip("sq8h not support in cpu mode") - if request.param["index_type"] == IndexType.IVF_PQ: - pytest.skip("Skip PQ Temporary") - return request.param - - @pytest.mark.level(1) - def test_load_collection(self, connect, collection, get_simple_index): - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - status, ids = connect.insert(collection, vectors) - status = connect.create_index(collection, index_type, index_param) - status = connect.load_collection(collection) - assert status.OK() - - @pytest.mark.level(1) - def test_load_collection_ip(self, connect, ip_collection, get_simple_index): - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - status, ids = connect.insert(ip_collection, vectors) - status = connect.create_index(ip_collection, index_type, index_param) - status = connect.load_collection(ip_collection) - assert status.OK() - - @pytest.mark.level(1) - def test_load_collection_jaccard(self, connect, jac_collection, get_simple_index): - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - status, ids = connect.insert(jac_collection, vectors) - status = connect.create_index(jac_collection, index_type, index_param) - status = connect.load_collection(jac_collection) - assert status.OK() - - @pytest.mark.level(1) - def test_load_collection_hamming(self, connect, ham_collection, get_simple_index): - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - status, ids = connect.insert(ham_collection, vectors) - status = connect.create_index(ham_collection, index_type, index_param) - status = connect.load_collection(ham_collection) - assert status.OK() - - @pytest.mark.level(2) - def test_load_collection_not_existed(self, connect, collection, get_simple_index): - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - collection_name = gen_unique_str() - status, ids = connect.insert(collection, vectors) - status = connect.create_index(collection, index_type, index_param) - status = connect.load_collection(collection_name) - assert not status.OK() - - @pytest.mark.level(2) - def test_load_collection_not_existed_ip(self, connect, ip_collection, get_simple_index): - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - collection_name = gen_unique_str() - status, ids = connect.insert(ip_collection, vectors) - status = connect.create_index(ip_collection, index_type, index_param) - status = connect.load_collection(collection_name) - assert not status.OK() - - @pytest.mark.level(1) - def test_load_collection_no_vectors(self, connect, collection): - status = connect.load_collection(collection) - assert status.OK() - - @pytest.mark.level(2) - def test_load_collection_no_vectors_ip(self, connect, ip_collection): - status = connect.load_collection(ip_collection) - assert status.OK() - - # TODO: psutils get memory usage - @pytest.mark.level(1) - def test_load_collection_memory_usage(self, connect, collection): - pass - - -def create_collection(connect, **params): - param = {'collection_name': params["collection_name"], - 'dimension': params["dimension"], - 'segment_size': segment_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - return status - -def search_collection(connect, **params): - status, result = connect.search( - params["collection_name"], - params["top_k"], - params["query_vectors"], - params={"nprobe": params["nprobe"]}) - return status - -def load_collection(connect, **params): - status = connect.load_collection(params["collection_name"]) - return status - -def has(connect, **params): - status, result = connect.has_collection(params["collection_name"]) - return status - -def show(connect, **params): - status, result = connect.list_collections() - return status - -def delete(connect, **params): - status = connect.drop_collection(params["collection_name"]) - return status - -def describe(connect, **params): - status, result = connect.get_collection_info(params["collection_name"]) - return status - -def rowcount(connect, **params): - status, result = connect.count_entities(params["collection_name"]) - return status - -def create_index(connect, **params): - status = connect.create_index(params["collection_name"], params["index_type"], params["index_param"]) - return status - -func_map = { - # 0:has, - 1:show, - 10:create_collection, - 11:describe, - 12:rowcount, - 13:search_collection, - 14:load_collection, - 15:create_index, - 30:delete -} - -def gen_sequence(): - raw_seq = func_map.keys() - result = itertools.permutations(raw_seq) - for x in result: - yield x - -class TestCollectionLogic(object): - @pytest.mark.parametrize("logic_seq", gen_sequence()) - @pytest.mark.level(2) - def test_logic(self, connect, logic_seq, args): - if args["handler"] == "HTTP": - pytest.skip("Skip in http mode") - if self.is_right(logic_seq): - self.execute(logic_seq, connect) - else: - self.execute_with_error(logic_seq, connect) - self.tear_down(connect) - - def is_right(self, seq): - if sorted(seq) == seq: - return True - - not_created = True - has_deleted = False - for i in range(len(seq)): - if seq[i] > 10 and not_created: - return False - elif seq [i] > 10 and has_deleted: - return False - elif seq[i] == 10: - not_created = False - elif seq[i] == 30: - has_deleted = True - - return True - - def execute(self, logic_seq, connect): - basic_params = self.gen_params() - for i in range(len(logic_seq)): - # logging.getLogger().info(logic_seq[i]) - f = func_map[logic_seq[i]] - status = f(connect, **basic_params) - assert status.OK() - - def execute_with_error(self, logic_seq, connect): - basic_params = self.gen_params() - - error_flag = False - for i in range(len(logic_seq)): - f = func_map[logic_seq[i]] - status = f(connect, **basic_params) - if not status.OK(): - # logging.getLogger().info(logic_seq[i]) - error_flag = True - break - assert error_flag == True - - def tear_down(self, connect): - names = connect.list_collections()[1] - for name in names: - connect.drop_collection(name) - - def gen_params(self): - collection_name = gen_unique_str("test_collection") - top_k = 1 - vectors = gen_vectors(2, dim) - param = {'collection_name': collection_name, - 'dimension': dim, - 'metric_type': MetricType.L2, - 'nprobe': 1, - 'top_k': top_k, - 'index_type': IndexType.IVF_SQ8, - 'index_param': { - 'nlist': 16384 - }, - 'query_vectors': vectors} - return param diff --git a/tests/milvus_python_test/collection/test_collection_logic.py b/tests/milvus_python_test/collection/test_collection_logic.py new file mode 100644 index 0000000000000000000000000000000000000000..612c8725d6a5b8a7351fd239b56ddf83ed565b8e --- /dev/null +++ b/tests/milvus_python_test/collection/test_collection_logic.py @@ -0,0 +1,145 @@ +import pdb +import pytest +import logging +import itertools +from time import sleep +from multiprocessing import Process +from milvus import IndexType, MetricType +from utils import * + +dim = 128 +default_segment_size = 1024 +drop_collection_interval_time = 3 +segment_size = 10 +vectors = gen_vectors(100, dim) +default_fields = gen_default_fields() + + +def create_collection(connect, **params): + connect.create_collection(params["collection_name"], default_fields) + +def search_collection(connect, **params): + status, result = connect.search( + params["collection_name"], + params["top_k"], + params["query_vectors"], + params={"nprobe": params["nprobe"]}) + return status + +def load_collection(connect, **params): + connect.load_collection(params["collection_name"]) + +def has(connect, **params): + status, result = connect.has_collection(params["collection_name"]) + return status + +def show(connect, **params): + status, result = connect.list_collections() + return status + +def delete(connect, **params): + status = connect.drop_collection(params["collection_name"]) + return status + +def describe(connect, **params): + status, result = connect.get_collection_info(params["collection_name"]) + return status + +def rowcount(connect, **params): + status, result = connect.count_entities(params["collection_name"]) + return status + +def create_index(connect, **params): + status = connect.create_index(params["collection_name"], params["index_type"], params["index_param"]) + return status + +func_map = { + # 0:has, + 1:show, + 10:create_collection, + 11:describe, + 12:rowcount, + 13:search_collection, + 14:load_collection, + 15:create_index, + 30:delete +} + +def gen_sequence(): + raw_seq = func_map.keys() + result = itertools.permutations(raw_seq) + for x in result: + yield x + + +class TestCollectionLogic(object): + @pytest.mark.parametrize("logic_seq", gen_sequence()) + @pytest.mark.level(2) + def _test_logic(self, connect, logic_seq, args): + if args["handler"] == "HTTP": + pytest.skip("Skip in http mode") + if self.is_right(logic_seq): + self.execute(logic_seq, connect) + else: + self.execute_with_error(logic_seq, connect) + self.tear_down(connect) + + def is_right(self, seq): + if sorted(seq) == seq: + return True + + not_created = True + has_deleted = False + for i in range(len(seq)): + if seq[i] > 10 and not_created: + return False + elif seq [i] > 10 and has_deleted: + return False + elif seq[i] == 10: + not_created = False + elif seq[i] == 30: + has_deleted = True + + return True + + def execute(self, logic_seq, connect): + basic_params = self.gen_params() + for i in range(len(logic_seq)): + # logging.getLogger().info(logic_seq[i]) + f = func_map[logic_seq[i]] + status = f(connect, **basic_params) + assert status.OK() + + def execute_with_error(self, logic_seq, connect): + basic_params = self.gen_params() + + error_flag = False + for i in range(len(logic_seq)): + f = func_map[logic_seq[i]] + status = f(connect, **basic_params) + if not status.OK(): + # logging.getLogger().info(logic_seq[i]) + error_flag = True + break + assert error_flag == True + + def tear_down(self, connect): + names = connect.list_collections()[1] + for name in names: + connect.drop_collection(name) + + def gen_params(self): + collection_name = gen_unique_str("test_collection") + top_k = 1 + vectors = gen_vectors(2, dim) + param = {'collection_name': collection_name, + 'dimension': dim, + 'metric_type': MetricType.L2, + 'nprobe': 1, + 'top_k': top_k, + 'index_type': IndexType.IVF_SQ8, + 'index_param': { + 'nlist': 16384 + }, + 'query_vectors': vectors} + return param diff --git a/tests/milvus_python_test/collection/test_create_collection.py b/tests/milvus_python_test/collection/test_create_collection.py index b1c06d23e7dfcc54fe99d795229e10443b71d669..7b2c55363c907674877d7c52d7e3d599bfbf097a 100644 --- a/tests/milvus_python_test/collection/test_create_collection.py +++ b/tests/milvus_python_test/collection/test_create_collection.py @@ -1,9 +1,11 @@ import pdb -import pytest +import copy import logging import itertools from time import sleep from multiprocessing import Process + +import pytest from milvus import IndexType, MetricType from utils import * @@ -15,7 +17,7 @@ vectors = gen_vectors(100, dim) default_fields = gen_default_fields() -class TestCollection: +class TestCreateCollection: """ ****************************************************************** @@ -104,6 +106,27 @@ class TestCollection: finally: enable_flush(connect) + def test_create_collection_after_insert(self, connect, collection): + ''' + target: test insert vector, then create collection again + method: insert vector and create collection + expected: error raised + ''' + connect.insert(collection, entities) + with pytest.raises(Exception) as e: + connect.create_collection(collection, default_fields) + + def test_create_collection_after_insert_flush(self, connect, collection): + ''' + target: test insert vector, then create collection again + method: insert vector and create collection + expected: error raised + ''' + connect.insert(collection, entities) + connect.flush([collection]) + with pytest.raises(Exception) as e: + connect.create_collection(collection, default_fields) + # TODO: assert exception @pytest.mark.level(2) def test_create_collection_without_connection(self, dis_connect): @@ -182,11 +205,17 @@ class TestCreateCollectionInvalid(object): @pytest.fixture( scope="function", - params=gen_invalid_collection_names() + params=gen_invalid_strings() ) - def get_collection_name(self, request): + def get_invalid_string(self, request): yield request.param + @pytest.fixture( + scope="function", + params=gen_invalid_field_types() + ) + def get_field_type(self, request): + yield request.param @pytest.mark.level(2) def test_create_collection_with_invalid_segment_size(self, connect, get_segment_size): @@ -209,12 +238,12 @@ class TestCreateCollectionInvalid(object): dimension = get_dim collection_name = gen_unique_str() fields = copy.deepcopy(default_fields) - fields["fields"][-1]["dimension"] = dimension + fields["fields"][-1]["extra_params"]["dimension"] = dimension with pytest.raises(Exception) as e: connect.create_collection(collection_name, fields) @pytest.mark.level(2) - def test_create_collection_with_invalid_collectionname(self, connect, get_collection_name): + def test_create_collection_with_invalid_collectionname(self, connect, get_invalid_string): collection_name = get_collection_name with pytest.raises(Exception) as e: connect.create_collection(collection_name, default_fields) @@ -248,7 +277,7 @@ class TestCreateCollectionInvalid(object): ''' collection_name = gen_unique_str("test_collection") fields = copy.deepcopy(default_fields) - fields["fields"][-1].pop("dimension") + fields["fields"][-1]["extra_params"].pop("dimension") with pytest.raises(Exception) as e: connect.create_collection(collection_name, fields) @@ -279,3 +308,35 @@ class TestCreateCollectionInvalid(object): res = connect.get_collection_info(collection_name) logging.getLogger().info(res) # assert result.metric_type == MetricType.L2 + + # TODO: assert exception + def test_create_collection_limit_fields(self, connect): + collection_name = gen_unique_str("test_collection") + limit_num = 64 + fields = copy.deepcopy(default_fields) + for i in range(limit_num): + field_name = gen_unique_str("field_name") + field = {"field": field_name, "type": DataType.INT8} + fields["fields"].append(field) + with pytest.raises(Exception) as e: + connect.create_collection(collection_name, fields) + + # TODO: assert exception + def test_create_collection_invalid_field_name(self, connect, get_invalid_string): + collection_name = gen_unique_str("test_collection") + fields = copy.deepcopy(default_fields) + field_name = get_invalid_string + field = {"field": field_name, "type": DataType.INT8} + fields["fields"].append(field) + with pytest.raises(Exception) as e: + connect.create_collection(collection_name, fields) + + # TODO: assert exception + def test_create_collection_invalid_field_type(self, connect, get_field_type): + collection_name = gen_unique_str("test_collection") + fields = copy.deepcopy(default_fields) + field_type = get_field_type + field = {"field": "test_field", "type": field_type} + fields["fields"].append(field) + with pytest.raises(Exception) as e: + connect.create_collection(collection_name, fields) diff --git a/tests/milvus_python_test/collection/test_drop_collection.py b/tests/milvus_python_test/collection/test_drop_collection.py new file mode 100644 index 0000000000000000000000000000000000000000..af457c4cae788f82d3289bee16808698de0bfa58 --- /dev/null +++ b/tests/milvus_python_test/collection/test_drop_collection.py @@ -0,0 +1,80 @@ +import pdb +import pytest +import logging +import itertools +from time import sleep +from multiprocessing import Process +from milvus import IndexType, MetricType +from utils import * + +uniq_id = "test_drop_collection" +default_fields = gen_default_fields() + + +class TestDropCollection: + + """ + ****************************************************************** + The following cases are used to test `drop_collection` function + ****************************************************************** + """ + def test_drop_collection(self, connect, collection): + ''' + target: test delete collection created with correct params + method: create collection and then delete, + assert the value returned by delete method + expected: status ok, and no collection in collections + ''' + connect.drop_collection(collection) + assert not assert_has_collection(connect, collection) + + @pytest.mark.level(2) + def test_drop_collection_without_connection(self, collection, dis_connect): + ''' + target: test describe collection, without connection + method: drop collection with correct params, with a disconnected instance + expected: drop raise exception + ''' + with pytest.raises(Exception) as e: + dis_connect.drop_collection(collection) + + def test_drop_collection_not_existed(self, connect): + ''' + target: test if collection not created + method: random a collection name, which not existed in db, + assert the exception raised returned by drp_collection method + expected: False + ''' + collection_name = gen_unique_str(uniq_id) + with pytest.raises(Exception) as e: + assert not assert_has_collection(connect, collection_name) + + +class TestDropCollectionInvalid(object): + """ + Test has collection with invalid params + """ + @pytest.fixture( + scope="function", + params=gen_invalid_collection_names() + ) + def get_collection_name(self, request): + yield request.param + + @pytest.mark.level(2) + def test_drop_collection_with_invalid_collectionname(self, connect, get_collection_name): + collection_name = get_collection_name + with pytest.raises(Exception) as e: + connect.has_collection(collection_name) + + @pytest.mark.level(2) + def test_drop_collection_with_empty_collectionname(self, connect): + collection_name = '' + with pytest.raises(Exception) as e: + connect.has_collection(collection_name) + + @pytest.mark.level(2) + def test_drop_collection_with_none_collectionname(self, connect): + collection_name = None + with pytest.raises(Exception) as e: + connect.has_collection(collection_name) diff --git a/tests/milvus_python_test/collection/test_load_collection.py b/tests/milvus_python_test/collection/test_load_collection.py new file mode 100644 index 0000000000000000000000000000000000000000..c6b31815ea074108c5488af13c746783fa8aa097 --- /dev/null +++ b/tests/milvus_python_test/collection/test_load_collection.py @@ -0,0 +1,99 @@ +import pdb +import pytest +import logging +import itertools +from time import sleep +from multiprocessing import Process +from milvus import IndexType, MetricType +from utils import * + +uniq_id = "test_load_collection" +index_name = "load_index_name" +default_fields = gen_default_fields() +entities = gen_entities(6000) + + +class TestLoadCollection: + + """ + ****************************************************************** + The following cases are used to test `load_collection` function + ****************************************************************** + """ + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_simple_index(self, request, connect): + if str(connect._cmd("mode")[1]) == "CPU": + if request.param["index_type"] == IndexType.IVF_SQ8H: + pytest.skip("sq8h not support in cpu mode") + if request.param["index_type"] == IndexType.IVF_PQ: + pytest.skip("Skip PQ Temporary") + return request.param + + def test_load_collection_after_index(self, connect, collection, get_simple_index): + ''' + target: test load collection, after index created + method: insert and create index, load collection with correct params + expected: describe raise exception + ''' + connect.insert(collection, entities) + connect.flush([collection]) + field_name = "fload_vector" + connect.create_index(collection, field_name, index_name, get_simple_index) + connect.load_collection(collection) + + def load_empty_collection(self, connect, collection): + ''' + target: test load collection + method: no entities in collection, load collection with correct params + expected: load success + ''' + connect.load_collection(collection) + + @pytest.mark.level(1) + def test_load_collection_dis_connect(self, dis_connect, collection): + ''' + target: test load collection, without connection + method: load collection with correct params, with a disconnected instance + expected: load raise exception + ''' + with pytest.raises(Exception) as e: + dis_connect.load_collection(collection) + + @pytest.mark.level(2) + def test_load_collection_not_existed(self, connect, collection): + collection_name = gen_unique_str() + with pytest.raises(Exception) as e: + connect.load_collection(collection_name) + + +class TestLoadCollectionInvalid(object): + """ + Test load collection with invalid params + """ + @pytest.fixture( + scope="function", + params=gen_invalid_collection_names() + ) + def get_collection_name(self, request): + yield request.param + + @pytest.mark.level(2) + def test_load_collection_with_invalid_collectionname(self, connect, get_collection_name): + collection_name = get_collection_name + with pytest.raises(Exception) as e: + connect.has_collection(collection_name) + + @pytest.mark.level(2) + def test_load_collection_with_empty_collectionname(self, connect): + collection_name = '' + with pytest.raises(Exception) as e: + connect.has_collection(collection_name) + + @pytest.mark.level(2) + def test_load_collection_with_none_collectionname(self, connect): + collection_name = None + with pytest.raises(Exception) as e: + connect.has_collection(collection_name) diff --git a/tests/milvus_python_test/entity/test_delete.py b/tests/milvus_python_test/entity/test_delete.py index 148291c377f9167f686da11cbd29fce1c996a4cc..eaef96e855ac8c6fb720bbfbb1a134b777b2b702 100644 --- a/tests/milvus_python_test/entity/test_delete.py +++ b/tests/milvus_python_test/entity/test_delete.py @@ -1,6 +1,7 @@ import time import random import pdb +import copy import threading import logging from multiprocessing import Pool, Process @@ -10,14 +11,30 @@ from utils import * dim = 128 -index_file_size = 10 +segment_size = 10 collection_id = "test_delete" DELETE_TIMEOUT = 60 -nprobe = 1 -epsilon = 0.001 tag = "1970-01-01" -top_k = 1 nb = 6000 +field_name = "float_vector" +default_index_name = "insert_index" +entity = gen_entities(1) +binary_entity = gen_binary_entities(1) +entities = gen_entities(nb) +raw_vectors, binary_entities = gen_binary_entities(nb) +default_single_query = { + "bool": { + "must": [ + {"vector": {field_name: {"topk": 10, "query": gen_single_vector(dim), "params": {"nprobe": 10}}}} + ] + } +} + +def query_with_index(index_name): + query = copy.deepcopy(default_single_query) + query["bool"]["must"][0]["vector"]["params"].update({"index_name": default_index_name}) + return query + class TestDeleteBase: """ @@ -25,7 +42,6 @@ class TestDeleteBase: The following cases are used to test `delete_entity_by_id` function ****************************************************************** """ - @pytest.fixture( scope="function", params=gen_simple_index() @@ -39,482 +55,388 @@ class TestDeleteBase: pytest.skip("CPU not support index_type: ivf_sq8h") return request.param - def test_delete_vector_search(self, connect, collection, get_simple_index): - ''' - target: test delete vector - method: add vector and delete - expected: status ok, vector deleted - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.delete_entity_by_id(collection, ids) - assert status.OK() - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, vector, params=search_param) - logging.getLogger().info(res) - assert status.OK() - assert len(res[0]) == 0 - - def test_delete_vector_multi_same_ids(self, connect, collection, get_simple_index): - ''' - target: test delete vector, with some same ids - method: add vector and delete - expected: status ok, vector deleted - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vectors(nb, dim) - connect.insert(collection, vectors, ids=[1 for i in range(nb)]) - status = connect.flush([collection]) - # Bloom filter error - assert status.OK() - status = connect.delete_entity_by_id(collection, [1]) - assert status.OK() - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, [vectors[0]], params=search_param) - logging.getLogger().info(res) - assert status.OK() - assert len(res[0]) == 0 - - def test_delete_vector_collection_count(self, connect, collection): - ''' - target: test delete vector - method: add vector and delete - expected: status ok, vector deleted - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.delete_entity_by_id(collection, ids) - assert status.OK() - status = connect.flush([collection]) - status, res = connect.count_entities(collection) - assert status.OK() - assert res == 0 - - def test_delete_vector_collection_count_no_flush(self, connect, collection): - ''' - target: test delete vector - method: add vector and delete, no flush(using auto flush) - expected: status ok, vector deleted - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.delete_entity_by_id(collection, ids) - assert status.OK() - time.sleep(2) - status, res = connect.count_entities(collection) - assert status.OK() - assert res == 0 - - def test_delete_vector_id_not_exised(self, connect, collection, get_simple_index): - ''' - target: test delete vector, params vector_id not existed - method: add vector and delete - expected: status ok, search with vector have result - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() + @pytest.fixture( + scope="function", + params=[ + 1, + 6000 + ], + ) + def insert_count(self, request): + yield request.param + + def test_delete_entity_id_not_exised(self, connect, collection): + ''' + target: test delete entity, params entity_id not existed + method: add entity and delete + expected: status DELETED + ''' + ids = connect.insert(collection, entity) + connect.flush([collection]) + status = connect.delete_entity_by_id(collection, [0]) + assert status + + # TODO + def test_delete_empty_collection(self, connect, collection): + ''' + target: test delete entity, params collection_name not existed + method: add entity and delete + expected: status DELETED + ''' status = connect.delete_entity_by_id(collection, [0]) - assert status.OK() - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, vector, params=search_param) - assert status.OK() - assert res[0][0].id == ids[0] - - def test_delete_vector_collection_not_existed(self, connect, collection): - ''' - target: test delete vector, params collection_name not existed - method: add vector and delete - expected: status not ok - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() + assert status + + def test_delete_entity_collection_not_existed(self, connect, collection): + ''' + target: test delete entity, params collection_name not existed + method: add entity and delete + expected: error raised + ''' collection_new = gen_unique_str() - status = connect.delete_entity_by_id(collection_new, [0]) - assert not status.OK() + with pytest.raises(Exception) as e: + status = connect.delete_entity_by_id(collection_new, [0]) + + def test_delete_entity_collection_not_existed(self, connect, collection): + ''' + target: test delete entity, params collection_name not existed + method: add entity and delete + expected: error raised + ''' + ids = connect.insert(collection, entity) + connect.flush([collection]) + collection_new = gen_unique_str() + with pytest.raises(Exception) as e: + status = connect.delete_entity_by_id(collection_new, [0]) + + # TODO: + def test_insert_delete(self, connect, collection, insert_count): + ''' + target: test delete entity + method: add entities and delete + expected: no error raised + ''' + entities = gen_entities(insert_count) + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = [ids[0]] + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + + def test_insert_delete_A(self, connect, collection): + ''' + target: test delete entity + method: add entities and delete one in collection, and one not in collection + expected: no error raised + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = [ids[0], 1] + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == nb - 1 - def test_insert_delete_vector(self, connect, collection, get_simple_index): + def test_insert_delete_B(self, connect, collection): ''' - method: add vectors and delete - expected: status ok, vectors deleted + target: test delete entity + method: add entities with the same ids, and delete the id in collection + expected: no error raised, all entities deleted ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vector(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() + ids = [1 for i in range(nb)] + res_ids = connect.insert(collection, entities, ids) + connect.flush([collection]) + delete_ids = [1] + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == 0 + + def test_delete_exceed_limit(self, connect, collection): + ''' + target: test delete entity + method: add one entity and delete two ids + expected: error raised + ''' + ids = connect.insert(collection, entity) + connect.flush([collection]) + delete_ids = [ids[0], ids[-1]] + with pytest.raises(Exception) as e: + status = connect.delete_entity_by_id(collection, delete_ids) + + # TODO: + def test_delete_limit_ids(self, connect, collection): + ''' + target: test delete entity + method: add one entity and delete two ids + expected: error raised + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = ids + with pytest.raises(Exception) as e: + status = connect.delete_entity_by_id(collection, delete_ids) + + # TODO + def test_flush_after_delete(self, connect, collection): + ''' + target: test delete entity + method: add entities and delete, then flush + expected: entity deleted and no error raised + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, query_vecs, params=search_param) - assert status.OK() - logging.getLogger().info(res) - assert res[0][0].distance > epsilon - assert res[1][0].distance < epsilon - assert res[1][0].id == ids[1] - assert res[2][0].distance > epsilon + assert status + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == nb - len(delete_ids) - def test_create_index_after_delete(self, connect, collection, get_simple_index): + # TODO + def test_flush_after_delete_ip(self, connect, ip_collection): + ''' + target: test delete entity + method: add entities and delete, then flush + expected: entity deleted and no error raised + ''' + ids = connect.insert(ip_collection, entities) + connect.flush([ip_collection]) + delete_ids = [ids[0], ids[-1]] + status = connect.delete_entity_by_id(ip_collection, delete_ids) + assert status + connect.flush([ip_collection]) + res_count = connect.count_collection(ip_collection) + assert res_count == nb - len(delete_ids) + + # TODO + def test_flush_after_delete_jac(self, connect, jac_collection): ''' - method: add vectors and delete, then create index - expected: status ok, vectors deleted, index created + target: test delete entity + method: add entities and delete, then flush + expected: entity deleted and no error raised ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vector(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() + ids = connect.insert(ip_collection, bianry_entities) + connect.flush([jac_collection]) + delete_ids = [ids[0], ids[-1]] + status = connect.delete_entity_by_id(jac_collection, delete_ids) + assert status + connect.flush([jac_collection]) + res_count = connect.count_collection(jac_collection) + assert res_count == nb - len(delete_ids) + + # TODO + def test_insert_same_ids_after_delete(self, connect, collection): + ''' + method: add entities and delete + expected: status DELETED + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() - status = connect.flush([collection]) - status = connect.create_index(collection, index_type, index_param) - assert status.OK() - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, query_vecs, params=search_param) - assert status.OK() - logging.getLogger().info(res) - logging.getLogger().info(ids[0]) - logging.getLogger().info(ids[1]) - logging.getLogger().info(ids[-1]) - assert res[0][0].id != ids[0] - assert res[1][0].id == ids[1] - assert res[2][0].id != ids[-1] - - def test_add_vector_after_delete(self, connect, collection, get_simple_index): - ''' - method: add vectors and delete, then add vector - expected: status ok, vectors deleted, vector added - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vector(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() + assert status + new_ids = connect.insert(collection, entity, [ids[0]]) + assert new_ids == [ids[0]] + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == nb + + # TODO + @pytest.mark.level(2) + def test_insert_same_ids_after_delete_ip(self, connect, ip_collection): + ''' + method: add entities and delete + expected: status DELETED + ''' + ids = connect.insert(ip_collection, entities) + connect.flush([ip_collection]) + delete_ids = [ids[0], ids[-1]] + status = connect.delete_entity_by_id(ip_collection, delete_ids) + assert status + new_ids = connect.insert(ip_collection, entity, [ids[0]]) + assert new_ids == [ids[0]] + connect.flush([ip_collection]) + res_count = connect.count_collection(ip_collection) + assert res_count == nb + + # TODO + @pytest.mark.level(2) + def test_insert_same_ids_after_delete_jac(self, connect, jac_collection): + ''' + method: add entities and delete + expected: status DELETED + ''' + ids = connect.insert(jac_collection, binary_entities) + connect.flush([jac_collection]) + delete_ids = [ids[0], ids[-1]] + status = connect.delete_entity_by_id(jac_collection, delete_ids) + assert status + new_ids = connect.insert(jac_collection, entity, [ids[0]]) + assert new_ids == [ids[0]] + connect.flush([jac_collection]) + res_count = connect.count_collection(jac_collection) + assert res_count == nb + + # TODO: + def test_search_after_delete(self, connect, collection): + ''' + target: test delete entity + method: add entities and delete, then search + expected: entity deleted and no error raised + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = [ids[0], ids[-1]] + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + query = copy.deepcopy(default_single_query) + query["bool"]["must"][0]["vector"][field_name]["query"] = [entity, entities[-1]["values"][0], entities[-1]["values"][-1]] + res = connect.search(collection, default_single_query) + logging.getLogger().debug(res) + assert not res + # assert res[0][0].distance > epsilon + # assert res[1][0].distance < epsilon + # assert res[1][0].id == ids[1] + # assert res[2][0].distance > epsilon + + # TODO + def test_create_index_after_delete(self, connect, collection, get_simple_index): + ''' + method: add entitys and delete, then create index + expected: vectors deleted, index created + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() - status = connect.flush([collection]) - status, tmp_ids = connect.insert(collection, [vectors[0], vectors[-1]]) - assert status.OK() - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, query_vecs, params=search_param) - assert status.OK() - logging.getLogger().info(res) - assert res[0][0].id == tmp_ids[0] - assert res[0][0].distance < epsilon - assert res[1][0].distance < epsilon - assert res[2][0].id == tmp_ids[-1] - assert res[2][0].distance < epsilon + connect.create_index(collection, field_name, index_name, get_simple_index) + # assert index info + # TODO def test_delete_multiable_times(self, connect, collection): ''' - method: add vectors and delete id serveral times - expected: status ok, vectors deleted, and status ok for next delete operation + method: add entities and delete id serveral times + expected: entities deleted ''' - vectors = gen_vector(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() + ids = connect.insert(collection, entities) + connect.flush([collection]) delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() - status = connect.flush([collection]) + assert status + connect.flush([collection]) for i in range(10): status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() + assert status - def test_delete_no_flush_multiable_times(self, connect, collection): + # TODO + def test_index_insert_batch_delete_get(self, connect, collection, get_simple_index): ''' - method: add vectors and delete id serveral times - expected: status ok, vectors deleted, and status ok for next delete operation + method: create index, insert entities, and delete + expected: entities deleted ''' - vectors = gen_vector(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() + connect.create_index(collection, field_name, index_name, get_simple_index) + ids = connect.insert(collection, entities) + connect.flush([collection]) delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() - for i in range(10): - status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() - assert status.OK() - + assert status + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == nb - len(delete_ids) + res_get = connect.get_entity_by_id(collection, delete_ids) + assert not res_get + + # TODO + def test_index_insert_single_delete_get(self, connect, collection, get_simple_index): + ''' + method: create index, insert entities, and delete + expected: entities deleted + ''' + connect.create_index(collection, field_name, index_name, get_simple_index) + for i in range(nb): + ids = connect.insert(collection, entity) + connect.flush([collection]) + delete_ids = [ids[0], ids[-1]] + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == 0 -class TestDeleteIndexedVectors: """ ****************************************************************** - The following cases are used to test `delete_entity_by_id` function + The following cases are used to test `delete_entity_by_id` function, with tags ****************************************************************** """ - @pytest.fixture( - scope="function", - params=gen_simple_index() - ) - def get_simple_index(self, request, connect): - if str(connect._cmd("mode")[1]) == "GPU": - if request.param["index_type"] not in [IndexType.IVF_SQ8, IndexType.IVFLAT, IndexType.FLAT, IndexType.IVF_PQ, IndexType.IVF_SQ8H]: - pytest.skip("Only support index_type: idmap/ivf") - elif str(connect._cmd("mode")[1]) == "CPU": - if request.param["index_type"] in [IndexType.IVF_SQ8H]: - pytest.skip("CPU not support index_type: ivf_sq8h") - return request.param - - def test_delete_vectors_after_index_created_search(self, connect, collection, get_simple_index): - ''' - target: test delete vector after index created - method: add vector, create index and delete vector - expected: status ok, vector deleted - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.create_index(collection, index_type, index_param) - assert status.OK() - status = connect.delete_entity_by_id(collection, ids) - assert status.OK() - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, vector, params=search_param) - logging.getLogger().info(res) - assert status.OK() - assert len(res[0]) == 0 - - def test_insert_delete_vector(self, connect, collection, get_simple_index): - ''' - method: add vectors and delete - expected: status ok, vectors deleted - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vector(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.create_index(collection, index_type, index_param) - assert status.OK() + # TODO: + def test_insert_tag_delete(self, connect, collection): + ''' + method: add entitys with given tag, delete entities with the return ids + expected: entities deleted + ''' + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities, partition_tag=tag) + connect.flush([collection]) delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] status = connect.delete_entity_by_id(collection, delete_ids) - assert status.OK() - status = connect.flush([collection]) - search_param = get_search_param(index_type) - status, res = connect.search(collection, top_k, query_vecs, params=search_param) - assert status.OK() - logging.getLogger().info(ids[0]) - logging.getLogger().info(ids[1]) - logging.getLogger().info(ids[-1]) - logging.getLogger().info(res) - assert res[0][0].id != ids[0] - assert res[1][0].id == ids[1] - assert res[2][0].id != ids[-1] - - -class TestDeleteBinary: - """ - ****************************************************************** - The following cases are used to test `delete_entity_by_id` function - ****************************************************************** - """ - @pytest.fixture( - scope="function", - params=gen_simple_index() - ) - def get_simple_index(self, request, connect): - logging.getLogger().info(request.param) - if request.param["index_type"] == IndexType.IVFLAT or request.param["index_type"] == IndexType.FLAT: - return request.param - else: - pytest.skip("Skip index Temporary") - - def test_delete_vector_search(self, connect, jac_collection, get_simple_index): - ''' - target: test delete vector - method: add vector and delete - expected: status ok, vector deleted - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - tmp, vector = gen_binary_vectors(1, dim) - status, ids = connect.insert(jac_collection, vector) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - status = connect.delete_entity_by_id(jac_collection, ids) - assert status.OK() - status = connect.flush([jac_collection]) - search_param = get_search_param(index_type) - status, res = connect.search(jac_collection, top_k, vector, params=search_param) - logging.getLogger().info(res) - assert status.OK() - assert len(res[0]) == 0 - - # TODO: soft delete - def test_delete_vector_collection_count(self, connect, jac_collection): - ''' - target: test delete vector - method: add vector and delete - expected: status ok, vector deleted - ''' - tmp, vector = gen_binary_vectors(1, dim) - status, ids = connect.insert(jac_collection, vector) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - status = connect.delete_entity_by_id(jac_collection, ids) - assert status.OK() - status = connect.flush([jac_collection]) - status, res = connect.count_entities(jac_collection) - assert status.OK() - assert res == 0 - - def test_delete_vector_id_not_exised(self, connect, jac_collection, get_simple_index): - ''' - target: test delete vector, params vector_id not existed - method: add vector and delete - expected: status ok, search with vector have result - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - tmp, vector = gen_binary_vectors(1, dim) - status, ids = connect.insert(jac_collection, vector) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - status = connect.delete_entity_by_id(jac_collection, [0]) - assert status.OK() - status = connect.flush([jac_collection]) - status = connect.flush([jac_collection]) - search_param = get_search_param(index_type) - status, res = connect.search(jac_collection, top_k, vector, params=search_param) - assert status.OK() - assert res[0][0].id == ids[0] - - def test_delete_vector_collection_not_existed(self, connect, jac_collection): - ''' - target: test delete vector, params collection_name not existed - method: add vector and delete - expected: status not ok - ''' - tmp, vector = gen_binary_vectors(1, dim) - status, ids = connect.insert(jac_collection, vector) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - collection_new = gen_unique_str() - status = connect.delete_entity_by_id(collection_new, [0]) - collection_new = gen_unique_str() - status = connect.delete_entity_by_id(collection_new, [0]) - assert not status.OK() + assert status - def test_insert_delete_vector(self, connect, jac_collection, get_simple_index): + # TODO: + def test_insert_default_tag_delete(self, connect, collection): ''' - method: add vectors and delete - expected: status ok, vectors deleted + method: add entitys, delete entities with the return ids + expected: entities deleted ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - tmp, vectors = gen_binary_vectors(nb, dim) - status, ids = connect.insert(jac_collection, vectors) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] - status = connect.delete_entity_by_id(jac_collection, delete_ids) - assert status.OK() - status = connect.flush([jac_collection]) - search_param = get_search_param(index_type) - status, res = connect.search(jac_collection, top_k, query_vecs, params=search_param) - assert status.OK() - logging.getLogger().info(res) - assert res[0][0].id != ids[0] - assert res[1][0].id == ids[1] - assert res[2][0].id != ids[-1] - - def test_add_after_delete_vector(self, connect, jac_collection, get_simple_index): - ''' - method: add vectors and delete, add - expected: status ok, vectors added - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - tmp, vectors = gen_binary_vectors(nb, dim) - status, ids = connect.insert(jac_collection, vectors) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities) + connect.flush([collection]) delete_ids = [ids[0], ids[-1]] - query_vecs = [vectors[0], vectors[1], vectors[-1]] - status = connect.delete_entity_by_id(jac_collection, delete_ids) - assert status.OK() - status = connect.flush([jac_collection]) - status, tmp_ids = connect.insert(jac_collection, [vectors[0], vectors[-1]]) - assert status.OK() - status = connect.flush([jac_collection]) - search_param = get_search_param(index_type) - status, res = connect.search(jac_collection, top_k, query_vecs, params=search_param) - assert status.OK() - logging.getLogger().info(res) - assert res[0][0].id == tmp_ids[0] - assert res[1][0].id == ids[1] - assert res[2][0].id == tmp_ids[-1] - assert res[2][0].id == tmp_ids[-1] - - -class TestDeleteIdsIngalid(object): + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + + # TODO: + def test_insert_tags_delete(self, connect, collection): + ''' + method: add entitys with given two tags, delete entities with the return ids + expected: entities deleted + ''' + tag_new = "tag_new" + connect.create_partition(collection, tag) + connect.create_partition(collection, tag_new) + ids = connect.insert(collection, entities, partition_tag=tag) + ids_new = connect.insert(collection, entities, partition_tag=tag_new) + connect.flush([collection]) + delete_ids = [ids[0], ids_new[0]] + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == 2 * (nb - 2) + + # TODO: + def test_insert_tags_index_delete(self, connect, collection, get_simple_index): + ''' + method: add entitys with given tag, create index, delete entities with the return ids + expected: entities deleted + ''' + tag_new = "tag_new" + connect.create_partition(collection, tag) + connect.create_partition(collection, tag_new) + ids = connect.insert(collection, entities, partition_tag=tag) + ids_new = connect.insert(collection, entities, partition_tag=tag_new) + connect.flush([collection]) + connect.create_index(collection, field_name, index_name, get_simple_index) + delete_ids = [ids[0], ids_new[0]] + status = connect.delete_entity_by_id(collection, delete_ids) + assert status + connect.flush([collection]) + res_count = connect.count_collection(collection) + assert res_count == 2 * (nb - 1) + + +class TestDeleteInvalid(object): single_vector = gen_single_vector(dim) """ @@ -522,38 +444,32 @@ class TestDeleteIdsIngalid(object): """ @pytest.fixture( scope="function", - params=gen_invalid_vector_ids() + params=gen_invalid_ints() ) - def gen_invalid_id(self, request): + def gen_entity_id(self, request): + yield request.param + + @pytest.fixture( + scope="function", + params=gen_invalid_strs() + ) + def get_collection_name(self, request): yield request.param @pytest.mark.level(1) - def test_delete_vector_id_invalid(self, connect, collection, gen_invalid_id): - invalid_id = gen_invalid_id + def test_delete_entity_id_invalid(self, connect, collection, gen_entity_id): + invalid_id = gen_entity_id with pytest.raises(Exception) as e: status = connect.delete_entity_by_id(collection, [invalid_id]) @pytest.mark.level(2) - def test_delete_vector_ids_invalid(self, connect, collection, gen_invalid_id): - invalid_id = gen_invalid_id + def test_delete_entity_ids_invalid(self, connect, collection, gen_entity_id): + invalid_id = gen_entity_id with pytest.raises(Exception) as e: status = connect.delete_entity_by_id(collection, [1, invalid_id]) - -class TestCollectionNameInvalid(object): - """ - Test adding vectors with invalid collection names - """ - @pytest.fixture( - scope="function", - params=gen_invalid_collection_names() - ) - def get_collection_name(self, request): - yield request.param - @pytest.mark.level(2) - def test_delete_vectors_with_invalid_collection_name(self, connect, get_collection_name): + def test_delete_entity_with_invalid_collection_name(self, connect, get_collection_name): collection_name = get_collection_name - status = connect.delete_entity_by_id(collection_name, [1]) - assert not status.OK() - + with pytest.raises(Exception) as e: + status = connect.delete_entity_by_id(collection_name, [1]) \ No newline at end of file diff --git a/tests/milvus_python_test/entity/test_get_entity_by_id.py b/tests/milvus_python_test/entity/test_get_entity_by_id.py index 16b6f15a112710780b0fcae2c4b87b7068bb39ec..f41d407dad2720eb8f357bad1dfefd7d17d81033 100644 --- a/tests/milvus_python_test/entity/test_get_entity_by_id.py +++ b/tests/milvus_python_test/entity/test_get_entity_by_id.py @@ -1,467 +1,582 @@ import time import random import pdb +import copy import threading import logging from multiprocessing import Pool, Process -import concurrent.futures import pytest from milvus import IndexType, MetricType from utils import * dim = 128 -index_file_size = 10 -collection_id = "get_entity_by_id" +segment_size = 10 +collection_id = "test_get" DELETE_TIMEOUT = 60 -nprobe = 1 tag = "1970-01-01" -top_k = 1 nb = 6000 -tag = "tag" +field_name = "float_entity" +default_index_name = "insert_index" +entity = gen_entities(1) +binary_entity = gen_binary_entities(1) +entities = gen_entities(nb) +bianry_entities = gen_binary_entities(nb) +default_single_query = { + "bool": { + "must": [ + {"vector": {field_name: {"topk": 10, "query": gen_single_entity(dim), "params": {"nprobe": 10}}}} + ] + } +} class TestGetBase: """ ****************************************************************** - The following cases are used to test .get_entity_by_id` function + The following cases are used to test `get_entity_by_id` function ****************************************************************** """ - def test_get_vector_A(self, connect, collection): + @pytest.fixture( + scope="function", + params=gen_simple_index() + ) + def get_simple_index(self, request, connect): + if str(connect._cmd("mode")[1]) == "GPU": + if request.param["index_type"] not in [IndexType.IVF_SQ8, IndexType.IVFLAT, IndexType.FLAT, IndexType.IVF_PQ, IndexType.IVF_SQ8H]: + pytest.skip("Only support index_type: idmap/ivf") + elif str(connect._cmd("mode")[1]) == "CPU": + if request.param["index_type"] in [IndexType.IVF_SQ8H]: + pytest.skip("CPU not support index_type: ivf_sq8h") + return request.param + + @pytest.fixture( + scope="function", + params=[ + 1, + 10, + 100, + 500 + ], + ) + def get_pos(self, request): + yield request.param + + def test_get_entity(self, connect, collection, get_pos): ''' - target: test.get_entity_by_id - method: add vector, and get - expected: status ok, vector returned + target: test.get_entity_by_id, get one + method: add entity, and get + expected: entity returned equals insert ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status, res = connect.get_entity_by_id(collection, ids) - assert status.OK() - assert_equal_vector(res[0], vector[0]) + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = [ids[get_pos]] + res = connect.get_entity_by_id(collection, get_ids) + assert_equal_entity(res[get_pos], entities[get_pos]) - def test_get_vector_B(self, connect, collection): + def test_get_entity_multi_ids(self, connect, collection, get_pos): ''' - target: test.get_entity_by_id - method: add vector, and get - expected: status ok, vector returned - ''' - vectors = gen_vectors(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - length = 100 - status, res = connect.get_entity_by_id(collection, ids[:length]) - assert status.OK() - for i in range(length): - assert_equal_vector(res[i], vectors[i]) - - def test_get_vector_C_limit(self, connect, collection, args): + target: test.get_entity_by_id, get one + method: add entity, and get + expected: entity returned equals insert + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) + + def test_get_entity_parts_ids(self, connect, collection): + ''' + target: test.get_entity_by_id, some ids in collection, some ids not + method: add entity, and get + expected: entity returned equals insert + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = [ids[0], 1, ids[-1]] + res = connect.get_entity_by_id(collection, get_ids) + assert_equal_entity(res[0], entities[0]) + assert_equal_entity(res[-1], entities[-1]) + assert not len(res[1]) + + def test_get_entity_limit(self, connect, collection, args): ''' target: test.get_entity_by_id - method: add vector, and get, limit > 1000 - expected: status ok, vector returned + method: add entity, and get, limit > 1000 + expected: entity returned ''' if args["handler"] == "HTTP": pytest.skip("skip in http mode") - vectors = gen_vectors(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status, res = connect.get_entity_by_id(collection, ids) - assert not status.OK() + ids = connect.insert(collection, entities) + connect.flush([collection]) + with pytest.raises(Exception) as e: + res = connect.get_entity_by_id(collection, ids) + + def test_get_entity_same_ids(self, connect, collection): + ''' + target: test.get_entity_by_id, with the same ids + method: add entity, and get one id + expected: entity returned equals insert + ''' + ids = [1 for i in range(nb)] + res_ids = connect.insert(collection, entities, ids) + connect.flush([collection]) + get_ids = [ids[0]] + res = connect.get_entity_by_id(collection, get_ids) + assert len(res) == 1 + assert_equal_entity(res[0], entities[0]) + + def test_get_entity_params_same_ids(self, connect, collection): + ''' + target: test.get_entity_by_id, with the same ids + method: add entity, and get entity with the same ids + expected: entity returned equals insert + ''' + ids = [1] + res_ids = connect.insert(collection, entity, ids) + connect.flush([collection]) + get_ids = [1, 1] + res = connect.get_entity_by_id(collection, get_ids) + assert len(res) == len(get_ids) + for i in range(len(get_ids)): + assert_equal_entity(res[i], entity) + + def test_get_entities_params_same_ids(self, connect, collection): + ''' + target: test.get_entity_by_id, with the same ids + method: add entities, and get entity with the same ids + expected: entity returned equals insert + ''' + res_ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = [res_ids[0], res_ids[0]] + res = connect.get_entity_by_id(collection, get_ids) + assert len(res) == len(get_ids) + for i in range(len(get_ids)): + assert_equal_entity(res[i], entities[0]) - def test_get_vector_partition(self, connect, collection): - ''' - target: test.get_entity_by_id - method: add vector, and get - expected: status ok, vector returned - ''' - vectors = gen_vectors(nb, dim) - status = connect.create_partition(collection, tag) - assert status.OK() - status, ids = connect.insert(collection, vectors, partition_tag=tag) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - length = 100 - status, res = connect.get_entity_by_id(collection, ids[:length]) - assert status.OK() - for i in range(length): - assert_equal_vector(res[i], vectors[i]) - - def test_get_vector_multi_same_ids(self, connect, collection): + """ + ****************************************************************** + The following cases are used to test `get_entity_by_id` function, with different metric type + ****************************************************************** + """ + + def test_get_entity_parts_ids_ip(self, connect, ip_collection): ''' - target: test.get_entity_by_id - method: add vectors, with the same id, get vector by the given id - expected: status ok, get one vector + target: test.get_entity_by_id, some ids in ip_collection, some ids not + method: add entity, and get + expected: entity returned equals insert ''' - vectors = gen_vectors(nb, dim) - ids = [i for i in range(nb)] - ids[1] = 0; ids[-1] = 0 - status, ids = connect.insert(collection, vectors, ids=ids) - status = connect.flush([collection]) - assert status.OK() - status, res = connect.get_entity_by_id(collection, [0]) - assert status.OK() - assert_equal_vector(res[0], vectors[0]) + ids = connect.insert(ip_collection, entities) + connect.flush([ip_collection]) + get_ids = [ids[0], 1, ids[-1]] + res = connect.get_entity_by_id(ip_collection, get_ids) + assert_equal_entity(res[0], entities[0]) + assert_equal_entity(res[-1], entities[-1]) + assert not len(res[1]) - @pytest.fixture( - scope="function", - params=[ - 1, - 10, - 100, - 1000, - -1 - ], - ) - def get_id(self, request): - yield request.param + def test_get_entity_parts_ids_jac(self, connect, jac_collection): + ''' + target: test.get_entity_by_id, some ids in jac_collection, some ids not + method: add entity, and get + expected: entity returned equals insert + ''' + ids = connect.insert(jac_collection, binary_entities) + connect.flush([jac_collection]) + get_ids = [ids[0], 1, ids[-1]] + res = connect.get_entity_by_id(jac_collection, get_ids) + assert_equal_entity(res[0], binary_entities[0]) + assert_equal_entity(res[-1], binary_entities[-1]) + assert not len(res[1]) - def test_get_vector_after_delete(self, connect, collection, get_id): + """ + ****************************************************************** + The following cases are used to test `get_entity_by_id` function, with tags + ****************************************************************** + """ + def test_get_entities_tag(self, connect, collection, get_pos): ''' target: test.get_entity_by_id - method: add vectors, and delete, get vector by the given id - expected: status ok, get one vector - ''' - vectors = gen_vectors(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - id = get_id - status = connect.delete_entity_by_id(collection, [ids[id]]) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status, res = connect.get_entity_by_id(collection, [ids[id]]) - assert status.OK() - assert not len(res[0]) + method: add entities with tag, get + expected: entity returned + ''' + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities, partition_tag=tag) + connect.flush([collection]) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) - def test_get_vector_after_delete_with_partition(self, connect, collection, get_id): + def test_get_entities_tag_default(self, connect, collection, get_pos): ''' target: test.get_entity_by_id - method: add vectors into partition, and delete, get vector by the given id - expected: status ok, get one vector - ''' - vectors = gen_vectors(nb, dim) - status = connect.create_partition(collection, tag) - status, ids = connect.insert(collection, vectors, partition_tag=tag) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - id = get_id - status = connect.delete_entity_by_id(collection, [ids[id]]) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status, res = connect.get_entity_by_id(collection, [ids[id]]) - assert status.OK() - assert not len(res[0]) + method: add entities with default tag, get + expected: entity returned + ''' + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) - def test_get_vector_id_not_exised(self, connect, collection): + def test_get_entities_tags_default(self, connect, collection, get_pos): ''' - target: test get vector, params vector_id not existed - method: add vector and get - expected: status ok, empty result + target: test.get_entity_by_id + method: create partitions, add entities with default tag, get + expected: entity returned ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status, res = connect.get_entity_by_id(collection, [1]) - assert status.OK() - assert not len(res[0]) + tag_new = "tag_new" + connect.create_partition(collection, tag) + connect.create_partition(collection, tag_new) + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) - def test_get_vector_collection_not_existed(self, connect, collection): + def test_get_entities_tags_A(self, connect, collection, get_pos): ''' - target: test get vector, params collection_name not existed - method: add vector and get - expected: status not ok + target: test.get_entity_by_id + method: create partitions, add entities with default tag, get + expected: entity returned ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - collection_new = gen_unique_str() - status, res = connect.get_entity_by_id(collection_new, [1]) - assert not status.OK() - - @pytest.mark.timeout(60) - def test_get_vector_by_id_multithreads(self, connect, collection): - vectors = gen_vectors(nb, dim) - status, ids = connect.insert(collection, vectors) - status = connect.flush([collection]) - assert status.OK() - get_id = ids[100:200] - def get(): - status, res = connect.get_entity_by_id(collection, get_id) - assert status.OK() - assert len(res) == len(get_id) - for i in range(len(res)): - assert_equal_vector(res[i], vectors[100+i]) - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: - future_results = {executor.submit( - get): i for i in range(10)} - for future in concurrent.futures.as_completed(future_results): - future.result() - - # TODO: autoflush - def _test_get_vector_by_id_after_delete_no_flush(self, connect, collection): - vectors = gen_vectors(nb, dim) - status, ids = connect.insert(collection, vectors) - status = connect.flush([collection]) - assert status.OK() - get_id = ids[100:200] - status = connect.delete_entity_by_id(collection, get_id) - assert status.OK() - status, res = connect.get_entity_by_id(collection, get_id) - assert status.OK() - assert len(res) == len(get_id) - for i in range(len(res)): - assert_equal_vector(res[i], vectors[100+i]) + tag_new = "tag_new" + connect.create_partition(collection, tag) + connect.create_partition(collection, tag_new) + ids = connect.insert(collection, entities, partition_tag=tag) + connect.flush([collection]) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) + def test_get_entities_tags_B(self, connect, collection, get_pos): + ''' + target: test.get_entity_by_id + method: create partitions, add entities with default tag, get + expected: entity returned + ''' + tag_new = "tag_new" + connect.create_partition(collection, tag) + connect.create_partition(collection, tag_new) + ids = connect.insert(collection, entities, partition_tag=tag) + ids_new = connect.insert(collection, entities, partition_tag=tag_new) + connect.flush([collection]) + get_ids = ids[:get_pos] + get_ids.extend(ids_new[:get_pos]) + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) + for i in range(get_pos, get_pos*2): + assert_equal_entity(res[i], entities[i]) + + def test_get_entities_indexed_tag(self, connect, collection, get_simple_index, get_pos): + ''' + target: test.get_entity_by_id + method: add entities with tag, get + expected: entity returned + ''' + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities, partition_tag=tag) + connect.flush([collection]) + connect.create_index(collection, field_name, index_name, get_simple_index) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) -class TestGetIndexedVectors: """ ****************************************************************** - The following cases are used to test .get_entity_by_id` function + The following cases are used to test `get_entity_by_id` function, with fields params ****************************************************************** """ - @pytest.fixture( - scope="function", - params=gen_simple_index() - ) - def get_simple_index(self, request, connect): - if str(connect._cmd("mode")[1]) == "GPU": - if request.param["index_type"] not in [IndexType.IVF_SQ8, IndexType.IVFLAT, IndexType.FLAT, IndexType.IVF_PQ, IndexType.IVF_SQ8H]: - pytest.skip("Only support index_type: idmap/ivf") - elif str(connect._cmd("mode")[1]) == "CPU": - if request.param["index_type"] in [IndexType.IVF_SQ8H]: - pytest.skip("CPU not support index_type: ivf_sq8h") - - return request.param - - @pytest.fixture( - scope="function", - params=[ - 1, - 10, - 100, - 1000, - -1 - ], - ) - def get_id(self, request): - yield request.param + # TODO: + def test_get_entity_field(self, connect, collection, get_pos): + ''' + target: test.get_entity_by_id, get one + method: add entity, and get + expected: entity returned equals insert + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = [ids[get_pos]] + fields = ["int8"] + res = connect.get_entity_by_id(collection, get_ids, fields = fields) + # assert fields + + # TODO: + def test_get_entity_fields(self, connect, collection, get_pos): + ''' + target: test.get_entity_by_id, get one + method: add entity, and get + expected: entity returned equals insert + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = [ids[get_pos]] + fields = ["int8", "int64", "float", "float_vector"] + res = connect.get_entity_by_id(collection, get_ids, fields = fields) + # assert fields + + # TODO: assert exception + def test_get_entity_field_not_match(self, connect, collection, get_pos): + ''' + target: test.get_entity_by_id, get one + method: add entity, and get + expected: entity returned equals insert + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = [ids[get_pos]] + fields = ["int1288"] + with pytest.raises(Exception) as e: + res = connect.get_entity_by_id(collection, get_ids, fields = fields) - def test_get_vectors_after_index_created(self, connect, collection, get_simple_index, get_id): + # TODO: assert exception + def test_get_entity_fields_not_match(self, connect, collection, get_pos): ''' - target: test get vector after index created - method: add vector, create index and get vector - expected: status ok + target: test.get_entity_by_id, get one + method: add entity, and get + expected: entity returned equals insert ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vector(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.create_index(collection, index_type, index_param) - assert status.OK() - id = get_id - status, res = connect.get_entity_by_id(collection, [ids[id]]) - assert status.OK() - assert_equal_vector(res[0], vectors[id]) + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_ids = [ids[get_pos]] + fields = ["int1288", "int8"] + with pytest.raises(Exception) as e: + res = connect.get_entity_by_id(collection, get_ids, fields = fields) - def test_get_vector_after_delete(self, connect, collection, get_simple_index, get_id): + def test_get_entity_id_not_exised(self, connect, collection): ''' - target: test.get_entity_by_id - method: add vectors, and delete, get vector by the given id - expected: status ok, get one vector - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vectors(nb, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.create_index(collection, index_type, index_param) - assert status.OK() - id = get_id - status = connect.delete_entity_by_id(collection, [ids[id]]) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status, res = connect.get_entity_by_id(collection, [ids[id]]) - assert status.OK() - assert not len(res[0]) + target: test get entity, params entity_id not existed + method: add entity and get + expected: empty result + ''' + ids = connect.insert(collection, entity) + connect.flush([collection]) + res = connect.get_entity_by_id(collection, [1]) + assert not res - def test_get_vector_partition(self, connect, collection, get_simple_index, get_id): + def test_get_entity_collection_not_existed(self, connect, collection): ''' - target: test.get_entity_by_id - method: add vector, and get - expected: status ok, vector returned - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vectors = gen_vectors(nb, dim) - status = connect.create_partition(collection, tag) - ids = [i for i in range(nb)] - status, ids = connect.insert(collection, vectors, ids, partition_tag=tag) - assert status.OK() - status = connect.flush([collection]) - assert status.OK() - status = connect.create_index(collection, index_type, index_param) - assert status.OK() - id = get_id - status, res = connect.get_entity_by_id(collection, [ids[id]]) - assert status.OK() - assert_equal_vector(res[0], vectors[id]) - - -class TestGetBinary: + target: test get entity, params collection_name not existed + method: add entity and get + expected: error raised + ''' + ids = connect.insert(collection, entity) + connect.flush([collection]) + collection_new = gen_unique_str() + with pytest.raises(Exception) as e: + res = connect.get_entity_by_id(collection_new, [ids[0]]) + """ ****************************************************************** - The following cases are used to test .get_entity_by_id` function + The following cases are used to test `get_entity_by_id` function, after deleted ****************************************************************** """ - def test_get_vector_A(self, connect, jac_collection): + def test_get_entity_after_delete(self, connect, collection, get_pos): ''' target: test.get_entity_by_id - method: add vector, and get - expected: status ok, vector returned - ''' - tmp, vector = gen_binary_vectors(1, dim) - status, ids = connect.insert(jac_collection, vector) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - status, res = connect.get_entity_by_id(jac_collection, [ids[0]]) - assert status.OK() - assert_equal_vector(res[0], vector[0]) + method: add entities, and delete, get entity by the given id + expected: empty result + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = [ids[get_pos]] + status = connect.delete_entity_by_id(collection, delete_ids) + connect.flush([collection]) + get_ids = [ids[get_pos]] + res = connect.get_entity_by_id(collection, get_ids) + assert not len(res[0]) - def test_get_vector_B(self, connect, jac_collection): + def test_get_entities_after_delete(self, connect, collection, get_pos): ''' target: test.get_entity_by_id - method: add vector, and get - expected: status ok, vector returned + method: add entities, and delete, get entity by the given id + expected: empty result ''' - tmp, vectors = gen_binary_vectors(nb, dim) - status, ids = connect.insert(jac_collection, vectors) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - status, res = connect.get_entity_by_id(jac_collection, [ids[0]]) - assert status.OK() - assert_equal_vector(res[0], vectors[0]) + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = ids[:get_pos] + status = connect.delete_entity_by_id(collection, delete_ids) + connect.flush([collection]) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert not len(res[i]) - def test_get_vector_multi_same_ids(self, connect, jac_collection): + def test_get_entities_after_delete_compact(self, connect, collection, get_pos): ''' target: test.get_entity_by_id - method: add vectors, with the same id, get vector by the given id - expected: status ok, get one vector + method: add entities, and delete, get entity by the given id + expected: empty result + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = ids[:get_pos] + status = connect.delete_entity_by_id(collection, delete_ids) + connect.flush([collection]) + connect.compact(collection) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert not len(res[i]) + + def test_get_entities_indexed_batch(self, connect, collection, get_simple_index, get_pos): ''' - tmp, vectors = gen_binary_vectors(nb, dim) - ids = [i for i in range(nb)] - ids[0] = 0; ids[-1] = 0 - status, ids = connect.insert(jac_collection, vectors, ids=ids) - status = connect.flush([jac_collection]) - assert status.OK() - status, res = connect.get_entity_by_id(jac_collection, [0]) - assert status.OK() - assert_equal_vector(res[0], vectors[0]) - - def test_get_vector_id_not_exised(self, connect, jac_collection): - ''' - target: test get vector, params vector_id not existed - method: add vector and get - expected: status ok, empty result - ''' - tmp, vector = gen_binary_vectors(1, dim) - status, ids = connect.insert(jac_collection, vector) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - status, res = connect.get_entity_by_id(jac_collection, [1]) - assert status.OK() - assert not len(res[0]) + target: test.get_entity_by_id + method: add entities batch, create index, get + expected: entity returned + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + connect.create_index(collection, field_name, index_name, get_simple_index) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) - def test_get_vector_collection_not_existed(self, connect, jac_collection): + def test_get_entities_indexed_single(self, connect, collection, get_simple_index, get_pos): ''' - target: test get vector, params collection_name not existed - method: add vector and get - expected: status not ok + target: test.get_entity_by_id + method: add entities 1 entity/per request, create index, get + expected: entity returned ''' - tmp, vector = gen_binary_vectors(1, dim) - status, ids = connect.insert(jac_collection, vector) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - collection_new = gen_unique_str() - status, res = connect.get_entity_by_id(collection_new, [1]) - assert not status.OK() + for i in range(nb): + ids = connect.insert(collection, entity, ids=[i]) + connect.flush([collection]) + connect.create_index(collection, field_name, index_name, get_simple_index) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) - def test_get_vector_partition(self, connect, jac_collection): + def test_get_entities_after_delete_disable_autoflush(self, connect, collection, get_pos): ''' target: test.get_entity_by_id - method: add vector, and get - expected: status ok, vector returned + method: disable autoflush, add entities, and delete, get entity by the given id + expected: empty result + ''' + ids = connect.insert(collection, entities) + connect.flush([collection]) + delete_ids = ids[:get_pos] + try: + disable_flush(connect) + status = connect.delete_entity_by_id(collection, delete_ids) + get_ids = ids[:get_pos] + res = connect.get_entity_by_id(collection, get_ids) + for i in range(get_pos): + assert_equal_entity(res[i], entities[i]) + finally: + enable_flush(connect) + + def test_get_entities_after_delete_same_ids(self, connect, collection): + ''' + target: test.get_entity_by_id + method: add entities with the same ids, and delete, get entity by the given id + expected: empty result + ''' + ids = [i for i in range(nb)] + ids[0] = 1 + res_ids = connect.insert(collection, entities, ids) + connect.flush([collection]) + status = connect.delete_entity_by_id(collection, [1]) + connect.flush([collection]) + get_ids = [1] + res = connect.get_entity_by_id(collection, get_ids) + assert not len(res[0]) + + def test_get_entity_after_delete_with_partition(self, connect, collection, get_pos): ''' - tmp, vectors = gen_binary_vectors(nb, dim) - status = connect.create_partition(jac_collection, tag) - status, ids = connect.insert(jac_collection, vectors, partition_tag=tag) - assert status.OK() - status = connect.flush([jac_collection]) - assert status.OK() - status, res = connect.get_entity_by_id(jac_collection, [ids[0]]) - assert status.OK() - assert_equal_vector(res[0], vectors[0]) + target: test.get_entity_by_id + method: add entities into partition, and delete, get entity by the given id + expected: get one entity + ''' + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities, partition_tag=tag) + connect.flush([collection]) + status = connect.delete_entity_by_id(collection, [ids[get_pos]]) + connect.flush([collection]) + res = connect.get_entity_by_id(collection, [ids[get_pos]]) + assert not len(res[0]) + @pytest.mark.timeout(60) + def test_get_entity_by_id_multithreads(self, connect, collection): + ids = connect.insert(collection, entities) + connect.flush([collection]) + get_id = ids[100:200] + def get(): + res = connect.get_entity_by_id(collection, get_id) + assert len(res) == len(get_id) + for i in range(len(res)): + assert_equal_entity(res[i], entities[100+i]) + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + future_results = {executor.submit( + get): i for i in range(10)} + for future in concurrent.futures.as_completed(future_results): + future.result() -class TestGetVectorIdIngalid(object): - single_vector = gen_single_vector(dim) +class TestGetInvalid(object): """ - Test adding vectors with invalid vectors + Test get entities with invalid params """ @pytest.fixture( scope="function", - params=gen_invalid_vector_ids() + params=gen_invalid_strs() ) - def gen_invalid_id(self, request): + def get_collection_name(self, request): yield request.param - @pytest.mark.level(2) - def test_get_vector_id_invalid(self, connect, collection, gen_invalid_id): - invalid_id = gen_invalid_id - with pytest.raises(Exception) as e: - status = connect.get_entity_by_id(collection, [invalid_id]) - - -class TestCollectionNameInvalid(object): - """ - Test adding vectors with invalid collection names - """ @pytest.fixture( scope="function", - params=gen_invalid_collection_names() + params=gen_invalid_ints() ) - def get_collection_name(self, request): + def get_entity_id(self, request): yield request.param @pytest.mark.level(2) - def test_get_vectors_with_invalid_collection_name(self, connect, get_collection_name): + def test_insert_ids_invalid(self, connect, collection, get_entity_id): + ''' + target: test insert, with using customize ids, which are not int64 + method: create collection and insert entities in it + expected: raise an exception + ''' + entity_id = get_entity_id + ids = [entity_id for _ in range(nb)] + with pytest.raises(Exception): + connect.get_entity_by_id(collection, ids) + + @pytest.mark.level(2) + def test_insert_parts_ids_invalid(self, connect, collection, get_entity_id): + ''' + target: test insert, with using customize ids, which are not int64 + method: create collection and insert entities in it + expected: raise an exception + ''' + entity_id = get_entity_id + ids = [i for i in range(nb)] + ids[-1] = entity_id + with pytest.raises(Exception): + connect.get_entity_by_id(collection, ids) + + @pytest.mark.level(2) + def test_get_entities_with_invalid_collection_name(self, connect, get_collection_name): collection_name = get_collection_name - vectors = gen_vectors(1, dim) - status, result = connect.get_entity_by_id(collection_name, [1]) - assert not status.OK() + ids = [1] + with pytest.raises(Exception): + res = connect.get_entity_by_id(collection_name, ids) + + @pytest.mark.level(2) + def test_get_entities_with_invalid_field_name(self, connect, get_field_name): + field_name = get_field_name + ids = [1] + fields = [field_name] + with pytest.raises(Exception): + res = connect.get_entity_by_id(collection_name, ids, fields=fields) \ No newline at end of file diff --git a/tests/milvus_python_test/entity/test_insert.py b/tests/milvus_python_test/entity/test_insert.py index f860450065e84012883fe5dddd0e339e545320b8..656c000e84801cb7c9f58df7eb62301863e4ed3e 100644 --- a/tests/milvus_python_test/entity/test_insert.py +++ b/tests/milvus_python_test/entity/test_insert.py @@ -1,24 +1,36 @@ import time import pdb +import copy import threading import logging -import threading from multiprocessing import Pool, Process import pytest from milvus import IndexType, MetricType from utils import * - dim = 128 -index_file_size = 10 -collection_id = "test_add" +segment_size = 10 +collection_id = "test_insert" ADD_TIMEOUT = 60 tag = "1970-01-01" -add_interval_time = 1.5 +insert_interval_time = 1.5 nb = 6000 - - -class TestAddBase: +field_name = "float_vector" +default_index_name = "insert_index" +entity = gen_entities(1) +binary_entity = gen_binary_entities(1) +entities = gen_entities(nb) +raw_vectors, binary_entities = gen_binary_entities(nb) +default_single_query = { + "bool": { + "must": [ + {"vector": {field_name: {"topk": 10, "query": gen_single_vector(dim), "params": {"index_name": default_index_name, "nprobe": 10}}}} + ] + } +} + + +class TestInsertBase: """ ****************************************************************** The following cases are used to test `insert` function @@ -32,607 +44,437 @@ class TestAddBase: if str(connect._cmd("mode")[1]) == "CPU": if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in cpu mode") - if request.param["index_type"] == IndexType.IVF_PQ: - pytest.skip("Skip PQ Temporary") return request.param - def test_add_vector_create_collection(self, connect, collection): - ''' - target: test add vector, then create collection again - method: add vector and create collection - expected: status not ok - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - param = {'collection_name': collection, - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - assert not status.OK() - - def test_add_vector_has_collection(self, connect, collection): - ''' - target: test add vector, then check collection existence - method: add vector and call Hascollection - expected: collection exists, status ok - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert assert_has_collection(connect, collection) - - @pytest.mark.timeout(ADD_TIMEOUT) - def test_drop_collection_add_vector(self, connect, collection): - ''' - target: test add vector after collection deleted - method: delete collection and add vector - expected: status not ok - ''' - status = connect.drop_collection(collection) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert not status.OK() + @pytest.fixture( + scope="function", + params=gen_single_filter_fields() + ) + def get_filter_field(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_drop_collection_add_vector_another(self, connect, collection): - ''' - target: test add vector to collection_1 after collection_2 deleted - method: delete collection_2 and add vector to collection_1 - expected: status ok - ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - status = connect.drop_collection(collection) - vector = gen_single_vector(dim) - status, ids = connect.insert(param['collection_name'], vector) - assert status.OK() + @pytest.fixture( + scope="function", + params=gen_single_vector_fields() + ) + def get_vector_field(self, request): + yield request.param @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_drop_collection(self, connect, collection): + def test_insert_collection_not_existed(self, connect): ''' - target: test delete collection after add vector - method: add vector and delete collection - expected: status ok + target: test insert, with collection not existed + method: insert entity into a random named collection + expected: error raised ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - status = connect.drop_collection(collection) - assert status.OK() + collection_name = gen_unique_str(collection_id) + with pytest.raises(Exception) as e: + connect.insert(collection_name, entities) @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_delete_another_collection(self, connect, collection): + def test_insert_drop_collection(self, connect, collection): ''' - target: test delete collection_1 collection after add vector to collection_2 - method: add vector and delete collection - expected: status ok + target: test delete collection after insert vector + method: insert vector and delete collection + expected: no error raised ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() + ids = connect.insert(collection, entity) + assert len(ids) == 1 + connect.drop_collection(collection) @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_drop_collection(self, connect, collection): + def test_insert_sleep_drop_collection(self, connect, collection): ''' - target: test delete collection after add vector for a while - method: add vector, sleep, and delete collection - expected: status ok + target: test delete collection after insert vector for a while + method: insert vector, sleep, and delete collection + expected: no error raised ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() + ids = connect.insert(collection, entity) + assert len(ids) == 1 connect.flush([collection]) - status = connect.drop_collection(collection) - assert status.OK() + connect.drop_collection(collection) @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_delete_another_collection(self, connect, collection): + def test_insert_create_index(self, connect, collection, get_simple_index): ''' - target: test delete collection_1 collection after add vector to collection_2 for a while - method: add vector , sleep, and delete collection - expected: status ok + target: test build index insert after vector + method: insert vector and build index + expected: no error raised ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) + ids = connect.insert(collection, entities) + assert len(ids) == nb connect.flush([collection]) - status = connect.drop_collection(param['collection_name']) - assert status.OK() + connect.create_index(collection, field_name, index_name, get_simple_index) @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector(self, connect, collection, get_simple_index): + def test_insert_after_create_index(self, connect, collection, get_simple_index): ''' - target: test add vector after build index - method: build index and add vector - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - status = connect.create_index(collection, index_type, index_param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - - @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector_another(self, connect, collection, get_simple_index): + target: test build index insert after vector + method: insert vector and build index + expected: no error raised ''' - target: test add vector to collection_2 after build index for collection_1 - method: build index and add vector - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - status = connect.create_index(collection, index_type, index_param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - connect.drop_collection(param['collection_name']) - assert status.OK() + connect.create_index(collection, field_name, index_name, get_simple_index) + ids = connect.insert(collection, entities) + assert len(ids) == nb @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index(self, connect, collection, get_simple_index): + def test_insert_search(self, connect, collection): ''' - target: test build index add after vector - method: add vector and build index - expected: status ok + target: test search vector after insert vector after a while + method: insert vector, sleep, and search collection + expected: no error raised ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - logging.getLogger().info(index_param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - status = connect.create_index(collection, index_type, index_param) - assert status.OK() + ids = connect.insert(collection, entities) + connect.flush([collection]) + res = connect.search(collection, default_single_query) + logging.getLogger().debug(res) + assert not res - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index_another(self, connect, collection, get_simple_index): - ''' - target: test add vector to collection_2 after build index for collection_1 - method: build index and add vector - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - status = connect.create_index(param['collection_name'], index_type, index_param) - assert status.OK() + @pytest.fixture( + scope="function", + params=[ + 1, + 6000 + ], + ) + def insert_count(self, request): + yield request.param @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index(self, connect, collection, get_simple_index): + def test_insert_ids(self, connect, collection, insert_count): ''' - target: test build index add after vector for a while - method: add vector and build index - expected: status ok + target: test insert vectors in collection, use customize ids + method: create collection and insert vectors in it, check the ids returned and the collection length after vectors inserted + expected: the length of ids and the collection row count ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) + nb = insert_count + ids = [i for i in range(nb)] + res_ids = connect.insert(collection, gen_entities(nb), ids) connect.flush([collection]) - status = connect.create_index(collection, index_type, index_param) - assert status.OK() - - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index_another(self, connect, collection, get_simple_index): - ''' - target: test add vector to collection_2 after build index for collection_1 for a while - method: build index and add vector - expected: status ok + assert len(res_ids) == nb + assert res_ids == ids + res_count = connect.count_collection(collection) + assert res_count == nb + + @pytest.mark.timeout(ADD_TIMEOUT) + def test_insert_the_same_ids(self, connect, collection, insert_count): + ''' + target: test insert vectors in collection, use customize the same ids + method: create collection and insert vectors in it, check the ids returned and the collection length after vectors inserted + expected: the length of ids and the collection row count ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) + nb = insert_count + ids = [1 for i in range(nb)] + res_ids = connect.insert(collection, gen_entities(nb), ids) connect.flush([collection]) - status = connect.create_index(param['collection_name'], index_type, index_param) - assert status.OK() + assert len(res_ids) == nb + assert res_ids == ids + res_count = connect.count_collection(collection) + assert res_count == nb + # TODO @pytest.mark.timeout(ADD_TIMEOUT) - def test_search_vector_add_vector(self, connect, collection): + def test_insert_ids_fields(self, connect, get_filter_field, get_vector_field): + ''' + target: test create normal collection with different fields, insert entities into id with ids + method: create collection with diff fields: metric/field_type/..., insert, and count + expected: row count correct + ''' + nb = 5 + filter_field = get_filter_field + vector_field = get_vector_field + collection_name = gen_unique_str("test_collection") + fields = { + "fields": [filter_field, vector_field], + "segment_size": segment_size + } + connect.create_collection(collection_name, fields) + ids = [i for i in range(nb)] + entities = gen_entities_by_fields(fields) + res_ids = connect.insert(collection_name, entities, ids) + assert res_ids == ids + res_count = connect.count_collection(collection) + assert res_count == nb + + # TODO: assert exception + @pytest.mark.timeout(ADD_TIMEOUT) + def test_insert_twice_ids_no_ids(self, connect, collection): ''' - target: test add vector after search collection - method: search collection and add vector - expected: status ok + target: check the result of insert, with params ids and no ids + method: test insert vectors twice, use customize ids first, and then use no ids + expected: error raised ''' - vector = gen_single_vector(dim) - status, result = connect.search(collection, 1, vector) - status, ids = connect.insert(collection, vector) - assert status.OK() + ids = [i for i in range(nb)] + res_ids = connect.insert(collection, entities, ids) + with pytest.raises(Exception) as e: + res_ids_new = connect.insert(collection, entities) @pytest.mark.timeout(ADD_TIMEOUT) - def test_search_vector_add_vector_another(self, connect, collection): + def test_insert_twice_not_ids_ids(self, connect, collection): ''' - target: test add vector to collection_1 after search collection_2 - method: search collection and add vector - expected: status ok + target: check the result of insert, with params ids and no ids + method: test insert vectors twice, use not ids first, and then use customize ids + expected: error raised ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, result = connect.search(collection, 1, vector) - status, ids = connect.insert(param['collection_name'], vector) - assert status.OK() + res_ids = connect.insert(collection, entities) + ids = [i for i in range(nb)] + with pytest.raises(Exception) as e: + res_ids_new = connect.insert(collection, entities, ids) @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_search_vector(self, connect, collection): + def test_insert_ids_length_not_match(self, connect, collection): ''' - target: test search vector after add vector - method: add vector and search collection - expected: status ok + target: test insert vectors in collection, use customize ids, len(ids) != len(vectors) + method: create collection and insert vectors in it + expected: raise an exception ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - assert status.OK() - connect.flush([collection]) - status, result = connect.search(collection, 1, vector) - assert status.OK() + ids = [i for i in range(1, nb)] + with pytest.raises(Exception) as e: + res_ids = connect.insert(collection, entities, ids) + # TODO @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_search_vector_another(self, connect, collection): - ''' - target: test add vector to collection_1 after search collection_2 - method: search collection and add vector - expected: status ok - ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - status, result = connect.search(param['collection_name'], 1, vector) - assert status.OK() + def test_insert_ids_fields(self, connect, get_filter_field, get_vector_field): + ''' + target: test create normal collection with different fields, insert entities into id without ids + method: create collection with diff fields: metric/field_type/..., insert, and count + expected: row count correct + ''' + nb = 5 + filter_field = get_filter_field + vector_field = get_vector_field + collection_name = gen_unique_str("test_collection") + fields = { + "fields": [filter_field, vector_field], + "segment_size": segment_size + } + connect.create_collection(collection_name, fields) + entities = gen_entities_by_fields(fields) + res_ids = connect.insert(collection_name, entities) + res_count = connect.count_collection(collection) + assert res_count == nb @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_search_vector(self, connect, collection): + def test_insert_tag(self, connect, collection): ''' - target: test search vector after add vector after a while - method: add vector, sleep, and search collection - expected: status ok + target: test insert entities in collection created before + method: create collection and insert entities in it, with the partition_tag param + expected: the collection row count equals to nq ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - connect.flush([collection]) - status, result = connect.search(collection, 1, vector) - assert status.OK() + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities, partition_tag=tag) + assert len(ids) == nb @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_search_vector_another(self, connect, collection): + def test_insert_tag_with_ids(self, connect, collection): ''' - target: test add vector to collection_1 after search collection_2 a while - method: search collection , sleep, and add vector - expected: status ok + target: test insert entities in collection created before, insert with ids + method: create collection and insert entities in it, with the partition_tag param + expected: the collection row count equals to nq ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(collection, vector) - connect.flush([collection]) - status, result = connect.search(param['collection_name'], 1, vector) - assert status.OK() - - """ - ****************************************************************** - The following cases are used to test `insert` function - ****************************************************************** - """ + connect.create_partition(collection, tag) + ids = [i for i in range(nb)] + res_ids = connect.insert(collection, entities, ids, partition_tag=tag) + assert res_ids == ids @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_ids(self, connect, collection): + def test_insert_default_tag(self, connect, collection): ''' - target: test add vectors in collection, use customize ids - method: create collection and add vectors in it, check the ids returned and the collection length after vectors added - expected: the length of ids and the collection row count + target: test insert entities into default partition + method: create partition and insert info collection without tag params + expected: the collection row count equals to nb ''' - nq = 5; top_k = 1; - vectors = gen_vectors(nq, dim) - ids = [i for i in range(nq)] - status, ids = connect.insert(collection, vectors, ids) - connect.flush([collection]) - assert status.OK() - assert len(ids) == nq - status, result = connect.search(collection, top_k, query_records=vectors) - logging.getLogger().info(result) - assert len(result) == nq - for i in range(nq): - assert result[i][0].id == i + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities) + assert len(ids) == nb + res_count = connect.count_collection(collection) + assert res_count == nb @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_twice_ids_no_ids(self, connect, collection): + def test_insert_tag_not_existed(self, connect, collection): ''' - target: check the result of insert, with params ids and no ids - method: test add vectors twice, use customize ids first, and then use no ids - expected: status not OK + target: test insert entities in collection created before + method: create collection and insert entities in it, with the not existed partition_tag param + expected: error raised ''' - nq = 5; top_k = 1; - vectors = gen_vectors(nq, dim) - ids = [i for i in range(nq)] - status, ids = connect.insert(collection, vectors, ids) - assert status.OK() - status, ids = connect.insert(collection, vectors) - logging.getLogger().info(status) - logging.getLogger().info(ids) - assert not status.OK() + tag = gen_unique_str() + with pytest.raises(Exception) as e: + ids = connect.insert(collection, entities, partition_tag=tag) @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_twice_not_ids_ids(self, connect, collection): + def test_insert_tag_existed(self, connect, collection): ''' - target: check the result of insert, with params ids and no ids - method: test add vectors twice, use not ids first, and then use customize ids - expected: status not OK + target: test insert entities in collection created before + method: create collection and insert entities in it repeatly, with the partition_tag param + expected: the collection row count equals to nq ''' - nq = 5; top_k = 1; - vectors = gen_vectors(nq, dim) - ids = [i for i in range(nq)] - status, ids = connect.insert(collection, vectors) - assert status.OK() - status, ids = connect.insert(collection, vectors, ids) - logging.getLogger().info(status) - logging.getLogger().info(ids) - assert not status.OK() + connect.create_partition(collection, tag) + ids = connect.insert(collection, entities, partition_tag=tag) + ids = connect.insert(collection, entities, partition_tag=tag) + res_count = connect.count_collection(collection) + assert res_count == 2*nb - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_ids_length_not_match(self, connect, collection): + @pytest.mark.level(2) + def test_insert_without_connect(self, dis_connect, collection): ''' - target: test add vectors in collection, use customize ids, len(ids) != len(vectors) - method: create collection and add vectors in it - expected: raise an exception + target: test insert entities without connection + method: create collection and insert entities in it, check if inserted successfully + expected: raise exception ''' - nq = 5 - vectors = gen_vectors(nq, dim) - ids = [i for i in range(1, nq)] with pytest.raises(Exception) as e: - status, ids = connect.insert(collection, vectors, ids) - - @pytest.fixture( - scope="function", - params=gen_invalid_vector_ids() - ) - def get_vector_id(self, request): - yield request.param + ids = dis_connect.insert(collection, entities) - @pytest.mark.level(2) - def test_insert_ids_invalid(self, connect, collection, get_vector_id): + def test_insert_collection_not_existed(self, connect): ''' - target: test add vectors in collection, use customize ids, which are not int64 - method: create collection and add vectors in it - expected: raise an exception + target: test insert entities in collection, which not existed before + method: insert entities collection not existed, check the status + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - vector_id = get_vector_id - ids = [vector_id for _ in range(nq)] - with pytest.raises(Exception): - connect.insert(collection, vectors, ids) + with pytest.raises(Exception) as e: + ids = connect.insert(gen_unique_str("not_exist_collection"), entities) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert(self, connect, collection): + def test_insert_dim_not_matched(self, connect, collection): ''' - target: test add vectors in collection created before - method: create collection and add vectors in it, check the ids returned and the collection length after vectors added - expected: the length of ids and the collection row count + target: test insert entities, the vector dimension is not equal to the collection dimension + method: the entities dimension is half of the collection dimension, check the status + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status, ids = connect.insert(collection, vectors) - assert status.OK() - assert len(ids) == nq + vectors = gen_vectors(nb, int(dim)//2) + insert_entities = copy.deepcopy(entities) + insert_entities[-1]["values"] = vectors + with pytest.raises(Exception) as e: + ids = connect.insert(collection, insert_entities) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_tag(self, connect, collection): + def test_insert_with_field_name_not_match(self, connect, collection): ''' - target: test add vectors in collection created before - method: create collection and add vectors in it, with the partition_tag param - expected: the collection row count equals to nq + target: test insert entities, with the entity field name updated + method: update entity field name + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status = connect.create_partition(collection, tag) - status, ids = connect.insert(collection, vectors, partition_tag=tag) - assert status.OK() - assert len(ids) == nq + tmp_entity = update_field_name(copy.deepcopy(entity), "int8", "int8new") + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_tag_A(self, connect, collection): + def test_insert_with_field_type_not_match(self, connect, collection): ''' - target: test add vectors in collection created before - method: create partition and add vectors in it - expected: the collection row count equals to nq + target: test insert entities, with the entity field type updated + method: update entity field type + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status = connect.create_partition(collection, tag) - status, ids = connect.insert(collection, vectors, partition_tag=tag) - assert status.OK() - assert len(ids) == nq + tmp_entity = update_field_type(copy.deepcopy(entity), DataType.INT8, DataType.FLOAT) + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_tag_not_existed(self, connect, collection): + def test_insert_with_field_value_not_match(self, connect, collection): ''' - target: test add vectors in collection created before - method: create collection and add vectors in it, with the not existed partition_tag param - expected: status not ok + target: test insert entities, with the entity field value updated + method: update entity field value + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status, ids = connect.insert(collection, vectors, partition_tag=tag) - assert not status.OK() + tmp_entity = update_field_value(copy.deepcopy(entity), 'int8', 's') + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_tag_not_existed_A(self, connect, collection): + def test_insert_with_field_more(self, connect, collection): ''' - target: test add vectors in collection created before - method: create partition, add vectors with the not existed partition_tag param - expected: status not ok + target: test insert entities, with more fields than collection schema + method: add entity field + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - new_tag = "new_tag" - status = connect.create_partition(collection, tag) - status, ids = connect.insert(collection, vectors, partition_tag=new_tag) - assert not status.OK() + tmp_entity = add_field(copy.deepcopy(entity)) + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_tag_existed(self, connect, collection): + def test_insert_with_field_vector_more(self, connect, collection): ''' - target: test add vectors in collection created before - method: create collection and add vectors in it repeatly, with the partition_tag param - expected: the collection row count equals to nq + target: test insert entities, with more fields than collection schema + method: add entity vector field + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status = connect.create_partition(collection, tag) - status, ids = connect.insert(collection, vectors, partition_tag=tag) - for i in range(5): - status, ids = connect.insert(collection, vectors, partition_tag=tag) - assert status.OK() - assert len(ids) == nq + tmp_entity = add_vector_field(copy.deepcopy(entity)) + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - @pytest.mark.level(2) - def test_insert_without_connect(self, dis_connect, collection): + def test_insert_with_field_less(self, connect, collection): ''' - target: test add vectors without connection - method: create collection and add vectors in it, check if added successfully - expected: raise exception + target: test insert entities, with less fields than collection schema + method: remove entity field + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - with pytest.raises(Exception) as e: - status, ids = dis_connect.insert(collection, vectors) + tmp_entity = remove_field(copy.deepcopy(entity)) + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - def test_add_collection_not_existed(self, connect): + def test_insert_with_field_vector_less(self, connect, collection): ''' - target: test add vectors in collection, which not existed before - method: add vectors collection not existed, check the status - expected: status not ok + target: test insert entities, with less fields than collection schema + method: remove entity vector field + expected: error raised ''' - nq = 5 - vector = gen_single_vector(dim) - status, ids = connect.insert(gen_unique_str("not_exist_collection"), vector) - assert not status.OK() - assert not ids + tmp_entity = remove_vector_field(copy.deepcopy(entity)) + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - def test_add_vector_dim_not_matched(self, connect, collection): + def test_insert_with_no_field_vector_value(self, connect, collection): ''' - target: test add vector, the vector dimension is not equal to the collection dimension - method: the vector dimension is half of the collection dimension, check the status - expected: status not ok + target: test insert entities, with no vector field value + method: remove entity vector field + expected: error raised ''' - vector = gen_single_vector(int(dim)//2) - status, ids = connect.insert(collection, vector) - assert not status.OK() + tmp_entity = copy.deepcopy(entity) + del tmp_entity[-1]["values"] + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - def test_insert_dim_not_matched(self, connect, collection): + def test_insert_with_no_field_vector_type(self, connect, collection): ''' - target: test add vectors, the vector dimension is not equal to the collection dimension - method: the vectors dimension is half of the collection dimension, check the status - expected: status not ok + target: test insert entities, with no vector field type + method: remove entity vector field + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, int(dim)//2) - status, ids = connect.insert(collection, vectors) - assert not status.OK() + tmp_entity = copy.deepcopy(entity) + del tmp_entity[-1]["type"] + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) - def test_add_vector_query_after_sleep(self, connect, collection): + def test_insert_with_no_field_vector_name(self, connect, collection): ''' - target: test add vectors, and search it after sleep - method: set vector[0][1] as query vectors - expected: status ok and result length is 1 + target: test insert entities, with no vector field name + method: remove entity vector field + expected: error raised ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status, ids = connect.insert(collection, vectors) - connect.flush([collection]) - status, result = connect.search(collection, 1, [vectors[0]]) - assert status.OK() - assert len(result) == 1 + tmp_entity = copy.deepcopy(entity) + del tmp_entity[-1]["name"] + with pytest.raises(Exception): + connect.insert(collection_name, tmp_entity) @pytest.mark.level(2) @pytest.mark.timeout(30) - def test_collection_add_rows_count_multi_threading(self, args, collection): + def test_collection_insert_rows_count_multi_threading(self, args, collection): ''' target: test collection rows_count is correct or not with multi threading - method: create collection and add vectors in it(idmap), - assert the value returned by count_entities method is equal to length of vectors - expected: the count is equal to the length of vectors + method: create collection and insert entities in it(idmap), + assert the value returned by count_entities method is equal to length of entities + expected: the count is equal to the length of entities ''' if args["handler"] == "HTTP": pytest.skip("Skip test in http mode") thread_num = 8 threads = [] milvus = get_milvus(host=args["ip"], port=args["port"], handler=args["handler"], try_connect=False) - vectors = gen_vectors(nb, dim) - def add(thread_i): + def insert(thread_i): logging.getLogger().info("In thread-%d" % thread_i) - # milvus = get_milvus(host=args["ip"], port=args["port"], handler=args["handler"]) - # assert milvus - status, result = milvus.insert(collection, records=vectors) - assert status.OK() - status = milvus.flush([collection]) - assert status.OK() + res_ids = milvus.insert(collection, entities) + milvus.flush([collection]) for i in range(thread_num): - x = threading.Thread(target=add, args=(i, )) + x = threading.Thread(target=insert, args=(i, )) threads.append(x) x.start() for th in threads: th.join() - status, res = milvus.count_entities(collection) - assert res == thread_num * nb + res_count = milvus.count_entities(collection) + assert res_count == thread_num * nb - def test_add_vector_multi_collections(self, connect): - ''' - target: test add vectors is correct or not with multiple collections of L2 - method: create 50 collections and add vectors into them in turn - expected: status ok - ''' - nq = 100 - vectors = gen_vectors(nq, dim) - collection_list = [] - for i in range(20): - collection_name = gen_unique_str('test_add_vector_multi_collections') - collection_list.append(collection_name) - param = {'collection_name': collection_name, - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - connect.create_collection(param) - for j in range(5): - for i in range(20): - status, ids = connect.insert(collection_name=collection_list[i], records=vectors) - assert status.OK() class TestAddAsync: @pytest.fixture(scope="function", autouse=True) @@ -650,114 +492,103 @@ class TestAddAsync: def insert_count(self, request): yield request.param - def check_status(self, status, result): + def check_status(self, result): logging.getLogger().info("In callback check status") - assert status.OK() + assert not result def check_status_not_ok(self, status, result): logging.getLogger().info("In callback check status") assert not status.OK() - def test_insert_async(self, connect, collection, insert_count): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params + target: test insert vectors with different length of vectors + method: set different vectors as insert method params expected: length of ids is equal to the length of vectors ''' nb = insert_count - insert_vec_list = gen_vectors(nb, dim) - future = connect.insert(collection, insert_vec_list, _async=True) - status, ids = future.result() + future = connect.insert(collection, gen_entities(nb), _async=True) + ids = future.result() connect.flush([collection]) assert len(ids) == nb - assert status.OK() @pytest.mark.level(2) def test_insert_async_false(self, connect, collection, insert_count): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params + target: test insert vectors with different length of vectors + method: set different vectors as insert method params expected: length of ids is equal to the length of vectors ''' nb = insert_count - insert_vec_list = gen_vectors(nb, dim) - status, ids = connect.insert(collection, insert_vec_list, _async=False) + future = connect.insert(collection, gen_entities(nb), _async=False) + ids = future.result() connect.flush([collection]) assert len(ids) == nb - assert status.OK() def test_insert_async_callback(self, connect, collection, insert_count): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params + target: test insert vectors with different length of vectors + method: set different vectors as insert method params expected: length of ids is equal to the length of vectors ''' nb = insert_count - insert_vec_list = gen_vectors(nb, dim) - future = connect.insert(collection, insert_vec_list, _async=True, _callback=self.check_status) + future = connect.insert(collection, gen_entities(nb), _async=True, _callback=self.check_status) future.done() @pytest.mark.level(2) def test_insert_async_long(self, connect, collection): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params + target: test insert vectors with different length of vectors + method: set different vectors as insert method params expected: length of ids is equal to the length of vectors ''' nb = 50000 - insert_vec_list = gen_vectors(nb, dim) - future = connect.insert(collection, insert_vec_list, _async=True, _callback=self.check_status) - status, result = future.result() - assert status.OK() + future = connect.insert(collection, gen_entities(nb), _async=True, _callback=self.check_status) + result = future.result() assert len(result) == nb connect.flush([collection]) - status, count = connect.count_entities(collection) - assert status.OK() - logging.getLogger().info(status) + count = connect.count_entities(collection) logging.getLogger().info(count) assert count == nb def test_insert_async_callback_timeout(self, connect, collection): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params + target: test insert vectors with different length of vectors + method: set different vectors as insert method params expected: length of ids is equal to the length of vectors ''' nb = 100000 - insert_vec_list = gen_vectors(nb, dim) - future = connect.insert(collection, insert_vec_list, _async=True, _callback=self.check_status, timeout=1) - future.done() + future = connect.insert(collection, gen_entities(nb), _async=True, _callback=self.check_status, timeout=1) + with pytest.raises(Exception) as e: + result = future.result() - def test_insert_async_invalid_params(self, connect, collection): + def test_insert_async_invalid_params(self, connect): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params + target: test insert vectors with different length of vectors + method: set different vectors as insert method params expected: length of ids is equal to the length of vectors ''' - insert_vec_list = gen_vectors(nb, dim) collection_new = gen_unique_str() - future = connect.insert(collection_new, insert_vec_list, _async=True) - status, result = future.result() - assert not status.OK() + future = connect.insert(collection_new, entities, _async=True) + with pytest.raises(Exception) as e: + result = future.result() - # TODO: add assertion + # TODO: assertion def test_insert_async_invalid_params_raise_exception(self, connect, collection): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params + target: test insert vectors with different length of vectors + method: set different vectors as insert method params expected: length of ids is equal to the length of vectors ''' - insert_vec_list = [] - collection_new = gen_unique_str() + entities = [] with pytest.raises(Exception) as e: - future = connect.insert(collection_new, insert_vec_list, _async=True) + future = connect.insert(collection, entities, _async=True) -class TestAddIP: +class TestInsertMultiCollections: """ ****************************************************************** - The following cases are used to test `insert / index / search / delete` mixed function + The following cases are used to test `insert` function ****************************************************************** """ @pytest.fixture( @@ -768,648 +599,451 @@ class TestAddIP: if str(connect._cmd("mode")[1]) == "CPU": if request.param["index_type"] == IndexType.IVF_SQ8H: pytest.skip("sq8h not support in cpu mode") - if request.param["index_type"] == IndexType.IVF_PQ: - pytest.skip("Skip PQ Temporary") - return request.param - def test_add_vector_create_collection(self, connect, ip_collection): + def test_insert_vector_multi_collections(self, connect): ''' - target: test add vector, then create collection again - method: add vector and create collection - expected: status not ok + target: test insert entities + method: create 10 collections and insert entities into them in turn + expected: row count ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - param = {'collection_name': ip_collection, - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - assert not status.OK() - - def test_add_vector_has_collection(self, connect, ip_collection): - ''' - target: test add vector, then check collection existence - method: add vector and call Hascollection - expected: collection exists, status ok - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - assert assert_has_collection(connect, ip_collection) + collection_num = 10 + vectors = gen_vectors(nq, dim) + collection_list = [] + for i in range(collection_num): + collection_name = gen_unique_str(collection_id) + collection_list.append(collection_name) + connect.create_collection(collection_name, default_fields) + ids = connect.insert(collection_name, entities) + connect.flush([collection]) + assert len(ids) == nb + count = connect.count_entities(collection) + assert count == nb @pytest.mark.timeout(ADD_TIMEOUT) - def test_drop_collection_add_vector(self, connect, ip_collection): + def test_drop_collection_insert_vector_another(self, connect, collection): ''' - target: test add vector after collection deleted - method: delete collection and add vector - expected: status not ok + target: test insert vector to collection_1 after collection_2 deleted + method: delete collection_2 and insert vector to collection_1 + expected: row count equals the length of entities inserted ''' - status = connect.drop_collection(ip_collection) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - assert not status.OK() + collection_name = gen_unique_str(collection_id) + connect.create_collection(collection_name, default_fields) + connect.drop_collection(collection) + ids = connect.insert(collection_name, entity) + connect.flush([collection_name]) + assert len(ids) == 1 @pytest.mark.timeout(ADD_TIMEOUT) - def test_drop_collection_add_vector_another(self, connect, ip_collection): + def test_create_index_insert_vector_another(self, connect, collection, get_simple_index): ''' - target: test add vector to collection_1 after collection_2 deleted - method: delete collection_2 and add vector to collection_1 + target: test insert vector to collection_2 after build index for collection_1 + method: build index and insert vector expected: status ok ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - status = connect.drop_collection(ip_collection) - vector = gen_single_vector(dim) - status, ids = connect.insert(param['collection_name'], vector) - assert status.OK() + collection_name = gen_unique_str(collection_id) + connect.create_collection(collection_name, default_fields) + connect.create_index(collection, field_name, index_name, get_simple_index) + ids = connect.insert(collection, entity) + connect.drop_collection(collection_name) @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_drop_collection(self, connect, ip_collection): + def test_insert_vector_create_index_another(self, connect, collection, get_simple_index): ''' - target: test delete collection after add vector - method: add vector and delete collection + target: test insert vector to collection_2 after build index for collection_1 + method: build index and insert vector expected: status ok ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - status = connect.drop_collection(ip_collection) - assert status.OK() + collection_name = gen_unique_str(collection_id) + connect.create_collection(collection_name, default_fields) + ids = connect.insert(collection, entity) + connect.create_index(collection_name, field_name, index_name, get_simple_index) + count = connect.count_entities(collection_name) + assert count == 1 @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_delete_another_collection(self, connect, ip_collection): + def test_insert_vector_sleep_create_index_another(self, connect, collection, get_simple_index): ''' - target: test delete collection_1 collection after add vector to collection_2 - method: add vector and delete collection + target: test insert vector to collection_2 after build index for collection_1 for a while + method: build index and insert vector expected: status ok ''' - param = {'collection_name': 'test_add_vector_delete_another_collection', - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - status = connect.drop_collection(param['collection_name']) - assert status.OK() + collection_name = gen_unique_str(collection_id) + connect.create_collection(collection_name, default_fields) + ids = connect.insert(collection, entity) + connect.flush([collection]) + connect.create_index(collection_name, field_name, index_name, get_simple_index) + count = connect.count_entities(collection_name) + assert count == 1 @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_drop_collection(self, connect, ip_collection): + def test_search_vector_insert_vector_another(self, connect, collection): ''' - target: test delete collection after add vector for a while - method: add vector, sleep, and delete collection + target: test insert vector to collection_1 after search collection_2 + method: search collection and insert vector expected: status ok ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - connect.flush([ip_collection]) - status = connect.drop_collection(ip_collection) - assert status.OK() + collection_name = gen_unique_str(collection_id) + connect.create_collection(collection_name, default_fields) + res = connect.search(collection, default_single_query) + logging.getLogger().debug(res) + ids = connect.insert(collection_name, entity) + connect.flush([collection]) + count = connect.count_entities(collection_name) + assert count == 1 @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_delete_another_collection(self, connect, ip_collection): + def test_insert_vector_search_vector_another(self, connect, collection): ''' - target: test delete collection_1 collection after add vector to collection_2 for a while - method: add vector , sleep, and delete collection + target: test insert vector to collection_1 after search collection_2 + method: search collection and insert vector expected: status ok ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - connect.flush([ip_collection]) - status = connect.drop_collection(param['collection_name']) - assert status.OK() + collection_name = gen_unique_str(collection_id) + connect.create_collection(collection_name, default_fields) + ids = connect.insert(collection, entity) + result = connect.search(collection_name, default_single_query) @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector(self, connect, ip_collection, get_simple_index): + def test_insert_vector_sleep_search_vector_another(self, connect, collection): ''' - target: test add vector after build index - method: build index and add vector + target: test insert vector to collection_1 after search collection_2 a while + method: search collection , sleep, and insert vector expected: status ok ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - status = connect.create_index(ip_collection, index_type, index_param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - assert status.OK() + collection_name = gen_unique_str(collection_id) + connect.create_collection(collection_name, default_fields) + ids = connect.insert(collection, entity) + connect.flush([collection]) + result = connect.search(collection_name, default_single_query) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_create_index_add_vector_another(self, connect, ip_collection, get_simple_index): - ''' - target: test add vector to collection_2 after build index for collection_1 - method: build index and add vector - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - status = connect.create_index(ip_collection, index_type, index_param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - assert status.OK() - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index(self, connect, ip_collection, get_simple_index): - ''' - target: test build index add after vector - method: add vector and build index - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - status, mode = connect._cmd("mode") - assert status.OK() - status = connect.create_index(ip_collection, index_type, index_param) - if str(mode) == "GPU" and (index_type == IndexType.IVF_PQ): - assert not status.OK() - else: - assert status.OK() - - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_create_index_another(self, connect, ip_collection, get_simple_index): - ''' - target: test add vector to collection_2 after build index for collection_1 - method: build index and add vector - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - status = connect.create_index(param['collection_name'], index_type, index_param) - assert status.OK() +class TestInsertInvalid(object): + """ + Test inserting vectors with invalid collection names + """ + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_collection_name(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index(self, connect, ip_collection, get_simple_index): - ''' - target: test build index add after vector for a while - method: add vector and build index - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - if index_type == IndexType.IVF_PQ: - pytest.skip("Skip some PQ cases") - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - assert status.OK() - time.sleep(add_interval_time) - status = connect.create_index(ip_collection, index_type, index_param) - assert status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_tag_name(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_create_index_another(self, connect, ip_collection, get_simple_index): - ''' - target: test add vector to collection_2 after build index for collection_1 for a while - method: build index and add vector - expected: status ok - ''' - index_param = get_simple_index["index_param"] - index_type = get_simple_index["index_type"] - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - connect.flush([ip_collection]) - status = connect.create_index(param['collection_name'], index_type, index_param) - assert status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_field_name(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_search_vector_add_vector(self, connect, ip_collection): - ''' - target: test add vector after search collection - method: search collection and add vector - expected: status ok - ''' - vector = gen_single_vector(dim) - status, result = connect.search(ip_collection, 1, vector) - status, ids = connect.insert(ip_collection, vector) - assert status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_field_int_value(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_search_vector_add_vector_another(self, connect, ip_collection): - ''' - target: test add vector to collection_1 after search collection_2 - method: search collection and add vector - expected: status ok - ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, result = connect.search(ip_collection, 1, vector) - status, ids = connect.insert(param['collection_name'], vector) - assert status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_ints() + ) + def get_entity_id(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_search_vector(self, connect, ip_collection): - ''' - target: test search vector after add vector - method: add vector and search collection - expected: status ok - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - assert status.OK() - connect.flush([ip_collection]) - status, result = connect.search(ip_collection, 1, vector) - assert status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_vectors() + ) + def get_field_vectors_value(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_search_vector_another(self, connect, ip_collection): + def test_insert_ids_invalid(self, connect, collection, get_entity_id): ''' - target: test add vector to collection_1 after search collection_2 - method: search collection and add vector - expected: status ok + target: test insert, with using customize ids, which are not int64 + method: create collection and insert entities in it + expected: raise an exception ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - connect.flush([ip_collection]) - status, result = connect.search(param['collection_name'], 1, vector) - assert status.OK() + entity_id = get_entity_id + ids = [entity_id for _ in range(nb)] + with pytest.raises(Exception): + connect.insert(collection, entities, ids) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_search_vector(self, connect, ip_collection): - ''' - target: test search vector after add vector after a while - method: add vector, sleep, and search collection - expected: status ok - ''' - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - time.sleep(add_interval_time) - status, result = connect.search(ip_collection, 1, vector) - assert status.OK() + def test_insert_with_invalid_collection_name(self, connect, get_collection_name): + collection_name = get_collection_name + with pytest.raises(Exception): + connect.insert(collection, entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_add_vector_sleep_search_vector_another(self, connect, ip_collection): - ''' - target: test add vector to collection_1 after search collection_2 a while - method: search collection , sleep, and add vector - expected: status ok - ''' - param = {'collection_name': gen_unique_str(), - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.L2} - status = connect.create_collection(param) - vector = gen_single_vector(dim) - status, ids = connect.insert(ip_collection, vector) - assert status.OK() - time.sleep(add_interval_time) - status, result = connect.search(param['collection_name'], 1, vector) - assert status.OK() + def test_insert_with_invalid_tag_name(self, connect, collection, get_tag_name): + tag_name = get_tag_name + connect.create_partition(collection, tag) + with pytest.raises(Exception): + connect.insert(collection, entity, partition_tag=tag_name) - """ - ****************************************************************** - The following cases are used to test `insert` function - ****************************************************************** - """ + def test_insert_with_invalid_field_name(self, connect, collection, get_field_name): + field_name = get_field_name + tmp_entity = update_field_name(copy.deepcopy(entity), "int8", get_field_name) + with pytest.raises(Exception): + connect.insert(collection, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_ids(self, connect, ip_collection): - ''' - target: test add vectors in collection, use customize ids - method: create collection and add vectors in it, check the ids returned and the collection length after vectors added - expected: the length of ids and the collection row count - ''' - nq = 5; top_k = 1 - vectors = gen_vectors(nq, dim) - ids = [i for i in range(nq)] - status, ids = connect.insert(ip_collection, vectors, ids) - assert status.OK() - connect.flush([ip_collection]) - assert len(ids) == nq - # check search result - status, result = connect.search(ip_collection, top_k, vectors) - logging.getLogger().info(result) - assert len(result) == nq - for i in range(nq): - assert result[i][0].id == i + def test_insert_with_invalid_field_type(self, connect, collection, get_field_type): + field_type = get_field_type + tmp_entity = update_field_type(copy.deepcopy(entity), DataType.FLOAT, field_type) + with pytest.raises(Exception): + connect.insert(collection, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_twice_ids_no_ids(self, connect, ip_collection): - ''' - target: check the result of insert, with params ids and no ids - method: test add vectors twice, use customize ids first, and then use no ids - expected: status not OK - ''' - nq = 5; top_k = 1 - vectors = gen_vectors(nq, dim) - ids = [i for i in range(nq)] - status, ids = connect.insert(ip_collection, vectors, ids) - assert status.OK() - status, ids = connect.insert(ip_collection, vectors) - logging.getLogger().info(status) - logging.getLogger().info(ids) - assert not status.OK() + def test_insert_with_invalid_field_value(self, connect, collection, get_field_int_value): + field_value = get_field_int_value + tmp_entity = update_field_type(copy.deepcopy(entity), 'int8', field_value) + with pytest.raises(Exception): + connect.insert(collection, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_twice_not_ids_ids(self, connect, ip_collection): - ''' - target: check the result of insert, with params ids and no ids - method: test add vectors twice, use not ids first, and then use customize ids - expected: status not OK - ''' - nq = 5; top_k = 1 - vectors = gen_vectors(nq, dim) - ids = [i for i in range(nq)] - status, ids = connect.insert(ip_collection, vectors) - assert status.OK() - status, ids = connect.insert(ip_collection, vectors, ids) - logging.getLogger().info(status) - logging.getLogger().info(ids) - assert not status.OK() + def test_insert_with_invalid_field_vector_value(self, connect, collection, get_field_vectors_value): + tmp_entity = copy.deepcopy(entity) + src_vector = tmp_entity[-1]["values"] + src_vector[0][1] = get_field_vectors_value + with pytest.raises(Exception): + connect.insert(collection, tmp_entity) - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert_ids_length_not_match(self, connect, ip_collection): - ''' - target: test add vectors in collection, use customize ids, len(ids) != len(vectors) - method: create collection and add vectors in it - expected: raise an exception - ''' - nq = 5 - vectors = gen_vectors(nq, dim) - ids = [i for i in range(1, nq)] - with pytest.raises(Exception) as e: - status, ids = connect.insert(ip_collection, vectors, ids) +class TestInsertInvalidIP(object): + """ + Test inserting vectors with invalid collection names + """ @pytest.fixture( scope="function", - params=gen_invalid_vector_ids() + params=gen_invalid_strings() ) - def get_vector_id(self, request): + def get_collection_name(self, request): yield request.param - @pytest.mark.level(2) - def test_insert_ids_invalid(self, connect, ip_collection, get_vector_id): - ''' - target: test add vectors in collection, use customize ids, which are not int64 - method: create collection and add vectors in it - expected: raise an exception - ''' - nq = 5 - vectors = gen_vectors(nq, dim) - vector_id = get_vector_id - ids = [vector_id for i in range(nq)] - with pytest.raises(Exception) as e: - status, ids = connect.insert(ip_collection, vectors, ids) + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_tag_name(self, request): + yield request.param - @pytest.mark.timeout(ADD_TIMEOUT) - def test_insert(self, connect, ip_collection): - ''' - target: test add vectors in collection created before - method: create collection and add vectors in it, check the ids returned and the collection length after vectors added - expected: the length of ids and the collection row count - ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status, ids = connect.insert(ip_collection, vectors) - assert status.OK() - assert len(ids) == nq - - # @pytest.mark.level(2) - # def test_insert_without_connect(self, dis_connect, ip_collection): - # ''' - # target: test add vectors without connection - # method: create collection and add vectors in it, check if added successfully - # expected: raise exception - # ''' - # nq = 5 - # vectors = gen_vectors(nq, dim) - # with pytest.raises(Exception) as e: - # status, ids = dis_connect.insert(ip_collection, vectors) - - def test_add_vector_dim_not_matched(self, connect, ip_collection): - ''' - target: test add vector, the vector dimension is not equal to the collection dimension - method: the vector dimension is half of the collection dimension, check the status - expected: status not ok - ''' - vector = gen_single_vector(int(dim)//2) - status, ids = connect.insert(ip_collection, vector) - assert not status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_field_name(self, request): + yield request.param - def test_insert_dim_not_matched(self, connect, ip_collection): - ''' - target: test add vectors, the vector dimension is not equal to the collection dimension - method: the vectors dimension is half of the collection dimension, check the status - expected: status not ok - ''' - nq = 5 - vectors = gen_vectors(nq, int(dim)//2) - status, ids = connect.insert(ip_collection, vectors) - assert not status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_field_int_value(self, request): + yield request.param - def test_add_vector_query_after_sleep(self, connect, ip_collection): - ''' - target: test add vectors, and search it after sleep - method: set vector[0][1] as query vectors - expected: status ok and result length is 1 - ''' - nq = 5 - vectors = gen_vectors(nq, dim) - status, ids = connect.insert(ip_collection, vectors) - time.sleep(add_interval_time) - status, result = connect.search(ip_collection, 1, [vectors[0]]) - assert status.OK() - assert len(result) == 1 + @pytest.fixture( + scope="function", + params=gen_invalid_ints() + ) + def get_entity_id(self, request): + yield request.param - def test_add_vector_multi_collections(self, connect): - ''' - target: test add vectors is correct or not with multiple collections of IP - method: create 50 collections and add vectors into them in turn - expected: status ok - ''' - nq = 100 - vectors = gen_vectors(nq, dim) - collection_list = [] - for i in range(20): - collection_name = gen_unique_str('test_add_vector_multi_collections') - collection_list.append(collection_name) - param = {'collection_name': collection_name, - 'dimension': dim, - 'index_file_size': index_file_size, - 'metric_type': MetricType.IP} - connect.create_collection(param) - for j in range(10): - for i in range(20): - status, ids = connect.insert(collection_name=collection_list[i], records=vectors) - assert status.OK() - -class TestAddAdvance: @pytest.fixture( scope="function", - params=[ - 1, - 1000, - 6000 - ], + params=gen_invalid_vectors() ) - def insert_count(self, request): + def get_field_vectors_value(self, request): yield request.param - def test_insert_much(self, connect, collection, insert_count): + @pytest.mark.level(2) + def test_insert_ids_invalid(self, connect, ip_collection, get_entity_id): ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params - expected: length of ids is equal to the length of vectors + target: test insert, with using customize ids, which are not int64 + method: create collection and insert entities in it + expected: raise an exception ''' - nb = insert_count - insert_vec_list = gen_vectors(nb, dim) - status, ids = connect.insert(collection, insert_vec_list) - assert len(ids) == nb - assert status.OK() + entity_id = get_entity_id + ids = [entity_id for _ in range(nb)] + with pytest.raises(Exception): + connect.insert(ip_collection, entities, ids) - def test_insert_much_ip(self, connect, ip_collection, insert_count): - ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params - expected: length of ids is equal to the length of vectors - ''' - nb = insert_count - insert_vec_list = gen_vectors(nb, dim) - status, ids = connect.insert(ip_collection, insert_vec_list) - assert len(ids) == nb - assert status.OK() + @pytest.mark.level(2) + def test_insert_with_invalid_tag_name(self, connect, ip_collection, get_tag_name): + tag_name = get_tag_name + connect.create_partition(ip_collection, tag) + with pytest.raises(Exception): + connect.insert(ip_collection, entity, partition_tag=tag_name) - def test_insert_much_jaccard(self, connect, jac_collection, insert_count): - ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params - expected: length of ids is equal to the length of vectors - ''' - nb = insert_count - tmp, insert_vec_list = gen_binary_vectors(nb, dim) - status, ids = connect.insert(jac_collection, insert_vec_list) - assert len(ids) == nb - assert status.OK() + @pytest.mark.level(2) + def test_insert_with_invalid_field_name(self, connect, ip_collection, get_field_name): + field_name = get_field_name + tmp_entity = update_field_name(copy.deepcopy(entity), "int8", get_field_name) + with pytest.raises(Exception): + connect.insert(ip_collection, tmp_entity) - def test_insert_much_hamming(self, connect, ham_collection, insert_count): - ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params - expected: length of ids is equal to the length of vectors - ''' - nb = insert_count - tmp, insert_vec_list = gen_binary_vectors(nb, dim) - status, ids = connect.insert(ham_collection, insert_vec_list) - assert len(ids) == nb - assert status.OK() + @pytest.mark.level(2) + def test_insert_with_invalid_field_type(self, connect, ip_collection, get_field_type): + field_type = get_field_type + tmp_entity = update_field_type(copy.deepcopy(entity), DataType.FLOAT, field_type) + with pytest.raises(Exception): + connect.insert(ip_collection, tmp_entity) - def test_insert_much_tanimoto(self, connect, tanimoto_collection, insert_count): - ''' - target: test add vectors with different length of vectors - method: set different vectors as add method params - expected: length of ids is equal to the length of vectors - ''' - nb = insert_count - tmp, insert_vec_list = gen_binary_vectors(nb, dim) - status, ids = connect.insert(tanimoto_collection, insert_vec_list) - assert len(ids) == nb - assert status.OK() + @pytest.mark.level(2) + def test_insert_with_invalid_field_value(self, connect, ip_collection, get_field_int_value): + field_value = get_field_int_value + tmp_entity = update_field_type(copy.deepcopy(entity), 'int8', field_value) + with pytest.raises(Exception): + connect.insert(ip_collection, tmp_entity) + + @pytest.mark.level(2) + def test_insert_with_invalid_field_vector_value(self, connect, ip_collection, get_field_vectors_value): + tmp_entity = copy.deepcopy(entity) + src_vector = tmp_entity[-1]["values"] + src_vector[0][1] = get_field_vectors_value + with pytest.raises(Exception): + connect.insert(ip_collection, tmp_entity) -class TestNameInvalid(object): +class TestInsertInvalidBinary(object): """ - Test adding vectors with invalid collection names + Test inserting vectors with invalid collection names """ @pytest.fixture( scope="function", - params=gen_invalid_collection_names() + params=gen_invalid_strings() ) def get_collection_name(self, request): yield request.param @pytest.fixture( scope="function", - params=gen_invalid_collection_names() + params=gen_invalid_strings() ) def get_tag_name(self, request): yield request.param - @pytest.mark.level(2) - def test_insert_with_invalid_collection_name(self, connect, get_collection_name): - collection_name = get_collection_name - vectors = gen_vectors(1, dim) - status, result = connect.insert(collection_name, vectors) - assert not status.OK() - - @pytest.mark.level(2) - def test_insert_with_invalid_tag_name(self, connect, get_collection_name, get_tag_name): - collection_name = get_collection_name - tag_name = get_tag_name - vectors = gen_vectors(1, dim) - status, result = connect.insert(collection_name, vectors, partition_tag=tag_name) - assert not status.OK() + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_field_name(self, request): + yield request.param + @pytest.fixture( + scope="function", + params=gen_invalid_strings() + ) + def get_field_int_value(self, request): + yield request.param -class TestAddCollectionVectorsInvalid(object): - single_vector = gen_single_vector(dim) - vectors = gen_vectors(2, dim) + @pytest.fixture( + scope="function", + params=gen_invalid_ints() + ) + def get_entity_id(self, request): + yield request.param - """ - Test adding vectors with invalid vectors - """ @pytest.fixture( scope="function", params=gen_invalid_vectors() ) - def gen_vector(self, request): + def get_field_vectors_value(self, request): yield request.param @pytest.mark.level(2) - def test_add_vector_with_invalid_vectors(self, connect, collection, gen_vector): - tmp_single_vector = copy.deepcopy(self.single_vector) - tmp_single_vector[0][1] = gen_vector - with pytest.raises(Exception) as e: - status, result = connect.insert(collection, tmp_single_vector) + def test_insert_ids_invalid(self, connect, jac_collection, get_entity_id): + ''' + target: test insert, with using customize ids, which are not int64 + method: create collection and insert entities in it + expected: raise an exception + ''' + entity_id = get_entity_id + ids = [entity_id for _ in range(nb)] + with pytest.raises(Exception): + connect.insert(jac_collection, binary_entities, ids) @pytest.mark.level(2) - def test_insert_with_invalid_vectors(self, connect, collection, gen_vector): - tmp_vectors = copy.deepcopy(self.vectors) - tmp_vectors[1][1] = gen_vector - with pytest.raises(Exception) as e: - status, result = connect.insert(collection, tmp_vectors) + def test_insert_with_invalid_tag_name(self, connect, jac_collection, get_tag_name): + tag_name = get_tag_name + connect.create_partition(jac_collection, tag) + with pytest.raises(Exception): + connect.insert(jac_collection, binary_entity, partition_tag=tag_name) @pytest.mark.level(2) - def test_insert_with_invalid_vectors_jaccard(self, connect, jac_collection, gen_vector): - tmp_vectors = copy.deepcopy(self.vectors) - tmp_vectors[1][1] = gen_vector - with pytest.raises(Exception) as e: - status, result = connect.insert(jac_collection, tmp_vectors) + def test_insert_with_invalid_field_name(self, connect, jac_collection, get_field_name): + field_name = get_field_name + tmp_entity = update_field_name(copy.deepcopy(binary_entity), "int8", get_field_name) + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) @pytest.mark.level(2) - def test_insert_with_invalid_vectors_hamming(self, connect, ham_collection, gen_vector): - tmp_vectors = copy.deepcopy(self.vectors) - tmp_vectors[1][1] = gen_vector - with pytest.raises(Exception) as e: - status, result = connect.insert(ham_collection, tmp_vectors) + def test_insert_with_invalid_field_type(self, connect, jac_collection, get_field_type): + field_type = get_field_type + tmp_entity = update_field_type(copy.deepcopy(binary_entity), DataType.FLOAT, field_type) + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) + + @pytest.mark.level(2) + def test_insert_with_invalid_field_value(self, connect, jac_collection, get_field_int_value): + field_value = get_field_int_value + tmp_entity = update_field_type(copy.deepcopy(binary_entity), 'int8', field_value) + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) + + @pytest.mark.level(2) + def test_insert_with_invalid_field_vector_value(self, connect, jac_collection, get_field_vectors_value): + tmp_entity = copy.deepcopy(binary_entity) + src_vector = tmp_entity[-1]["values"] + src_vector[0][1] = get_field_vectors_value + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) + + @pytest.mark.level(2) + def test_insert_ids_invalid(self, connect, jac_collection, get_entity_id): + ''' + target: test insert, with using customize ids, which are not int64 + method: create collection and insert entities in it + expected: raise an exception + ''' + entity_id = get_entity_id + ids = [entity_id for _ in range(nb)] + with pytest.raises(Exception): + connect.insert(jac_collection, binary_entities, ids) + + @pytest.mark.level(2) + def test_insert_with_invalid_tag_name(self, connect, jac_collection, get_tag_name): + tag_name = get_tag_name + connect.create_partition(jac_collection, tag) + with pytest.raises(Exception): + connect.insert(jac_collection, binary_entity, partition_tag=tag_name) + + @pytest.mark.level(2) + def test_insert_with_invalid_field_name(self, connect, jac_collection, get_field_name): + field_name = get_field_name + tmp_entity = update_field_name(copy.deepcopy(binary_entity), "int8", get_field_name) + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) + + @pytest.mark.level(2) + def test_insert_with_invalid_field_type(self, connect, jac_collection, get_field_type): + field_type = get_field_type + tmp_entity = update_field_type(copy.deepcopy(binary_entity), DataType.FLOAT, field_type) + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) + + @pytest.mark.level(2) + def test_insert_with_invalid_field_value(self, connect, jac_collection, get_field_int_value): + field_value = get_field_int_value + tmp_entity = update_field_type(copy.deepcopy(binary_entity), 'int8', field_value) + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) + + @pytest.mark.level(2) + def test_insert_with_invalid_field_vector_value(self, connect, jac_collection, get_field_vectors_value): + tmp_entity = copy.deepcopy(binary_entity) + src_vector = tmp_entity[-1]["values"] + src_vector[0][1] = get_field_vectors_value + with pytest.raises(Exception): + connect.insert(jac_collection, tmp_entity) \ No newline at end of file diff --git a/tests/milvus_python_test/entity/test_search.py b/tests/milvus_python_test/entity/test_search.py index aa1ad881f163902132f22c54dbffddd0c9911d05..e47b49f5809bd99bfbd2e620380c25c4ce82e9bd 100644 --- a/tests/milvus_python_test/entity/test_search.py +++ b/tests/milvus_python_test/entity/test_search.py @@ -1,73 +1,83 @@ +import time import pdb -import struct -from random import sample - -import pytest +import copy import threading -import datetime import logging -from time import sleep -from multiprocessing import Process -import numpy -import sklearn.preprocessing +from multiprocessing import Pool, Process +import pytest from milvus import IndexType, MetricType from utils import * dim = 128 -collection_id = "test_search" -add_interval_time = 2 -vectors = gen_vectors(6000, dim) -vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') -vectors = vectors.tolist() +segment_size = 10 +collection_id = "test_insert" +ADD_TIMEOUT = 60 +tag = "1970-01-01" +insert_interval_time = 1.5 +nb = 6000 top_k = 1 nprobe = 1 epsilon = 0.001 -tag = "1970-01-01" -raw_vectors, binary_vectors = gen_binary_vectors(6000, dim) - +field_name = "float_vector" +default_index_name = "insert_index" +entity = gen_entities(1, is_normal=True) +binary_entity = gen_binary_entities(1) +entities = gen_entities(nb, is_normal=True) +raw_vectors, binary_entities = gen_binary_entities(nb) +default_single_query = { + "bool": { + "must": [ + {"vector": {field_name: {"topk": 10, "query": entity, "params": {"index_name": default_index_name, "nprobe": 10}}}} + ] + } +} + +query = { + "bool": { + "must": [ + {"term": {"A": {"values": [1, 2, 5]}}}, + {"range": {"B": {"ranges": {"GT": 1, "LT": 100}}}}, + {"vector": {"Vec": {"topk": 10, "query": vec[: 1], "params": {"index_name": Indextype.IVF_FLAT, "nprobe": 10}}}} + ], + }, +} class TestSearchBase: def init_data(self, connect, collection, nb=6000, partition_tags=None): ''' - Generate vectors and add it in collection, before search vectors + Generate entities and add it in collection ''' - global vectors + global entities if nb == 6000: - add_vectors = vectors + insert_entities = entities else: - add_vectors = gen_vectors(nb, dim) - add_vectors = sklearn.preprocessing.normalize(add_vectors, axis=1, norm='l2') - add_vectors = add_vectors.tolist() + insert_entities = gen_entities(nb, is_normal=True) if partition_tags is None: - status, ids = connect.insert(collection, add_vectors) - assert status.OK() + ids = connect.insert(collection, insert_entities) else: - status, ids = connect.insert(collection, add_vectors, partition_tag=partition_tags) - assert status.OK() + ids = connect.insert(collection, insert_entities, partition_tag=partition_tags) connect.flush([collection]) - return add_vectors, ids + return insert_entities, ids def init_binary_data(self, connect, collection, nb=6000, insert=True, partition_tags=None): ''' - Generate vectors and add it in collection, before search vectors + Generate entities and add it in collection ''' ids = [] - global binary_vectors + global binary_entities global raw_vectors if nb == 6000: - add_vectors = binary_vectors - add_raw_vectors = raw_vectors + insert_entities = binary_entities + insert_raw_vectors = raw_vectors else: - add_raw_vectors, add_vectors = gen_binary_vectors(nb, dim) + insert_raw_vectors, insert_entities = gen_binary_entities(nb) if insert is True: if partition_tags is None: - status, ids = connect.insert(collection, add_vectors) - assert status.OK() + ids = connect.insert(collection, add_vectors) else: - status, ids = connect.insert(collection, add_vectors, partition_tag=partition_tags) - assert status.OK() + ids = connect.insert(collection, add_vectors, partition_tag=partition_tags) connect.flush([collection]) - return add_raw_vectors, add_vectors, ids + return insert_raw_vectors, insert_entities, ids """ generate valid create_index params diff --git a/tests/milvus_python_test/utils.py b/tests/milvus_python_test/utils.py index 5f34af10dd739840b874b630c9f028b8eccb3b85..13c9e483bf74e6f048a46a513d88ffdcdcd2613e 100644 --- a/tests/milvus_python_test/utils.py +++ b/tests/milvus_python_test/utils.py @@ -29,6 +29,36 @@ all_index_types = [ ] +def jaccard(x, y): + x = np.asarray(x, np.bool) + y = np.asarray(y, np.bool) + return 1 - np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()) + + +def hamming(x, y): + x = np.asarray(x, np.bool) + y = np.asarray(y, np.bool) + return np.bitwise_xor(x, y).sum() + + +def tanimoto(x, y): + x = np.asarray(x, np.bool) + y = np.asarray(y, np.bool) + return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())) + + +def substructure(x, y): + x = np.asarray(x, np.bool) + y = np.asarray(y, np.bool) + return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y) + + +def superstructure(x, y): + x = np.asarray(x, np.bool) + y = np.asarray(y, np.bool) + return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x) + + def get_milvus(host, port, uri=None, handler=None, **kwargs): if handler is None: handler = "GRPC" @@ -58,8 +88,16 @@ def gen_inaccuracy(num): return num / 255.0 -def gen_vectors(num, dim): - return [[random.random() for _ in range(dim)] for _ in range(num)] +def gen_vectors(num, dim, is_normal=False): + vectors = [[random.random() for _ in range(dim)] for _ in range(num)] + vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') + return vectors.tolist() + + +def gen_vectors(nb, d, seed=np.random.RandomState(1234), is_normal=False): + xb = seed.rand(nb, d).astype("float32") + xb = klearn.preprocessing.normalize(xb, axis=1, norm='l2') + return xb.tolist() def gen_binary_vectors(num, dim): @@ -72,36 +110,6 @@ def gen_binary_vectors(num, dim): return raw_vectors, binary_vectors -def jaccard(x, y): - x = np.asarray(x, np.bool) - y = np.asarray(y, np.bool) - return 1 - np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()) - - -def hamming(x, y): - x = np.asarray(x, np.bool) - y = np.asarray(y, np.bool) - return np.bitwise_xor(x, y).sum() - - -def tanimoto(x, y): - x = np.asarray(x, np.bool) - y = np.asarray(y, np.bool) - return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())) - - -def substructure(x, y): - x = np.asarray(x, np.bool) - y = np.asarray(y, np.bool) - return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y) - - -def superstructure(x, y): - x = np.asarray(x, np.bool) - y = np.asarray(y, np.bool) - return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x) - - def gen_binary_sub_vectors(vectors, length): raw_vectors = [] binary_vectors = [] @@ -127,34 +135,21 @@ def gen_binary_super_vectors(vectors, length): raw_vectors.append(raw_vector) binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist())) return raw_vectors, binary_vectors - -def gen_single_vector(dim): - return [[random.random() for _ in range(dim)]] def gen_int_attr(row_num): return [random.randint(0, 255) for _ in range(row_num)] + def gen_float_attr(row_num): return [random.uniform(0, 255) for _ in range(row_num)] -def gen_vector(nb, d, seed=np.random.RandomState(1234)): - xb = seed.rand(nb, d).astype("float32") - return xb.tolist() - def gen_unique_str(str_value=None): prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) return "test_" + prefix if str_value is None else str_value + "_" + prefix -def gen_long_str(num): - string = '' - for _ in range(num): - char = random.choice('tomorrow') - string += char - - def gen_single_filter_fields(): fields = [] for data_type in [i.value for i in DataType]: @@ -168,7 +163,7 @@ def gen_single_vector_fields(): for data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]: if metric_type in [MetricType.L2, MetricType.IP] and data_type == DataType.BINARY_VECTOR: continue - field = {"field": data_type.name, "type": data_type, "dimension": dimension, "extra_params": {"metric_type": metric_type}} + field = {"field": data_type.name, "type": data_type, "extra_params": {"metric_type": metric_type, "dimension": dimension}} fields.append(field) return fields @@ -179,13 +174,98 @@ def gen_default_fields(): {"field": "int8", "type": DataType.INT8}, {"field": "int64", "type": DataType.INT64}, {"field": "float", "type": DataType.FLOAT}, - {"field": "float_vector", "type": DataType.FLOAT_VECTOR, "dimension": dimension, "extra_params": {"metric_type": MetricType.L2} + {"field": "float_vector", "type": DataType.FLOAT_VECTOR, "extra_params": {"metric_type": MetricType.L2, "dimension": dimension}} ], "segment_size": segment_size } return default_fields +def gen_entities(nb, is_normal=False): + vectors = gen_vectors(nb, dimension, is_normal) + entities = [ + {"field": "int8", "type": DataType.INT8, "values": [1 for i in range(nb)]}, + {"field": "int64", "type": DataType.INT64, "values": [2 for i in range(nb)]}, + {"field": "float", "type": DataType.FLOAT, "values": [3.0 for i in range(nb)]}, + {"field": "float_vector", "type": DataType.FLOAT_VECTOR, "values": vectors} + ] + return entities + + +def gen_binary_entities(nb): + raw_vectors, vectors = gen_binary_vectors(nb, dimension) + entities = [ + {"field": "int8", "type": DataType.INT8, "values": [1 for i in range(nb)]}, + {"field": "int64", "type": DataType.INT64, "values": [2 for i in range(nb)]}, + {"field": "float", "type": DataType.FLOAT, "values": [3.0 for i in range(nb)]}, + {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "values": vectors} + ] + return raw_vectors, entities + + +def add_field(entities): + field = { + "field": gen_unique_str(), + "type": DataType.INT8, + "values": [1 for i in range(nb)] + } + entities.append(field) + return entities + + +def add_vector_field(entities, is_normal=False): + vectors = gen_vectors(nb, dimension, is_normal) + field = { + "field": gen_unique_str(), + "type": DataType.FLOAT_VECTOR, + "values": vectors + } + entities.append(field) + return entities + + +def remove_field(entities): + del entities[0] + return entities + + +def remove_vector_field(entities): + del entities[-1] + return entities + + +def update_field_name(entities, old_name, new_name): + for item in entities: + if item["field"] == old_name: + item["field"] = new_name + return entities + + +def update_field_type(entities, old_name, new_name): + for item in entities: + if item["type"] == old_name: + item["type"] = new_name + return entities + + +def update_field_value(entities, old_type, new_value): + for item in entities: + if item["type"] == old_type: + for i in item["values"]: + item["values"][i] = new_value + return entities + + +def add_float_vector_field(nb, dimension): + field_name = gen_unique_str() + field = { + "field": field_name, + "type": DataType.FLOAT_VECTOR, + "values": gen_vectors(nb, dimension) + } + return field_name + + def gen_segment_sizes(): sizes = [ 1, @@ -216,23 +296,6 @@ def gen_invalid_ips(): return ips -def gen_invalid_ports(): - ports = [ - # empty - " ", - -1, - # too big port - 100000, - # not correct port - 39540, - "BB。A", - " siede ", - "(mn)", - "中文" - ] - return ports - - def gen_invalid_uris(): ip = None uris = [ @@ -262,8 +325,11 @@ def gen_invalid_uris(): return uris -def gen_invalid_collection_names(): - collection_names = [ +def gen_invalid_strs(): + strings = [ + 1, + [1], + None, "12-s", " ", # "", @@ -278,65 +344,26 @@ def gen_invalid_collection_names(): "中文", "a".join("a" for i in range(256)) ] - return collection_names + return strings -def gen_invalid_ints(): - top_ks = [ - 0, - -1, - None, - [1,2,3], - (1,2), - {"a": 1}, - " ", - "", - "String", - "12-s", - "BB。A", - " siede ", - "(mn)", - "pip+", +def gen_invalid_field_types(): + field_types = [ + 1, "=c", - "中文", - "a".join("a" for i in range(256)) - ] - return top_ks - - -def gen_invalid_dims(): - dims = [ 0, - -1, - 100001, - 1000000000000001, None, - False, - [1,2,3], - (1,2), - {"a": 1}, - " ", "", - "String", - "12-s", - "BB。A", - " siede ", - "(mn)", - "pip+", - "=c", - "中文", "a".join("a" for i in range(256)) ] - return dims + return field_types -def gen_invalid_file_sizes(): - file_sizes = [ - 0, - -1, - 1000000000000001, +def gen_invalid_ints(): + top_ks = [ + 1.0, None, - False, + "stringg", [1,2,3], (1,2), {"a": 1}, @@ -352,33 +379,7 @@ def gen_invalid_file_sizes(): "中文", "a".join("a" for i in range(256)) ] - return file_sizes - - -def gen_invalid_index_types(): - invalid_types = [ - 0, - -1, - 100, - 1000000000000001, - # None, - False, - [1,2,3], - (1,2), - {"a": 1}, - " ", - "", - "String", - "12-s", - "BB。A", - " siede ", - "(mn)", - "pip+", - "=c", - "中文", - "a".join("a" for i in range(256)) - ] - return invalid_types + return top_ks def gen_invalid_params(): @@ -403,52 +404,6 @@ def gen_invalid_params(): return params -def gen_invalid_nprobes(): - nprobes = [ - 0, - -1, - 1000000000000001, - None, - [1,2,3], - (1,2), - {"a": 1}, - " ", - "", - "String", - "12-s", - "BB。A", - " siede ", - "(mn)", - "pip+", - "=c", - "中文" - ] - return nprobes - - -def gen_invalid_metric_types(): - metric_types = [ - 0, - -1, - 1000000000000001, - # None, - [1,2,3], - (1,2), - {"a": 1}, - " ", - "", - "String", - "12-s", - "BB。A", - " siede ", - "(mn)", - "pip+", - "=c", - "中文" - ] - return metric_types - - def gen_invalid_vectors(): invalid_vectors = [ "1*2", @@ -476,70 +431,6 @@ def gen_invalid_vectors(): return invalid_vectors -def gen_invalid_vector_ids(): - invalid_vector_ids = [ - 1.0, - -1.0, - None, - # int 64 - 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, - " ", - "", - "String", - "BB。A", - " siede ", - "(mn)", - "=c", - "中文", - ] - return invalid_vector_ids - - -def gen_invalid_cache_config(): - invalid_configs = [ - 0, - -1, - 9223372036854775808, - [1,2,3], - (1,2), - {"a": 1}, - " ", - "", - "String", - "12-s", - "BB。A", - " siede ", - "(mn)", - "pip+", - "=c", - "中文", - "'123'", - "さようなら" - ] - return invalid_configs - - -def gen_invalid_gpu_config(): - invalid_configs = [ - -1, - [1,2,3], - (1,2), - {"a": 1}, - " ", - "", - "String", - "12-s", - "BB。A", - " siede ", - "(mn)", - "pip+", - "=c", - "中文", - "'123'", - ] - return invalid_configs - - def gen_invaild_search_params(): invalid_search_key = 100 search_params = [] @@ -572,39 +463,39 @@ def gen_invaild_search_params(): def gen_invalid_index(): index_params = [] for index_type in gen_invalid_index_types(): - index_param = {"index_type": index_type, "index_param": {"nlist": 1024}} + index_param = {"index_type": index_type, "params": {"nlist": 1024}} index_params.append(index_param) for nlist in gen_invalid_params(): - index_param = {"index_type": IndexType.IVFLAT, "index_param": {"nlist": nlist}} + index_param = {"index_type": IndexType.IVFLAT, "params": {"nlist": nlist}} index_params.append(index_param) for M in gen_invalid_params(): - index_param = {"index_type": IndexType.HNSW, "index_param": {"M": M, "efConstruction": 100}} + index_param = {"index_type": IndexType.HNSW, "params": {"M": M, "efConstruction": 100}} index_params.append(index_param) for efConstruction in gen_invalid_params(): - index_param = {"index_type": IndexType.HNSW, "index_param": {"M": 16, "efConstruction": efConstruction}} + index_param = {"index_type": IndexType.HNSW, "params": {"M": 16, "efConstruction": efConstruction}} index_params.append(index_param) for search_length in gen_invalid_params(): index_param = {"index_type": IndexType.RNSG, - "index_param": {"search_length": search_length, "out_degree": 40, "candidate_pool_size": 50, + "params": {"search_length": search_length, "out_degree": 40, "candidate_pool_size": 50, "knng": 100}} index_params.append(index_param) for out_degree in gen_invalid_params(): index_param = {"index_type": IndexType.RNSG, - "index_param": {"search_length": 100, "out_degree": out_degree, "candidate_pool_size": 50, + "params": {"search_length": 100, "out_degree": out_degree, "candidate_pool_size": 50, "knng": 100}} index_params.append(index_param) for candidate_pool_size in gen_invalid_params(): - index_param = {"index_type": IndexType.RNSG, "index_param": {"search_length": 100, "out_degree": 40, + index_param = {"index_type": IndexType.RNSG, "params": {"search_length": 100, "out_degree": 40, "candidate_pool_size": candidate_pool_size, "knng": 100}} index_params.append(index_param) - index_params.append({"index_type": IndexType.IVF_FLAT, "index_param": {"invalid_key": 1024}}) - index_params.append({"index_type": IndexType.HNSW, "index_param": {"invalid_key": 16, "efConstruction": 100}}) + index_params.append({"index_type": IndexType.IVF_FLAT, "params": {"invalid_key": 1024}}) + index_params.append({"index_type": IndexType.HNSW, "params": {"invalid_key": 16, "efConstruction": 100}}) index_params.append({"index_type": IndexType.RNSG, - "index_param": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300, + "params": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300, "knng": 100}}) for invalid_n_trees in gen_invalid_params(): - index_params.append({"index_type": IndexType.ANNOY, "index_param": {"n_trees": invalid_n_trees}}) + index_params.append({"index_type": IndexType.ANNOY, "params": {"n_trees": invalid_n_trees}}) return index_params @@ -663,7 +554,7 @@ def gen_simple_index(): ] index_params = [] for i in range(len(all_index_types)): - index_params.append({"index_type": all_index_types[i], "index_param": params[i]}) + index_params.append({"index_type": all_index_types[i], "params": params[i]}) return index_params