未验证 提交 840d9cfc 编写于 作者: D del-zhenwu 提交者: GitHub

[skip ci] remove ip_collection (#3122)

* [skip ci] remove ip_collection
Signed-off-by: Nzw <zw@milvus.io>

* [skip ci] move index params
Signed-off-by: Nzw <zw@milvus.io>

* [skip ci] move metric_type outside
Signed-off-by: Nzw <zw@milvus.io>
Co-authored-by: Nzw <zw@milvus.io>
Co-authored-by: NWang Xiangyu <xy.wang@zilliz.com>
上级 9aa364ce
......@@ -106,20 +106,6 @@ class TestStatsBase:
assert stats["partitions"][0]["tag"] == "_default"
assert stats["partitions"][0]["row_count"] == nb
def test_get_collection_stats_batch_ip(self, connect, ip_collection):
'''
target: get row count with collection_stats
method: add entities, check count in collection info
expected: count as expected
'''
ids = connect.insert(ip_collection, entities)
connect.flush([ip_collection])
stats = connect.get_collection_stats(ip_collection)
assert stats["row_count"] == nb
assert len(stats["partitions"]) == 1
assert stats["partitions"][0]["tag"] == "_default"
assert stats["partitions"][0]["row_count"] == nb
def test_get_collection_stats_single(self, connect, collection):
'''
target: get row count with collection_stats
......@@ -247,31 +233,32 @@ class TestStatsBase:
assert stats["partitions"][0]["segments"][0]["row_count"] == nb
assert stats["partitions"][0]["segments"][0]["index_name"] == get_simple_index["index_type"]
def test_get_collection_stats_after_index_created_ip(self, connect, ip_collection, get_simple_index):
def test_get_collection_stats_after_index_created_ip(self, connect, collection, get_simple_index):
'''
target: test collection info after index created
method: create collection, add vectors, create index and call collection_stats
expected: status ok, index created and shown in segments
'''
ids = connect.insert(ip_collection, entities)
connect.flush([ip_collection])
get_simple_index["metric_type"] = "IP"
ids = connect.insert(collection, entities)
connect.flush([collection])
get_simple_index.update({"metric_type": "IP"})
connect.create_index(ip_collection, field_name, get_simple_index)
stats = connect.get_collection_stats(ip_collection)
connect.create_index(collection, field_name, get_simple_index)
stats = connect.get_collection_stats(collection)
logging.getLogger().info(stats)
assert stats["partitions"][0]["segments"][0]["row_count"] == nb
assert stats["partitions"][0]["segments"][0]["index_name"] == get_simple_index["index_type"]
def test_get_collection_stats_after_index_created_jac(self, connect, jac_collection, get_jaccard_index):
def test_get_collection_stats_after_index_created_jac(self, connect, binary_collection, get_jaccard_index):
'''
target: test collection info after index created
method: create collection, add binary entities, create index and call collection_stats
expected: status ok, index created and shown in segments
'''
ids = connect.insert(jac_collection, binary_entities)
connect.flush([jac_collection])
connect.create_index(jac_collection, "binary_vector", get_jaccard_index)
stats = connect.get_collection_stats(jac_collection)
ids = connect.insert(binary_collection, binary_entities)
connect.flush([binary_collection])
connect.create_index(binary_collection, "binary_vector", get_jaccard_index)
stats = connect.get_collection_stats(binary_collection)
logging.getLogger().info(stats)
assert stats["partitions"][0]["segments"][0]["row_count"] == nb
assert stats["partitions"][0]["segments"][0]["index_name"] == get_jaccard_index["index_type"]
......
......@@ -114,31 +114,11 @@ def collection(request, connect):
@pytest.fixture(scope="function")
def ip_collection(request, connect):
def binary_collection(request, connect):
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
fields = gen_default_fields()
fields["fields"][-1]["params"]["metric_type"] = "IP"
try:
connect.create_collection(collection_name, fields)
except Exception as e:
logging.getLogger().info(str(e))
pytest.exit(str(e))
def teardown():
collection_names = connect.list_collections()
for collection_name in collection_names:
connect.drop_collection(collection_name, timeout=delete_timeout)
request.addfinalizer(teardown)
assert connect.has_collection(collection_name)
return collection_name
@pytest.fixture(scope="function")
def jac_collection(request, connect):
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
fields = gen_default_fields()
fields["fields"][-1] = {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "params": {"dim": dimension, "metric_type": "JACCARD"}}
fields["fields"][-1] = {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "params": {"dim": dimension}}
logging.getLogger().info(fields)
try:
connect.create_collection(collection_name, fields)
......@@ -150,81 +130,4 @@ def jac_collection(request, connect):
connect.drop_collection(collection_name, timeout=delete_timeout)
request.addfinalizer(teardown)
assert connect.has_collection(collection_name)
return collection_name
@pytest.fixture(scope="function")
def ham_collection(request, connect):
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
fields = gen_default_fields()
fields["fields"][-1] = {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "params": {"dim": dimension, "metric_type": "HAMMING"}}
try:
connect.create_collection(collection_name, fields)
except Exception as e:
pytest.exit(str(e))
def teardown():
collection_names = connect.list_collections()
for collection_name in collection_names:
connect.drop_collection(collection_name, timeout=delete_timeout)
request.addfinalizer(teardown)
assert connect.has_collection(collection_name)
return collection_name
@pytest.fixture(scope="function")
def tanimoto_collection(request, connect):
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
fields = gen_default_fields()
fields["fields"][-1] = {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "params": {"dim": dimension, "metric_type": "TANIMOTO"}}
try:
connect.create_collection(collection_name, fields)
except Exception as e:
pytest.exit(str(e))
def teardown():
collection_names = connect.list_collections()
for collection_name in collection_names:
connect.drop_collection(collection_name, timeout=delete_timeout)
request.addfinalizer(teardown)
assert connect.has_collection(collection_name)
return collection_name
@pytest.fixture(scope="function")
def substructure_collection(request, connect):
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
fields = gen_default_fields()
fields["fields"][-1] = {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "params": {"dim": dimension, "metric_type": "SUBSTRUCTURE"}}
try:
connect.create_collection(collection_name, fields)
except Exception as e:
pytest.exit(str(e))
def teardown():
collection_names = connect.list_collections()
for collection_name in collection_names:
connect.drop_collection(collection_name, timeout=delete_timeout)
request.addfinalizer(teardown)
assert connect.has_collection(collection_name)
return collection_name
@pytest.fixture(scope="function")
def superstructure_collection(request, connect):
dim = getattr(request.module, "dim", "128")
ori_collection_name = getattr(request.module, "collection_id", "test")
collection_name = gen_unique_str(ori_collection_name)
fields = gen_default_fields()
fields["fields"][-1] = {"field": "binary_vector", "type": DataType.BINARY_VECTOR, "params": {"dim": dimension, "metric_type": MetricType.SUPERSTRUCTURE}}
try:
connect.create_collection(collection_name, fields)
except Exception as e:
pytest.exit(str(e))
def teardown():
collection_names = connect.list_collections()
for collection_name in collection_names:
connect.drop_collection(collection_name, timeout=delete_timeout)
request.addfinalizer(teardown)
assert connect.has_collection(collection_name)
return collection_name
return collection_name
\ No newline at end of file
......@@ -181,36 +181,19 @@ class TestDeleteBase:
# TODO
@pytest.mark.level(2)
def test_flush_after_delete_ip(self, connect, ip_collection):
def test_flush_after_delete_binary(self, connect, binary_collection):
'''
target: test delete entity
method: add entities and delete, then flush
expected: entity deleted and no error raised
'''
ids = connect.insert(ip_collection, entities)
connect.flush([ip_collection])
ids = connect.insert(binary_collection, binary_entities)
connect.flush([binary_collection])
delete_ids = [ids[0], ids[-1]]
status = connect.delete_entity_by_id(ip_collection, delete_ids)
status = connect.delete_entity_by_id(binary_collection, delete_ids)
assert status
connect.flush([ip_collection])
res_count = connect.count_entities(ip_collection)
assert res_count == nb - len(delete_ids)
# TODO
@pytest.mark.level(2)
def test_flush_after_delete_jac(self, connect, jac_collection):
'''
target: test delete entity
method: add entities and delete, then flush
expected: entity deleted and no error raised
'''
ids = connect.insert(jac_collection, binary_entities)
connect.flush([jac_collection])
delete_ids = [ids[0], ids[-1]]
status = connect.delete_entity_by_id(jac_collection, delete_ids)
assert status
connect.flush([jac_collection])
res_count = connect.count_entities(jac_collection)
connect.flush([binary_collection])
res_count = connect.count_entities(binary_collection)
assert res_count == nb - len(delete_ids)
# TODO
......@@ -234,40 +217,21 @@ class TestDeleteBase:
# TODO
@pytest.mark.level(2)
def test_insert_same_ids_after_delete_ip(self, connect, ip_collection):
'''
method: add entities and delete
expected: status DELETED
'''
insert_ids = [i for i in range(nb)]
ids = connect.insert(ip_collection, entities, insert_ids)
connect.flush([ip_collection])
delete_ids = [ids[0], ids[-1]]
status = connect.delete_entity_by_id(ip_collection, delete_ids)
assert status
new_ids = connect.insert(ip_collection, entity, [ids[0]])
assert new_ids == [ids[0]]
connect.flush([ip_collection])
res_count = connect.count_entities(ip_collection)
assert res_count == nb - 1
# TODO
@pytest.mark.level(2)
def test_insert_same_ids_after_delete_jac(self, connect, jac_collection):
def test_insert_same_ids_after_delete_binary(self, connect, binary_collection):
'''
method: add entities, with the same id and delete the ids
expected: status DELETED, all id deleted
'''
insert_ids = [i for i in range(nb)]
ids = connect.insert(jac_collection, binary_entities, insert_ids)
connect.flush([jac_collection])
ids = connect.insert(binary, binary_entities, insert_ids)
connect.flush([binary])
delete_ids = [ids[0], ids[-1]]
status = connect.delete_entity_by_id(jac_collection, delete_ids)
status = connect.delete_entity_by_id(binary, delete_ids)
assert status
new_ids = connect.insert(jac_collection, binary_entity, [ids[0]])
new_ids = connect.insert(binary, binary_entity, [ids[0]])
assert new_ids == [ids[0]]
connect.flush([jac_collection])
res_count = connect.count_entities(jac_collection)
connect.flush([binary])
res_count = connect.count_entities(binary)
assert res_count == nb - 1
# TODO:
......
......@@ -161,30 +161,16 @@ class TestGetBase:
******************************************************************
"""
def test_get_entity_parts_ids_ip(self, connect, ip_collection):
'''
target: test.get_entity_by_id, some ids in ip_collection, some ids not
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.insert(ip_collection, entities)
connect.flush([ip_collection])
get_ids = [ids[0], 1, ids[-1]]
res = connect.get_entity_by_id(ip_collection, get_ids)
assert_equal_vector(res[0].get(default_float_vec_field_name), entities[-1]["values"][0])
assert_equal_vector(res[-1].get(default_float_vec_field_name), entities[-1]["values"][-1])
assert res[1] is None
def test_get_entity_parts_ids_jac(self, connect, jac_collection):
def test_get_entity_parts_ids_binary(self, connect, binary_collection):
'''
target: test.get_entity_by_id, some ids in jac_collection, some ids not
method: add entity, and get
expected: entity returned equals insert
'''
ids = connect.insert(jac_collection, binary_entities)
connect.flush([jac_collection])
ids = connect.insert(binary_collection, binary_entities)
connect.flush([binary_collection])
get_ids = [ids[0], 1, ids[-1]]
res = connect.get_entity_by_id(jac_collection, get_ids)
res = connect.get_entity_by_id(binary_collection, get_ids)
assert_equal_vector(res[0].get("binary_vector"), binary_entities[-1]["values"][0])
assert_equal_vector(res[-1].get("binary_vector"), binary_entities[-1]["values"][-1])
assert res[1] is None
......
......@@ -219,9 +219,9 @@ class TestInsertBase:
res_count = connect.count_entities(collection_name)
assert res_count == nb
# TODO: assert exception
# TODO: assert exception && enable
@pytest.mark.timeout(ADD_TIMEOUT)
def test_insert_twice_ids_no_ids(self, connect, collection):
def _test_insert_twice_ids_no_ids(self, connect, collection):
'''
target: check the result of insert, with params ids and no ids
method: test insert vectors twice, use customize ids first, and then use no ids
......@@ -232,8 +232,9 @@ class TestInsertBase:
with pytest.raises(Exception) as e:
res_ids_new = connect.insert(collection, entities)
# TODO: assert exception && enable
@pytest.mark.timeout(ADD_TIMEOUT)
def test_insert_twice_not_ids_ids(self, connect, collection):
def _test_insert_twice_not_ids_ids(self, connect, collection):
'''
target: check the result of insert, with params ids and no ids
method: test insert vectors twice, use not ids first, and then use customize ids
......@@ -871,112 +872,6 @@ class TestInsertInvalid(object):
connect.insert(collection, tmp_entity)
class TestInsertInvalidIP(object):
"""
Test inserting vectors with invalid collection names
"""
@pytest.fixture(
scope="function",
params=gen_invalid_strs()
)
def get_collection_name(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_strs()
)
def get_tag_name(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_strs()
)
def get_field_name(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_strs()
)
def get_field_type(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_strs()
)
def get_field_int_value(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_ints()
)
def get_entity_id(self, request):
yield request.param
@pytest.fixture(
scope="function",
params=gen_invalid_vectors()
)
def get_field_vectors_value(self, request):
yield request.param
@pytest.mark.level(2)
def test_insert_ids_invalid(self, connect, ip_collection, get_entity_id):
'''
target: test insert, with using customize ids, which are not int64
method: create collection and insert entities in it
expected: raise an exception
'''
entity_id = get_entity_id
ids = [entity_id for _ in range(nb)]
with pytest.raises(Exception):
connect.insert(ip_collection, entities, ids)
@pytest.mark.level(2)
def test_insert_with_invalid_tag_name(self, connect, ip_collection, get_tag_name):
tag_name = get_tag_name
connect.create_partition(ip_collection, tag)
if tag_name is not None:
with pytest.raises(Exception):
connect.insert(ip_collection, entity, partition_tag=tag_name)
else:
connect.insert(ip_collection, entity, partition_tag=tag_name)
@pytest.mark.level(2)
def test_insert_with_invalid_field_name(self, connect, ip_collection, get_field_name):
field_name = get_field_name
tmp_entity = update_field_name(copy.deepcopy(entity), "int64", get_field_name)
with pytest.raises(Exception):
connect.insert(ip_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_type(self, connect, ip_collection, get_field_type):
field_type = get_field_type
tmp_entity = update_field_type(copy.deepcopy(entity), 'float', field_type)
with pytest.raises(Exception):
connect.insert(ip_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_value(self, connect, ip_collection, get_field_int_value):
field_value = get_field_int_value
tmp_entity = update_field_type(copy.deepcopy(entity), 'int64', field_value)
with pytest.raises(Exception):
connect.insert(ip_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_vector_value(self, connect, ip_collection, get_field_vectors_value):
tmp_entity = copy.deepcopy(entity)
src_vector = tmp_entity[-1]["values"]
src_vector[0][1] = get_field_vectors_value
with pytest.raises(Exception):
connect.insert(ip_collection, tmp_entity)
class TestInsertInvalidBinary(object):
"""
Test inserting vectors with invalid collection names
......@@ -1032,7 +927,7 @@ class TestInsertInvalidBinary(object):
yield request.param
@pytest.mark.level(2)
def test_insert_ids_invalid(self, connect, jac_collection, get_entity_id):
def test_insert_ids_invalid(self, connect, binary_collection, get_entity_id):
'''
target: test insert, with using customize ids, which are not int64
method: create collection and insert entities in it
......@@ -1041,42 +936,42 @@ class TestInsertInvalidBinary(object):
entity_id = get_entity_id
ids = [entity_id for _ in range(nb)]
with pytest.raises(Exception):
connect.insert(jac_collection, binary_entities, ids)
connect.insert(binary_collection, binary_entities, ids)
@pytest.mark.level(2)
def test_insert_with_invalid_tag_name(self, connect, jac_collection, get_tag_name):
def test_insert_with_invalid_tag_name(self, connect, binary_collection, get_tag_name):
tag_name = get_tag_name
connect.create_partition(jac_collection, tag)
connect.create_partition(binary_collection, tag)
if tag_name is not None:
with pytest.raises(Exception):
connect.insert(jac_collection, binary_entity, partition_tag=tag_name)
connect.insert(binary_collection, binary_entity, partition_tag=tag_name)
else:
connect.insert(jac_collection, binary_entity, partition_tag=tag_name)
connect.insert(binary_collection, binary_entity, partition_tag=tag_name)
@pytest.mark.level(2)
def test_insert_with_invalid_field_name(self, connect, jac_collection, get_field_name):
def test_insert_with_invalid_field_name(self, connect, binary_collection, get_field_name):
field_name = get_field_name
tmp_entity = update_field_name(copy.deepcopy(binary_entity), "int64", get_field_name)
with pytest.raises(Exception):
connect.insert(jac_collection, tmp_entity)
connect.insert(binary_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_value(self, connect, jac_collection, get_field_int_value):
def test_insert_with_invalid_field_value(self, connect, binary_collection, get_field_int_value):
field_value = get_field_int_value
tmp_entity = update_field_type(copy.deepcopy(binary_entity), 'int64', field_value)
with pytest.raises(Exception):
connect.insert(jac_collection, tmp_entity)
connect.insert(binary_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_vector_value(self, connect, jac_collection, get_field_vectors_value):
def test_insert_with_invalid_field_vector_value(self, connect, binary_collection, get_field_vectors_value):
tmp_entity = copy.deepcopy(binary_entity)
src_vector = tmp_entity[-1]["values"]
src_vector[0][1] = get_field_vectors_value
with pytest.raises(Exception):
connect.insert(jac_collection, tmp_entity)
connect.insert(binary_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_ids_invalid(self, connect, jac_collection, get_entity_id):
def test_insert_ids_invalid(self, connect, binary_collection, get_entity_id):
'''
target: test insert, with using customize ids, which are not int64
method: create collection and insert entities in it
......@@ -1085,33 +980,33 @@ class TestInsertInvalidBinary(object):
entity_id = get_entity_id
ids = [entity_id for _ in range(nb)]
with pytest.raises(Exception):
connect.insert(jac_collection, binary_entities, ids)
connect.insert(binary_collection, binary_entities, ids)
@pytest.mark.level(2)
def test_insert_with_invalid_field_name(self, connect, jac_collection, get_field_name):
def test_insert_with_invalid_field_name(self, connect, binary_collection, get_field_name):
field_name = get_field_name
tmp_entity = update_field_name(copy.deepcopy(binary_entity), "int64", get_field_name)
with pytest.raises(Exception):
connect.insert(jac_collection, tmp_entity)
connect.insert(binary_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_type(self, connect, jac_collection, get_field_type):
def test_insert_with_invalid_field_type(self, connect, binary_collection, get_field_type):
field_type = get_field_type
tmp_entity = update_field_type(copy.deepcopy(binary_entity), 'int64', field_type)
with pytest.raises(Exception):
connect.insert(jac_collection, tmp_entity)
connect.insert(binary_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_value(self, connect, jac_collection, get_field_int_value):
def test_insert_with_invalid_field_value(self, connect, binary_collection, get_field_int_value):
field_value = get_field_int_value
tmp_entity = update_field_type(copy.deepcopy(binary_entity), 'int64', field_value)
with pytest.raises(Exception):
connect.insert(jac_collection, tmp_entity)
connect.insert(binary_collection, tmp_entity)
@pytest.mark.level(2)
def test_insert_with_invalid_field_vector_value(self, connect, jac_collection, get_field_vectors_value):
def test_insert_with_invalid_field_vector_value(self, connect, binary_collection, get_field_vectors_value):
tmp_entity = copy.deepcopy(binary_entities)
src_vector = tmp_entity[-1]["values"]
src_vector[1] = get_field_vectors_value
with pytest.raises(Exception):
connect.insert(jac_collection, tmp_entity)
connect.insert(binary_collection, tmp_entity)
......@@ -196,116 +196,27 @@ class TestListIdInSegmentBase:
assert len(vector_ids) == 1
assert vector_ids[0] == ids[1]
class TestListIdInSegmentIP:
"""
******************************************************************
The following cases are used to test `list_id_in_segment` function
******************************************************************
"""
@pytest.mark.level(2)
def test_list_id_in_segment_without_index_A(self, connect, ip_collection):
'''
target: get vector ids when there is no index
method: call list_id_in_segment and check if the segment contains vectors
expected: status ok
'''
nb = 10
entities = gen_entities(nb)
ids = connect.insert(ip_collection, entities)
connect.flush([ip_collection])
stats = connect.get_collection_stats(ip_collection)
vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][0]["segments"][0]["id"])
# vector_ids should match ids
assert len(vector_ids) == nb
for i in range(nb):
assert vector_ids[i] == ids[i]
@pytest.mark.level(2)
def test_list_id_in_segment_without_index_B(self, connect, ip_collection):
'''
target: get vector ids when there is no index but with partition
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
expected: status ok
'''
connect.create_partition(ip_collection, tag)
nb = 10
entities = gen_entities(nb)
ids = connect.insert(ip_collection, entities, partition_tag=tag)
connect.flush([ip_collection])
stats = connect.get_collection_stats(ip_collection)
assert stats["partitions"][1]["tag"] == tag
vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][1]["segments"][0]["id"])
# vector_ids should match ids
assert len(vector_ids) == nb
for i in range(nb):
assert vector_ids[i] == ids[i]
@pytest.fixture(
scope="function",
params=gen_simple_index()
)
def get_simple_index(self, request, connect):
if str(connect._cmd("mode")) == "CPU":
if request.param["index_type"] in index_cpu_not_support():
pytest.skip("CPU not support index_type: ivf_sq8h")
return request.param
@pytest.mark.level(2)
def test_list_id_in_segment_with_index_A(self, connect, ip_collection, get_simple_index):
def test_list_id_in_segment_with_index_ip(self, connect, collection, get_simple_index):
'''
target: get vector ids when there is index
method: call list_id_in_segment and check if the segment contains vectors
expected: status ok
'''
get_simple_index["metric_type"] = "IP"
ids, seg_id = get_segment_id(connect, ip_collection, nb=nb, index_params=get_simple_index)
vector_ids = connect.list_id_in_segment(ip_collection, seg_id)
ids, seg_id = get_segment_id(connect, collection, nb=nb, index_params=get_simple_index)
vector_ids = connect.list_id_in_segment(collection, seg_id)
# TODO:
@pytest.mark.level(2)
def test_list_id_in_segment_with_index_B(self, connect, ip_collection, get_simple_index):
'''
target: get vector ids when there is index and with partition
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
expected: status ok
'''
connect.create_partition(ip_collection, tag)
ids = connect.insert(ip_collection, entities, partition_tag=tag)
connect.flush([ip_collection])
stats = connect.get_collection_stats(ip_collection)
assert stats["partitions"][1]["tag"] == tag
vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][1]["segments"][0]["id"])
# vector_ids should match ids
# TODO
@pytest.mark.level(2)
def test_list_id_in_segment_after_delete_vectors(self, connect, ip_collection, get_simple_index):
'''
target: get vector ids after vectors are deleted
method: add vectors and delete a few, call list_id_in_segment
expected: status ok, vector_ids decreased after vectors deleted
'''
nb = 2
get_simple_index["metric_type"] = "IP"
ids, seg_id = get_segment_id(connect, ip_collection, nb=nb)
delete_ids = [ids[0]]
status = connect.delete_entity_by_id(ip_collection, delete_ids)
connect.flush([ip_collection])
stats = connect.get_collection_stats(ip_collection)
vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][0]["segments"][0]["id"])
assert len(vector_ids) == 1
assert vector_ids[0] == ids[1]
class TestListIdInSegmentJAC:
class TestListIdInSegmentBinary:
"""
******************************************************************
The following cases are used to test `list_id_in_segment` function
******************************************************************
"""
@pytest.mark.level(2)
def test_list_id_in_segment_without_index_A(self, connect, jac_collection):
def test_list_id_in_segment_without_index_A(self, connect, binary_collection):
'''
target: get vector ids when there is no index
method: call list_id_in_segment and check if the segment contains vectors
......@@ -313,29 +224,29 @@ class TestListIdInSegmentJAC:
'''
nb = 10
vectors, entities = gen_binary_entities(nb)
ids = connect.insert(jac_collection, entities)
connect.flush([jac_collection])
stats = connect.get_collection_stats(jac_collection)
vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][0]["segments"][0]["id"])
ids = connect.insert(binary_collection, entities)
connect.flush([binary_collection])
stats = connect.get_collection_stats(binary_collection)
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][0]["segments"][0]["id"])
# vector_ids should match ids
assert len(vector_ids) == nb
for i in range(nb):
assert vector_ids[i] == ids[i]
@pytest.mark.level(2)
def test_list_id_in_segment_without_index_B(self, connect, jac_collection):
def test_list_id_in_segment_without_index_B(self, connect, binary_collection):
'''
target: get vector ids when there is no index but with partition
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
expected: status ok
'''
connect.create_partition(jac_collection, tag)
connect.create_partition(binary_collection, tag)
nb = 10
vectors, entities = gen_binary_entities(nb)
ids = connect.insert(jac_collection, entities, partition_tag=tag)
connect.flush([jac_collection])
stats = connect.get_collection_stats(jac_collection)
vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][1]["segments"][0]["id"])
ids = connect.insert(binary_collection, entities, partition_tag=tag)
connect.flush([binary_collection])
stats = connect.get_collection_stats(binary_collection)
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][1]["segments"][0]["id"])
# vector_ids should match ids
assert len(vector_ids) == nb
for i in range(nb):
......@@ -348,49 +259,48 @@ class TestListIdInSegmentJAC:
def get_jaccard_index(self, request, connect):
logging.getLogger().info(request.param)
if request.param["index_type"] in binary_support():
request.param["metric_type"] = "JACCARD"
return request.param
else:
pytest.skip("not support")
def test_list_id_in_segment_with_index_A(self, connect, jac_collection, get_jaccard_index):
def test_list_id_in_segment_with_index_A(self, connect, binary_collection, get_jaccard_index):
'''
target: get vector ids when there is index
method: call list_id_in_segment and check if the segment contains vectors
expected: status ok
'''
get_jaccard_index["metric_type"] = "JACCARD"
ids, seg_id = get_segment_id(connect, jac_collection, nb=nb, index_params=get_jaccard_index, vec_type='binary')
vector_ids = connect.list_id_in_segment(jac_collection, seg_id)
vector_ids = connect.list_id_in_segment(binary_collection, seg_id)
# TODO:
def test_list_id_in_segment_with_index_B(self, connect, jac_collection, get_jaccard_index):
def test_list_id_in_segment_with_index_B(self, connect, binary_collection, get_jaccard_index):
'''
target: get vector ids when there is index and with partition
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
expected: status ok
'''
connect.create_partition(jac_collection, tag)
ids = connect.insert(jac_collection, entities, partition_tag=tag)
connect.flush([jac_collection])
stats = connect.get_collection_stats(jac_collection)
connect.create_partition(binary_collection, tag)
ids = connect.insert(binary_collection, entities, partition_tag=tag)
connect.flush([binary_collection])
stats = connect.get_collection_stats(binary_collection)
assert stats["partitions"][1]["tag"] == tag
vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][1]["segments"][0]["id"])
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][1]["segments"][0]["id"])
# vector_ids should match ids
# TODO
def test_list_id_in_segment_after_delete_vectors(self, connect, jac_collection, get_jaccard_index):
def test_list_id_in_segment_after_delete_vectors(self, connect, binary_collection, get_jaccard_index):
'''
target: get vector ids after vectors are deleted
method: add vectors and delete a few, call list_id_in_segment
expected: status ok, vector_ids decreased after vectors deleted
'''
nb = 2
get_jaccard_index["metric_type"] = "JACCARD"
ids, seg_id = get_segment_id(connect, jac_collection, nb=nb, vec_type='binary', index_params=get_jaccard_index)
ids, seg_id = get_segment_id(connect, binary_collection, nb=nb, vec_type='binary', index_params=get_jaccard_index)
delete_ids = [ids[0]]
status = connect.delete_entity_by_id(jac_collection, delete_ids)
connect.flush([jac_collection])
stats = connect.get_collection_stats(jac_collection)
vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][0]["segments"][0]["id"])
status = connect.delete_entity_by_id(binary_collection, delete_ids)
connect.flush([binary_collection])
stats = connect.get_collection_stats(binary_collection)
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][0]["segments"][0]["id"])
assert len(vector_ids) == 1
assert vector_ids[0] == ids[1]
......@@ -29,6 +29,11 @@ default_single_query = {
}
}
def ip_query():
query = copy.deepcopy(default_single_query)
query["bool"]["must"][0]["vector"][field_name].update({"metric_type": "IP"})
return query
class TestCompactBase:
"""
......@@ -684,253 +689,23 @@ class TestCompactJAC:
res = connect.search(jac_collection, query)
assert abs(res[0]._distances[0]-distance) <= epsilon
class TestCompactIP:
"""
******************************************************************
The following cases are used to test `compact` function
******************************************************************
"""
@pytest.mark.timeout(COMPACT_TIMEOUT)
def test_add_entity_and_compact(self, connect, ip_collection):
'''
target: test add entity and compact
method: add entity and compact collection
expected: status ok, entity added
'''
# vector = gen_single_vector(dim)
ids = connect.insert(ip_collection, entity)
assert len(ids) == 1
connect.flush([ip_collection])
# get collection info before compact
info = connect.get_collection_stats(ip_collection)
size_before = info["partitions"][0]["segments"][0]["data_size"]
status = connect.compact(ip_collection)
assert status.OK()
connect.flush([ip_collection])
# get collection info after compact
info = connect.get_collection_stats(ip_collection)
size_after = info["partitions"][0]["segments"][0]["data_size"]
assert(size_before == size_after)
@pytest.mark.timeout(COMPACT_TIMEOUT)
def test_insert_and_compact(self, connect, ip_collection):
'''
target: test add entities and compact
method: add entities and compact collection
expected: status ok, entities added
'''
ids = connect.insert(ip_collection, entities)
assert len(ids) == nb
connect.flush([ip_collection])
# get collection info before compact
info = connect.get_collection_stats(ip_collection)
size_before = info["partitions"][0]["segments"][0]["data_size"]
status = connect.compact(ip_collection)
assert status.OK()
# get collection info after compact
info = connect.get_collection_stats(ip_collection)
size_after = info["partitions"][0]["segments"][0]["data_size"]
assert(size_before == size_after)
@pytest.mark.timeout(COMPACT_TIMEOUT)
@pytest.mark.skip(reason="delete not support yet")
def test_insert_delete_part_and_compact(self, connect, ip_collection):
'''
target: test add entities, delete part of them and compact
method: add entities, delete a few and compact collection
expected: status ok, data size is smaller after compact
'''
ids = connect.insert(ip_collection, entities)
assert len(ids) == nb
connect.flush([ip_collection])
delete_ids = [ids[0], ids[-1]]
status = connect.delete_entity_by_id(ip_collection, delete_ids)
assert status.OK()
connect.flush([ip_collection])
# get collection info before compact
info = connect.get_collection_stats(ip_collection)
logging.getLogger().info(info["partitions"])
size_before = info["partitions"][0]["segments"][0]["data_size"]
logging.getLogger().info(size_before)
status = connect.compact(ip_collection)
assert status.OK()
# get collection info after compact
info = connect.get_collection_stats(ip_collection)
logging.getLogger().info(info["partitions"])
size_after = info["partitions"][0]["segments"][0]["data_size"]
logging.getLogger().info(size_after)
assert(size_before >= size_after)
@pytest.mark.timeout(COMPACT_TIMEOUT)
@pytest.mark.skip(reason="delete not support yet")
def test_insert_delete_all_and_compact(self, connect, ip_collection):
'''
target: test add entities, delete them and compact
method: add entities, delete all and compact collection
expected: status ok, no data size in collection info because collection is empty
'''
ids = connect.insert(ip_collection, entities)
assert len(ids) == nb
connect.flush([ip_collection])
status = connect.delete_entity_by_id(ip_collection, ids)
assert status.OK()
connect.flush([ip_collection])
# get collection info before compact
info = connect.get_collection_stats(ip_collection)
status = connect.compact(ip_collection)
assert status.OK()
# get collection info after compact
info = connect.get_collection_stats(ip_collection)
logging.getLogger().info(info["partitions"])
assert not info["partitions"][0]["segments"]
@pytest.mark.timeout(COMPACT_TIMEOUT)
def test_add_entity_and_compact_twice(self, connect, ip_collection):
'''
target: test add vector and compact twice
method: add vector and compact collection twice
expected: status ok
'''
ids = connect.insert(ip_collection, entity)
assert len(ids) == 1
connect.flush([ip_collection])
# get collection info before compact
info = connect.get_collection_stats(ip_collection)
size_before = info["partitions"][0]["segments"][0]["data_size"]
status = connect.compact(ip_collection)
assert status.OK()
# get collection info after compact
info = connect.get_collection_stats(ip_collection)
size_after = info["partitions"][0]["segments"][0]["data_size"]
assert(size_before == size_after)
status = connect.compact(ip_collection)
assert status.OK()
# get collection info after compact twice
info = connect.get_collection_stats(ip_collection)
size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
assert(size_after == size_after_twice)
@pytest.mark.timeout(COMPACT_TIMEOUT)
@pytest.mark.skip(reason="delete not support yet")
def test_insert_delete_part_and_compact_twice(self, connect, ip_collection):
'''
target: test add entities, delete part of them and compact twice
method: add entities, delete part and compact collection twice
expected: status ok, data size smaller after first compact, no change after second
'''
ids = connect.insert(ip_collection, entities)
assert len(ids) == nb
connect.flush([ip_collection])
delete_ids = [ids[0], ids[-1]]
status = connect.delete_entity_by_id(ip_collection, delete_ids)
assert status.OK()
connect.flush([ip_collection])
# get collection info before compact
info = connect.get_collection_stats(ip_collection)
size_before = info["partitions"][0]["segments"][0]["data_size"]
status = connect.compact(ip_collection)
assert status.OK()
connect.flush([ip_collection])
# get collection info after compact
info = connect.get_collection_stats(ip_collection)
size_after = info["partitions"][0]["segments"][0]["data_size"]
assert(size_before >= size_after)
status = connect.compact(ip_collection)
assert status.OK()
connect.flush([ip_collection])
# get collection info after compact twice
info = connect.get_collection_stats(ip_collection)
size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
assert(size_after == size_after_twice)
@pytest.mark.timeout(COMPACT_TIMEOUT)
def test_compact_multi_collections(self, connect):
'''
target: test compact works or not with multiple collections
method: create 50 collections, add entities into them and compact in turn
expected: status ok
'''
nq = 100
num_collections = 50
entities = gen_entities(nq)
collection_list = []
for i in range(num_collections):
collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
collection_list.append(collection_name)
# param = {'collection_name': collection_name,
# 'dimension': dim,
# 'index_file_size': index_file_size,
# 'metric_type': MetricType.IP}
connect.create_collection(collection_name, default_fields)
time.sleep(6)
for i in range(num_collections):
ids = connect.insert(collection_list[i], entities)
assert len(ids) == nq
status = connect.compact(collection_list[i])
assert status.OK()
@pytest.mark.level(2)
@pytest.mark.timeout(COMPACT_TIMEOUT)
def test_add_entity_after_compact(self, connect, ip_collection):
'''
target: test add entity after compact
method: after compact operation, add entity
expected: status ok, entity added
'''
ids = connect.insert(ip_collection, entities)
status = connect.flush([ip_collection])
assert len(ids) == nb
# get collection info before compact
info = connect.get_collection_stats(ip_collection)
size_before = info["partitions"][0]["segments"][0]["data_size"]
status = connect.compact(ip_collection)
assert status.OK()
# get collection info after compact
info = connect.get_collection_stats(ip_collection)
size_after = info["partitions"][0]["segments"][0]["data_size"]
assert(size_before == size_after)
# vector = gen_single_vector(dim)
ids = connect.insert(ip_collection, entity)
connect.flush([ip_collection])
res = connect.count_entities(ip_collection)
assert res == nb + 1
@pytest.mark.timeout(COMPACT_TIMEOUT)
@pytest.mark.skip(reason="delete not support yet")
def test_delete_entities_after_compact(self, connect, ip_collection):
'''
target: test delete entities after compact
method: after compact operation, delete entities
expected: status ok, entities deleted
'''
ids = connect.insert(ip_collection, entities)
assert len(ids) == nb
connect.flush([ip_collection])
status = connect.compact(ip_collection)
assert status.OK()
status = connect.delete_entity_by_id(ip_collection, ids)
assert status.OK()
connect.flush([ip_collection])
assert connect.count_entities(ip_collection) == 0
# TODO:
@pytest.mark.skip(reason="search not support yet")
@pytest.mark.timeout(COMPACT_TIMEOUT)
def test_search_after_compact(self, connect, ip_collection):
def test_search_after_compact_ip(self, connect, collection):
'''
target: test search after compact
method: after compact operation, search vector
expected: status ok
'''
ids = connect.insert(ip_collection, entities)
ids = connect.insert(collection, entities)
assert len(ids) == nb
connect.flush([ip_collection])
status = connect.compact(ip_collection)
query = copy.deepcopy(default_single_query)
connect.flush([collection])
status = connect.compact(collection)
query = ip_query()
query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
entities[-1]["values"][-1]]
res = connect.search(ip_collection, query)
res = connect.search(collection, query)
logging.getLogger().info(res)
assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
assert res[0]._distances[0] < 1 - epsilon
......
......@@ -25,8 +25,8 @@ entity = gen_entities(1)
entities = gen_entities(nb)
raw_vector, binary_entity = gen_binary_entities(1)
raw_vectors, binary_entities = gen_binary_entities(nb)
query, query_vecs = gen_query_vectors_inside_entities(field_name, entities, top_k, 1)
default_index = {"index_type": "IVF_FLAT", "nlist": 1024, "metric_type": "L2"}
query, query_vecs = gen_query_vectors(field_name, entities, top_k, 1)
default_index = {"index_type": "IVF_FLAT", "params": {"nlist": 1024}, "metric_type": "L2"}
class TestIndexBase:
......@@ -124,7 +124,7 @@ class TestIndexBase:
nq = get_nq
index_type = get_simple_index["index_type"]
search_param = get_search_param(index_type)
query, vecs = gen_query_vectors_inside_entities(field_name, entities, top_k, nq, search_params=search_param)
query, vecs = gen_query_vectors(field_name, entities, top_k, nq, search_params=search_param)
res = connect.search(collection, query)
assert len(res) == nq
......@@ -204,83 +204,84 @@ class TestIndexBase:
assert stats["row_count"] == nb
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_ip(self, connect, ip_collection, get_simple_index):
def test_create_index_ip(self, connect, collection, get_simple_index):
'''
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
'''
ids = connect.insert(ip_collection, entities)
ids = connect.insert(collection, entities)
get_simple_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
connect.create_index(collection, field_name, get_simple_index)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_no_vectors_ip(self, connect, ip_collection, get_simple_index):
def test_create_index_no_vectors_ip(self, connect, collection, get_simple_index):
'''
target: test create index interface
method: create collection and add entities in it, create index
expected: return search success
'''
get_simple_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
connect.create_index(collection, field_name, get_simple_index)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_partition_ip(self, connect, ip_collection, get_simple_index):
def test_create_index_partition_ip(self, connect, collection, get_simple_index):
'''
target: test create index interface
method: create collection, create partition, and add entities in it, create index
expected: return search success
'''
connect.create_partition(ip_collection, tag)
ids = connect.insert(ip_collection, entities, partition_tag=tag)
connect.flush([ip_collection])
connect.create_partition(collection, tag)
ids = connect.insert(collection, entities, partition_tag=tag)
connect.flush([collection])
get_simple_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
connect.create_index(collection, field_name, get_simple_index)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_partition_flush_ip(self, connect, ip_collection, get_simple_index):
def test_create_index_partition_flush_ip(self, connect, collection, get_simple_index):
'''
target: test create index interface
method: create collection, create partition, and add entities in it, create index
expected: return search success
'''
connect.create_partition(ip_collection, tag)
ids = connect.insert(ip_collection, entities, partition_tag=tag)
connect.create_partition(collection, tag)
ids = connect.insert(collection, entities, partition_tag=tag)
connect.flush()
get_simple_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
connect.create_index(collection, field_name, get_simple_index)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_search_with_query_vectors_ip(self, connect, ip_collection, get_simple_index, get_nq):
def test_create_index_search_with_query_vectors_ip(self, connect, collection, get_simple_index, get_nq):
'''
target: test create index interface, search with more query vectors
method: create collection and add entities in it, create index
expected: return search success
'''
ids = connect.insert(ip_collection, entities)
get_simple_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
logging.getLogger().info(connect.get_collection_stats(ip_collection))
metric_type = "IP"
ids = connect.insert(collection, entities)
get_simple_index["metric_type"] = metric_type
connect.create_index(collection, field_name, get_simple_index)
logging.getLogger().info(connect.get_collection_stats(collection))
nq = get_nq
index_type = get_simple_index["index_type"]
search_param = get_search_param(index_type)
query, vecs = gen_query_vectors_inside_entities(field_name, entities, top_k, nq, search_params=search_param)
res = connect.search(ip_collection, query)
query, vecs = gen_query_vectors(field_name, entities, top_k, nq, metric_type=metric_type, search_params=search_param)
res = connect.search(collection, query)
assert len(res) == nq
@pytest.mark.timeout(BUILD_TIMEOUT)
@pytest.mark.level(2)
def test_create_index_multithread_ip(self, connect, ip_collection, args):
def test_create_index_multithread_ip(self, connect, collection, args):
'''
target: test create index interface with multiprocess
method: create collection and add entities in it, create index
expected: return search success
'''
ids = connect.insert(ip_collection, entities)
ids = connect.insert(collection, entities)
def build(connect):
default_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, default_index)
connect.create_index(collection, field_name, default_index)
threads_num = 8
threads = []
......@@ -293,7 +294,7 @@ class TestIndexBase:
for t in threads:
t.join()
def test_create_index_collection_not_existed_ip(self, connect):
def test_create_index_collection_not_existed_ip(self, connect, collection):
'''
target: test create index interface when collection name not existed
method: create collection and add entities in it, create index
......@@ -303,47 +304,47 @@ class TestIndexBase:
collection_name = gen_unique_str(collection_id)
default_index["metric_type"] = "IP"
with pytest.raises(Exception) as e:
connect.create_index(ip_collection, field_name, default_index)
connect.create_index(collection, field_name, default_index)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_index_no_vectors_insert_ip(self, connect, ip_collection, get_simple_index):
def test_create_index_no_vectors_insert_ip(self, connect, collection, get_simple_index):
'''
target: test create index interface when there is no vectors in collection, and does not affect the subsequent process
method: create collection and add no vectors in it, and then create index, add entities in it
expected: return code equals to 0
'''
default_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
ids = connect.insert(ip_collection, entities)
connect.flush([ip_collection])
count = connect.count_entities(ip_collection)
connect.create_index(collection, field_name, get_simple_index)
ids = connect.insert(collection, entities)
connect.flush([collection])
count = connect.count_entities(collection)
assert count == nb
@pytest.mark.level(2)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_same_index_repeatedly_ip(self, connect, ip_collection, get_simple_index):
def test_create_same_index_repeatedly_ip(self, connect, collection, get_simple_index):
'''
target: check if index can be created repeatedly, with the same create_index params
method: create index after index have been built
expected: return code success, and search ok
'''
default_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
connect.create_index(ip_collection, field_name, get_simple_index)
connect.create_index(collection, field_name, get_simple_index)
connect.create_index(collection, field_name, get_simple_index)
@pytest.mark.level(2)
@pytest.mark.timeout(BUILD_TIMEOUT)
def test_create_different_index_repeatedly_ip(self, connect, ip_collection):
def test_create_different_index_repeatedly_ip(self, connect, collection):
'''
target: check if index can be created repeatedly, with the different create_index params
method: create another index with different index_params after index have been built
expected: return code 0, and describe index result equals with the second index params
'''
ids = connect.insert(ip_collection, entities)
indexs = [default_index, {"index_type": "FLAT", "nlist": 1024, "metric_type": "IP"}]
ids = connect.insert(collection, entities)
indexs = [default_index, {"index_type": "FLAT", "params": {"nlist": 1024}, "metric_type": "IP"}]
for index in indexs:
connect.create_index(ip_collection, field_name, index)
stats = connect.get_collection_stats(ip_collection)
connect.create_index(collection, field_name, index)
stats = connect.get_collection_stats(collection)
assert stats["partitions"][0]["segments"][0]["index_name"] == index["index_type"]
assert stats["row_count"] == nb
......@@ -424,7 +425,7 @@ class TestIndexBase:
connect.create_index(collection, field_name, get_simple_index)
connect.drop_index(collection, field_name)
def test_drop_index_ip(self, connect, ip_collection, get_simple_index):
def test_drop_index_ip(self, connect, collection, get_simple_index):
'''
target: test drop index interface
method: create collection and add entities in it, create index, call drop index
......@@ -432,40 +433,40 @@ class TestIndexBase:
'''
# ids = connect.insert(collection, entities)
get_simple_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
connect.drop_index(ip_collection, field_name)
stats = connect.get_collection_stats(ip_collection)
connect.create_index(collection, field_name, get_simple_index)
connect.drop_index(collection, field_name)
stats = connect.get_collection_stats(collection)
# assert stats["partitions"][0]["segments"][0]["index_name"] == default_index_type
assert not stats["partitions"][0]["segments"]
@pytest.mark.level(2)
def test_drop_index_repeatly_ip(self, connect, ip_collection, get_simple_index):
def test_drop_index_repeatly_ip(self, connect, collection, get_simple_index):
'''
target: test drop index repeatly
method: create index, call drop index, and drop again
expected: return code 0
'''
get_simple_index["metric_type"] = "IP"
connect.create_index(ip_collection, field_name, get_simple_index)
stats = connect.get_collection_stats(ip_collection)
connect.drop_index(ip_collection, field_name)
connect.drop_index(ip_collection, field_name)
stats = connect.get_collection_stats(ip_collection)
connect.create_index(collection, field_name, get_simple_index)
stats = connect.get_collection_stats(collection)
connect.drop_index(collection, field_name)
connect.drop_index(collection, field_name)
stats = connect.get_collection_stats(collection)
logging.getLogger().info(stats)
# assert stats["partitions"][0]["segments"][0]["index_name"] == default_index_type
assert not stats["partitions"][0]["segments"]
@pytest.mark.level(2)
def test_drop_index_without_connect_ip(self, dis_connect, ip_collection):
def test_drop_index_without_connect_ip(self, dis_connect, collection):
'''
target: test drop index without connection
method: drop index, and check if drop successfully
expected: raise exception
'''
with pytest.raises(Exception) as e:
dis_connect.drop_index(ip_collection, field_name)
dis_connect.drop_index(collection, field_name)
def test_drop_index_collection_not_create_ip(self, connect, ip_collection):
def test_drop_index_collection_not_create_ip(self, connect, collection):
'''
target: test drop index interface when index not created
method: create collection and add entities in it, create index
......@@ -473,10 +474,10 @@ class TestIndexBase:
'''
# ids = connect.insert(collection, entities)
# no create index
connect.drop_index(ip_collection, field_name)
connect.drop_index(collection, field_name)
@pytest.mark.level(2)
def test_create_drop_index_repeatly_ip(self, connect, ip_collection, get_simple_index):
def test_create_drop_index_repeatly_ip(self, connect, collection, get_simple_index):
'''
target: test create / drop index repeatly, use the same index params
method: create index, drop index, four times
......@@ -484,8 +485,8 @@ class TestIndexBase:
'''
get_simple_index["metric_type"] = "IP"
for i in range(4):
connect.create_index(ip_collection, field_name, get_simple_index)
connect.drop_index(ip_collection, field_name)
connect.create_index(collection, field_name, get_simple_index)
connect.drop_index(collection, field_name)
class TestIndexJAC:
......@@ -501,11 +502,14 @@ class TestIndexJAC:
@pytest.fixture(
scope="function",
params=gen_binary_index()
params=gen_simple_index()
)
def get_jaccard_index(self, request, connect):
request.param["params"].update({"metric_type": "JACCARD"})
return request.param
if request.param["index_type"] in binary_support():
request.param["metric_type"] = "JACCARD"
return request.param
else:
pytest.skip("Skip index")
@pytest.fixture(
scope="function",
......@@ -557,7 +561,7 @@ class TestIndexJAC:
pdb.set_trace()
ids = connect.insert(jac_collection, binary_entities)
connect.create_index(jac_collection, binary_field_name, get_jaccard_index)
query, vecs = gen_query_vectors_inside_entities(binary_field_name, binary_entities, top_k, nq)
query, vecs = gen_query_vectors(binary_field_name, binary_entities, top_k, nq)
search_param = get_search_param(get_jaccard_index["index_type"])
res = connect.search(jac_collection, query, search_params=search_param)
logging.getLogger().info(res)
......@@ -606,35 +610,34 @@ class TestIndexJAC:
******************************************************************
"""
def test_drop_index(self, connect, jac_collection, get_jaccard_index):
def test_drop_index(self, connect, binary_collection, get_jaccard_index):
'''
target: test drop index interface
method: create collection and add entities in it, create index, call drop index
expected: return code 0, and default index param
'''
# ids = connect.insert(ip_collection, vectors)
connect.create_index(jac_collection, binary_field_name, get_jaccard_index)
stats = connect.get_collection_stats(jac_collection)
connect.create_index(binary_collection, binary_field_name, get_jaccard_index)
stats = connect.get_collection_stats(binary_collection)
logging.getLogger().info(stats)
connect.drop_index(jac_collection, binary_field_name)
stats = connect.get_collection_stats(jac_collection)
connect.drop_index(binary_collection, binary_field_name)
stats = connect.get_collection_stats(binary_collection)
# assert stats["partitions"][0]["segments"][0]["index_name"] == default_index_type
assert not stats["partitions"][0]["segments"]
def test_drop_index_partition(self, connect, jac_collection, get_jaccard_index):
def test_drop_index_partition(self, connect, binary_collection, get_jaccard_index):
'''
target: test drop index interface
method: create collection, create partition and add entities in it, create index on collection, call drop collection index
expected: return code 0, and default index param
'''
connect.create_partition(jac_collection, tag)
ids = connect.insert(jac_collection, binary_entities, partition_tag=tag)
connect.flush([jac_collection])
connect.create_index(jac_collection, binary_field_name, get_jaccard_index)
stats = connect.get_collection_stats(jac_collection)
connect.create_partition(binary_collection, tag)
ids = connect.insert(binary_collection, binary_entities, partition_tag=tag)
connect.flush([binary_collection])
connect.create_index(binary_collection, binary_field_name, get_jaccard_index)
stats = connect.get_collection_stats(binary_collection)
logging.getLogger().info(stats)
connect.drop_index(jac_collection, binary_field_name)
stats = connect.get_collection_stats(jac_collection)
connect.drop_index(binary_collection, binary_field_name)
stats = connect.get_collection_stats(binary_collection)
logging.getLogger().info(stats)
assert stats["partitions"][1]["segments"][0]["index_name"] == default_index_type
......
......@@ -129,7 +129,7 @@ def gen_inaccuracy(num):
return num / 255.0
def gen_vectors(num, dim, is_normal=False):
def gen_vectors(num, dim, is_normal=True):
vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
return vectors.tolist()
......@@ -259,26 +259,18 @@ def assert_equal_entity(a, b):
pass
def gen_query_vectors_inside_entities(field_name, entities, top_k, nq, search_params={"nprobe": 10, "metric_type": "L2"}):
query_vectors = entities[-1]["values"][:nq]
query = {
"bool": {
"must": [
{"vector": {field_name: {"topk": top_k, "query": query_vectors, "params": search_params}}}
]
}
}
return query, query_vectors
def gen_query_vectors_rand_entities(field_name, entities, top_k, nq, search_params={"nprobe": 10, "metric_type": "L2"}):
dimension = len(entities[-1]["values"][0])
query_vectors = gen_vectors(nq, dimension)
def gen_query_vectors(field_name, entities, top_k, nq, search_params={"nprobe": 10}, rand_vector=False, metric_type=None):
if rand_vector is True:
dimension = len(entities[-1]["values"][0])
query_vectors = gen_vectors(nq, dimension)
else:
query_vectors = entities[-1]["values"][:nq]
must_param = {"vector": {field_name: {"topk": top_k, "query": query_vectors, "params": search_params}}}
if metric_type is not None:
must_param["vector"]["field_name"]["metric_type"] = metric_type
query = {
"bool": {
"must": [
{"vector": {field_name: {"topk": top_k, "query": query_vectors, "params": search_params}}}
]
"must": [must_param]
}
}
return query, query_vectors
......@@ -677,7 +669,7 @@ def gen_simple_index():
if all_index_types[i] in binary_support():
continue
dic = {"index_type": all_index_types[i], "metric_type": "L2"}
dic.update(default_index_params[i])
dic.update({"params": default_index_params[i]})
index_params.append(dic)
return index_params
......@@ -687,7 +679,7 @@ def gen_binary_index():
for i in range(len(all_index_types)):
if all_index_types[i] in binary_support():
dic = {"index_type": all_index_types[i]}
dic.update(default_index_params[i])
dic.update({"params": default_index_params[i]})
index_params.append(dic)
return index_params
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册