未验证 提交 52a59396 编写于 作者: D del-zhenwu 提交者: GitHub

[skip ci] update segment_size to segment_row_count (#3068)

* [skip ci] update segment_size to segment_row_count
Signed-off-by: Nzw <zw@milvus.io>

* [skip ci] remove metric type in collection params
Signed-off-by: Nzw <zw@milvus.io>
Co-authored-by: Nzw <zw@milvus.io>
Co-authored-by: NWang XiangYu <xy.wang@zilliz.com>
上级 86652416
......@@ -15,7 +15,7 @@ dim = 128
tag = "tag"
collection_id = "count_collection"
add_interval_time = 3
segment_size = 10
segment_row_count = 5000
default_fields = gen_default_fields()
entities = gen_entities(nb)
raw_vectors, binary_entities = gen_binary_entities(nb)
......@@ -31,7 +31,7 @@ class TestCollectionCount:
scope="function",
params=[
1,
5000,
4000,
6001
],
)
......@@ -186,7 +186,7 @@ class TestCollectionCountIP:
scope="function",
params=[
1,
5000,
4000,
6001
],
)
......@@ -341,7 +341,7 @@ class TestCollectionCountBinary:
scope="function",
params=[
1,
5000,
4000,
6001
],
)
......@@ -507,7 +507,7 @@ class TestCollectionMultiCollections:
scope="function",
params=[
1,
5000,
4000,
6001
],
)
......@@ -564,7 +564,8 @@ class TestCollectionMultiCollections:
res = connect.count_entities(collection_list[i])
assert res == insert_count
def test_collection_count_multi_collections_binary(self, connect, jac_collection, insert_count):
# TODO:
def _test_collection_count_multi_collections_binary(self, connect, jac_collection, insert_count):
'''
target: test collection rows_count is correct or not with multiple collections of JACCARD
method: create collection and add entities in it,
......@@ -587,7 +588,8 @@ class TestCollectionMultiCollections:
res = connect.count_entities(collection_list[i])
assert res == insert_count
def test_collection_count_multi_collections_mix(self, connect):
# TODO:
def _test_collection_count_multi_collections_mix(self, connect):
'''
target: test collection rows_count is correct or not with multiple collections of JACCARD
method: create collection and add entities in it,
......
......@@ -7,9 +7,9 @@ from multiprocessing import Process
from utils import *
dim = 128
default_segment_size = 1024
default_segment_row_count = 100000
drop_collection_interval_time = 3
segment_size = 10
segment_row_count = 5000
collection_id = "logic"
vectors = gen_vectors(100, dim)
default_fields = gen_default_fields()
......
......@@ -7,7 +7,7 @@ import pytest
from utils import *
dim = 128
segment_size = 10
segment_row_count = 5000
nprobe = 1
top_k = 1
epsilon = 0.0001
......
......@@ -13,9 +13,9 @@ from utils import *
nb = 1
dim = 128
collection_id = "create_collection"
default_segment_size = 1024
default_segment_row_count = 100000
drop_collection_interval_time = 3
segment_size = 10
segment_row_count = 5000
default_fields = gen_default_fields()
entities = gen_entities(nb)
......@@ -42,9 +42,9 @@ class TestCreateCollection:
@pytest.fixture(
scope="function",
params=gen_segment_sizes()
params=gen_segment_row_counts()
)
def get_segment_size(self, request):
def get_segment_row_count(self, request):
yield request.param
def test_create_collection_fields(self, connect, get_filter_field, get_vector_field):
......@@ -59,7 +59,7 @@ class TestCreateCollection:
collection_name = gen_unique_str(collection_id)
fields = {
"fields": [filter_field, vector_field],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
logging.getLogger().info(fields)
connect.create_collection(collection_name, fields)
......@@ -77,20 +77,20 @@ class TestCreateCollection:
collection_name = gen_unique_str(collection_id)
fields = {
"fields": [filter_field, vector_field],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
connect.create_collection(collection_name, fields)
assert connect.has_collection(collection_name)
def test_create_collection_segment_size(self, connect, get_segment_size):
def test_create_collection_segment_row_count(self, connect, get_segment_row_count):
'''
target: test create normal collection with different fields
method: create collection with diff segment_size
method: create collection with diff segment_row_count
expected: no exception raised
'''
collection_name = gen_unique_str(collection_id)
fields = copy.deepcopy(default_fields)
fields["segment_size"] = get_segment_size
fields["segment_row_count"] = get_segment_row_count
connect.create_collection(collection_name, fields)
assert connect.has_collection(collection_name)
......@@ -197,7 +197,7 @@ class TestCreateCollectionInvalid(object):
scope="function",
params=gen_invalid_ints()
)
def get_segment_size(self, request):
def get_segment_row_count(self, request):
yield request.param
@pytest.fixture(
......@@ -222,20 +222,20 @@ class TestCreateCollectionInvalid(object):
yield request.param
@pytest.mark.level(2)
def test_create_collection_with_invalid_segment_size(self, connect, get_segment_size):
def test_create_collection_with_invalid_segment_row_count(self, connect, get_segment_row_count):
collection_name = gen_unique_str()
fields = copy.deepcopy(default_fields)
fields["segment_size"] = get_segment_size
fields["segment_row_count"] = get_segment_row_count
with pytest.raises(Exception) as e:
connect.create_collection(collection_name, fields)
@pytest.mark.level(2)
def test_create_collection_with_invalid_metric_type(self, connect, get_metric_type):
collection_name = gen_unique_str()
fields = copy.deepcopy(default_fields)
fields["fields"][-1]["params"]["metric_type"] = get_metric_type
with pytest.raises(Exception) as e:
connect.create_collection(collection_name, fields)
# @pytest.mark.level(2)
# def test_create_collection_with_invalid_metric_type(self, connect, get_metric_type):
# collection_name = gen_unique_str()
# fields = copy.deepcopy(default_fields)
# fields["fields"][-1]["params"]["metric_type"] = get_metric_type
# with pytest.raises(Exception) as e:
# connect.create_collection(collection_name, fields)
@pytest.mark.level(2)
def test_create_collection_with_invalid_dimension(self, connect, get_dim):
......@@ -285,34 +285,33 @@ class TestCreateCollectionInvalid(object):
with pytest.raises(Exception) as e:
connect.create_collection(collection_name, fields)
def test_create_collection_no_segment_size(self, connect):
'''
target: test create collection with no segment_size params
method: create collection with corrent params
expected: use default default_segment_size
'''
collection_name = gen_unique_str(collection_id)
fields = copy.deepcopy(default_fields)
fields.pop("segment_size")
connect.create_collection(collection_name, fields)
res = connect.get_collection_info(collection_name)
logging.getLogger().info(res)
assert res["segment_size"] == default_segment_size
# TODO:
def _test_create_collection_no_metric_type(self, connect):
def test_create_collection_no_segment_row_count(self, connect):
'''
target: test create collection with no metric_type params
target: test create collection with no segment_row_count params
method: create collection with corrent params
expected: use default L2
expected: use default default_segment_row_count
'''
collection_name = gen_unique_str(collection_id)
fields = copy.deepcopy(default_fields)
fields["fields"][-1]["params"].pop("metric_type")
fields.pop("segment_row_count")
connect.create_collection(collection_name, fields)
res = connect.get_collection_info(collection_name)
logging.getLogger().info(res)
assert res["metric_type"] == "L2"
assert res["segment_row_count"] == default_segment_row_count
# def _test_create_collection_no_metric_type(self, connect):
# '''
# target: test create collection with no metric_type params
# method: create collection with corrent params
# expected: use default L2
# '''
# collection_name = gen_unique_str(collection_id)
# fields = copy.deepcopy(default_fields)
# fields["fields"][-1]["params"].pop("metric_type")
# connect.create_collection(collection_name, fields)
# res = connect.get_collection_info(collection_name)
# logging.getLogger().info(res)
# assert res["metric_type"] == "L2"
# TODO: assert exception
def test_create_collection_limit_fields(self, connect):
......
......@@ -9,7 +9,7 @@ from utils import *
collection_id = "info"
default_fields = gen_default_fields()
segment_size = 10
segment_row_count = 5000
class TestInfoBase:
......@@ -30,9 +30,9 @@ class TestInfoBase:
@pytest.fixture(
scope="function",
params=gen_segment_sizes()
params=gen_segment_row_counts()
)
def get_segment_size(self, request):
def get_segment_row_count(self, request):
yield request.param
"""
......@@ -53,7 +53,7 @@ class TestInfoBase:
collection_name = gen_unique_str(collection_id)
fields = {
"fields": [filter_field, vector_field],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
connect.create_collection(collection_name, fields)
res = connect.get_collection_info(collection_name)
......@@ -64,15 +64,15 @@ class TestInfoBase:
# assert dimension
# TODO
def test_create_collection_segment_size(self, connect, get_segment_size):
def test_create_collection_segment_row_count(self, connect, get_segment_row_count):
'''
target: test create normal collection with different fields
method: create collection with diff segment_size
method: create collection with diff segment_row_count
expected: no exception raised
'''
collection_name = gen_unique_str(collection_id)
fields = copy.deepcopy(default_fields)
fields["segment_size"] = get_segment_size
fields["segment_row_count"] = get_segment_row_count
connect.create_collection(collection_name, fields)
# assert segment size
......@@ -141,7 +141,7 @@ class TestInfoBase:
collection_name = gen_unique_str(collection_id)
fields = {
"fields": [filter_field, vector_field],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
connect.create_collection(collection_name, fields)
# insert
......@@ -153,15 +153,15 @@ class TestInfoBase:
# assert dimension
# TODO
def test_create_collection_segment_size_after_insert(self, connect, get_segment_size):
def test_create_collection_segment_row_count_after_insert(self, connect, get_segment_row_count):
'''
target: test create normal collection with different fields
method: create collection with diff segment_size
method: create collection with diff segment_row_count
expected: no exception raised
'''
collection_name = gen_unique_str(collection_id)
fields = copy.deepcopy(default_fields)
fields["segment_size"] = get_segment_size
fields["segment_row_count"] = get_segment_row_count
connect.create_collection(collection_name, fields)
# insert
# assert segment size
......
......@@ -10,7 +10,7 @@ from utils import *
dim = 128
segment_size = 10
segment_row_count = 5000
collection_id = "test_delete"
DELETE_TIMEOUT = 60
tag = "1970-01-01"
......
......@@ -11,7 +11,7 @@ from utils import *
dim = 128
segment_size = 10
segment_row_count = 5000
collection_id = "test_get"
DELETE_TIMEOUT = 60
tag = "1970-01-01"
......
......@@ -9,7 +9,7 @@ from milvus import DataType
from utils import *
dim = 128
segment_size = 10
segment_row_count = 5000
collection_id = "test_insert"
ADD_TIMEOUT = 60
tag = "1970-01-01"
......@@ -209,7 +209,7 @@ class TestInsertBase:
collection_name = gen_unique_str("test_collection")
fields = {
"fields": [filter_field, vector_field],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
connect.create_collection(collection_name, fields)
ids = [i for i in range(nb)]
......@@ -283,7 +283,7 @@ class TestInsertBase:
collection_name = gen_unique_str("test_collection")
fields = {
"fields": [filter_field, vector_field],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
connect.create_collection(collection_name, fields)
entities = gen_entities_by_fields(fields["fields"], nb, dim)
......
......@@ -8,7 +8,7 @@ import pytest
from utils import *
dim = 128
segment_size = 100
segment_row_count = 100000
nb = 6000
tag = "1970-01-01"
field_name = "float_vector"
......
......@@ -11,7 +11,7 @@ from milvus import DataType
from utils import *
dim = 128
segment_size = 10
segment_row_count = 5000
top_k_limit = 2048
collection_id = "search"
tag = "1970-01-01"
......
......@@ -13,7 +13,7 @@ nprobe = 1
top_k = 1
tag = "1970-01-01"
nb = 6000
segment_size = 10
segment_row_count = 5000
entity = gen_entities(1)
entities = gen_entities(nb)
raw_vector, binary_entity = gen_binary_entities(1)
......
......@@ -7,7 +7,7 @@ import pytest
from utils import *
dim = 128
segment_size = 10
segment_row_count = 5000
index_file_size = 10
collection_id = "test_flush"
DELETE_TIMEOUT = 60
......@@ -155,7 +155,7 @@ class TestFlushBase:
collection_new = gen_unique_str("test_flush")
fields = {
"fields": [filter_field, vector_field],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
connect.create_collection(collection_new, fields)
connect.create_partition(collection, tag)
......
......@@ -9,7 +9,7 @@ from utils import *
dim = 128
segment_size = 10
segment_row_count = 5000
collection_id = "partition"
nprobe = 1
tag = "1970-01-01"
......
......@@ -8,7 +8,7 @@ from utils import *
dim = 128
collection_id = "test_wal"
segment_size = 10
segment_row_count = 5000
WAL_TIMEOUT = 60
tag = "1970-01-01"
insert_interval_time = 1.5
......
......@@ -16,7 +16,7 @@ epsilon = 0.000001
default_flush_interval = 1
big_flush_interval = 1000
dimension = 128
segment_size = 10
segment_row_count = 5000
# TODO:
all_index_types = [
......@@ -199,14 +199,9 @@ def gen_single_filter_fields():
def gen_single_vector_fields():
fields = []
for metric_type in ['HAMMING', 'IP', 'JACCARD', 'L2', 'SUBSTRUCTURE', 'SUPERSTRUCTURE', 'TANIMOTO']:
for data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]:
if metric_type in ["L2", "IP"] and data_type == DataType.BINARY_VECTOR:
continue
if metric_type not in ["L2", "IP"] and data_type == DataType.FLOAT_VECTOR:
continue
field = {"field": data_type.name, "type": data_type, "params": {"metric_type": metric_type, "dim": dimension}}
fields.append(field)
for data_type in [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR]:
field = {"field": data_type.name, "type": data_type, "params": {"dim": dimension}}
fields.append(field)
return fields
......@@ -215,9 +210,9 @@ def gen_default_fields():
"fields": [
{"field": "int64", "type": DataType.INT64},
{"field": "float", "type": DataType.FLOAT},
{"field": "vector", "type": DataType.FLOAT_VECTOR, "params": {"metric_type": "L2", "dim": dimension}}
{"field": "vector", "type": DataType.FLOAT_VECTOR, "params": {"dim": dimension}}
],
"segment_size": segment_size
"segment_row_count": segment_row_count
}
return default_fields
......@@ -311,14 +306,14 @@ def add_vector_field(entities, is_normal=False):
return entities
def update_fields_metric_type(fields, metric_type):
tmp_fields = copy.deepcopy(fields)
if metric_type in ["L2", "IP"]:
tmp_fields["fields"][-1]["type"] = DataType.FLOAT_VECTOR
else:
tmp_fields["fields"][-1]["type"] = DataType.BINARY_VECTOR
tmp_fields["fields"][-1]["params"]["metric_type"] = metric_type
return tmp_fields
# def update_fields_metric_type(fields, metric_type):
# tmp_fields = copy.deepcopy(fields)
# if metric_type in ["L2", "IP"]:
# tmp_fields["fields"][-1]["type"] = DataType.FLOAT_VECTOR
# else:
# tmp_fields["fields"][-1]["type"] = DataType.BINARY_VECTOR
# tmp_fields["fields"][-1]["params"]["metric_type"] = metric_type
# return tmp_fields
def remove_field(entities):
......@@ -363,7 +358,7 @@ def add_vector_field(nb, dimension=dimension):
return field_name
def gen_segment_sizes():
def gen_segment_row_counts():
sizes = [
1,
2,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册