未验证 提交 4e562181 编写于 作者: B binbin 提交者: GitHub

Add test cases for search after dev changes (#6092)

Signed-off-by: NBinbin Lv <binbin.lv@zilliz.com>
上级 f633d48c
......@@ -143,7 +143,8 @@ class TestcaseBase(Base):
**kwargs)
return partition_wrap
def init_collection_general(self, prefix, insert_data=False, nb=ct.default_nb, partition_num=0, is_binary=False):
def init_collection_general(self, prefix, insert_data=False, nb=ct.default_nb,
partition_num=0, is_binary=False, is_all_data_type=False):
"""
target: create specified collections
method: 1. create collections (binary/non-binary)
......@@ -158,10 +159,11 @@ class TestcaseBase(Base):
vectors = []
binary_raw_vectors = []
# 1 create collection
default_schema = cf.gen_default_collection_schema()
if is_binary:
default_schema = cf.gen_default_binary_collection_schema()
else:
default_schema = cf.gen_default_collection_schema()
if is_all_data_type:
default_schema = cf.gen_collection_schema_all_datatype()
log.info("init_collection_general: collection creation")
collection_w = self.init_collection_wrap(name=collection_name,
schema=default_schema)
......@@ -170,11 +172,10 @@ class TestcaseBase(Base):
cf.gen_partitions(collection_w, partition_num)
# 3 insert data if specified
if insert_data:
collection_w, vectors, binary_raw_vectors = cf.insert_data(collection_w, nb, is_binary)
if nb <= 32000:
conn.flush([collection_w.name])
assert collection_w.is_empty == False
assert collection_w.num_entities == nb
collection_w, vectors, binary_raw_vectors = \
cf.insert_data(collection_w, nb, is_binary, is_all_data_type)
assert collection_w.is_empty == False
assert collection_w.num_entities == nb
collection_w.load()
return collection_w, vectors, binary_raw_vectors
......@@ -25,6 +25,30 @@ def gen_str_by_length(length=8):
return "".join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
def gen_bool_field(name=ct.default_bool_field_name, description=ct.default_desc, is_primary=False, **kwargs):
bool_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.BOOL, description=description,
is_primary=is_primary, **kwargs)
return bool_field
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
is_primary=is_primary, **kwargs)
return int8_field
def gen_int16_field(name=ct.default_int16_field_name, description=ct.default_desc, is_primary=False, **kwargs):
int16_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT16, description=description,
is_primary=is_primary, **kwargs)
return int16_field
def gen_int32_field(name=ct.default_int32_field_name, description=ct.default_desc, is_primary=False, **kwargs):
int32_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT32, description=description,
is_primary=is_primary, **kwargs)
return int32_field
def gen_int64_field(name=ct.default_int64_field_name, description=ct.default_desc, is_primary=False, **kwargs):
int64_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT64, description=description,
is_primary=is_primary, **kwargs)
......@@ -37,6 +61,12 @@ def gen_float_field(name=ct.default_float_field_name, is_primary=False, descript
return float_field
def gen_double_field(name=ct.default_double_field_name, is_primary=False, description=ct.default_desc):
double_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.DOUBLE,
description=description, is_primary=is_primary)
return double_field
def gen_float_vec_field(name=ct.default_float_vec_field_name, is_primary=False, dim=ct.default_dim,
description=ct.default_desc):
float_vec_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.FLOAT_VECTOR,
......@@ -61,6 +91,16 @@ def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.
return schema
def gen_collection_schema_all_datatype(description=ct.default_desc,
primary_field=ct.default_int64_field_name,
auto_id=False):
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
gen_bool_field(), gen_float_field(), gen_double_field(), gen_float_vec_field()]
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id)
return schema
def gen_collection_schema(fields, primary_field=None, description=ct.default_desc, auto_id=False):
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, primary_field=primary_field,
description=description, auto_id=auto_id)
......@@ -101,6 +141,27 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
})
return df
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0):
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
int32_values = pd.Series(data=[np.int32(i) for i in range(start, start + nb)], dtype="int32")
int16_values = pd.Series(data=[np.int16(i) for i in range(start, start + nb)], dtype="int16")
int8_values = pd.Series(data=[np.int8(i) for i in range(start, start + nb)], dtype="int8")
bool_values = pd.Series(data=[np.bool(i) for i in range(start, start + nb)], dtype="bool")
float_values = pd.Series(data=[float(i) for i in range(start, start + nb)], dtype="float32")
double_values = pd.Series(data=[np.double(i) for i in range(start, start + nb)], dtype="double")
float_vec_values = gen_vectors(nb, dim)
df = pd.DataFrame({
ct.default_int64_field_name: int64_values,
ct.default_int32_field_name: int32_values,
ct.default_int16_field_name: int16_values,
ct.default_int8_field_name: int8_values,
ct.default_bool_field_name: bool_values,
ct.default_float_field_name: float_values,
ct.default_double_field_name: double_values,
ct.default_float_vec_field_name: float_vec_values
})
return df
def gen_default_binary_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0):
int_values = pd.Series(data=[i for i in range(start, start + nb)])
......@@ -190,7 +251,6 @@ def gen_normal_expressions():
]
return expressions
def jaccard(x, y):
x = np.asarray(x, np.bool)
y = np.asarray(y, np.bool)
......@@ -278,7 +338,7 @@ def gen_partitions(collection_w, partition_num=1):
log.info("gen_partitions: created partitions %s" % par)
def insert_data(collection_w, nb=3000, is_binary=False):
def insert_data(collection_w, nb=3000, is_binary=False, is_all_data_type=False):
"""
target: insert non-binary/binary data
method: insert non-binary/binary data into partitions if any
......@@ -291,11 +351,12 @@ def insert_data(collection_w, nb=3000, is_binary=False):
log.info("insert_data: inserting data into collection %s (num_entities: %s)"
% (collection_w.name, nb))
for i in range(num):
default_data = gen_default_dataframe_data(nb // num)
if is_binary:
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num)
binary_raw_vectors.extend(binary_raw_data)
else:
default_data = gen_default_dataframe_data(nb // num)
if is_all_data_type:
default_data = gen_dataframe_all_data_type(nb // num)
collection_w.insert(default_data, par[i].name)
vectors.append(default_data)
log.info("insert_data: inserted data into collection %s (num_entities: %s)"
......
......@@ -17,8 +17,13 @@ max_partition_num = 4096 # 256
default_segment_row_limit = 1000
default_server_segment_row_limit = 1024 * 512
default_alias = "default"
default_bool_field_name = "bool"
default_int8_field_name = "int8"
default_int16_field_name = "int16"
default_int32_field_name = "int32"
default_int64_field_name = "int64"
default_float_field_name = "float"
default_double_field_name = "double"
default_float_vec_field_name = "float_vector"
default_binary_vec_field_name = "binary_vector"
default_partition_name = "_default"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册