Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
d97e4213
M
milvus
项目概览
milvus
/
milvus
10 个月 前同步成功
通知
260
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
d97e4213
编写于
5月 31, 2023
作者:
Z
zhuwenxing
提交者:
GitHub
5月 31, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[test]Add dynamic schema bulk insert test cases (#24481)
Signed-off-by:
N
zhuwenxing
<
wenxing.zhu@zilliz.com
>
上级
3fb9334b
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
162 addition
and
21 deletion
+162
-21
tests/python_client/common/common_func.py
tests/python_client/common/common_func.py
+6
-4
tests/python_client/testcases/test_bulk_insert.py
tests/python_client/testcases/test_bulk_insert.py
+139
-0
tests/python_client/testcases/test_query.py
tests/python_client/testcases/test_query.py
+5
-5
tests/python_client/testcases/test_search.py
tests/python_client/testcases/test_search.py
+12
-12
未找到文件。
tests/python_client/common/common_func.py
浏览文件 @
d97e4213
...
...
@@ -171,9 +171,11 @@ def gen_collection_schema_all_datatype(description=ct.default_desc,
return
schema
def
gen_collection_schema
(
fields
,
primary_field
=
None
,
description
=
ct
.
default_desc
,
auto_id
=
False
):
def
gen_collection_schema
(
fields
,
primary_field
=
None
,
description
=
ct
.
default_desc
,
auto_id
=
False
,
enable_dynamic_field
=
False
):
schema
,
_
=
ApiCollectionSchemaWrapper
().
init_collection_schema
(
fields
=
fields
,
primary_field
=
primary_field
,
description
=
description
,
auto_id
=
auto_id
)
description
=
description
,
auto_id
=
auto_id
,
enable_dynamic_field
=
enable_dynamic_field
)
return
schema
...
...
@@ -396,7 +398,7 @@ def gen_invalid_field_types():
return
field_types
def
gen_inva
il
d_search_params_type
():
def
gen_inva
li
d_search_params_type
():
invalid_search_key
=
100
search_params
=
[]
for
index_type
in
ct
.
all_index_types
:
...
...
@@ -543,7 +545,7 @@ def gen_normal_string_expressions(field):
return
expressions
def
gen_inva
il
d_string_expressions
():
def
gen_inva
li
d_string_expressions
():
expressions
=
[
"varchar in [0,
\"
1
\"
]"
,
"varchar not in [
\"
0
\"
, 1, 2]"
...
...
tests/python_client/testcases/test_bulk_insert.py
浏览文件 @
d97e4213
import
logging
import
random
import
time
import
pytest
import
numpy
as
np
...
...
@@ -6,10 +7,12 @@ from pathlib import Path
from
base.client_base
import
TestcaseBase
from
common
import
common_func
as
cf
from
common
import
common_type
as
ct
from
common.minio_comm
import
copy_files_to_minio
from
common.milvus_sys
import
MilvusSys
from
common.common_type
import
CaseLabel
,
CheckTasks
from
utils.util_log
import
test_log
as
log
from
common.bulk_insert_data
import
(
data_source
,
prepare_bulk_insert_json_files
,
prepare_bulk_insert_numpy_files
,
DataField
as
df
,
...
...
@@ -837,3 +840,139 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
ids
=
hits
.
ids
results
,
_
=
self
.
collection_wrap
.
query
(
expr
=
f
"
{
df
.
pk_field
}
in
{
ids
}
"
)
assert
len
(
results
)
==
len
(
ids
)
@
pytest
.
mark
.
parametrize
(
"auto_id"
,
[
True
,
False
])
def
test_dynamic_schema_with_json
(
self
,
auto_id
):
"""
"""
import
json
self
.
_connect
()
c_name
=
cf
.
gen_unique_str
(
"dynamic_schema"
)
dim
=
128
nb
=
100
fields
=
[
cf
.
gen_int64_field
(
name
=
df
.
pk_field
,
is_primary
=
True
,
auto_id
=
auto_id
),
cf
.
gen_float_vec_field
(
name
=
df
.
vec_field
,
dim
=
dim
),
]
schema
=
cf
.
gen_collection_schema
(
fields
=
fields
,
auto_id
=
auto_id
,
enable_dynamic_field
=
True
)
self
.
collection_wrap
.
init_collection
(
c_name
,
schema
=
schema
)
data
=
[]
for
i
in
range
(
nb
):
d
=
{
"name"
:
f
"test_
{
i
}
"
,
"age"
:
i
,
df
.
pk_field
:
i
,
df
.
vec_field
:
[
x
for
x
in
range
(
dim
)],
}
if
auto_id
is
True
:
del
d
[
df
.
pk_field
]
for
_
in
range
(
random
.
randint
(
0
,
3
)):
random_key
=
cf
.
gen_unique_str
(
"random_key"
)
random_value
=
cf
.
gen_unique_str
(
"random_value"
)
d
[
random_key
]
=
random_value
data
.
append
(
d
)
# generate json file for bulk insert
file_name
=
"dynamic_schema.json"
json_data
=
{
"rows"
:
data
,
}
with
open
(
f
"
{
data_source
}
/
{
file_name
}
"
,
"w"
)
as
f
:
json
.
dump
(
json_data
,
f
)
# upload data to minio
files
=
[
file_name
]
copy_files_to_minio
(
self
.
minio_endpoint
,
data_source
,
files
,
self
.
bucket_name
,
force
=
True
)
index_params
=
ct
.
default_index
self
.
collection_wrap
.
create_index
(
field_name
=
df
.
vec_field
,
index_params
=
index_params
)
# load collection
self
.
collection_wrap
.
load
()
t0
=
time
.
time
()
task_id
,
_
=
self
.
utility_wrap
.
do_bulk_insert
(
collection_name
=
c_name
,
files
=
files
)
logging
.
info
(
f
"bulk insert task ids:
{
task_id
}
"
)
success
,
states
=
self
.
utility_wrap
.
wait_for_bulk_insert_tasks_completed
(
task_ids
=
[
task_id
],
timeout
=
90
)
tt
=
time
.
time
()
-
t0
log
.
info
(
f
"bulk insert state:
{
success
}
in
{
tt
}
with states:
{
states
}
"
)
assert
success
time
.
sleep
(
2
)
self
.
utility_wrap
.
wait_for_index_building_complete
(
c_name
,
timeout
=
120
)
res
,
_
=
self
.
utility_wrap
.
index_building_progress
(
c_name
)
self
.
collection_wrap
.
load
(
_refresh
=
True
)
log
.
info
(
f
"wait for load finished and be ready for search"
)
res
,
_
=
self
.
collection_wrap
.
query
(
expr
=
f
"
{
df
.
pk_field
}
>= 0"
,
output_fields
=
[
"name"
,
"age"
])
log
.
debug
(
f
"query result:
{
res
}
"
)
assert
len
(
res
)
==
nb
@
pytest
.
mark
.
parametrize
(
"auto_id"
,
[
True
,
False
])
def
test_dynamic_schema_with_numpy
(
self
,
auto_id
):
"""
"""
import
json
self
.
_connect
()
c_name
=
cf
.
gen_unique_str
(
"dynamic_schema"
)
dim
=
128
nb
=
100
fields
=
[
cf
.
gen_int64_field
(
name
=
df
.
pk_field
,
is_primary
=
True
,
auto_id
=
auto_id
),
cf
.
gen_float_vec_field
(
name
=
df
.
vec_field
,
dim
=
dim
),
]
schema
=
cf
.
gen_collection_schema
(
fields
=
fields
,
auto_id
=
auto_id
,
enable_dynamic_field
=
True
)
self
.
collection_wrap
.
init_collection
(
c_name
,
schema
=
schema
)
if
auto_id
is
True
:
files
=
[
f
"
{
df
.
vec_field
}
.npy"
,
"$meta.npy"
]
else
:
files
=
[
f
"
{
df
.
pk_field
}
.npy"
,
f
"
{
df
.
vec_field
}
.npy"
,
"$meta.npy"
]
for
f
in
files
:
d
=
[]
if
f
==
"$meta.npy"
:
for
i
in
range
(
nb
):
tmp
=
{
"name"
:
f
"test_
{
i
}
"
,
"age"
:
i
}
for
_
in
range
(
random
.
randint
(
0
,
3
)):
random_key
=
cf
.
gen_unique_str
(
"random_key"
)
random_value
=
cf
.
gen_unique_str
(
"random_value"
)
tmp
[
random_key
]
=
random_value
d
.
append
(
json
.
dumps
(
tmp
))
np
.
save
(
f
"
{
data_source
}
/
{
f
}
"
,
d
)
elif
f
==
f
"
{
df
.
pk_field
}
.npy"
:
d
=
np
.
array
([
i
for
i
in
range
(
nb
)])
np
.
save
(
f
"
{
data_source
}
/
{
f
}
"
,
d
)
elif
f
==
f
"
{
df
.
vec_field
}
.npy"
:
d
=
np
.
array
([[
np
.
float32
(
i
)
for
i
in
range
(
dim
)]
for
_
in
range
(
nb
)])
log
.
debug
(
f
"vec data:
{
d
}
"
)
np
.
save
(
f
"
{
data_source
}
/
{
f
}
"
,
d
)
else
:
raise
Exception
(
f
"unknown file with
{
files
}
"
)
copy_files_to_minio
(
self
.
minio_endpoint
,
data_source
,
files
,
self
.
bucket_name
,
force
=
True
)
index_params
=
ct
.
default_index
self
.
collection_wrap
.
create_index
(
field_name
=
df
.
vec_field
,
index_params
=
index_params
)
# load collection
self
.
collection_wrap
.
load
()
t0
=
time
.
time
()
task_id
,
_
=
self
.
utility_wrap
.
do_bulk_insert
(
collection_name
=
c_name
,
files
=
files
)
logging
.
info
(
f
"bulk insert task ids:
{
task_id
}
"
)
success
,
states
=
self
.
utility_wrap
.
wait_for_bulk_insert_tasks_completed
(
task_ids
=
[
task_id
],
timeout
=
90
)
tt
=
time
.
time
()
-
t0
log
.
info
(
f
"bulk insert state:
{
success
}
in
{
tt
}
with states:
{
states
}
"
)
assert
success
time
.
sleep
(
2
)
self
.
utility_wrap
.
wait_for_index_building_complete
(
c_name
,
timeout
=
120
)
res
,
_
=
self
.
utility_wrap
.
index_building_progress
(
c_name
)
self
.
collection_wrap
.
load
(
_refresh
=
True
)
log
.
info
(
f
"wait for load finished and be ready for search"
)
res
,
_
=
self
.
collection_wrap
.
query
(
expr
=
f
"
{
df
.
pk_field
}
>= 0"
,
output_fields
=
[
"name"
,
"age"
])
log
.
debug
(
f
"query result:
{
res
}
"
)
assert
len
(
res
)
==
nb
tests/python_client/testcases/test_query.py
浏览文件 @
d97e4213
...
...
@@ -18,7 +18,7 @@ prefix = "query"
exp_res
=
"exp_res"
default_term_expr
=
f
'
{
ct
.
default_int64_field_name
}
in [0, 1]'
default_mix_expr
=
"int64 >= 0 && varchar >=
\"
0
\"
"
default_inva
il
d_expr
=
"varchar >= 0"
default_inva
li
d_expr
=
"varchar >= 0"
default_string_term_expr
=
f
'
{
ct
.
default_string_field_name
}
in [
\"
0
\"
,
\"
1
\"
]'
default_index_params
=
{
"index_type"
:
"IVF_SQ8"
,
"metric_type"
:
"L2"
,
"params"
:
{
"nlist"
:
64
}}
binary_index_params
=
{
"index_type"
:
"BIN_IVF_FLAT"
,
"metric_type"
:
"JACCARD"
,
"params"
:
{
"nlist"
:
64
}}
...
...
@@ -880,7 +880,7 @@ class TestQueryParams(TestcaseBase):
collection_w
.
create_index
(
ct
.
default_float_vec_field_name
,
index_params
=
ct
.
default_flat_index
)
collection_w
.
load
()
partition_names
=
cf
.
gen_unique_str
()
error
=
{
ct
.
err_code
:
1
,
ct
.
err_msg
:
f
'PartitonName:
{
partition_names
}
not found'
}
error
=
{
ct
.
err_code
:
1
,
ct
.
err_msg
:
f
'Partit
i
onName:
{
partition_names
}
not found'
}
collection_w
.
query
(
default_term_expr
,
partition_names
=
[
partition_names
],
check_task
=
CheckTasks
.
err_res
,
check_items
=
error
)
...
...
@@ -1538,7 +1538,7 @@ class TestqueryString(TestcaseBase):
check_task
=
CheckTasks
.
check_query_results
,
check_items
=
{
exp_res
:
res
})
@
pytest
.
mark
.
tags
(
CaseLabel
.
L1
)
@
pytest
.
mark
.
parametrize
(
"expression"
,
cf
.
gen_inva
il
d_string_expressions
())
@
pytest
.
mark
.
parametrize
(
"expression"
,
cf
.
gen_inva
li
d_string_expressions
())
def
test_query_with_invalid_string_expr
(
self
,
expression
):
"""
target: test query data
...
...
@@ -1579,10 +1579,10 @@ class TestqueryString(TestcaseBase):
check_task
=
CheckTasks
.
check_query_results
,
check_items
=
{
exp_res
:
res
})
@
pytest
.
mark
.
tags
(
CaseLabel
.
L1
)
def
test_query_string_with_inva
il
d_prefix_expr
(
self
):
def
test_query_string_with_inva
li
d_prefix_expr
(
self
):
"""
target: test query with invalid prefix string expression
method: specify string primary field, use inva
il
d prefix string expr
method: specify string primary field, use inva
li
d prefix string expr
expected: raise error
"""
collection_w
=
self
.
init_collection_general
(
prefix
,
insert_data
=
True
)[
0
]
...
...
tests/python_client/testcases/test_search.py
浏览文件 @
d97e4213
...
...
@@ -29,8 +29,8 @@ default_limit = ct.default_limit
default_search_exp
=
"int64 >= 0"
default_search_string_exp
=
"varchar >=
\"
0
\"
"
default_search_mix_exp
=
"int64 >= 0 && varchar >=
\"
0
\"
"
default_inva
il
d_string_exp
=
"varchar >= 0"
p
er
fix_expr
=
'varchar like "0%"'
default_inva
li
d_string_exp
=
"varchar >= 0"
p
re
fix_expr
=
'varchar like "0%"'
default_search_field
=
ct
.
default_float_vec_field_name
default_search_params
=
ct
.
default_search_params
default_int64_field_name
=
ct
.
default_int64_field_name
...
...
@@ -318,7 +318,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
collection_w
.
create_index
(
"float_vector"
,
default_index
)
collection_w
.
load
()
# 3. search
invalid_search_params
=
cf
.
gen_inva
il
d_search_params_type
()
invalid_search_params
=
cf
.
gen_inva
li
d_search_params_type
()
message
=
"Search params check failed"
for
invalid_search_param
in
invalid_search_params
:
if
index
==
invalid_search_param
[
"index_type"
]:
...
...
@@ -681,7 +681,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
[
deleted_par_name
],
check_task
=
CheckTasks
.
err_res
,
check_items
=
{
"err_code"
:
1
,
"err_msg"
:
"PartitonName: %s not found"
%
deleted_par_name
})
"err_msg"
:
"Partit
i
onName: %s not found"
%
deleted_par_name
})
@
pytest
.
mark
.
tags
(
CaseLabel
.
L3
)
@
pytest
.
mark
.
parametrize
(
"index, params"
,
...
...
@@ -734,7 +734,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
default_limit
,
default_search_exp
,
[
partition_name
],
check_task
=
CheckTasks
.
err_res
,
check_items
=
{
"err_code"
:
1
,
"err_msg"
:
"PartitonName: %s not found"
%
partition_name
})
"err_msg"
:
"Partit
i
onName: %s not found"
%
partition_name
})
@
pytest
.
mark
.
tags
(
CaseLabel
.
L2
)
@
pytest
.
mark
.
xfail
(
reason
=
"issue 15407"
)
...
...
@@ -774,7 +774,7 @@ class TestCollectionSearchInvalid(TestcaseBase):
search_params
,
default_limit
,
"int64 >= 0"
,
check_task
=
CheckTasks
.
err_res
,
check_items
=
{
"err_code"
:
1
,
"err_msg"
:
"Data type and metric type mis-match"
})
"err_msg"
:
"Data type and metric type mis
s
-match"
})
@
pytest
.
mark
.
tags
(
CaseLabel
.
L2
)
def
test_search_with_output_fields_not_exist
(
self
):
...
...
@@ -3170,7 +3170,7 @@ class TestSearchBase(TestcaseBase):
partition_num
=
1
,
dim
=
dim
,
is_index
=
True
)[
0
:
5
]
vectors
=
[[
random
.
random
()
for
_
in
range
(
dim
)]
for
_
in
range
(
nq
)]
# 2. create patition
# 2. create pa
r
tition
partition_name
=
"search_partition_empty"
collection_w
.
create_partition
(
partition_name
=
partition_name
,
description
=
"search partition empty"
)
par
=
collection_w
.
partitions
...
...
@@ -3219,7 +3219,7 @@ class TestSearchBase(TestcaseBase):
partition_num
=
1
,
dim
=
dim
,
is_index
=
True
)[
0
:
5
]
vectors
=
[[
random
.
random
()
for
_
in
range
(
dim
)]
for
_
in
range
(
nq
)]
# 2. create patition
# 2. create pa
r
tition
partition_name
=
ct
.
default_partition_name
par
=
collection_w
.
partitions
# collection_w.load()
...
...
@@ -3336,7 +3336,7 @@ class TestSearchBase(TestcaseBase):
partition_num
=
1
,
dim
=
dim
,
is_index
=
True
)[
0
:
5
]
vectors
=
[[
random
.
random
()
for
_
in
range
(
dim
)]
for
_
in
range
(
nq
)]
# 2. create patition
# 2. create pa
r
tition
partition_name
=
"search_partition_empty"
collection_w
.
create_partition
(
partition_name
=
partition_name
,
description
=
"search partition empty"
)
par
=
collection_w
.
partitions
...
...
@@ -3379,7 +3379,7 @@ class TestSearchBase(TestcaseBase):
partition_num
=
1
,
dim
=
dim
,
is_index
=
True
)[
0
:
5
]
vectors
=
[[
random
.
random
()
for
_
in
range
(
dim
)]
for
_
in
range
(
nq
)]
# 2. create patition
# 2. create pa
r
tition
par_name
=
collection_w
.
partitions
[
0
].
name
# collection_w.load()
# 3. create different index
...
...
@@ -3631,7 +3631,7 @@ class TestsearchString(TestcaseBase):
vectors
=
[[
random
.
random
()
for
_
in
range
(
default_dim
)]
for
_
in
range
(
default_nq
)]
collection_w
.
search
(
vectors
[:
default_nq
],
default_search_field
,
default_search_params
,
default_limit
,
default_inva
il
d_string_exp
,
default_inva
li
d_string_exp
,
check_task
=
CheckTasks
.
err_res
,
check_items
=
{
"err_code"
:
1
,
"err_msg"
:
"failed to create query plan: type mismatch"
}
...
...
@@ -3812,7 +3812,7 @@ class TestsearchString(TestcaseBase):
collection_w
.
search
(
vectors
[:
default_nq
],
default_search_field
,
# search all buckets
{
"metric_type"
:
"L2"
,
"params"
:
{
"nprobe"
:
100
}},
default_limit
,
p
er
fix_expr
,
p
re
fix_expr
,
output_fields
=
output_fields
,
_async
=
_async
,
travel_timestamp
=
0
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录