Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
8dc43fa8
M
milvus
项目概览
milvus
/
milvus
12 个月 前同步成功
通知
261
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
8dc43fa8
编写于
5月 24, 2022
作者:
Y
yanliang567
提交者:
GitHub
5月 24, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add test for numpy files in diff folders (#17175)
Signed-off-by:
N
yanliang567
<
yanliang.qiao@zilliz.com
>
上级
ea8e1623
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
63 addition
and
23 deletion
+63
-23
tests/python_client/bulk_load/test_bulk_load.py
tests/python_client/bulk_load/test_bulk_load.py
+63
-23
未找到文件。
tests/python_client/bulk_load/test_bulk_load.py
浏览文件 @
8dc43fa8
...
...
@@ -847,11 +847,10 @@ class TestBulkLoad(TestcaseBase):
"limit"
:
1
})
@
pytest
.
mark
.
tags
(
CaseLabel
.
L3
)
@
pytest
.
mark
.
parametrize
(
"auto_id"
,
[
True
])
@
pytest
.
mark
.
parametrize
(
"auto_id"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"dim"
,
[
6
])
@
pytest
.
mark
.
parametrize
(
"entities"
,
[
10
])
@
pytest
.
mark
.
parametrize
(
"file_nums"
,
[
2
])
# 32, max task nums 32? need improve
@
pytest
.
mark
.
xfail
(
reason
=
"only one numpy file imported successfully, issue #16992"
)
@
pytest
.
mark
.
parametrize
(
"entities"
,
[
1000
])
@
pytest
.
mark
.
parametrize
(
"file_nums"
,
[
10
])
def
test_multi_numpy_files_from_diff_folders
(
self
,
auto_id
,
dim
,
entities
,
file_nums
):
"""
collection schema 1: [pk, float_vector]
...
...
@@ -859,18 +858,10 @@ class TestBulkLoad(TestcaseBase):
Steps:
1. create collection
2. import data
3. if row_based: verify import failed
4. if column_based:
4.1 verify the data entities equal the import data
4.2 verify search and query successfully
3. verify that import numpy files in a loop
"""
row_based
=
False
# numpy files supports only column based
data_fields
=
[
df
.
vec_field
]
if
not
auto_id
:
data_fields
.
append
(
df
.
pk_field
)
files
=
prepare_bulk_load_numpy_files
(
rows
=
entities
,
dim
=
dim
,
data_fields
=
data_fields
,
file_nums
=
file_nums
,
force
=
True
)
self
.
_connect
()
c_name
=
cf
.
gen_unique_str
()
fields
=
[
cf
.
gen_int64_field
(
name
=
df
.
pk_field
,
is_primary
=
True
),
...
...
@@ -882,16 +873,22 @@ class TestBulkLoad(TestcaseBase):
self
.
collection_wrap
.
create_index
(
field_name
=
df
.
vec_field
,
index_params
=
index_params
)
# load collection
self
.
collection_wrap
.
load
()
t0
=
time
.
time
()
task_ids
,
_
=
self
.
utility_wrap
.
bulk_load
(
collection_name
=
c_name
,
row_based
=
row_based
,
files
=
files
)
data_fields
=
[
df
.
vec_field
]
if
not
auto_id
:
data_fields
.
append
(
df
.
pk_field
)
for
i
in
range
(
file_nums
):
files
=
prepare_bulk_load_numpy_files
(
rows
=
entities
,
dim
=
dim
,
data_fields
=
data_fields
,
file_nums
=
1
,
force
=
True
)
task_ids
,
_
=
self
.
utility_wrap
.
bulk_load
(
collection_name
=
c_name
,
row_based
=
row_based
,
files
=
files
)
success
,
states
=
self
.
utility_wrap
.
\
wait_for_bulk_load_tasks_completed
(
task_ids
=
task_ids
,
target_state
=
BulkLoadStates
.
BulkLoadPersisted
,
timeout
=
30
)
tt
=
time
.
time
()
-
t0
log
.
info
(
f
"bulk load state:
{
success
}
in
{
tt
}
"
)
log
.
info
(
f
"bulk load state:
{
success
}
"
)
assert
success
log
.
info
(
f
" collection entities:
{
self
.
collection_wrap
.
num_entities
}
"
)
...
...
@@ -1734,6 +1731,51 @@ class TestBulkLoadInvalidParams(TestcaseBase):
# res, _ = self.collection_wrap.query(expr=f"{float_field} in [1.0]", output_fields=[float_field])
# assert res[0].get(float_field, 0) == 1.0
@
pytest
.
mark
.
tags
(
CaseLabel
.
L3
)
@
pytest
.
mark
.
parametrize
(
"auto_id"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"dim"
,
[
6
])
@
pytest
.
mark
.
parametrize
(
"entities"
,
[
10
])
@
pytest
.
mark
.
parametrize
(
"file_nums"
,
[
2
])
def
test_multi_numpy_files_from_diff_folders_in_one_request
(
self
,
auto_id
,
dim
,
entities
,
file_nums
):
"""
collection schema 1: [pk, float_vector]
data file: .npy files in different folders
Steps:
1. create collection
2. import data
3. fail to import data with errors
"""
row_based
=
False
# numpy files supports only column based
data_fields
=
[
df
.
vec_field
]
if
not
auto_id
:
data_fields
.
append
(
df
.
pk_field
)
files
=
prepare_bulk_load_numpy_files
(
rows
=
entities
,
dim
=
dim
,
data_fields
=
data_fields
,
file_nums
=
file_nums
,
force
=
True
)
self
.
_connect
()
c_name
=
cf
.
gen_unique_str
()
fields
=
[
cf
.
gen_int64_field
(
name
=
df
.
pk_field
,
is_primary
=
True
),
cf
.
gen_float_vec_field
(
name
=
df
.
vec_field
,
dim
=
dim
)]
schema
=
cf
.
gen_collection_schema
(
fields
=
fields
,
auto_id
=
auto_id
)
self
.
collection_wrap
.
init_collection
(
c_name
,
schema
=
schema
)
t0
=
time
.
time
()
task_ids
,
_
=
self
.
utility_wrap
.
bulk_load
(
collection_name
=
c_name
,
row_based
=
row_based
,
files
=
files
)
success
,
states
=
self
.
utility_wrap
.
\
wait_for_bulk_load_tasks_completed
(
task_ids
=
task_ids
,
target_state
=
BulkLoadStates
.
BulkLoadPersisted
,
timeout
=
30
)
tt
=
time
.
time
()
-
t0
log
.
info
(
f
"bulk load state:
{
success
}
in
{
tt
}
"
)
assert
not
success
failed_reason
=
"duplicate file"
for
state
in
states
.
values
():
assert
state
.
state_name
==
"BulkLoadFailed"
assert
failed_reason
in
state
.
infos
.
get
(
"failed_reason"
,
""
)
assert
self
.
collection_wrap
.
num_entities
==
0
# TODO: string data on float field
...
...
@@ -1810,6 +1852,4 @@ class TestBulkLoadAdvanced(TestcaseBase):
"limit"
:
1
})
# self.collection_wrap.query(expr=f"id in {ids}")
"""Validate data consistency and availability during import"""
"""Validate data consistency and availability during import"""
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录