Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
95105bfd
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
95105bfd
编写于
3月 09, 2020
作者:
X
xiaojun.lin
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'official/master' into fix_1564
Signed-off-by:
N
xiaojun.lin
<
xiaojun.lin@zilliz.com
>
上级
f891a590
7cd727aa
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
210 addition
and
113 deletion
+210
-113
CHANGELOG.md
CHANGELOG.md
+1
-0
core/src/db/insert/MemTable.cpp
core/src/db/insert/MemTable.cpp
+24
-14
core/src/wrapper/VecIndex.h
core/src/wrapper/VecIndex.h
+4
-3
core/unittest/db/test_delete.cpp
core/unittest/db/test_delete.cpp
+144
-62
core/unittest/db/test_search_by_id.cpp
core/unittest/db/test_search_by_id.cpp
+37
-34
未找到文件。
CHANGELOG.md
浏览文件 @
95105bfd
...
...
@@ -47,6 +47,7 @@ Please mark all change in change log and use the issue from GitHub
-
\#
1556 Index file not created after table and index created
-
\#
1560 Search crashed with Super-high dimensional binary vector
-
\#
1564 Too low recall for glove-200-angular, ivf_pq index
-
\#
1574 Set all existing bitset in cache when applying deletes
## Feature
-
\#
216 Add CLI to get server info
...
...
core/src/db/insert/MemTable.cpp
浏览文件 @
95105bfd
...
...
@@ -236,11 +236,27 @@ MemTable::ApplyDeletes() {
utils
::
GetParentPath
(
table_file
.
location_
,
segment_dir
);
segment
::
SegmentReader
segment_reader
(
segment_dir
);
auto
index
=
std
::
static_pointer_cast
<
VecIndex
>
(
cache
::
CpuCacheMgr
::
GetInstance
()
->
GetIndex
(
table_file
.
location_
));
faiss
::
ConcurrentBitsetPtr
blacklist
=
nullptr
;
if
(
index
!=
nullptr
)
{
status
=
index
->
GetBlacklist
(
blacklist
);
auto
&
segment_id
=
table_file
.
segment_id_
;
meta
::
TableFilesSchema
segment_files
;
status
=
meta_
->
GetTableFilesBySegmentId
(
segment_id
,
segment_files
);
if
(
!
status
.
ok
())
{
break
;
}
// Get all index that contains blacklist in cache
std
::
vector
<
VecIndexPtr
>
indexes
;
std
::
vector
<
faiss
::
ConcurrentBitsetPtr
>
blacklists
;
for
(
auto
&
file
:
segment_files
)
{
auto
index
=
std
::
static_pointer_cast
<
VecIndex
>
(
cache
::
CpuCacheMgr
::
GetInstance
()
->
GetIndex
(
file
.
location_
));
faiss
::
ConcurrentBitsetPtr
blacklist
=
nullptr
;
if
(
index
!=
nullptr
)
{
index
->
GetBlacklist
(
blacklist
);
if
(
blacklist
!=
nullptr
)
{
indexes
.
emplace_back
(
index
);
blacklists
.
emplace_back
(
blacklist
);
}
}
}
std
::
vector
<
segment
::
doc_id_t
>
uids
;
...
...
@@ -293,7 +309,7 @@ MemTable::ApplyDeletes() {
id_bloom_filter_ptr
->
Remove
(
uids
[
i
]);
}
if
(
blacklist
!=
nullptr
)
{
for
(
auto
&
blacklist
:
blacklists
)
{
if
(
!
blacklist
->
test
(
i
))
{
blacklist
->
set
(
i
);
}
...
...
@@ -308,8 +324,8 @@ MemTable::ApplyDeletes() {
<<
find_diff
.
count
()
<<
" s in total"
;
ENGINE_LOG_DEBUG
<<
"Setting deleted docs and bloom filter took "
<<
set_diff
.
count
()
<<
" s in total"
;
if
(
index
!=
nullptr
)
{
index
->
SetBlacklist
(
blacklist
);
for
(
auto
i
=
0
;
i
<
indexes
.
size
();
++
i
)
{
index
es
[
i
]
->
SetBlacklist
(
blacklists
[
i
]
);
}
start
=
std
::
chrono
::
high_resolution_clock
::
now
();
...
...
@@ -339,12 +355,6 @@ MemTable::ApplyDeletes() {
<<
" s"
;
// Update table file row count
auto
&
segment_id
=
table_file
.
segment_id_
;
meta
::
TableFilesSchema
segment_files
;
status
=
meta_
->
GetTableFilesBySegmentId
(
segment_id
,
segment_files
);
if
(
!
status
.
ok
())
{
break
;
}
for
(
auto
&
file
:
segment_files
)
{
if
(
file
.
file_type_
==
meta
::
TableFileSchema
::
RAW
||
file
.
file_type_
==
meta
::
TableFileSchema
::
TO_INDEX
||
file
.
file_type_
==
meta
::
TableFileSchema
::
INDEX
||
file
.
file_type_
==
meta
::
TableFileSchema
::
BACKUP
)
{
...
...
core/src/wrapper/VecIndex.h
浏览文件 @
95105bfd
...
...
@@ -12,10 +12,10 @@
#pragma once
#include <faiss/utils/ConcurrentBitset.h>
#include <thirdparty/nlohmann/json.hpp>
#include <memory>
#include <string>
#include <thirdparty/nlohmann/json.hpp>
#include <utility>
#include <vector>
...
...
@@ -180,13 +180,14 @@ class VecIndex : public cache::DataObj {
virtual
Status
SetBlacklist
(
faiss
::
ConcurrentBitsetPtr
list
)
{
ENGINE_LOG_ERROR
<<
"SetBlacklist not support"
;
//
ENGINE_LOG_ERROR << "SetBlacklist not support";
return
Status
::
OK
();
}
virtual
Status
GetBlacklist
(
faiss
::
ConcurrentBitsetPtr
&
list
)
{
ENGINE_LOG_ERROR
<<
"GetBlacklist not support"
;
// ENGINE_LOG_ERROR << "GetBlacklist not support";
ENGINE_LOG_WARNING
<<
"Deletion on unsupported index type"
;
return
Status
::
OK
();
}
...
...
core/unittest/db/test_delete.cpp
浏览文件 @
95105bfd
此差异已折叠。
点击以展开。
core/unittest/db/test_search_by_id.cpp
浏览文件 @
95105bfd
...
...
@@ -68,7 +68,7 @@ TEST_F(SearchByIdTest, basic) {
auto
stat
=
db_
->
CreateTable
(
table_info
);
milvus
::
engine
::
meta
::
TableSchema
table_info_get
;
table_info_get
.
table_id_
=
GetTableName
()
;
table_info_get
.
table_id_
=
table_info
.
table_id_
;
stat
=
db_
->
DescribeTable
(
table_info_get
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
table_info_get
.
dimension_
,
TABLE_DIM
);
...
...
@@ -81,7 +81,7 @@ TEST_F(SearchByIdTest, basic) {
xb
.
id_array_
.
push_back
(
i
);
}
stat
=
db_
->
InsertVectors
(
GetTableName
()
,
""
,
xb
);
stat
=
db_
->
InsertVectors
(
table_info
.
table_id_
,
""
,
xb
);
ASSERT_TRUE
(
stat
.
ok
());
std
::
random_device
rd
;
...
...
@@ -108,7 +108,8 @@ TEST_F(SearchByIdTest, basic) {
milvus
::
engine
::
ResultIds
result_ids
;
milvus
::
engine
::
ResultDistances
result_distances
;
stat
=
db_
->
QueryByID
(
dummy_context_
,
GetTableName
(),
tags
,
topk
,
json_params
,
i
,
result_ids
,
result_distances
);
stat
=
db_
->
QueryByID
(
dummy_context_
,
table_info
.
table_id_
,
tags
,
topk
,
json_params
,
i
,
result_ids
,
result_distances
);
ASSERT_EQ
(
result_ids
[
0
],
i
);
ASSERT_LT
(
result_distances
[
0
],
1e-4
);
}
...
...
@@ -119,7 +120,7 @@ TEST_F(SearchByIdTest, with_index) {
auto
stat
=
db_
->
CreateTable
(
table_info
);
milvus
::
engine
::
meta
::
TableSchema
table_info_get
;
table_info_get
.
table_id_
=
GetTableName
()
;
table_info_get
.
table_id_
=
table_info
.
table_id_
;
stat
=
db_
->
DescribeTable
(
table_info_get
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
table_info_get
.
dimension_
,
TABLE_DIM
);
...
...
@@ -132,7 +133,7 @@ TEST_F(SearchByIdTest, with_index) {
xb
.
id_array_
.
push_back
(
i
);
}
stat
=
db_
->
InsertVectors
(
GetTableName
()
,
""
,
xb
);
stat
=
db_
->
InsertVectors
(
table_info
.
table_id_
,
""
,
xb
);
ASSERT_TRUE
(
stat
.
ok
());
std
::
random_device
rd
;
...
...
@@ -153,7 +154,7 @@ TEST_F(SearchByIdTest, with_index) {
milvus
::
engine
::
TableIndex
index
;
index
.
engine_type_
=
(
int
)
milvus
::
engine
::
EngineType
::
FAISS_IVFSQ8
;
index
.
extra_params_
=
{{
"nlist"
,
10
}};
stat
=
db_
->
CreateIndex
(
GetTableName
()
,
index
);
stat
=
db_
->
CreateIndex
(
table_info
.
table_id_
,
index
);
ASSERT_TRUE
(
stat
.
ok
());
const
int
topk
=
10
,
nprobe
=
10
;
...
...
@@ -165,7 +166,8 @@ TEST_F(SearchByIdTest, with_index) {
milvus
::
engine
::
ResultIds
result_ids
;
milvus
::
engine
::
ResultDistances
result_distances
;
stat
=
db_
->
QueryByID
(
dummy_context_
,
GetTableName
(),
tags
,
topk
,
json_params
,
i
,
result_ids
,
result_distances
);
stat
=
db_
->
QueryByID
(
dummy_context_
,
table_info
.
table_id_
,
tags
,
topk
,
json_params
,
i
,
result_ids
,
result_distances
);
ASSERT_EQ
(
result_ids
[
0
],
i
);
ASSERT_LT
(
result_distances
[
0
],
1e-3
);
}
...
...
@@ -176,7 +178,7 @@ TEST_F(SearchByIdTest, with_delete) {
auto
stat
=
db_
->
CreateTable
(
table_info
);
milvus
::
engine
::
meta
::
TableSchema
table_info_get
;
table_info_get
.
table_id_
=
GetTableName
()
;
table_info_get
.
table_id_
=
table_info
.
table_id_
;
stat
=
db_
->
DescribeTable
(
table_info_get
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
table_info_get
.
dimension_
,
TABLE_DIM
);
...
...
@@ -189,7 +191,7 @@ TEST_F(SearchByIdTest, with_delete) {
xb
.
id_array_
.
push_back
(
i
);
}
stat
=
db_
->
InsertVectors
(
GetTableName
()
,
""
,
xb
);
stat
=
db_
->
InsertVectors
(
table_info
.
table_id_
,
""
,
xb
);
ASSERT_TRUE
(
stat
.
ok
());
std
::
random_device
rd
;
...
...
@@ -211,7 +213,7 @@ TEST_F(SearchByIdTest, with_delete) {
for
(
auto
&
id
:
ids_to_search
)
{
ids_to_delete
.
emplace_back
(
id
);
}
stat
=
db_
->
DeleteVectors
(
GetTableName
()
,
ids_to_delete
);
stat
=
db_
->
DeleteVectors
(
table_info
.
table_id_
,
ids_to_delete
);
stat
=
db_
->
Flush
();
ASSERT_TRUE
(
stat
.
ok
());
...
...
@@ -225,7 +227,8 @@ TEST_F(SearchByIdTest, with_delete) {
milvus
::
engine
::
ResultIds
result_ids
;
milvus
::
engine
::
ResultDistances
result_distances
;
stat
=
db_
->
QueryByID
(
dummy_context_
,
GetTableName
(),
tags
,
topk
,
json_params
,
i
,
result_ids
,
result_distances
);
stat
=
db_
->
QueryByID
(
dummy_context_
,
table_info
.
table_id_
,
tags
,
topk
,
json_params
,
i
,
result_ids
,
result_distances
);
ASSERT_EQ
(
result_ids
[
0
],
-
1
);
ASSERT_EQ
(
result_distances
[
0
],
std
::
numeric_limits
<
float
>::
max
());
}
...
...
@@ -236,7 +239,7 @@ TEST_F(GetVectorByIdTest, basic) {
auto
stat
=
db_
->
CreateTable
(
table_info
);
milvus
::
engine
::
meta
::
TableSchema
table_info_get
;
table_info_get
.
table_id_
=
GetTableName
()
;
table_info_get
.
table_id_
=
table_info
.
table_id_
;
stat
=
db_
->
DescribeTable
(
table_info_get
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
table_info_get
.
dimension_
,
TABLE_DIM
);
...
...
@@ -249,7 +252,7 @@ TEST_F(GetVectorByIdTest, basic) {
xb
.
id_array_
.
push_back
(
i
);
}
stat
=
db_
->
InsertVectors
(
GetTableName
()
,
""
,
xb
);
stat
=
db_
->
InsertVectors
(
table_info
.
table_id_
,
""
,
xb
);
ASSERT_TRUE
(
stat
.
ok
());
std
::
random_device
rd
;
...
...
@@ -277,11 +280,11 @@ TEST_F(GetVectorByIdTest, basic) {
milvus
::
engine
::
ResultDistances
result_distances
;
milvus
::
engine
::
VectorsData
vector
;
stat
=
db_
->
GetVectorByID
(
GetTableName
()
,
id
,
vector
);
stat
=
db_
->
GetVectorByID
(
table_info
.
table_id_
,
id
,
vector
);
ASSERT_TRUE
(
stat
.
ok
());
stat
=
db_
->
Query
(
dummy_context_
,
GetTableName
(),
tags
,
topk
,
json_params
,
vector
,
result_ids
,
result_distances
);
stat
=
db_
->
Query
(
dummy_context_
,
table_info
.
table_id_
,
tags
,
topk
,
json_params
,
vector
,
result_ids
,
result_distances
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
result_ids
[
0
],
id
);
ASSERT_LT
(
result_distances
[
0
],
1e-4
);
...
...
@@ -293,7 +296,7 @@ TEST_F(GetVectorByIdTest, with_index) {
auto
stat
=
db_
->
CreateTable
(
table_info
);
milvus
::
engine
::
meta
::
TableSchema
table_info_get
;
table_info_get
.
table_id_
=
GetTableName
()
;
table_info_get
.
table_id_
=
table_info
.
table_id_
;
stat
=
db_
->
DescribeTable
(
table_info_get
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
table_info_get
.
dimension_
,
TABLE_DIM
);
...
...
@@ -306,7 +309,7 @@ TEST_F(GetVectorByIdTest, with_index) {
xb
.
id_array_
.
push_back
(
i
);
}
stat
=
db_
->
InsertVectors
(
GetTableName
()
,
""
,
xb
);
stat
=
db_
->
InsertVectors
(
table_info
.
table_id_
,
""
,
xb
);
ASSERT_TRUE
(
stat
.
ok
());
std
::
random_device
rd
;
...
...
@@ -327,7 +330,7 @@ TEST_F(GetVectorByIdTest, with_index) {
milvus
::
engine
::
TableIndex
index
;
index
.
extra_params_
=
{{
"nlist"
,
10
}};
index
.
engine_type_
=
(
int
)
milvus
::
engine
::
EngineType
::
FAISS_IVFSQ8
;
stat
=
db_
->
CreateIndex
(
GetTableName
()
,
index
);
stat
=
db_
->
CreateIndex
(
table_info
.
table_id_
,
index
);
ASSERT_TRUE
(
stat
.
ok
());
const
int
topk
=
10
,
nprobe
=
10
;
...
...
@@ -340,11 +343,11 @@ TEST_F(GetVectorByIdTest, with_index) {
milvus
::
engine
::
ResultDistances
result_distances
;
milvus
::
engine
::
VectorsData
vector
;
stat
=
db_
->
GetVectorByID
(
GetTableName
()
,
id
,
vector
);
stat
=
db_
->
GetVectorByID
(
table_info
.
table_id_
,
id
,
vector
);
ASSERT_TRUE
(
stat
.
ok
());
stat
=
db_
->
Query
(
dummy_context_
,
GetTableName
(),
tags
,
topk
,
json_params
,
vector
,
result_ids
,
result_distances
);
stat
=
db_
->
Query
(
dummy_context_
,
table_info
.
table_id_
,
tags
,
topk
,
json_params
,
vector
,
result_ids
,
result_distances
);
ASSERT_EQ
(
result_ids
[
0
],
id
);
ASSERT_LT
(
result_distances
[
0
],
1e-3
);
}
...
...
@@ -355,7 +358,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
auto
stat
=
db_
->
CreateTable
(
table_info
);
milvus
::
engine
::
meta
::
TableSchema
table_info_get
;
table_info_get
.
table_id_
=
GetTableName
()
;
table_info_get
.
table_id_
=
table_info
.
table_id_
;
stat
=
db_
->
DescribeTable
(
table_info_get
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
table_info_get
.
dimension_
,
TABLE_DIM
);
...
...
@@ -368,7 +371,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
xb
.
id_array_
.
push_back
(
i
);
}
stat
=
db_
->
InsertVectors
(
GetTableName
()
,
""
,
xb
);
stat
=
db_
->
InsertVectors
(
table_info
.
table_id_
,
""
,
xb
);
ASSERT_TRUE
(
stat
.
ok
());
std
::
random_device
rd
;
...
...
@@ -390,7 +393,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
for
(
auto
&
id
:
ids_to_search
)
{
ids_to_delete
.
emplace_back
(
id
);
}
stat
=
db_
->
DeleteVectors
(
GetTableName
()
,
ids_to_delete
);
stat
=
db_
->
DeleteVectors
(
table_info
.
table_id_
,
ids_to_delete
);
stat
=
db_
->
Flush
();
ASSERT_TRUE
(
stat
.
ok
());
...
...
@@ -402,7 +405,7 @@ TEST_F(GetVectorByIdTest, with_delete) {
milvus
::
engine
::
ResultDistances
result_distances
;
milvus
::
engine
::
VectorsData
vector
;
stat
=
db_
->
GetVectorByID
(
GetTableName
()
,
id
,
vector
);
stat
=
db_
->
GetVectorByID
(
table_info
.
table_id_
,
id
,
vector
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_TRUE
(
vector
.
float_data_
.
empty
());
ASSERT_EQ
(
vector
.
vector_count_
,
0
);
...
...
@@ -419,7 +422,7 @@ TEST_F(SearchByIdTest, BINARY) {
ASSERT_TRUE
(
stat
.
ok
());
milvus
::
engine
::
meta
::
TableSchema
table_info_get
;
table_info_get
.
table_id_
=
GetTableName
()
;
table_info_get
.
table_id_
=
table_info
.
table_id_
;
stat
=
db_
->
DescribeTable
(
table_info_get
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
table_info_get
.
dimension_
,
TABLE_DIM
);
...
...
@@ -445,7 +448,7 @@ TEST_F(SearchByIdTest, BINARY) {
vectors
.
id_array_
.
emplace_back
(
k
*
nb
+
i
);
}
stat
=
db_
->
InsertVectors
(
GetTableName
()
,
""
,
vectors
);
stat
=
db_
->
InsertVectors
(
table_info
.
table_id_
,
""
,
vectors
);
ASSERT_TRUE
(
stat
.
ok
());
}
...
...
@@ -465,7 +468,7 @@ TEST_F(SearchByIdTest, BINARY) {
ASSERT_TRUE
(
stat
.
ok
());
uint64_t
row_count
;
stat
=
db_
->
GetTableRowCount
(
GetTableName
()
,
row_count
);
stat
=
db_
->
GetTableRowCount
(
table_info
.
table_id_
,
row_count
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
row_count
,
nb
*
insert_loop
);
...
...
@@ -479,12 +482,12 @@ TEST_F(SearchByIdTest, BINARY) {
milvus
::
engine
::
ResultDistances
result_distances
;
milvus
::
engine
::
VectorsData
vector
;
stat
=
db_
->
GetVectorByID
(
GetTableName
()
,
id
,
vector
);
stat
=
db_
->
GetVectorByID
(
table_info
.
table_id_
,
id
,
vector
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
vector
.
vector_count_
,
1
);
stat
=
db_
->
Query
(
dummy_context_
,
GetTableName
(),
tags
,
topk
,
json_params
,
vector
,
result_ids
,
result_distances
);
stat
=
db_
->
Query
(
dummy_context_
,
table_info
.
table_id_
,
tags
,
topk
,
json_params
,
vector
,
result_ids
,
result_distances
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
result_ids
[
0
],
id
);
ASSERT_LT
(
result_distances
[
0
],
1e-4
);
...
...
@@ -493,8 +496,8 @@ TEST_F(SearchByIdTest, BINARY) {
result_ids
.
clear
();
result_distances
.
clear
();
stat
=
db_
->
QueryByID
(
dummy_context_
,
GetTableName
(),
tags
,
topk
,
json_params
,
id
,
result_ids
,
result_distances
);
stat
=
db_
->
QueryByID
(
dummy_context_
,
table_info
.
table_id_
,
tags
,
topk
,
json_params
,
id
,
result_ids
,
result_distances
);
ASSERT_TRUE
(
stat
.
ok
());
ASSERT_EQ
(
result_ids
[
0
],
id
);
ASSERT_LT
(
result_distances
[
0
],
1e-4
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录