Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
53360cda
M
milvus
项目概览
milvus
/
milvus
10 个月 前同步成功
通知
260
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
53360cda
编写于
9月 25, 2020
作者:
G
groot
提交者:
GitHub
9月 25, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
reduce uid copy during search (#3867)
Signed-off-by:
N
groot
<
yihua.mo@zilliz.com
>
上级
7fcaa5be
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
43 addition
and
8 deletion
+43
-8
core/src/segment/SegmentReader.cpp
core/src/segment/SegmentReader.cpp
+40
-8
core/src/segment/SegmentReader.h
core/src/segment/SegmentReader.h
+3
-0
未找到文件。
core/src/segment/SegmentReader.cpp
浏览文件 @
53360cda
...
...
@@ -244,6 +244,8 @@ SegmentReader::LoadUids(std::vector<engine::idx_t>& uids) {
return
Status
(
DB_ERROR
,
err_msg
);
}
TimeRecorderAuto
recorder
(
"SegmentReader::LoadUids"
);
uids
.
clear
();
uids
.
resize
(
raw
->
data_
.
size
()
/
sizeof
(
engine
::
idx_t
));
memcpy
(
uids
.
data
(),
raw
->
data_
.
data
(),
raw
->
data_
.
size
());
...
...
@@ -269,12 +271,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
return
Status
(
DB_ERROR
,
"Field is not vector type"
);
}
// load uids
std
::
vector
<
int64_t
>
uids
;
STATUS_CHECK
(
LoadUids
(
uids
));
// load deleted doc
faiss
::
ConcurrentBitsetPtr
concurrent_bitset_ptr
=
std
::
make_shared
<
faiss
::
ConcurrentBitset
>
(
uids
.
size
());
int64_t
row_count
=
GetRowCount
();
faiss
::
ConcurrentBitsetPtr
concurrent_bitset_ptr
=
std
::
make_shared
<
faiss
::
ConcurrentBitset
>
(
row_count
);
segment
::
DeletedDocsPtr
deleted_docs_ptr
;
LoadDeletedDocs
(
deleted_docs_ptr
);
if
(
deleted_docs_ptr
!=
nullptr
)
{
...
...
@@ -307,7 +306,11 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
engine
::
BinaryDataPtr
raw
;
STATUS_CHECK
(
LoadField
(
field_name
,
raw
,
false
));
auto
dataset
=
knowhere
::
GenDataset
(
uids
.
size
(),
dimension
,
raw
->
data_
.
data
());
// load uids
std
::
vector
<
int64_t
>
uids
;
STATUS_CHECK
(
LoadUids
(
uids
));
auto
dataset
=
knowhere
::
GenDataset
(
row_count
,
dimension
,
raw
->
data_
.
data
());
// construct IDMAP index
knowhere
::
VecIndexFactory
&
vec_index_factory
=
knowhere
::
VecIndexFactory
::
GetInstance
();
...
...
@@ -326,9 +329,9 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
segment_ptr_
->
SetVectorIndex
(
field_name
,
index_ptr
);
cache
::
CpuCacheMgr
::
GetInstance
().
InsertItem
(
temp_index_path
,
index_ptr
);
recorder
.
RecordSection
(
"construct temp IDMAP index"
);
}
recorder
.
RecordSection
(
"create temp IDMAP index"
);
return
Status
::
OK
();
}
...
...
@@ -377,11 +380,16 @@ SegmentReader::LoadVectorIndex(const std::string& field_name, knowhere::VecIndex
STATUS_CHECK
(
ss_codec
.
GetVectorIndexFormat
()
->
ConstructIndex
(
index_type
,
index_data
,
raw_data
,
compress_data
,
index_ptr
));
// load uids
std
::
vector
<
int64_t
>
uids
;
STATUS_CHECK
(
LoadUids
(
uids
));
index_ptr
->
SetUids
(
uids
);
index_ptr
->
SetBlacklist
(
concurrent_bitset_ptr
);
segment_ptr_
->
SetVectorIndex
(
field_name
,
index_ptr
);
cache
::
CpuCacheMgr
::
GetInstance
().
InsertItem
(
index_file_path
,
index_ptr
);
// put into cache
recorder
.
RecordSection
(
"construct index"
);
}
catch
(
std
::
exception
&
e
)
{
std
::
string
err_msg
=
"Failed to load vector index: "
+
std
::
string
(
e
.
what
());
LOG_ENGINE_ERROR_
<<
err_msg
;
...
...
@@ -506,7 +514,7 @@ SegmentReader::LoadBloomFilter(segment::IdBloomFilterPtr& id_bloom_filter_ptr) {
Status
SegmentReader
::
LoadDeletedDocs
(
segment
::
DeletedDocsPtr
&
deleted_docs_ptr
)
{
try
{
TimeRecorder
recorder
(
"SegmentReader::LoadDeletedDocs"
);
TimeRecorder
Auto
recorder
(
"SegmentReader::LoadDeletedDocs"
);
deleted_docs_ptr
=
segment_ptr_
->
GetDeletedDocs
();
if
(
deleted_docs_ptr
!=
nullptr
)
{
...
...
@@ -611,6 +619,30 @@ SegmentReader::GetTempIndexPath(const std::string& field_name, std::string& path
return
Status
::
OK
();
}
int64_t
SegmentReader
::
GetRowCount
()
{
engine
::
BinaryDataPtr
raw
;
auto
status
=
LoadField
(
engine
::
FIELD_UID
,
raw
);
if
(
!
status
.
ok
())
{
LOG_ENGINE_ERROR_
<<
status
.
message
();
return
0
;
}
if
(
raw
==
nullptr
)
{
LOG_ENGINE_ERROR_
<<
"Failed to load id field"
;
return
0
;
}
if
(
raw
->
data_
.
size
()
%
sizeof
(
engine
::
idx_t
)
!=
0
)
{
std
::
string
err_msg
=
"Failed to load uids: illegal file size"
;
LOG_ENGINE_ERROR_
<<
err_msg
;
return
0
;
}
int64_t
count
=
raw
->
data_
.
size
()
/
sizeof
(
engine
::
idx_t
);
return
count
;
}
Status
SegmentReader
::
ClearCache
()
{
TimeRecorderAuto
recorder
(
"SegmentReader::ClearCache"
);
...
...
core/src/segment/SegmentReader.h
浏览文件 @
53360cda
...
...
@@ -95,6 +95,9 @@ class SegmentReader {
return
segment_visitor_
;
}
int64_t
GetRowCount
();
// clear cache from cache manager, use this method for segment merge/compact and collection/partition drop
Status
ClearCache
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录