Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
07f08daf
M
milvus
项目概览
milvus
/
milvus
9 个月 前同步成功
通知
260
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
07f08daf
编写于
8月 07, 2023
作者:
Y
yah01
提交者:
GitHub
8月 07, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix failed to load index due to lost binary (#26135)
Signed-off-by:
N
yah01
<
yang.cen@zilliz.com
>
上级
4e1b65d3
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
64 addition
and
57 deletion
+64
-57
internal/core/src/index/VectorMemIndex.cpp
internal/core/src/index/VectorMemIndex.cpp
+64
-17
internal/core/src/storage/MemFileManagerImpl.cpp
internal/core/src/storage/MemFileManagerImpl.cpp
+0
-35
internal/core/src/storage/MemFileManagerImpl.h
internal/core/src/storage/MemFileManagerImpl.h
+0
-5
未找到文件。
internal/core/src/index/VectorMemIndex.cpp
浏览文件 @
07f08daf
...
...
@@ -37,6 +37,7 @@
#include "storage/FieldData.h"
#include "storage/MemFileManagerImpl.h"
#include "storage/ThreadPools.h"
#include "storage/Util.h"
namespace
milvus
::
index
{
...
...
@@ -104,32 +105,78 @@ VectorMemIndex::Load(const Config& config) {
AssertInfo
(
index_files
.
has_value
(),
"index file paths is empty when load index"
);
LOG_SEGCORE_INFO_
<<
"load index files: "
<<
index_files
.
value
().
size
();
auto
parallel_degree
=
static_cast
<
uint64_t
>
(
DEFAULT_FIELD_MAX_MEMORY_LIMIT
/
FILE_SLICE_SIZE
);
std
::
map
<
std
::
string
,
storage
::
FieldDataChannelPtr
>
channels
;
for
(
const
auto
&
file
:
index_files
.
value
())
{
auto
key
=
file
.
substr
(
file
.
find_last_of
(
'/'
)
+
1
)
;
LOG_SEGCORE_INFO_
<<
"loading index file "
<<
key
;
if
(
channels
.
find
(
key
)
==
channels
.
end
())
{
channels
.
emplace
(
std
::
move
(
key
),
std
::
make_shared
<
storage
::
FieldDataChannel
>
(
parallel_degree
*
2
))
;
std
::
map
<
std
::
string
,
storage
::
FieldDataPtr
>
index_datas
{};
// try to read slice meta first
std
::
string
slice_meta_filepath
;
for
(
auto
&
file
:
index_files
.
value
())
{
auto
file_name
=
file
.
substr
(
file
.
find_last_of
(
'/'
)
+
1
);
if
(
file_name
==
INDEX_FILE_SLICE_META
)
{
slice_meta_filepath
=
file
;
break
;
}
}
auto
&
pool
=
ThreadPools
::
GetThreadPool
(
milvus
::
ThreadPoolPriority
::
MIDDLE
);
auto
future
=
pool
.
Submit
(
[
&
]
{
file_manager_
->
LoadFileStream
(
index_files
.
value
(),
channels
);
});
if
(
slice_meta_filepath
.
empty
())
{
// no slice meta, we could simply load all these files
index_datas
=
file_manager_
->
LoadIndexToMemory
(
index_files
.
value
());
AssembleIndexDatas
(
index_datas
);
}
else
{
// load with the slice meta info, then we can load batch by batch
std
::
string
index_file_prefix
=
slice_meta_filepath
.
substr
(
0
,
slice_meta_filepath
.
find_last_of
(
'/'
)
+
1
);
std
::
vector
<
std
::
string
>
batch
{};
batch
.
reserve
(
parallel_degree
);
auto
result
=
file_manager_
->
LoadIndexToMemory
({
slice_meta_filepath
});
auto
raw_slice_meta
=
result
[
INDEX_FILE_SLICE_META
];
Config
meta_data
=
Config
::
parse
(
std
::
string
(
static_cast
<
const
char
*>
(
raw_slice_meta
->
Data
()),
raw_slice_meta
->
Size
()));
for
(
auto
&
item
:
meta_data
[
META
])
{
std
::
string
prefix
=
item
[
NAME
];
int
slice_num
=
item
[
SLICE_NUM
];
auto
total_len
=
static_cast
<
size_t
>
(
item
[
TOTAL_LEN
]);
auto
new_field_data
=
milvus
::
storage
::
CreateFieldData
(
DataType
::
INT8
,
1
,
total_len
);
auto
HandleBatch
=
[
&
](
int
index
)
{
auto
batch_data
=
file_manager_
->
LoadIndexToMemory
(
batch
);
for
(
int
j
=
index
-
batch
.
size
()
+
1
;
j
<=
index
;
j
++
)
{
std
::
string
file_name
=
GenSlicedFileName
(
prefix
,
j
);
AssertInfo
(
batch_data
.
find
(
file_name
)
!=
batch_data
.
end
(),
"lost index slice data"
);
auto
data
=
batch_data
[
file_name
];
new_field_data
->
FillFieldData
(
data
->
Data
(),
data
->
Size
());
}
batch
.
clear
();
};
for
(
auto
i
=
0
;
i
<
slice_num
;
++
i
)
{
std
::
string
file_name
=
GenSlicedFileName
(
prefix
,
i
);
batch
.
push_back
(
index_file_prefix
+
file_name
);
if
(
batch
.
size
()
>=
parallel_degree
)
{
HandleBatch
(
i
);
}
}
if
(
batch
.
size
()
>
0
)
{
HandleBatch
(
slice_num
-
1
);
}
LOG_SEGCORE_INFO_
<<
"assemble index data..."
;
std
::
unordered_map
<
std
::
string
,
storage
::
FieldDataPtr
>
result
;
AssembleIndexDatas
(
channels
,
result
);
LOG_SEGCORE_INFO_
<<
"assemble index data done"
;
AssertInfo
(
new_field_data
->
IsFull
(),
"index len is inconsistent after disassemble and assemble"
);
index_datas
[
prefix
]
=
new_field_data
;
}
}
LOG_SEGCORE_INFO_
<<
"construct binary set..."
;
BinarySet
binary_set
;
for
(
auto
&
[
key
,
data
]
:
result
)
{
for
(
auto
&
[
key
,
data
]
:
index_datas
)
{
LOG_SEGCORE_INFO_
<<
"add index data to binary set: "
<<
key
;
auto
size
=
data
->
Size
();
auto
deleter
=
[
&
](
uint8_t
*
)
{};
// avoid repeated deconstruction
...
...
internal/core/src/storage/MemFileManagerImpl.cpp
浏览文件 @
07f08daf
...
...
@@ -119,41 +119,6 @@ MemFileManagerImpl::LoadIndexToMemory(
return
file_to_index_data
;
}
void
MemFileManagerImpl
::
LoadFileStream
(
const
std
::
vector
<
std
::
string
>&
remote_files
,
std
::
map
<
std
::
string
,
storage
::
FieldDataChannelPtr
>&
channels
)
{
auto
parallel_degree
=
static_cast
<
uint64_t
>
(
DEFAULT_FIELD_MAX_MEMORY_LIMIT
/
FILE_SLICE_SIZE
);
std
::
vector
<
std
::
string
>
batch_files
;
auto
LoadBatchIndexFiles
=
[
&
]()
{
auto
index_datas
=
GetObjectData
(
rcm_
.
get
(),
batch_files
);
for
(
auto
i
=
0
;
i
<
index_datas
.
size
();
i
++
)
{
auto
file_name
=
batch_files
[
i
].
substr
(
batch_files
[
i
].
find_last_of
(
'/'
)
+
1
);
auto
&
channel
=
channels
[
file_name
];
channel
->
push
(
index_datas
[
i
]);
}
};
for
(
auto
&
file
:
remote_files
)
{
if
(
batch_files
.
size
()
>=
parallel_degree
)
{
LoadBatchIndexFiles
();
batch_files
.
clear
();
}
batch_files
.
emplace_back
(
file
);
}
if
(
batch_files
.
size
()
>
0
)
{
LoadBatchIndexFiles
();
}
for
(
auto
&
[
_
,
channel
]
:
channels
)
{
channel
->
close
();
}
}
std
::
vector
<
FieldDataPtr
>
MemFileManagerImpl
::
CacheRawDataToMemory
(
std
::
vector
<
std
::
string
>
remote_files
)
{
...
...
internal/core/src/storage/MemFileManagerImpl.h
浏览文件 @
07f08daf
...
...
@@ -55,11 +55,6 @@ class MemFileManagerImpl : public FileManagerImpl {
std
::
map
<
std
::
string
,
storage
::
FieldDataPtr
>
LoadIndexToMemory
(
const
std
::
vector
<
std
::
string
>&
remote_files
);
void
LoadFileStream
(
const
std
::
vector
<
std
::
string
>&
remote_files
,
std
::
map
<
std
::
string
,
storage
::
FieldDataChannelPtr
>&
channels
);
std
::
vector
<
FieldDataPtr
>
CacheRawDataToMemory
(
std
::
vector
<
std
::
string
>
remote_files
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录