Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
4a0c682b
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4a0c682b
编写于
6月 25, 2019
作者:
G
groot
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
MS-110 - Avoid huge file size
Former-commit-id: a36dd97a21b2e0d6e5cf99250a7b884500991708
上级
0086b464
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
79 addition
and
50 deletion
+79
-50
cpp/CHANGELOG.md
cpp/CHANGELOG.md
+1
-0
cpp/src/db/DBImpl.cpp
cpp/src/db/DBImpl.cpp
+6
-4
cpp/src/db/MemManager.cpp
cpp/src/db/MemManager.cpp
+58
-33
cpp/src/db/MemManager.h
cpp/src/db/MemManager.h
+14
-13
未找到文件。
cpp/CHANGELOG.md
浏览文件 @
4a0c682b
...
...
@@ -11,6 +11,7 @@ Please mark all change in change log and use the ticket from JIRA.
-
MS-89 - Fix compile failed, libgpufaiss.a link missing
-
MS-90 - Fix arch match incorrect on ARM
-
MS-99 - Fix compilation bug
-
MS-110 - Avoid huge file size
## Improvement
-
MS-82 - Update server startup welcome message
...
...
cpp/src/db/DBImpl.cpp
浏览文件 @
4a0c682b
...
...
@@ -472,7 +472,7 @@ void DBImpl::StartCompactionTask() {
}
//serialize memory data
std
::
vector
<
std
::
string
>
temp_table_ids
;
std
::
set
<
std
::
string
>
temp_table_ids
;
mem_mgr_
->
Serialize
(
temp_table_ids
);
for
(
auto
&
id
:
temp_table_ids
)
{
compact_table_ids_
.
insert
(
id
);
...
...
@@ -543,7 +543,8 @@ Status DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date,
ENGINE_LOG_DEBUG
<<
"New merged file "
<<
table_file
.
file_id_
<<
" of size="
<<
index
->
PhysicalSize
()
/
(
1024
*
1024
)
<<
" M"
;
index
->
Cache
();
//current disable this line to avoid memory
//index->Cache();
return
status
;
}
...
...
@@ -659,7 +660,8 @@ Status DBImpl::BuildIndex(const meta::TableFileSchema& file) {
<<
index
->
PhysicalSize
()
/
(
1024
*
1024
)
<<
" M"
<<
" from file "
<<
to_remove
.
file_id_
;
index
->
Cache
();
//current disable this line to avoid memory
//index->Cache();
}
catch
(
std
::
exception
&
ex
)
{
return
Status
::
Error
(
"Build index encounter exception"
,
ex
.
what
());
...
...
@@ -698,7 +700,7 @@ Status DBImpl::Size(uint64_t& result) {
DBImpl
::~
DBImpl
()
{
shutting_down_
.
store
(
true
,
std
::
memory_order_release
);
bg_timer_thread_
.
join
();
std
::
vector
<
std
::
string
>
ids
;
std
::
set
<
std
::
string
>
ids
;
mem_mgr_
->
Serialize
(
ids
);
}
...
...
cpp/src/db/MemManager.cpp
浏览文件 @
4a0c682b
...
...
@@ -20,36 +20,54 @@ namespace engine {
MemVectors
::
MemVectors
(
const
std
::
shared_ptr
<
meta
::
Meta
>&
meta_ptr
,
const
meta
::
TableFileSchema
&
schema
,
const
Options
&
options
)
:
pM
eta_
(
meta_ptr
),
:
m
eta_
(
meta_ptr
),
options_
(
options
),
schema_
(
schema
),
pIdG
enerator_
(
new
SimpleIDGenerator
()),
pEE
_
(
EngineFactory
::
Build
(
schema_
.
dimension_
,
schema_
.
location_
,
(
EngineType
)
schema_
.
engine_type_
))
{
id_g
enerator_
(
new
SimpleIDGenerator
()),
active_engine
_
(
EngineFactory
::
Build
(
schema_
.
dimension_
,
schema_
.
location_
,
(
EngineType
)
schema_
.
engine_type_
))
{
}
void
MemVectors
::
Add
(
size_t
n_
,
const
float
*
vectors_
,
IDNumbers
&
vector_ids_
)
{
Status
MemVectors
::
Add
(
size_t
n_
,
const
float
*
vectors_
,
IDNumbers
&
vector_ids_
)
{
if
(
active_engine_
==
nullptr
)
{
return
Status
::
Error
(
"index engine is null"
);
}
auto
start_time
=
METRICS_NOW_TIME
;
pIdG
enerator_
->
GetNextIDNumbers
(
n_
,
vector_ids_
);
pEE
_
->
AddWithIds
(
n_
,
vectors_
,
vector_ids_
.
data
());
id_g
enerator_
->
GetNextIDNumbers
(
n_
,
vector_ids_
);
Status
status
=
active_engine
_
->
AddWithIds
(
n_
,
vectors_
,
vector_ids_
.
data
());
auto
end_time
=
METRICS_NOW_TIME
;
auto
total_time
=
METRICS_MICROSECONDS
(
start_time
,
end_time
);
server
::
Metrics
::
GetInstance
().
AddVectorsPerSecondGaugeSet
(
static_cast
<
int
>
(
n_
),
static_cast
<
int
>
(
schema_
.
dimension_
),
total_time
);
return
status
;
}
size_t
MemVectors
::
Total
()
const
{
return
pEE_
->
Count
();
size_t
MemVectors
::
RowCount
()
const
{
if
(
active_engine_
==
nullptr
)
{
return
0
;
}
return
active_engine_
->
Count
();
}
size_t
MemVectors
::
ApproximateSize
()
const
{
return
pEE_
->
Size
();
size_t
MemVectors
::
Size
()
const
{
if
(
active_engine_
==
nullptr
)
{
return
0
;
}
return
active_engine_
->
Size
();
}
Status
MemVectors
::
Serialize
(
std
::
string
&
table_id
)
{
if
(
active_engine_
==
nullptr
)
{
return
Status
::
Error
(
"index engine is null"
);
}
table_id
=
schema_
.
table_id_
;
auto
size
=
Approximate
Size
();
auto
size
=
Size
();
auto
start_time
=
METRICS_NOW_TIME
;
pEE
_
->
Serialize
();
active_engine
_
->
Serialize
();
auto
end_time
=
METRICS_NOW_TIME
;
auto
total_time
=
METRICS_MICROSECONDS
(
start_time
,
end_time
);
schema_
.
size_
=
size
;
...
...
@@ -59,20 +77,20 @@ Status MemVectors::Serialize(std::string& table_id) {
schema_
.
file_type_
=
(
size
>=
options_
.
index_trigger_size
)
?
meta
::
TableFileSchema
::
TO_INDEX
:
meta
::
TableFileSchema
::
RAW
;
auto
status
=
pM
eta_
->
UpdateTableFile
(
schema_
);
auto
status
=
m
eta_
->
UpdateTableFile
(
schema_
);
LOG
(
DEBUG
)
<<
"New "
<<
((
schema_
.
file_type_
==
meta
::
TableFileSchema
::
RAW
)
?
"raw"
:
"to_index"
)
<<
" file "
<<
schema_
.
file_id_
<<
" of size "
<<
(
double
)(
pEE
_
->
Size
())
/
(
double
)
meta
::
M
<<
" M"
;
<<
" file "
<<
schema_
.
file_id_
<<
" of size "
<<
(
double
)(
active_engine
_
->
Size
())
/
(
double
)
meta
::
M
<<
" M"
;
pEE
_
->
Cache
();
active_engine
_
->
Cache
();
return
status
;
}
MemVectors
::~
MemVectors
()
{
if
(
pIdG
enerator_
!=
nullptr
)
{
delete
pIdG
enerator_
;
pIdG
enerator_
=
nullptr
;
if
(
id_g
enerator_
!=
nullptr
)
{
delete
id_g
enerator_
;
id_g
enerator_
=
nullptr
;
}
}
...
...
@@ -81,20 +99,20 @@ MemVectors::~MemVectors() {
*/
MemManager
::
MemVectorsPtr
MemManager
::
GetMemByTable
(
const
std
::
string
&
table_id
)
{
auto
memIt
=
mem
M
ap_
.
find
(
table_id
);
if
(
memIt
!=
mem
M
ap_
.
end
())
{
auto
memIt
=
mem
_id_m
ap_
.
find
(
table_id
);
if
(
memIt
!=
mem
_id_m
ap_
.
end
())
{
return
memIt
->
second
;
}
meta
::
TableFileSchema
table_file
;
table_file
.
table_id_
=
table_id
;
auto
status
=
pM
eta_
->
CreateTableFile
(
table_file
);
auto
status
=
m
eta_
->
CreateTableFile
(
table_file
);
if
(
!
status
.
ok
())
{
return
nullptr
;
}
mem
Map_
[
table_id
]
=
MemVectorsPtr
(
new
MemVectors
(
pM
eta_
,
table_file
,
options_
));
return
mem
M
ap_
[
table_id
];
mem
_id_map_
[
table_id
]
=
MemVectorsPtr
(
new
MemVectors
(
m
eta_
,
table_file
,
options_
));
return
mem
_id_m
ap_
[
table_id
];
}
Status
MemManager
::
InsertVectors
(
const
std
::
string
&
table_id_
,
...
...
@@ -114,37 +132,44 @@ Status MemManager::InsertVectorsNoLock(const std::string& table_id,
if
(
mem
==
nullptr
)
{
return
Status
::
NotFound
(
"Group "
+
table_id
+
" not found!"
);
}
mem
->
Add
(
n
,
vectors
,
vector_ids
);
return
Status
::
OK
();
//makesure each file size less than index_trigger_size
if
(
mem
->
Size
()
>
options_
.
index_trigger_size
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
serialization_mtx_
);
immu_mem_list_
.
push_back
(
mem
);
mem_id_map_
.
erase
(
table_id
);
return
InsertVectorsNoLock
(
table_id
,
n
,
vectors
,
vector_ids
);
}
else
{
return
mem
->
Add
(
n
,
vectors
,
vector_ids
);
}
}
Status
MemManager
::
ToImmutable
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
for
(
auto
&
kv
:
mem
M
ap_
)
{
imm
Mems
_
.
push_back
(
kv
.
second
);
for
(
auto
&
kv
:
mem
_id_m
ap_
)
{
imm
u_mem_list
_
.
push_back
(
kv
.
second
);
}
mem
M
ap_
.
clear
();
mem
_id_m
ap_
.
clear
();
return
Status
::
OK
();
}
Status
MemManager
::
Serialize
(
std
::
vector
<
std
::
string
>&
table_ids
)
{
Status
MemManager
::
Serialize
(
std
::
set
<
std
::
string
>&
table_ids
)
{
ToImmutable
();
std
::
unique_lock
<
std
::
mutex
>
lock
(
serialization_mtx_
);
std
::
string
table_id
;
table_ids
.
clear
();
for
(
auto
&
mem
:
imm
Mems
_
)
{
for
(
auto
&
mem
:
imm
u_mem_list
_
)
{
mem
->
Serialize
(
table_id
);
table_ids
.
push_back
(
table_id
);
table_ids
.
insert
(
table_id
);
}
imm
Mems
_
.
clear
();
imm
u_mem_list
_
.
clear
();
return
Status
::
OK
();
}
Status
MemManager
::
EraseMemVector
(
const
std
::
string
&
table_id
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
mem
M
ap_
.
erase
(
table_id
);
mem
_id_m
ap_
.
erase
(
table_id
);
return
Status
::
OK
();
}
...
...
cpp/src/db/MemManager.h
浏览文件 @
4a0c682b
...
...
@@ -15,6 +15,7 @@
#include <ctime>
#include <memory>
#include <mutex>
#include <set>
namespace
zilliz
{
namespace
milvus
{
...
...
@@ -32,11 +33,11 @@ public:
explicit
MemVectors
(
const
std
::
shared_ptr
<
meta
::
Meta
>&
,
const
meta
::
TableFileSchema
&
,
const
Options
&
);
void
Add
(
size_t
n_
,
const
float
*
vectors_
,
IDNumbers
&
vector_ids_
);
Status
Add
(
size_t
n_
,
const
float
*
vectors_
,
IDNumbers
&
vector_ids_
);
size_t
Total
()
const
;
size_t
RowCount
()
const
;
size_t
Approximate
Size
()
const
;
size_t
Size
()
const
;
Status
Serialize
(
std
::
string
&
table_id
);
...
...
@@ -49,11 +50,11 @@ private:
MemVectors
(
const
MemVectors
&
)
=
delete
;
MemVectors
&
operator
=
(
const
MemVectors
&
)
=
delete
;
MetaPtr
pM
eta_
;
MetaPtr
m
eta_
;
Options
options_
;
meta
::
TableFileSchema
schema_
;
IDGenerator
*
pIdG
enerator_
;
ExecutionEnginePtr
pEE
_
;
IDGenerator
*
id_g
enerator_
;
ExecutionEnginePtr
active_engine
_
;
};
// MemVectors
...
...
@@ -66,14 +67,14 @@ public:
using
Ptr
=
std
::
shared_ptr
<
MemManager
>
;
MemManager
(
const
std
::
shared_ptr
<
meta
::
Meta
>&
meta
,
const
Options
&
options
)
:
pM
eta_
(
meta
),
options_
(
options
)
{}
:
m
eta_
(
meta
),
options_
(
options
)
{}
MemVectorsPtr
GetMemByTable
(
const
std
::
string
&
table_id
);
Status
InsertVectors
(
const
std
::
string
&
table_id
,
size_t
n
,
const
float
*
vectors
,
IDNumbers
&
vector_ids
);
Status
Serialize
(
std
::
vector
<
std
::
string
>&
table_ids
);
Status
Serialize
(
std
::
set
<
std
::
string
>&
table_ids
);
Status
EraseMemVector
(
const
std
::
string
&
table_id
);
...
...
@@ -82,11 +83,11 @@ private:
size_t
n
,
const
float
*
vectors
,
IDNumbers
&
vector_ids
);
Status
ToImmutable
();
using
MemMap
=
std
::
map
<
std
::
string
,
MemVectorsPtr
>
;
using
ImmMemPool
=
std
::
vector
<
MemVectorsPtr
>
;
Mem
Map
memM
ap_
;
ImmMemPool
immMems
_
;
MetaPtr
pM
eta_
;
using
Mem
Id
Map
=
std
::
map
<
std
::
string
,
MemVectorsPtr
>
;
using
MemList
=
std
::
vector
<
MemVectorsPtr
>
;
Mem
IdMap
mem_id_m
ap_
;
MemList
immu_mem_list
_
;
MetaPtr
m
eta_
;
Options
options_
;
std
::
mutex
mutex_
;
std
::
mutex
serialization_mtx_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录