Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
qq_35024513
milvus
提交
85aca5fa
milvus
项目概览
qq_35024513
/
milvus
与 Fork 源项目一致
Fork自
milvus / milvus
通知
14
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
85aca5fa
编写于
11月 22, 2019
作者:
J
Jin Hai
提交者:
GitHub
11月 22, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #474 from yhmo/0.6.0
#470 raw files should not be build index
上级
0201dd96
e40c0674
变更
11
展开全部
隐藏空白更改
内联
并排
Showing
11 changed file
with
194 addition
and
132 deletion
+194
-132
CHANGELOG.md
CHANGELOG.md
+1
-0
core/src/db/DBImpl.cpp
core/src/db/DBImpl.cpp
+23
-4
core/src/db/DBImpl.h
core/src/db/DBImpl.h
+4
-0
core/src/db/meta/Meta.h
core/src/db/meta/Meta.h
+1
-2
core/src/db/meta/MetaConsts.h
core/src/db/meta/MetaConsts.h
+7
-0
core/src/db/meta/MySQLMetaImpl.cpp
core/src/db/meta/MySQLMetaImpl.cpp
+19
-8
core/src/db/meta/MySQLMetaImpl.h
core/src/db/meta/MySQLMetaImpl.h
+1
-1
core/src/db/meta/SqliteMetaImpl.cpp
core/src/db/meta/SqliteMetaImpl.cpp
+124
-103
core/src/db/meta/SqliteMetaImpl.h
core/src/db/meta/SqliteMetaImpl.h
+1
-1
core/unittest/db/test_meta.cpp
core/unittest/db/test_meta.cpp
+5
-5
core/unittest/db/test_meta_mysql.cpp
core/unittest/db/test_meta_mysql.cpp
+8
-8
未找到文件。
CHANGELOG.md
浏览文件 @
85aca5fa
...
...
@@ -44,6 +44,7 @@ Please mark all change in change log and use the ticket from JIRA.
-
\#
409 - Add a Fallback pass in optimizer
-
\#
433 - C++ SDK query result is not easy to use
-
\#
449 - Add ShowPartitions example for C++ SDK
-
\#
470 - Small raw files should not be build index
## Task
...
...
core/src/db/DBImpl.cpp
浏览文件 @
85aca5fa
...
...
@@ -838,6 +838,25 @@ DBImpl::BackgroundBuildIndex() {
// ENGINE_LOG_TRACE << "Background build index thread exit";
}
Status
DBImpl
::
GetFilesToBuildIndex
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
int
>&
file_types
,
meta
::
TableFilesSchema
&
files
)
{
files
.
clear
();
auto
status
=
meta_ptr_
->
FilesByType
(
table_id
,
file_types
,
files
);
// only build index for files that row count greater than certain threshold
for
(
auto
it
=
files
.
begin
();
it
!=
files
.
end
();)
{
if
((
*
it
).
file_type_
==
static_cast
<
int
>
(
meta
::
TableFileSchema
::
RAW
)
&&
(
*
it
).
row_count_
<
meta
::
BUILD_INDEX_THRESHOLD
)
{
it
=
files
.
erase
(
it
);
}
else
{
it
++
;
}
}
return
Status
::
OK
();
}
Status
DBImpl
::
GetFilesToSearch
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
size_t
>&
file_ids
,
const
meta
::
DatesT
&
dates
,
meta
::
TableFilesSchema
&
files
)
{
...
...
@@ -946,18 +965,18 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
}
// get files to build index
std
::
vector
<
std
::
string
>
file_id
s
;
auto
status
=
meta_ptr_
->
FilesByType
(
table_id
,
file_types
,
file_id
s
);
meta
::
TableFilesSchema
table_file
s
;
auto
status
=
GetFilesToBuildIndex
(
table_id
,
file_types
,
table_file
s
);
int
times
=
1
;
while
(
!
file_id
s
.
empty
())
{
while
(
!
table_file
s
.
empty
())
{
ENGINE_LOG_DEBUG
<<
"Non index files detected! Will build index "
<<
times
;
if
(
index
.
engine_type_
!=
(
int
)
EngineType
::
FAISS_IDMAP
)
{
status
=
meta_ptr_
->
UpdateTableFilesToIndex
(
table_id
);
}
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
std
::
min
(
10
*
1000
,
times
*
100
)));
status
=
meta_ptr_
->
FilesByType
(
table_id
,
file_types
,
file_id
s
);
GetFilesToBuildIndex
(
table_id
,
file_types
,
table_file
s
);
times
++
;
}
...
...
core/src/db/DBImpl.h
浏览文件 @
85aca5fa
...
...
@@ -152,6 +152,10 @@ class DBImpl : public DB {
Status
MemSerialize
();
Status
GetFilesToBuildIndex
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
int
>&
file_types
,
meta
::
TableFilesSchema
&
files
);
Status
GetFilesToSearch
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
size_t
>&
file_ids
,
const
meta
::
DatesT
&
dates
,
meta
::
TableFilesSchema
&
files
);
...
...
core/src/db/meta/Meta.h
浏览文件 @
85aca5fa
...
...
@@ -109,8 +109,7 @@ class Meta {
FilesToIndex
(
TableFilesSchema
&
)
=
0
;
virtual
Status
FilesByType
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
int
>&
file_types
,
std
::
vector
<
std
::
string
>&
file_ids
)
=
0
;
FilesByType
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
int
>&
file_types
,
TableFilesSchema
&
table_files
)
=
0
;
virtual
Status
Size
(
uint64_t
&
result
)
=
0
;
...
...
core/src/db/meta/MetaConsts.h
浏览文件 @
85aca5fa
...
...
@@ -32,6 +32,13 @@ const size_t H_SEC = 60 * M_SEC;
const
size_t
D_SEC
=
24
*
H_SEC
;
const
size_t
W_SEC
=
7
*
D_SEC
;
// This value is to ignore small raw files when building index.
// The reason is:
// 1. The performance of brute-search for small raw files could be better than small index file.
// 2. And small raw files can be merged to larger files, thus reduce fragmented files count.
// We decide the value based on a testing for small size raw/index files.
const
size_t
BUILD_INDEX_THRESHOLD
=
5000
;
}
// namespace meta
}
// namespace engine
}
// namespace milvus
core/src/db/meta/MySQLMetaImpl.cpp
浏览文件 @
85aca5fa
...
...
@@ -959,6 +959,7 @@ MySQLMetaImpl::UpdateTableFilesToIndex(const std::string& table_id) {
updateTableFilesToIndexQuery
<<
"UPDATE "
<<
META_TABLEFILES
<<
" SET file_type = "
<<
std
::
to_string
(
TableFileSchema
::
TO_INDEX
)
<<
" WHERE table_id = "
<<
mysqlpp
::
quote
<<
table_id
<<
" AND row_count >= "
<<
std
::
to_string
(
meta
::
BUILD_INDEX_THRESHOLD
)
<<
" AND file_type = "
<<
std
::
to_string
(
TableFileSchema
::
RAW
)
<<
";"
;
ENGINE_LOG_DEBUG
<<
"MySQLMetaImpl::UpdateTableFilesToIndex: "
<<
updateTableFilesToIndexQuery
.
str
();
...
...
@@ -1527,13 +1528,13 @@ MySQLMetaImpl::FilesToIndex(TableFilesSchema& files) {
Status
MySQLMetaImpl
::
FilesByType
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
int
>&
file_types
,
std
::
vector
<
std
::
string
>&
file_id
s
)
{
TableFilesSchema
&
table_file
s
)
{
if
(
file_types
.
empty
())
{
return
Status
(
DB_ERROR
,
"file types array is empty"
);
}
try
{
file_id
s
.
clear
();
table_file
s
.
clear
();
mysqlpp
::
StoreQueryResult
res
;
{
...
...
@@ -1553,9 +1554,10 @@ MySQLMetaImpl::FilesByType(const std::string& table_id, const std::vector<int>&
mysqlpp
::
Query
hasNonIndexFilesQuery
=
connectionPtr
->
query
();
// since table_id is a unique column we just need to check whether it exists or not
hasNonIndexFilesQuery
<<
"SELECT file_id, file_type"
<<
" FROM "
<<
META_TABLEFILES
<<
" WHERE table_id = "
<<
mysqlpp
::
quote
<<
table_id
<<
" AND file_type in ("
<<
types
<<
");"
;
hasNonIndexFilesQuery
<<
"SELECT id, engine_type, file_id, file_type, file_size, row_count, date, created_on"
<<
" FROM "
<<
META_TABLEFILES
<<
" WHERE table_id = "
<<
mysqlpp
::
quote
<<
table_id
<<
" AND file_type in ("
<<
types
<<
");"
;
ENGINE_LOG_DEBUG
<<
"MySQLMetaImpl::FilesByType: "
<<
hasNonIndexFilesQuery
.
str
();
...
...
@@ -1566,9 +1568,18 @@ MySQLMetaImpl::FilesByType(const std::string& table_id, const std::vector<int>&
int
raw_count
=
0
,
new_count
=
0
,
new_merge_count
=
0
,
new_index_count
=
0
;
int
to_index_count
=
0
,
index_count
=
0
,
backup_count
=
0
;
for
(
auto
&
resRow
:
res
)
{
std
::
string
file_id
;
resRow
[
"file_id"
].
to_string
(
file_id
);
file_ids
.
push_back
(
file_id
);
TableFileSchema
file_schema
;
file_schema
.
id_
=
resRow
[
"id"
];
file_schema
.
table_id_
=
table_id
;
file_schema
.
engine_type_
=
resRow
[
"engine_type"
];
resRow
[
"file_id"
].
to_string
(
file_schema
.
file_id_
);
file_schema
.
file_type_
=
resRow
[
"file_type"
];
file_schema
.
file_size_
=
resRow
[
"file_size"
];
file_schema
.
row_count_
=
resRow
[
"row_count"
];
file_schema
.
date_
=
resRow
[
"date"
];
file_schema
.
created_on_
=
resRow
[
"created_on"
];
table_files
.
emplace_back
(
file_schema
);
int32_t
file_type
=
resRow
[
"file_type"
];
switch
(
file_type
)
{
...
...
core/src/db/meta/MySQLMetaImpl.h
浏览文件 @
85aca5fa
...
...
@@ -108,7 +108,7 @@ class MySQLMetaImpl : public Meta {
Status
FilesByType
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
int
>&
file_types
,
std
::
vector
<
std
::
string
>&
file_id
s
)
override
;
TableFilesSchema
&
table_file
s
)
override
;
Status
Archive
()
override
;
...
...
core/src/db/meta/SqliteMetaImpl.cpp
浏览文件 @
85aca5fa
此差异已折叠。
点击以展开。
core/src/db/meta/SqliteMetaImpl.h
浏览文件 @
85aca5fa
...
...
@@ -108,7 +108,7 @@ class SqliteMetaImpl : public Meta {
Status
FilesByType
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
int
>&
file_types
,
std
::
vector
<
std
::
string
>&
file_id
s
)
override
;
TableFilesSchema
&
table_file
s
)
override
;
Status
Size
(
uint64_t
&
result
)
override
;
...
...
core/unittest/db/test_meta.cpp
浏览文件 @
85aca5fa
...
...
@@ -306,9 +306,9 @@ TEST_F(MetaTest, TABLE_FILES_TEST) {
ASSERT_EQ
(
dated_files
[
table_file
.
date_
].
size
(),
0
);
std
::
vector
<
int
>
file_types
;
std
::
vector
<
std
::
string
>
file_id
s
;
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
file_id
s
);
ASSERT_TRUE
(
file_id
s
.
empty
());
milvus
::
engine
::
meta
::
TableFilesSchema
table_file
s
;
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
table_file
s
);
ASSERT_TRUE
(
table_file
s
.
empty
());
ASSERT_FALSE
(
status
.
ok
());
file_types
=
{
...
...
@@ -317,11 +317,11 @@ TEST_F(MetaTest, TABLE_FILES_TEST) {
milvus
::
engine
::
meta
::
TableFileSchema
::
INDEX
,
milvus
::
engine
::
meta
::
TableFileSchema
::
RAW
,
milvus
::
engine
::
meta
::
TableFileSchema
::
BACKUP
,
};
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
file_id
s
);
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
table_file
s
);
ASSERT_TRUE
(
status
.
ok
());
uint64_t
total_cnt
=
new_index_files_cnt
+
new_merge_files_cnt
+
backup_files_cnt
+
new_files_cnt
+
raw_files_cnt
+
to_index_files_cnt
+
index_files_cnt
;
ASSERT_EQ
(
file_id
s
.
size
(),
total_cnt
);
ASSERT_EQ
(
table_file
s
.
size
(),
total_cnt
);
status
=
impl_
->
DeleteTableFiles
(
table_id
);
ASSERT_TRUE
(
status
.
ok
());
...
...
core/unittest/db/test_meta_mysql.cpp
浏览文件 @
85aca5fa
...
...
@@ -169,9 +169,9 @@ TEST_F(MySqlMetaTest, ARCHIVE_TEST_DAYS) {
std
::
vector
<
int
>
file_types
=
{
(
int
)
milvus
::
engine
::
meta
::
TableFileSchema
::
NEW
,
};
std
::
vector
<
std
::
string
>
file_id
s
;
status
=
impl
.
FilesByType
(
table_id
,
file_types
,
file_id
s
);
ASSERT_FALSE
(
file_id
s
.
empty
());
milvus
::
engine
::
meta
::
TableFilesSchema
table_file
s
;
status
=
impl
.
FilesByType
(
table_id
,
file_types
,
table_file
s
);
ASSERT_FALSE
(
table_file
s
.
empty
());
status
=
impl
.
UpdateTableFilesToIndex
(
table_id
);
ASSERT_TRUE
(
status
.
ok
());
...
...
@@ -326,9 +326,9 @@ TEST_F(MySqlMetaTest, TABLE_FILES_TEST) {
ASSERT_EQ
(
dated_files
[
table_file
.
date_
].
size
(),
0
);
std
::
vector
<
int
>
file_types
;
std
::
vector
<
std
::
string
>
file_id
s
;
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
file_id
s
);
ASSERT_TRUE
(
file_id
s
.
empty
());
milvus
::
engine
::
meta
::
TableFilesSchema
table_file
s
;
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
table_file
s
);
ASSERT_TRUE
(
table_file
s
.
empty
());
ASSERT_FALSE
(
status
.
ok
());
file_types
=
{
...
...
@@ -337,11 +337,11 @@ TEST_F(MySqlMetaTest, TABLE_FILES_TEST) {
milvus
::
engine
::
meta
::
TableFileSchema
::
INDEX
,
milvus
::
engine
::
meta
::
TableFileSchema
::
RAW
,
milvus
::
engine
::
meta
::
TableFileSchema
::
BACKUP
,
};
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
file_id
s
);
status
=
impl_
->
FilesByType
(
table
.
table_id_
,
file_types
,
table_file
s
);
ASSERT_TRUE
(
status
.
ok
());
uint64_t
total_cnt
=
new_index_files_cnt
+
new_merge_files_cnt
+
backup_files_cnt
+
new_files_cnt
+
raw_files_cnt
+
to_index_files_cnt
+
index_files_cnt
;
ASSERT_EQ
(
file_id
s
.
size
(),
total_cnt
);
ASSERT_EQ
(
table_file
s
.
size
(),
total_cnt
);
status
=
impl_
->
DeleteTableFiles
(
table_id
);
ASSERT_TRUE
(
status
.
ok
());
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录