Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
b2cdb9e9
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b2cdb9e9
编写于
7月 01, 2019
作者:
Z
zhiru
浏览文件
操作
浏览文件
下载
差异文件
update
Former-commit-id: 0b15e2302d0bec551b1215285ff86b5f6cdf71de
上级
542a5212
ac03c4b2
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
0 addition
and
148 deletion
+0
-148
cpp/src/db/DBImpl.cpp
cpp/src/db/DBImpl.cpp
+0
-140
cpp/src/db/DBImpl.h
cpp/src/db/DBImpl.h
+0
-8
未找到文件。
cpp/src/db/DBImpl.cpp
浏览文件 @
b2cdb9e9
...
...
@@ -158,10 +158,6 @@ Status DBImpl::Query(const std::string &table_id, uint64_t k, uint64_t nq,
Status
DBImpl
::
Query
(
const
std
::
string
&
table_id
,
uint64_t
k
,
uint64_t
nq
,
const
float
*
vectors
,
const
meta
::
DatesT
&
dates
,
QueryResults
&
results
)
{
#if 0
return QuerySync(table_id, k, nq, vectors, dates, results);
#else
//get all table files from table
meta
::
DatePartionedTableFilesSchema
files
;
auto
status
=
meta_ptr_
->
FilesToSearch
(
table_id
,
dates
,
files
);
...
...
@@ -175,7 +171,6 @@ Status DBImpl::Query(const std::string& table_id, uint64_t k, uint64_t nq,
}
return
QueryAsync
(
table_id
,
file_id_array
,
k
,
nq
,
vectors
,
dates
,
results
);
#endif
}
Status
DBImpl
::
Query
(
const
std
::
string
&
table_id
,
const
std
::
vector
<
std
::
string
>&
file_ids
,
...
...
@@ -203,141 +198,6 @@ Status DBImpl::Query(const std::string& table_id, const std::vector<std::string>
return
QueryAsync
(
table_id
,
files_array
,
k
,
nq
,
vectors
,
dates
,
results
);
}
Status
DBImpl
::
QuerySync
(
const
std
::
string
&
table_id
,
uint64_t
k
,
uint64_t
nq
,
const
float
*
vectors
,
const
meta
::
DatesT
&
dates
,
QueryResults
&
results
)
{
meta
::
DatePartionedTableFilesSchema
files
;
auto
status
=
meta_ptr_
->
FilesToSearch
(
table_id
,
dates
,
files
);
if
(
!
status
.
ok
())
{
return
status
;
}
ENGINE_LOG_DEBUG
<<
"Search DateT Size = "
<<
files
.
size
();
meta
::
TableFilesSchema
index_files
;
meta
::
TableFilesSchema
raw_files
;
for
(
auto
&
day_files
:
files
)
{
for
(
auto
&
file
:
day_files
.
second
)
{
file
.
file_type_
==
meta
::
TableFileSchema
::
INDEX
?
index_files
.
push_back
(
file
)
:
raw_files
.
push_back
(
file
);
}
}
int
dim
=
0
;
if
(
!
index_files
.
empty
())
{
dim
=
index_files
[
0
].
dimension_
;
}
else
if
(
!
raw_files
.
empty
())
{
dim
=
raw_files
[
0
].
dimension_
;
}
else
{
ENGINE_LOG_DEBUG
<<
"no files to search"
;
return
Status
::
OK
();
}
{
// [{ids, distence}, ...]
using
SearchResult
=
std
::
pair
<
std
::
vector
<
long
>
,
std
::
vector
<
float
>>
;
std
::
vector
<
SearchResult
>
batchresult
(
nq
);
// allocate nq cells.
auto
cluster
=
[
&
](
long
*
nns
,
float
*
dis
,
const
int
&
k
)
->
void
{
for
(
int
i
=
0
;
i
<
nq
;
++
i
)
{
auto
f_begin
=
batchresult
[
i
].
first
.
cbegin
();
auto
s_begin
=
batchresult
[
i
].
second
.
cbegin
();
batchresult
[
i
].
first
.
insert
(
f_begin
,
nns
+
i
*
k
,
nns
+
i
*
k
+
k
);
batchresult
[
i
].
second
.
insert
(
s_begin
,
dis
+
i
*
k
,
dis
+
i
*
k
+
k
);
}
};
// Allocate Memory
float
*
output_distence
;
long
*
output_ids
;
output_distence
=
(
float
*
)
malloc
(
k
*
nq
*
sizeof
(
float
));
output_ids
=
(
long
*
)
malloc
(
k
*
nq
*
sizeof
(
long
));
memset
(
output_distence
,
0
,
k
*
nq
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
nq
*
sizeof
(
long
));
long
search_set_size
=
0
;
auto
search_in_index
=
[
&
](
meta
::
TableFilesSchema
&
file_vec
)
->
void
{
for
(
auto
&
file
:
file_vec
)
{
ExecutionEnginePtr
index
=
EngineFactory
::
Build
(
file
.
dimension_
,
file
.
location_
,
(
EngineType
)
file
.
engine_type_
);
index
->
Load
();
auto
file_size
=
index
->
PhysicalSize
();
search_set_size
+=
file_size
;
ENGINE_LOG_DEBUG
<<
"Search file_type "
<<
file
.
file_type_
<<
" Of Size: "
<<
file_size
/
(
1024
*
1024
)
<<
" M"
;
int
inner_k
=
index
->
Count
()
<
k
?
index
->
Count
()
:
k
;
auto
start_time
=
METRICS_NOW_TIME
;
index
->
Search
(
nq
,
vectors
,
inner_k
,
output_distence
,
output_ids
);
auto
end_time
=
METRICS_NOW_TIME
;
auto
total_time
=
METRICS_MICROSECONDS
(
start_time
,
end_time
);
CollectFileMetrics
(
file
.
file_type_
,
file_size
,
total_time
);
cluster
(
output_ids
,
output_distence
,
inner_k
);
// cluster to each query
memset
(
output_distence
,
0
,
k
*
nq
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
nq
*
sizeof
(
long
));
}
};
auto
topk_cpu
=
[](
const
std
::
vector
<
float
>
&
input_data
,
const
int
&
k
,
float
*
output_distence
,
long
*
output_ids
)
->
void
{
std
::
map
<
float
,
std
::
vector
<
int
>>
inverted_table
;
for
(
int
i
=
0
;
i
<
input_data
.
size
();
++
i
)
{
if
(
inverted_table
.
count
(
input_data
[
i
])
==
1
)
{
auto
&
ori_vec
=
inverted_table
[
input_data
[
i
]];
ori_vec
.
push_back
(
i
);
}
else
{
inverted_table
[
input_data
[
i
]]
=
std
::
vector
<
int
>
{
i
};
}
}
int
count
=
0
;
for
(
auto
&
item
:
inverted_table
){
if
(
count
==
k
)
break
;
for
(
auto
&
id
:
item
.
second
){
output_distence
[
count
]
=
item
.
first
;
output_ids
[
count
]
=
id
;
if
(
++
count
==
k
)
break
;
}
}
};
auto
cluster_topk
=
[
&
]()
->
void
{
QueryResult
res
;
for
(
auto
&
result_pair
:
batchresult
)
{
auto
&
dis
=
result_pair
.
second
;
auto
&
nns
=
result_pair
.
first
;
topk_cpu
(
dis
,
k
,
output_distence
,
output_ids
);
int
inner_k
=
dis
.
size
()
<
k
?
dis
.
size
()
:
k
;
for
(
int
i
=
0
;
i
<
inner_k
;
++
i
)
{
res
.
emplace_back
(
std
::
make_pair
(
nns
[
output_ids
[
i
]],
output_distence
[
i
]));
// mapping
}
results
.
push_back
(
res
);
// append to result list
res
.
clear
();
memset
(
output_distence
,
0
,
k
*
nq
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
nq
*
sizeof
(
long
));
}
};
search_in_index
(
raw_files
);
search_in_index
(
index_files
);
ENGINE_LOG_DEBUG
<<
"Search Overall Set Size = "
<<
search_set_size
<<
" M"
;
cluster_topk
();
free
(
output_distence
);
free
(
output_ids
);
}
if
(
results
.
empty
())
{
return
Status
::
NotFound
(
"Group "
+
table_id
+
", search result not found!"
);
}
return
Status
::
OK
();
}
Status
DBImpl
::
QueryAsync
(
const
std
::
string
&
table_id
,
const
meta
::
TableFilesSchema
&
files
,
uint64_t
k
,
uint64_t
nq
,
const
float
*
vectors
,
const
meta
::
DatesT
&
dates
,
QueryResults
&
results
)
{
...
...
cpp/src/db/DBImpl.h
浏览文件 @
b2cdb9e9
...
...
@@ -85,14 +85,6 @@ class DBImpl : public DB {
~
DBImpl
()
override
;
private:
Status
QuerySync
(
const
std
::
string
&
table_id
,
uint64_t
k
,
uint64_t
nq
,
const
float
*
vectors
,
const
meta
::
DatesT
&
dates
,
QueryResults
&
results
);
Status
QueryAsync
(
const
std
::
string
&
table_id
,
const
meta
::
TableFilesSchema
&
files
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录