Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
cf94a753
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
cf94a753
编写于
6月 09, 2019
作者:
G
groot
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
return search score
Former-commit-id: e141cb974769ee0e7ee06eb7635eb458622af60c
上级
9cb9fdc7
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
82 addition
and
27 deletion
+82
-27
cpp/CHANGELOG.md
cpp/CHANGELOG.md
+2
-1
cpp/src/db/DBImpl.cpp
cpp/src/db/DBImpl.cpp
+7
-8
cpp/src/db/ExecutionEngine.h
cpp/src/db/ExecutionEngine.h
+2
-0
cpp/src/db/FaissExecutionEngine.cpp
cpp/src/db/FaissExecutionEngine.cpp
+4
-0
cpp/src/db/FaissExecutionEngine.h
cpp/src/db/FaissExecutionEngine.h
+2
-0
cpp/src/db/Types.h
cpp/src/db/Types.h
+1
-1
cpp/src/db/scheduler/SearchContext.h
cpp/src/db/scheduler/SearchContext.h
+2
-2
cpp/src/db/scheduler/SearchTaskQueue.cpp
cpp/src/db/scheduler/SearchTaskQueue.cpp
+55
-9
cpp/src/server/MegasearchTask.cpp
cpp/src/server/MegasearchTask.cpp
+3
-2
cpp/unittest/db/db_tests.cpp
cpp/unittest/db/db_tests.cpp
+2
-2
cpp/unittest/metrics/metrics_test.cpp
cpp/unittest/metrics/metrics_test.cpp
+2
-2
未找到文件。
cpp/CHANGELOG.md
浏览文件 @
cf94a753
...
...
@@ -12,7 +12,8 @@ Please mark all change in change log and use the ticket from JIRA.
-
MS-57 - Implement index load/search pipeline
-
MS-56 - Add version information when server is started
-
Ms-64 - Different table can have different index type
-
MS-64 - Different table can have different index type
-
MS-52 - Return search score
## Task
...
...
cpp/src/db/DBImpl.cpp
浏览文件 @
cf94a753
...
...
@@ -240,7 +240,7 @@ Status DBImpl::QuerySync(const std::string& table_id, size_t k, size_t nq,
int
inner_k
=
dis
.
size
()
<
k
?
dis
.
size
()
:
k
;
for
(
int
i
=
0
;
i
<
inner_k
;
++
i
)
{
res
.
emplace_back
(
nns
[
output_ids
[
i
]]
);
// mapping
res
.
emplace_back
(
std
::
make_pair
(
nns
[
output_ids
[
i
]],
output_distence
[
i
])
);
// mapping
}
results
.
push_back
(
res
);
// append to result list
res
.
clear
();
...
...
@@ -267,6 +267,8 @@ Status DBImpl::QuerySync(const std::string& table_id, size_t k, size_t nq,
Status
DBImpl
::
QueryAsync
(
const
std
::
string
&
table_id
,
size_t
k
,
size_t
nq
,
const
float
*
vectors
,
const
meta
::
DatesT
&
dates
,
QueryResults
&
results
)
{
//step 1: get files to search
meta
::
DatePartionedTableFilesSchema
files
;
auto
status
=
pMeta_
->
FilesToSearch
(
table_id
,
dates
,
files
);
if
(
!
status
.
ok
())
{
return
status
;
}
...
...
@@ -282,18 +284,15 @@ Status DBImpl::QueryAsync(const std::string& table_id, size_t k, size_t nq,
}
}
//step 2: put search task to scheduler
SearchScheduler
&
scheduler
=
SearchScheduler
::
GetInstance
();
scheduler
.
ScheduleSearchTask
(
context
);
context
->
WaitResult
();
//step 3: construct results
auto
&
context_result
=
context
->
GetResult
();
for
(
auto
&
topk_result
:
context_result
)
{
QueryResult
ids
;
for
(
auto
&
pair
:
topk_result
)
{
ids
.
push_back
(
pair
.
second
);
}
results
.
emplace_back
(
ids
);
}
results
.
swap
(
context_result
);
return
Status
::
OK
();
}
...
...
cpp/src/db/ExecutionEngine.h
浏览文件 @
cf94a753
...
...
@@ -32,6 +32,8 @@ public:
virtual
size_t
Size
()
const
=
0
;
virtual
size_t
Dimension
()
const
=
0
;
virtual
size_t
PhysicalSize
()
const
=
0
;
virtual
Status
Serialize
()
=
0
;
...
...
cpp/src/db/FaissExecutionEngine.cpp
浏览文件 @
cf94a753
...
...
@@ -54,6 +54,10 @@ size_t FaissExecutionEngine::Size() const {
return
(
size_t
)(
Count
()
*
pIndex_
->
d
)
*
sizeof
(
float
);
}
size_t
FaissExecutionEngine
::
Dimension
()
const
{
return
pIndex_
->
d
;
}
size_t
FaissExecutionEngine
::
PhysicalSize
()
const
{
return
(
size_t
)(
Count
()
*
pIndex_
->
d
)
*
sizeof
(
float
);
}
...
...
cpp/src/db/FaissExecutionEngine.h
浏览文件 @
cf94a753
...
...
@@ -38,6 +38,8 @@ public:
size_t
Size
()
const
override
;
size_t
Dimension
()
const
override
;
size_t
PhysicalSize
()
const
override
;
Status
Serialize
()
override
;
...
...
cpp/src/db/Types.h
浏览文件 @
cf94a753
...
...
@@ -15,7 +15,7 @@ typedef long IDNumber;
typedef
IDNumber
*
IDNumberPtr
;
typedef
std
::
vector
<
IDNumber
>
IDNumbers
;
typedef
std
::
vector
<
IDNumber
>
QueryResult
;
typedef
std
::
vector
<
std
::
pair
<
IDNumber
,
double
>
>
QueryResult
;
typedef
std
::
vector
<
QueryResult
>
QueryResults
;
...
...
cpp/src/db/scheduler/SearchContext.h
浏览文件 @
cf94a753
...
...
@@ -31,8 +31,8 @@ public:
using
Id2IndexMap
=
std
::
unordered_map
<
size_t
,
TableFileSchemaPtr
>
;
const
Id2IndexMap
&
GetIndexMap
()
const
{
return
map_index_files_
;
}
using
Score2IdMap
=
std
::
map
<
float
,
int64_t
>
;
using
ResultSet
=
std
::
vector
<
Score2Id
Map
>
;
using
Id2ScoreMap
=
std
::
vector
<
std
::
pair
<
int64_t
,
double
>
>
;
using
ResultSet
=
std
::
vector
<
Id2Score
Map
>
;
const
ResultSet
&
GetResult
()
const
{
return
result_
;
}
ResultSet
&
GetResult
()
{
return
result_
;
}
...
...
cpp/src/db/scheduler/SearchTaskQueue.cpp
浏览文件 @
cf94a753
...
...
@@ -19,12 +19,12 @@ void ClusterResult(const std::vector<long> &output_ids,
SearchContext
::
ResultSet
&
result_set
)
{
result_set
.
clear
();
for
(
auto
i
=
0
;
i
<
nq
;
i
++
)
{
SearchContext
::
Score2IdMap
score2id
;
SearchContext
::
Id2ScoreMap
id_score
;
for
(
auto
k
=
0
;
k
<
topk
;
k
++
)
{
uint64_t
index
=
i
*
nq
+
k
;
score2id
.
insert
(
std
::
make_pair
(
output_distence
[
index
],
output_ids
[
index
]));
id_score
.
push_back
(
std
::
make_pair
(
output_ids
[
index
],
output_distence
[
index
]));
}
result_set
.
emplace_back
(
score2id
);
result_set
.
emplace_back
(
id_score
);
}
}
...
...
@@ -42,18 +42,52 @@ void TopkResult(SearchContext::ResultSet &result_src,
}
for
(
size_t
i
=
0
;
i
<
result_src
.
size
();
i
++
)
{
SearchContext
::
Score2IdMap
&
score2id_src
=
result_src
[
i
];
SearchContext
::
Score2IdMap
&
score2id_target
=
result_target
[
i
];
for
(
auto
iter
=
score2id_src
.
begin
();
iter
!=
score2id_src
.
end
();
++
iter
)
{
score2id_target
.
insert
(
std
::
make_pair
(
iter
->
first
,
iter
->
second
));
SearchContext
::
Id2ScoreMap
&
score_src
=
result_src
[
i
];
SearchContext
::
Id2ScoreMap
&
score_target
=
result_target
[
i
];
for
(
auto
&
pair_src
:
score_src
)
{
for
(
auto
iter
=
score_target
.
begin
();
iter
!=
score_target
.
end
();
++
iter
)
{
if
(
pair_src
.
second
>
iter
->
second
)
{
score_target
.
insert
(
iter
,
pair_src
);
}
}
}
//remove unused items
while
(
score2id_target
.
size
()
>
topk
)
{
score2id_target
.
erase
(
score2id_target
.
rbegin
()
->
first
);
while
(
score_target
.
size
()
>
topk
)
{
auto
it_end
=
score_target
.
end
();
it_end
--
;
score_target
.
erase
(
it_end
);
}
}
}
void
CalcScore
(
uint64_t
vector_count
,
const
float
*
vectors_data
,
uint64_t
dimension
,
const
SearchContext
::
ResultSet
&
result_src
,
SearchContext
::
ResultSet
&
result_target
)
{
result_target
.
clear
();
if
(
result_src
.
empty
()){
return
;
}
int
vec_index
=
0
;
for
(
auto
&
result
:
result_src
)
{
const
float
*
vec_data
=
vectors_data
+
vec_index
*
dimension
;
double
vec_len
=
0
;
for
(
uint64_t
i
=
0
;
i
<
dimension
;
i
++
)
{
vec_len
+=
vec_data
[
i
]
*
vec_data
[
i
];
}
vec_index
++
;
SearchContext
::
Id2ScoreMap
score_array
;
for
(
auto
&
pair
:
result
)
{
score_array
.
push_back
(
std
::
make_pair
(
pair
.
first
,
(
1
-
pair
.
second
/
vec_len
)
*
100.0
));
}
result_target
.
emplace_back
(
score_array
);
}
}
}
...
...
@@ -78,10 +112,12 @@ bool SearchTask::DoSearch() {
std
::
vector
<
long
>
output_ids
;
std
::
vector
<
float
>
output_distence
;
for
(
auto
&
context
:
search_contexts_
)
{
//step 1: allocate memory
auto
inner_k
=
index_engine_
->
Count
()
<
context
->
topk
()
?
index_engine_
->
Count
()
:
context
->
topk
();
output_ids
.
resize
(
inner_k
*
context
->
nq
());
output_distence
.
resize
(
inner_k
*
context
->
nq
());
//step 2: search
try
{
index_engine_
->
Search
(
context
->
nq
(),
context
->
vectors
(),
inner_k
,
output_distence
.
data
(),
output_ids
.
data
());
...
...
@@ -93,11 +129,21 @@ bool SearchTask::DoSearch() {
rc
.
Record
(
"do search"
);
//step 3: cluster result
SearchContext
::
ResultSet
result_set
;
ClusterResult
(
output_ids
,
output_distence
,
context
->
nq
(),
inner_k
,
result_set
);
rc
.
Record
(
"cluster result"
);
//step 4: pick up topk result
TopkResult
(
result_set
,
inner_k
,
context
->
GetResult
());
rc
.
Record
(
"reduce topk"
);
//step 5: calculate score between 0 ~ 100
CalcScore
(
context
->
nq
(),
context
->
vectors
(),
index_engine_
->
Dimension
(),
context
->
GetResult
(),
result_set
);
context
->
GetResult
().
swap
(
result_set
);
rc
.
Record
(
"reduce topk"
);
//step 6: notify to send result to client
context
->
IndexSearchDone
(
index_id_
);
}
...
...
cpp/src/server/MegasearchTask.cpp
浏览文件 @
cf94a753
...
...
@@ -400,9 +400,10 @@ ServerError SearchVectorTask::OnExecute() {
const
auto
&
record
=
record_array_
[
i
];
thrift
::
TopKQueryResult
thrift_topk_result
;
for
(
auto
id
:
result
)
{
for
(
auto
&
pair
:
result
)
{
thrift
::
QueryResult
thrift_result
;
thrift_result
.
__set_id
(
id
);
thrift_result
.
__set_id
(
pair
.
first
);
thrift_result
.
__set_score
(
pair
.
second
);
thrift_topk_result
.
query_result_arrays
.
emplace_back
(
thrift_result
);
}
...
...
cpp/unittest/db/db_tests.cpp
浏览文件 @
cf94a753
...
...
@@ -164,11 +164,11 @@ TEST_F(DBTest, DB_TEST) {
ASSERT_STATS
(
stat
);
for
(
auto
k
=
0
;
k
<
qb
;
++
k
)
{
ASSERT_EQ
(
results
[
k
][
0
],
target_ids
[
k
]);
ASSERT_EQ
(
results
[
k
][
0
]
.
first
,
target_ids
[
k
]);
ss
.
str
(
""
);
ss
<<
"Result ["
<<
k
<<
"]:"
;
for
(
auto
result
:
results
[
k
])
{
ss
<<
result
<<
" "
;
ss
<<
result
.
first
<<
" "
;
}
/* LOG(DEBUG) << ss.str(); */
}
...
...
cpp/unittest/metrics/metrics_test.cpp
浏览文件 @
cf94a753
...
...
@@ -87,11 +87,11 @@ TEST_F(DBTest, Metric_Tes) {
ASSERT_STATS
(
stat
);
for
(
auto
k
=
0
;
k
<
qb
;
++
k
)
{
ASSERT_EQ
(
results
[
k
][
0
],
target_ids
[
k
]);
ASSERT_EQ
(
results
[
k
][
0
]
.
first
,
target_ids
[
k
]);
ss
.
str
(
""
);
ss
<<
"Result ["
<<
k
<<
"]:"
;
for
(
auto
result
:
results
[
k
])
{
ss
<<
result
<<
" "
;
ss
<<
result
.
first
<<
" "
;
}
/* LOG(DEBUG) << ss.str(); */
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录