Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
0bcb727b
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
0bcb727b
编写于
4月 23, 2019
作者:
G
groot
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'jinhai' of 192.168.1.105:jinhai/vecwise_engine into jinhai
Former-commit-id: 086d172e4c9ba7db6a271940f2baa6ab5ede6069
上级
f642a7b3
393b3873
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
175 addition
and
43 deletion
+175
-43
cpp/src/db/DBImpl.cpp
cpp/src/db/DBImpl.cpp
+57
-39
cpp/unittest/db/db_tests.cpp
cpp/unittest/db/db_tests.cpp
+86
-4
cpp/unittest/faiss_wrapper/wrapper_test.cpp
cpp/unittest/faiss_wrapper/wrapper_test.cpp
+32
-0
未找到文件。
cpp/src/db/DBImpl.cpp
浏览文件 @
0bcb727b
...
...
@@ -56,6 +56,7 @@ Status DBImpl::add_vectors(const std::string& group_id_,
}
}
// TODO(XUPENG): add search range based on time
Status
DBImpl
::
search
(
const
std
::
string
&
group_id
,
size_t
k
,
size_t
nq
,
const
float
*
vectors
,
QueryResults
&
results
)
{
meta
::
DatePartionedGroupFilesSchema
files
;
...
...
@@ -63,75 +64,92 @@ Status DBImpl::search(const std::string &group_id, size_t k, size_t nq,
auto
status
=
_pMeta
->
files_to_search
(
group_id
,
partition
,
files
);
if
(
!
status
.
ok
())
{
return
status
;
}
// TODO: optimized
meta
::
GroupFilesSchema
index_files
;
meta
::
GroupFilesSchema
raw_files
;
for
(
auto
&
day_files
:
files
)
{
for
(
auto
&
file
:
day_files
.
second
)
{
file
.
file_type
==
meta
::
GroupFileSchema
::
RAW
?
raw_files
.
push_back
(
file
)
:
index_files
.
push_back
(
file
);
file
.
file_type
==
meta
::
GroupFileSchema
::
INDEX
?
index_files
.
push_back
(
file
)
:
raw_files
.
push_back
(
file
);
}
}
int
dim
=
raw_files
[
0
].
dimension
;
int
dim
=
0
;
if
(
!
index_files
.
empty
())
{
dim
=
index_files
[
0
].
dimension
;
}
else
if
(
!
raw_files
.
empty
())
{
dim
=
raw_files
[
0
].
dimension
;
}
else
{
return
Status
::
OK
();
}
// merge raw files
// merge raw files
and build flat index.
faiss
::
Index
*
index
(
faiss
::
index_factory
(
dim
,
"IDMap,Flat"
));
for
(
auto
&
file
:
raw_files
)
{
auto
file_index
=
dynamic_cast
<
faiss
::
IndexIDMap
*>
(
faiss
::
read_index
(
file
.
location
.
c_str
()));
index
->
add_with_ids
(
file_index
->
ntotal
,
dynamic_cast
<
faiss
::
IndexFlat
*>
(
file_index
->
index
)
->
xb
.
data
(),
index
->
add_with_ids
(
file_index
->
ntotal
,
dynamic_cast
<
faiss
::
IndexFlat
*>
(
file_index
->
index
)
->
xb
.
data
(),
file_index
->
id_map
.
data
());
}
float
*
xb
=
dynamic_cast
<
faiss
::
IndexFlat
*>
(
index
)
->
xb
.
data
();
int64_t
*
ids
=
dynamic_cast
<
faiss
::
IndexIDMap
*>
(
index
)
->
id_map
.
data
();
long
totoal
=
index
->
ntotal
;
std
::
vector
<
float
>
distence
;
std
::
vector
<
long
>
result_ids
;
{
// allocate memory
// [{ids, distence}, ...]
using
SearchResult
=
std
::
pair
<
std
::
vector
<
long
>
,
std
::
vector
<
float
>>
;
std
::
vector
<
SearchResult
>
batchresult
(
nq
);
// allocate nq cells.
auto
cluster
=
[
&
](
long
*
nns
,
float
*
dis
)
->
void
{
for
(
int
i
=
0
;
i
<
nq
;
++
i
)
{
auto
f_begin
=
batchresult
[
i
].
first
.
cbegin
();
auto
s_begin
=
batchresult
[
i
].
second
.
cbegin
();
batchresult
[
i
].
first
.
insert
(
f_begin
,
nns
+
i
*
k
,
nns
+
i
*
k
+
k
);
batchresult
[
i
].
second
.
insert
(
s_begin
,
dis
+
i
*
k
,
dis
+
i
*
k
+
k
);
}
};
// Allocate Memory
float
*
output_distence
;
long
*
output_ids
;
output_distence
=
(
float
*
)
malloc
(
k
*
sizeof
(
float
));
output_ids
=
(
long
*
)
malloc
(
k
*
sizeof
(
long
));
// build and search in raw file
// TODO: HardCode
auto
opd
=
std
::
make_shared
<
Operand
>
();
opd
->
index_type
=
"IDMap,Flat"
;
IndexBuilderPtr
builder
=
GetIndexBuilder
(
opd
);
auto
index
=
builder
->
build_all
(
totoal
,
xb
,
ids
);
output_distence
=
(
float
*
)
malloc
(
k
*
nq
*
sizeof
(
float
));
output_ids
=
(
long
*
)
malloc
(
k
*
nq
*
sizeof
(
long
));
memset
(
output_distence
,
0
,
k
*
nq
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
nq
*
sizeof
(
long
));
// search in raw file
index
->
search
(
nq
,
vectors
,
k
,
output_distence
,
output_ids
);
distence
.
insert
(
distence
.
begin
(),
output_distence
,
output_distence
+
k
);
result_ids
.
insert
(
result_ids
.
begin
(),
output_ids
,
output_ids
+
k
);
memset
(
output_distence
,
0
,
k
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
sizeof
(
long
));
cluster
(
output_ids
,
output_distence
);
// cluster to each query
memset
(
output_distence
,
0
,
k
*
nq
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
nq
*
sizeof
(
long
));
//
s
earch in index file
//
S
earch in index file
for
(
auto
&
file
:
index_files
)
{
auto
index
=
read_index
(
file
.
location
.
c_str
());
index
->
search
(
nq
,
vectors
,
k
,
output_distence
,
output_ids
);
distence
.
insert
(
distence
.
begin
(),
output_distence
,
output_distence
+
k
);
result_ids
.
insert
(
result_ids
.
begin
(),
output_ids
,
output_ids
+
k
);
memset
(
output_distence
,
0
,
k
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
sizeof
(
long
));
cluster
(
output_ids
,
output_distence
);
// cluster to each query
memset
(
output_distence
,
0
,
k
*
nq
*
sizeof
(
float
));
memset
(
output_ids
,
0
,
k
*
nq
*
sizeof
(
long
));
}
// TopK
TopK
(
distence
.
data
(),
distence
.
size
(),
k
,
output_distence
,
output_ids
);
distence
.
clear
();
result_ids
.
clear
();
distence
.
insert
(
distence
.
begin
(),
output_distence
,
output_distence
+
k
);
result_ids
.
insert
(
result_ids
.
begin
(),
output_ids
,
output_ids
+
k
);
auto
cluster_topk
=
[
&
]()
->
void
{
QueryResult
res
;
for
(
auto
&
result_pair
:
batchresult
)
{
auto
&
dis
=
result_pair
.
second
;
auto
&
nns
=
result_pair
.
first
;
TopK
(
dis
.
data
(),
dis
.
size
(),
k
,
output_distence
,
output_ids
);
for
(
int
i
=
0
;
i
<
k
;
++
i
)
{
res
.
emplace_back
(
nns
[
output_ids
[
i
]]);
// mapping
}
results
.
push_back
(
res
);
// append to result list
res
.
clear
();
}
};
cluster_topk
();
// free
free
(
output_distence
);
free
(
output_ids
);
}
if
(
results
.
empty
())
{
return
Status
::
NotFound
(
"Group "
+
group_id
+
", search result not found!"
);
}
return
Status
::
OK
();
}
...
...
cpp/unittest/db/db_tests.cpp
浏览文件 @
0bcb727b
...
...
@@ -4,8 +4,12 @@
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include <faiss/IndexFlat.h>
#include <faiss/MetaIndexes.h>
#include <faiss/AutoTune.h>
#include "db/DB.h"
#include "faiss/Index.h"
using
namespace
zilliz
::
vecwise
;
...
...
@@ -51,12 +55,90 @@ TEST(DBTest, DB_TEST) {
stat
=
db
->
add_vectors
(
group_name
,
1
,
vec_f
.
data
(),
vector_ids
);
ASSERT_STATS
(
stat
);
//engine::QueryResults results;
//std::vector<float> vec_s = vec_f;
//stat = db->search(group_name, 1, 1, vec_f.data(), results);
//ASSERT_STATS(stat);
//ASSERT_EQ(results.size(), 1);
//ASSERT_EQ(results[0][0], vector_ids[0]);
delete
db
;
}
TEST
(
SearchTest
,
DB_TEST
)
{
static
const
std
::
string
group_name
=
"test_group"
;
static
const
int
group_dim
=
256
;
engine
::
Options
opt
;
opt
.
meta
.
backend_uri
=
"http://127.0.0.1"
;
opt
.
meta
.
path
=
"/tmp/search_test"
;
opt
.
index_trigger_size
=
100000
*
group_dim
;
opt
.
memory_sync_interval
=
1
;
opt
.
merge_trigger_number
=
1
;
engine
::
DB
*
db
=
nullptr
;
engine
::
DB
::
Open
(
opt
,
&
db
);
ASSERT_TRUE
(
db
!=
nullptr
);
engine
::
meta
::
GroupSchema
group_info
;
group_info
.
dimension
=
group_dim
;
group_info
.
group_id
=
group_name
;
engine
::
Status
stat
=
db
->
add_group
(
group_info
);
//ASSERT_STATS(stat);
engine
::
meta
::
GroupSchema
group_info_get
;
group_info_get
.
group_id
=
group_name
;
stat
=
db
->
get_group
(
group_info_get
);
ASSERT_STATS
(
stat
);
ASSERT_EQ
(
group_info_get
.
dimension
,
group_dim
);
// prepare raw data
size_t
nb
=
25000
;
size_t
nq
=
10
;
size_t
k
=
5
;
std
::
vector
<
float
>
xb
(
nb
*
group_dim
);
std
::
vector
<
float
>
xq
(
nq
*
group_dim
);
std
::
vector
<
long
>
ids
(
nb
);
std
::
random_device
rd
;
std
::
mt19937
gen
(
rd
());
std
::
uniform_real_distribution
<>
dis_xt
(
-
1.0
,
1.0
);
for
(
size_t
i
=
0
;
i
<
nb
*
group_dim
;
i
++
)
{
xb
[
i
]
=
dis_xt
(
gen
);
if
(
i
<
nb
){
ids
[
i
]
=
i
;
}
}
for
(
size_t
i
=
0
;
i
<
nq
*
group_dim
;
i
++
)
{
xq
[
i
]
=
dis_xt
(
gen
);
}
// result data
//std::vector<long> nns_gt(k*nq);
std
::
vector
<
long
>
nns
(
k
*
nq
);
// nns = nearst neg search
//std::vector<float> dis_gt(k*nq);
std
::
vector
<
float
>
dis
(
k
*
nq
);
// prepare ground-truth
//faiss::Index* index_gt(faiss::index_factory(group_dim, "IDMap,Flat"));
//index_gt->add_with_ids(nb, xb.data(), ids.data());
//index_gt->search(nq, xq.data(), 1, dis_gt.data(), nns_gt.data());
// insert data
const
int
batch_size
=
100
;
for
(
int
j
=
0
;
j
<
nb
/
batch_size
;
++
j
)
{
stat
=
db
->
add_vectors
(
group_name
,
batch_size
,
xb
.
data
()
+
batch_size
*
j
*
group_dim
,
ids
);
ASSERT_STATS
(
stat
);
}
//sleep(10); // wait until build index finish
engine
::
QueryResults
results
;
std
::
vector
<
float
>
vec_s
=
vec_f
;
stat
=
db
->
search
(
group_name
,
1
,
1
,
vec_f
.
data
(),
results
);
stat
=
db
->
search
(
group_name
,
k
,
nq
,
xq
.
data
(),
results
);
ASSERT_STATS
(
stat
);
ASSERT_EQ
(
results
.
size
(),
1
);
ASSERT_EQ
(
results
[
0
][
0
],
vector_ids
[
0
]);
// TODO(linxj): add groundTruth assert
delete
db
;
}
\ No newline at end of file
cpp/unittest/faiss_wrapper/wrapper_test.cpp
浏览文件 @
0bcb727b
...
...
@@ -92,3 +92,35 @@ TEST(build_test, Wrapper_Test) {
delete
[]
result_ids
;
}
TEST
(
search_test
,
Wrapper_Test
)
{
const
int
dim
=
256
;
size_t
nb
=
25000
;
size_t
nq
=
100
;
size_t
k
=
100
;
std
::
vector
<
float
>
xb
(
nb
*
dim
);
std
::
vector
<
float
>
xq
(
nq
*
dim
);
std
::
vector
<
long
>
ids
(
nb
*
dim
);
std
::
random_device
rd
;
std
::
mt19937
gen
(
rd
());
std
::
uniform_real_distribution
<>
dis_xt
(
-
1.0
,
1.0
);
for
(
size_t
i
=
0
;
i
<
nb
*
dim
;
i
++
)
{
xb
[
i
]
=
dis_xt
(
gen
);
ids
[
i
]
=
i
;
}
for
(
size_t
i
=
0
;
i
<
nq
*
dim
;
i
++
)
{
xq
[
i
]
=
dis_xt
(
gen
);
}
// result data
std
::
vector
<
long
>
nns_gt
(
nq
*
k
);
// nns = nearst neg search
std
::
vector
<
long
>
nns
(
nq
*
k
);
std
::
vector
<
float
>
dis_gt
(
nq
*
k
);
std
::
vector
<
float
>
dis
(
nq
*
k
);
faiss
::
Index
*
index_gt
(
faiss
::
index_factory
(
dim
,
"IDMap,Flat"
));
index_gt
->
add_with_ids
(
nb
,
xb
.
data
(),
ids
.
data
());
index_gt
->
search
(
nq
,
xq
.
data
(),
10
,
dis_gt
.
data
(),
nns_gt
.
data
());
std
::
cout
<<
"data: "
<<
nns_gt
[
0
];
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录