Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
milvus
提交
eba5bdf3
milvus
项目概览
BaiXuePrincess
/
milvus
与 Fork 源项目一致
从无法访问的项目Fork
通知
7
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
eba5bdf3
编写于
7月 24, 2020
作者:
C
cqy123456
提交者:
GitHub
7月 24, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Improve IVF search performance when NQ and nProbe are both large (#3001)
Signed-off-by:
N
cqy
<
yaya645@126.com
>
上级
c94699fb
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
185 addition
and
151 deletion
+185
-151
CHANGELOG.md
CHANGELOG.md
+2
-0
core/src/index/thirdparty/faiss/utils/distances.cpp
core/src/index/thirdparty/faiss/utils/distances.cpp
+153
-151
core/src/index/thirdparty/faiss/utils/distances.h
core/src/index/thirdparty/faiss/utils/distances.h
+3
-0
core/src/index/thirdparty/faiss/utils/utils.cpp
core/src/index/thirdparty/faiss/utils/utils.cpp
+24
-0
core/src/index/thirdparty/faiss/utils/utils.h
core/src/index/thirdparty/faiss/utils/utils.h
+3
-0
未找到文件。
CHANGELOG.md
浏览文件 @
eba5bdf3
...
...
@@ -65,6 +65,7 @@ Please mark all changes in change log and use the issue from GitHub
-
\#
2544 Optimize unittest build
-
\#
2561 Clean util dependencies with other modules
-
\#
2612 Move all APIs in utils into namespace milvus
-
\#
2653 Improve IVF search performance when NQ and nProbe are both large
-
\#
2675 Print out system memory size when report invalid cpu cache size
-
\#
2828 Let Faiss not compile half float by default
-
\#
2841 Replace IndexType/EngineType/MetricType
...
...
@@ -901,3 +902,4 @@ Please mark all changes in change log and use the issue from GitHub
-
MS-1 Add CHANGELOG.md
-
MS-4 Refactor the vecwise_engine code structure
-
MS-62 Search range to all if no date specified
core/src/index/thirdparty/faiss/utils/distances.cpp
浏览文件 @
eba5bdf3
...
...
@@ -132,16 +132,11 @@ void fvec_renorm_L2 (size_t d, size_t nx, float * __restrict x)
/***************************************************************************
* KNN functions
***************************************************************************/
int
parallel_policy_threshold
=
65535
;
/* Find the nearest neighbors for nx queries in a set of ny vectors */
static
void
knn_inner_product_sse
(
const
float
*
x
,
...
...
@@ -151,101 +146,103 @@ static void knn_inner_product_sse (const float * x,
ConcurrentBitsetPtr
bitset
=
nullptr
)
{
size_t
k
=
res
->
k
;
size_t
thread_max_num
=
omp_get_max_threads
();
size_t
thread_heap_size
=
nx
*
k
;
size_t
all_heap_size
=
thread_heap_size
*
thread_max_num
;
float
*
value
=
new
float
[
all_heap_size
];
int64_t
*
labels
=
new
int64_t
[
all_heap_size
];
// init heap
for
(
size_t
i
=
0
;
i
<
all_heap_size
;
i
++
)
{
value
[
i
]
=
-
1.0
/
0.0
;
labels
[
i
]
=
-
1
;
}
#pragma omp parallel for
for
(
size_t
j
=
0
;
j
<
ny
;
j
++
)
{
if
(
!
bitset
||
!
bitset
->
test
(
j
))
{
size_t
thread_no
=
omp_get_thread_num
();
const
float
*
y_j
=
y
+
j
*
d
;
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
const
float
*
x_i
=
x
+
i
*
d
;
float
ip
=
fvec_inner_product
(
x_i
,
y_j
,
d
);
float
*
val_
=
value
+
thread_no
*
thread_heap_size
+
i
*
k
;
int64_t
*
ids_
=
labels
+
thread_no
*
thread_heap_size
+
i
*
k
;
if
(
ip
>
val_
[
0
])
{
minheap_swap_top
(
k
,
val_
,
ids_
,
ip
,
j
);
}
if
(
ny
>
parallel_policy_threshold
)
{
size_t
block_x
=
std
::
min
(
get_L3_Size
()
/
(
d
*
sizeof
(
float
)
+
thread_max_num
*
k
*
(
sizeof
(
float
)
+
sizeof
(
int64_t
))),
nx
);
size_t
all_heap_size
=
block_x
*
k
*
thread_max_num
;
float
*
value
=
new
float
[
all_heap_size
];
int64_t
*
labels
=
new
int64_t
[
all_heap_size
];
for
(
size_t
x_from
=
0
,
x_to
;
x_from
<
nx
;
x_from
=
x_to
)
{
x_to
=
std
::
min
(
nx
,
x_from
+
block_x
);
int
size
=
x_to
-
x_from
;
int
thread_heap_size
=
size
*
k
;
// init heap
for
(
size_t
i
=
0
;
i
<
all_heap_size
;
i
++
)
{
value
[
i
]
=
-
1.0
/
0.0
;
labels
[
i
]
=
-
1
;
}
}
}
for
(
size_t
t
=
1
;
t
<
thread_max_num
;
t
++
)
{
// merge heap
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
float
*
__restrict
value_x
=
value
+
i
*
k
;
int64_t
*
__restrict
labels_x
=
labels
+
i
*
k
;
float
*
value_x_t
=
value_x
+
t
*
thread_heap_size
;
int64_t
*
labels_x_t
=
labels_x
+
t
*
thread_heap_size
;
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
if
(
value_x_t
[
j
]
>
value_x
[
0
])
{
minheap_swap_top
(
k
,
value_x
,
labels_x
,
value_x_t
[
j
],
labels_x_t
[
j
]);
#pragma omp parallel for schedule(static)
for
(
size_t
j
=
0
;
j
<
ny
;
j
++
)
{
if
(
!
bitset
||
!
bitset
->
test
(
j
))
{
size_t
thread_no
=
omp_get_thread_num
();
const
float
*
y_j
=
y
+
j
*
d
;
for
(
size_t
i
=
x_from
;
i
<
x_to
;
i
++
)
{
const
float
*
x_i
=
x
+
i
*
d
;
float
disij
=
fvec_inner_product
(
x_i
,
y_j
,
d
);
float
*
val_
=
value
+
thread_no
*
thread_heap_size
+
(
i
-
x_from
)
*
k
;
int64_t
*
ids_
=
labels
+
thread_no
*
thread_heap_size
+
(
i
-
x_from
)
*
k
;
if
(
disij
>
val_
[
0
])
{
minheap_swap_top
(
k
,
val_
,
ids_
,
disij
,
j
);
}
}
}
}
}
}
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
float
*
value_x
=
value
+
i
*
k
;
int64_t
*
labels_x
=
labels
+
i
*
k
;
minheap_reorder
(
k
,
value_x
,
labels_x
);
}
// copy result
memcpy
(
res
->
val
,
value
,
thread_heap_size
*
sizeof
(
float
));
memcpy
(
res
->
ids
,
labels
,
thread_heap_size
*
sizeof
(
int64_t
));
for
(
size_t
t
=
1
;
t
<
thread_max_num
;
t
++
)
{
// merge heap
for
(
size_t
i
=
x_from
;
i
<
x_to
;
i
++
)
{
float
*
__restrict
value_x
=
value
+
(
i
-
x_from
)
*
k
;
int64_t
*
__restrict
labels_x
=
labels
+
(
i
-
x_from
)
*
k
;
float
*
value_x_t
=
value_x
+
t
*
thread_heap_size
;
int64_t
*
labels_x_t
=
labels_x
+
t
*
thread_heap_size
;
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
if
(
value_x_t
[
j
]
>
value_x
[
0
])
{
minheap_swap_top
(
k
,
value_x
,
labels_x
,
value_x_t
[
j
],
labels_x_t
[
j
]);
}
}
}
}
delete
[]
value
;
delete
[]
labels
;
for
(
size_t
i
=
x_from
;
i
<
x_to
;
i
++
)
{
float
*
value_x
=
value
+
(
i
-
x_from
)
*
k
;
int64_t
*
labels_x
=
labels
+
(
i
-
x_from
)
*
k
;
minheap_reorder
(
k
,
value_x
,
labels_x
);
}
/*
else {
size_t check_period = InterruptCallback::get_period_hint (ny * d);
check_period *= thread_max_num;
memcpy
(
res
->
val
+
x_from
*
k
,
value
,
thread_heap_size
*
sizeof
(
float
));
memcpy
(
res
->
ids
+
x_from
*
k
,
labels
,
thread_heap_size
*
sizeof
(
int64_t
));
}
delete
[]
value
;
delete
[]
labels
;
for (size_t i0 = 0; i0 < nx; i0 += check_period) {
size_t i1 = std::min(i0 + check_period, nx);
}
else
{
float
*
value
=
res
->
val
;
int64_t
*
labels
=
res
->
ids
;
#pragma omp parallel for
for (size_t i = i0; i < i1; i++) {
const float * x_i = x + i * d;
const float * y_j = y;
float * __restrict simi = res->get_val(i);
int64_t * __restrict idxi = res->get_ids (i);
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
const
float
*
x_i
=
x
+
i
*
d
;
const
float
*
y_j
=
y
;
minheap_heapify (k, simi, idxi);
float
*
__restrict
val_
=
value
+
i
*
k
;
int64_t
*
__restrict
ids_
=
labels
+
i
*
k
;
for (size_t j = 0; j < ny; j++) {
if(!bitset || !bitset->test(j)){
float ip = fvec_inner_product (x_i, y_j, d);
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
val_
[
j
]
=
-
1.0
/
0.0
;
ids_
[
j
]
=
-
1
;
}
if (ip > simi[0]) {
minheap_pop (k, simi, idxi);
minheap_push (k, simi, idxi, ip, j);
}
for
(
size_t
j
=
0
;
j
<
ny
;
j
++
)
{
if
(
!
bitset
||
!
bitset
->
test
(
j
))
{
float
disij
=
fvec_inner_product
(
x_i
,
y_j
,
d
);
if
(
disij
>
val_
[
0
])
{
minheap_swap_top
(
k
,
val_
,
ids_
,
disij
,
j
);
}
y_j += d;
}
minheap_reorder (k, simi, idxi)
;
y_j
+=
d
;
}
InterruptCallback::check ();
minheap_reorder
(
k
,
val_
,
ids_
);
}
}
*/
}
static
void
knn_L2sqr_sse
(
...
...
@@ -256,100 +253,105 @@ static void knn_L2sqr_sse (
ConcurrentBitsetPtr
bitset
=
nullptr
)
{
size_t
k
=
res
->
k
;
size_t
thread_max_num
=
omp_get_max_threads
();
size_t
thread_heap_size
=
nx
*
k
;
size_t
all_heap_size
=
thread_heap_size
*
thread_max_num
;
float
*
value
=
new
float
[
all_heap_size
];
int64_t
*
labels
=
new
int64_t
[
all_heap_size
]
;
if
(
ny
>
parallel_policy_threshold
)
{
size_t
block_x
=
std
::
min
(
get_L3_Size
()
/
(
d
*
sizeof
(
float
)
+
thread_max_num
*
k
*
(
sizeof
(
float
)
+
sizeof
(
int64_t
))),
nx
)
;
// init heap
for
(
size_t
i
=
0
;
i
<
all_heap_size
;
i
++
)
{
value
[
i
]
=
1.0
/
0.0
;
labels
[
i
]
=
-
1
;
}
size_t
all_heap_size
=
block_x
*
k
*
thread_max_num
;
float
*
value
=
new
float
[
all_heap_size
];
int64_t
*
labels
=
new
int64_t
[
all_heap_size
];
#pragma omp parallel for
for
(
size_t
j
=
0
;
j
<
ny
;
j
++
)
{
if
(
!
bitset
||
!
bitset
->
test
(
j
))
{
size_t
thread_no
=
omp_get_thread_num
();
const
float
*
y_j
=
y
+
j
*
d
;
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
const
float
*
x_i
=
x
+
i
*
d
;
float
disij
=
fvec_L2sqr
(
x_i
,
y_j
,
d
);
float
*
val_
=
value
+
thread_no
*
thread_heap_size
+
i
*
k
;
int64_t
*
ids_
=
labels
+
thread_no
*
thread_heap_size
+
i
*
k
;
if
(
disij
<
val_
[
0
])
{
maxheap_swap_top
(
k
,
val_
,
ids_
,
disij
,
j
);
for
(
size_t
x_from
=
0
,
x_to
;
x_from
<
nx
;
x_from
=
x_to
)
{
x_to
=
std
::
min
(
nx
,
x_from
+
block_x
);
int
size
=
x_to
-
x_from
;
int
thread_heap_size
=
size
*
k
;
// init heap
for
(
size_t
i
=
0
;
i
<
all_heap_size
;
i
++
)
{
value
[
i
]
=
1.0
/
0.0
;
labels
[
i
]
=
-
1
;
}
#pragma omp parallel for schedule(static)
for
(
size_t
j
=
0
;
j
<
ny
;
j
++
)
{
if
(
!
bitset
||
!
bitset
->
test
(
j
))
{
size_t
thread_no
=
omp_get_thread_num
();
const
float
*
y_j
=
y
+
j
*
d
;
for
(
size_t
i
=
x_from
;
i
<
x_to
;
i
++
)
{
const
float
*
x_i
=
x
+
i
*
d
;
float
disij
=
fvec_L2sqr
(
x_i
,
y_j
,
d
);
float
*
val_
=
value
+
thread_no
*
thread_heap_size
+
(
i
-
x_from
)
*
k
;
int64_t
*
ids_
=
labels
+
thread_no
*
thread_heap_size
+
(
i
-
x_from
)
*
k
;
if
(
disij
<
val_
[
0
])
{
maxheap_swap_top
(
k
,
val_
,
ids_
,
disij
,
j
);
}
}
}
}
}
}
for
(
size_t
t
=
1
;
t
<
thread_max_num
;
t
++
)
{
// merge heap
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
float
*
__restrict
value_x
=
value
+
i
*
k
;
int64_t
*
__restrict
labels_x
=
labels
+
i
*
k
;
float
*
value_x_t
=
value_x
+
t
*
thread_heap_size
;
int64_t
*
labels_x_t
=
labels_x
+
t
*
thread_heap_size
;
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
if
(
value_x_t
[
j
]
<
value_x
[
0
])
{
maxheap_swap_top
(
k
,
value_x
,
labels_x
,
value_x_t
[
j
],
labels_x_t
[
j
]);
for
(
size_t
t
=
1
;
t
<
thread_max_num
;
t
++
)
{
// merge heap
for
(
size_t
i
=
x_from
;
i
<
x_to
;
i
++
)
{
float
*
__restrict
value_x
=
value
+
(
i
-
x_from
)
*
k
;
int64_t
*
__restrict
labels_x
=
labels
+
(
i
-
x_from
)
*
k
;
float
*
value_x_t
=
value_x
+
t
*
thread_heap_size
;
int64_t
*
labels_x_t
=
labels_x
+
t
*
thread_heap_size
;
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
if
(
value_x_t
[
j
]
<
value_x
[
0
])
{
maxheap_swap_top
(
k
,
value_x
,
labels_x
,
value_x_t
[
j
],
labels_x_t
[
j
]);
}
}
}
}
}
}
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
float
*
value_x
=
value
+
i
*
k
;
int64_t
*
labels_x
=
labels
+
i
*
k
;
maxheap_reorder
(
k
,
value_x
,
labels_x
);
}
for
(
size_t
i
=
x_from
;
i
<
x_to
;
i
++
)
{
float
*
value_x
=
value
+
(
i
-
x_from
)
*
k
;
int64_t
*
labels_x
=
labels
+
(
i
-
x_from
)
*
k
;
maxheap_reorder
(
k
,
value_x
,
labels_x
);
}
// copy result
memcpy
(
res
->
val
,
value
,
thread_heap_size
*
sizeof
(
float
));
memcpy
(
res
->
ids
,
labels
,
thread_heap_size
*
sizeof
(
int64_t
));
memcpy
(
res
->
val
+
x_from
*
k
,
value
,
thread_heap_size
*
sizeof
(
float
));
memcpy
(
res
->
ids
+
x_from
*
k
,
labels
,
thread_heap_size
*
sizeof
(
int64_t
));
}
delete
[]
value
;
delete
[]
labels
;
delete
[]
value
;
delete
[]
labels
;
/*
else {
size_t check_period = InterruptCallback::get_period_hint (ny * d);
check_period *= thread_max_num;
}
else
{
f
or (size_t i0 = 0; i0 < nx; i0 += check_period) {
size_t i1 = std::min(i0 + check_period, nx)
;
f
loat
*
value
=
res
->
val
;
int64_t
*
labels
=
res
->
ids
;
#pragma omp parallel for
for (size_t i = i0; i < i1; i++) {
const float * x_i = x + i * d;
const float * y_j = y;
float * simi = res->get_val(i);
int64_t * idxi = res->get_ids (i);
for
(
size_t
i
=
0
;
i
<
nx
;
i
++
)
{
const
float
*
x_i
=
x
+
i
*
d
;
const
float
*
y_j
=
y
;
maxheap_heapify (k, simi, idxi);
float
*
__restrict
val_
=
value
+
i
*
k
;
int64_t
*
__restrict
ids_
=
labels
+
i
*
k
;
for (size_t j = 0; j < ny; j++) {
if(!bitset || !bitset->test(j)){
float disij = fvec_L2sqr (x_i, y_j, d);
for
(
size_t
j
=
0
;
j
<
k
;
j
++
)
{
val_
[
j
]
=
1.0
/
0.0
;
ids_
[
j
]
=
-
1
;
}
if (disij < simi[0]) {
maxheap_pop (k, simi, idxi);
maxheap_push (k, simi, idxi, disij, j);
}
for
(
size_t
j
=
0
;
j
<
ny
;
j
++
)
{
if
(
!
bitset
||
!
bitset
->
test
(
j
))
{
float
disij
=
fvec_L2sqr
(
x_i
,
y_j
,
d
);
if
(
disij
<
val_
[
0
])
{
maxheap_swap_top
(
k
,
val_
,
ids_
,
disij
,
j
);
}
y_j += d;
}
maxheap_reorder (k, simi, idxi)
;
y_j
+=
d
;
}
InterruptCallback::check ();
maxheap_reorder
(
k
,
val_
,
ids_
);
}
}
*/
}
/** Find the nearest neighbors for nx queries in a set of ny vectors */
...
...
core/src/index/thirdparty/faiss/utils/distances.h
浏览文件 @
eba5bdf3
...
...
@@ -158,6 +158,9 @@ void pairwise_indexed_inner_product (
// threshold on nx above which we switch to BLAS to compute distances
extern
int
distance_compute_blas_threshold
;
// threshold on nx above which we switch to compute parallel on ny
extern
int
parallel_policy_threshold
;
/** Return the k nearest neighors of each of the nx vectors x among the ny
* vector y, w.r.t to max inner product
*
...
...
core/src/index/thirdparty/faiss/utils/utils.cpp
浏览文件 @
eba5bdf3
...
...
@@ -684,4 +684,28 @@ bool check_openmp() {
return
true
;
}
int64_t
get_L3_Size
()
{
static
int64_t
l3_size
=
-
1
;
constexpr
int64_t
KB
=
1024
;
if
(
l3_size
==
-
1
)
{
FILE
*
file
=
fopen
(
"/sys/devices/system/cpu/cpu0/cache/index3/size"
,
"r"
);
int64_t
result
=
0
;
constexpr
int64_t
line_length
=
128
;
char
line
[
line_length
];
if
(
file
){
char
*
ret
=
fgets
(
line
,
sizeof
(
line
)
-
1
,
file
);
sscanf
(
line
,
"%luK"
,
&
result
);
l3_size
=
result
*
KB
;
fclose
(
file
);
}
else
{
l3_size
=
12
*
KB
*
KB
;
// 12M
}
}
return
l3_size
;
}
}
// namespace faiss
core/src/index/thirdparty/faiss/utils/utils.h
浏览文件 @
eba5bdf3
...
...
@@ -160,6 +160,9 @@ uint64_t hash_bytes (const uint8_t *bytes, int64_t n);
/** Whether OpenMP annotations were respected. */
bool
check_openmp
();
/** get the size of L3 cache */
int64_t
get_L3_Size
();
}
// namspace faiss
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录