From 96c68edcb61f1d93b4a9d9e3d5009cee20d1b5cd Mon Sep 17 00:00:00 2001 From: "shengjun.li" <49774184+shengjun1985@users.noreply.github.com> Date: Thu, 28 May 2020 23:39:16 +0800 Subject: [PATCH] fix search by GPU (#2453) Signed-off-by: shengjun.li --- CHANGELOG.md | 1 + .../thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu | 7 ++----- .../index/thirdparty/faiss/gpu/impl/L2Select.cu | 9 +++------ .../faiss/gpu/utils/BlockSelectKernel.cuh | 16 +++++----------- core/src/scheduler/task/SearchTask.cpp | 9 --------- 5 files changed, 11 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index faf94d77..51986608 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Please mark all change in change log and use the issue from GitHub - \#2395 Fix large nq cudaMalloc error - \#2399 The nlist set by the user may not take effect - \#2403 MySQL max_idle_time is 10 by default +- \#2450 The deleted vectors may be found on GPU ## Feature diff --git a/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu b/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu index bbb32ad1..b575d3c0 100644 --- a/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu +++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu @@ -75,10 +75,9 @@ pass1SelectLists(void** listIndices, topQueryToCentroid, opt); if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) { - heap.add(distanceStart[i], start + i); - } else { - heap.add((1.0 / 0.0), start + i); + heap.addThreadQ(distanceStart[i], start + i); } + heap.checkThreadQ(); } // Handle warp divergence separately @@ -91,8 +90,6 @@ pass1SelectLists(void** listIndices, opt); if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) { heap.addThreadQ(distanceStart[i], start + i); - } else { - heap.addThreadQ((1.0 / 0.0), start + i); } } diff --git a/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu b/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu index ed83c058..9ea70ec6 100644 --- a/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu +++ b/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu @@ -161,20 +161,17 @@ __global__ void l2SelectMinK(Tensor productDistances, if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { v = Math::add(centroidDistances[i], productDistances[row][i]); - } else { - v = (T)(1.0 / 0.0); + heap.addThreadQ(v, i); } - heap.add(v, i); + heap.checkThreadQ(); } if (i < productDistances.getSize(1)) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { v = Math::add(centroidDistances[i], productDistances[row][i]); - } else { - v = (T)(1.0 / 0.0); + heap.addThreadQ(v, i); } - heap.addThreadQ(v, i); } heap.reduce(); diff --git a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh index 37342de2..238909d4 100644 --- a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh +++ b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh @@ -146,11 +146,10 @@ __global__ void blockSelect(Tensor in, for (; i < limit; i += ThreadsPerBlock) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { - heap.add(*inStart, (IndexType) i); - } else { - heap.add(-1.0, (IndexType) i); + heap.addThreadQ(*inStart, (IndexType) i); } - + heap.checkThreadQ(); + inStart += ThreadsPerBlock; } @@ -158,8 +157,6 @@ __global__ void blockSelect(Tensor in, if (i < in.getSize(1)) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { heap.addThreadQ(*inStart, (IndexType) i); - } else { - heap.addThreadQ(-1.0, (IndexType) i); } } @@ -208,10 +205,9 @@ __global__ void blockSelectPair(Tensor inK, for (; i < limit; i += ThreadsPerBlock) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { - heap.add(*inKStart, *inVStart); - } else { - heap.add(-1.0, *inVStart); + heap.addThreadQ(*inKStart, *inVStart); } + heap.checkThreadQ(); inKStart += ThreadsPerBlock; inVStart += ThreadsPerBlock; @@ -221,8 +217,6 @@ __global__ void blockSelectPair(Tensor inK, if (i < inK.getSize(1)) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { heap.addThreadQ(*inKStart, *inVStart); - } else { - heap.addThreadQ(-1.0, *inVStart); } } diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 36b41482..2c42c37e 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -288,15 +288,6 @@ XSearchTask::Execute() { { std::unique_lock lock(search_job->mutex()); - - if (search_job->GetResultIds().size() > spec_k) { - if (search_job->GetResultIds().front() == -1) { - // initialized results set - search_job->GetResultIds().resize(spec_k * nq); - search_job->GetResultDistances().resize(spec_k * nq); - } - } - search_job->vector_count() = nq; XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, ascending_reduce, search_job->GetResultIds(), search_job->GetResultDistances()); -- GitLab