diff --git a/CHANGELOG.md b/CHANGELOG.md index faf94d7769e7061980e7810b89fecd7c6c7537fd..519866081d967358152aa5f3aee537f54cca773e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Please mark all change in change log and use the issue from GitHub - \#2395 Fix large nq cudaMalloc error - \#2399 The nlist set by the user may not take effect - \#2403 MySQL max_idle_time is 10 by default +- \#2450 The deleted vectors may be found on GPU ## Feature diff --git a/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu b/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu index bbb32ad1d67301fc7812fc6e1f194559cd9b0c9e..b575d3c0a4ce9f6f2b9bc5450cb9c12c72a01cde 100644 --- a/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu +++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu @@ -75,10 +75,9 @@ pass1SelectLists(void** listIndices, topQueryToCentroid, opt); if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) { - heap.add(distanceStart[i], start + i); - } else { - heap.add((1.0 / 0.0), start + i); + heap.addThreadQ(distanceStart[i], start + i); } + heap.checkThreadQ(); } // Handle warp divergence separately @@ -91,8 +90,6 @@ pass1SelectLists(void** listIndices, opt); if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) { heap.addThreadQ(distanceStart[i], start + i); - } else { - heap.addThreadQ((1.0 / 0.0), start + i); } } diff --git a/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu b/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu index ed83c058ca10a481eed3f4b2a5c048997e410fb0..9ea70ec651810a5f7761007087239fbb5ef4d803 100644 --- a/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu +++ b/core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu @@ -161,20 +161,17 @@ __global__ void l2SelectMinK(Tensor productDistances, if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { v = Math::add(centroidDistances[i], productDistances[row][i]); - } else { - v = (T)(1.0 / 0.0); + heap.addThreadQ(v, i); } - heap.add(v, i); + heap.checkThreadQ(); } if (i < productDistances.getSize(1)) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { v = Math::add(centroidDistances[i], productDistances[row][i]); - } else { - v = (T)(1.0 / 0.0); + heap.addThreadQ(v, i); } - heap.addThreadQ(v, i); } heap.reduce(); diff --git a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh index 37342de24b44a7a29bfa8b9bcc0da575b1030ade..238909d4b07e2692690044129176eb6a76fb8010 100644 --- a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh +++ b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh @@ -146,11 +146,10 @@ __global__ void blockSelect(Tensor in, for (; i < limit; i += ThreadsPerBlock) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { - heap.add(*inStart, (IndexType) i); - } else { - heap.add(-1.0, (IndexType) i); + heap.addThreadQ(*inStart, (IndexType) i); } - + heap.checkThreadQ(); + inStart += ThreadsPerBlock; } @@ -158,8 +157,6 @@ __global__ void blockSelect(Tensor in, if (i < in.getSize(1)) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { heap.addThreadQ(*inStart, (IndexType) i); - } else { - heap.addThreadQ(-1.0, (IndexType) i); } } @@ -208,10 +205,9 @@ __global__ void blockSelectPair(Tensor inK, for (; i < limit; i += ThreadsPerBlock) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { - heap.add(*inKStart, *inVStart); - } else { - heap.add(-1.0, *inVStart); + heap.addThreadQ(*inKStart, *inVStart); } + heap.checkThreadQ(); inKStart += ThreadsPerBlock; inVStart += ThreadsPerBlock; @@ -221,8 +217,6 @@ __global__ void blockSelectPair(Tensor inK, if (i < inK.getSize(1)) { if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) { heap.addThreadQ(*inKStart, *inVStart); - } else { - heap.addThreadQ(-1.0, *inVStart); } } diff --git a/core/src/scheduler/task/SearchTask.cpp b/core/src/scheduler/task/SearchTask.cpp index 36b41482d422e8c815a7691c48755070ac25a699..2c42c37ec6d965a2f560e06ade850e47886ba2e9 100644 --- a/core/src/scheduler/task/SearchTask.cpp +++ b/core/src/scheduler/task/SearchTask.cpp @@ -288,15 +288,6 @@ XSearchTask::Execute() { { std::unique_lock lock(search_job->mutex()); - - if (search_job->GetResultIds().size() > spec_k) { - if (search_job->GetResultIds().front() == -1) { - // initialized results set - search_job->GetResultIds().resize(spec_k * nq); - search_job->GetResultDistances().resize(spec_k * nq); - } - } - search_job->vector_count() = nq; XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, ascending_reduce, search_job->GetResultIds(), search_job->GetResultDistances());