Optimize functions (#1803)

Signed-off-by: N sahuang <xiaohai.xu@zilliz.com>

Optimize functions (#1803)
Signed-off-by: N sahuang <xiaohai.xu@zilliz.com>
88cf5e2b · Xiaohai Xu · GitHub · dbbf91db · 88cf5e2b · 88cf5e2b
8 changed file
--- a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFFlat.cu
+++ b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFFlat.cu
@@ -212,13 +212,11 @@ GpuIndexIVFFlat::addImpl_(int n,
  // Data is already resident on the GPU
  Tensor<float, 2, true> data(const_cast<float*>(x), {n, (int) this->d});

-  auto bitset = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
-
  static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch");
  Tensor<long, 1, true> labels(const_cast<long*>(xids), {n});

  // Not all vectors may be able to be added (some may contain NaNs etc)
-  index_->classifyAndAddVectors(data, labels, bitset);
+  index_->classifyAndAddVectors(data, labels);

  // but keep the ntotal based on the total number of vectors that we attempted
  // to add

--- a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFPQ.cu
+++ b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFPQ.cu
@@ -335,13 +335,11 @@ GpuIndexIVFPQ::addImpl_(int n,
  // Data is already resident on the GPU
  Tensor<float, 2, true> data(const_cast<float*>(x), {n, (int) this->d});

-  auto bitset = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
-
  static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch");
  Tensor<long, 1, true> labels(const_cast<long*>(xids), {n});

  // Not all vectors may be able to be added (some may contain NaNs etc)
-  index_->classifyAndAddVectors(data, labels, bitset);
+  index_->classifyAndAddVectors(data, labels);

  // but keep the ntotal based on the total number of vectors that we attempted
  // to add

--- a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFSQHybrid.cu
+++ b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFSQHybrid.cu
@@ -309,13 +309,11 @@ GpuIndexIVFSQHybrid::addImpl_(int n,
  // Data is already resident on the GPU
  Tensor<float, 2, true> data(const_cast<float*>(x), {n, (int) this->d});

-  auto bitset = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
-
  static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch");
  Tensor<long, 1, true> labels(const_cast<long*>(xids), {n});

  // Not all vectors may be able to be added (some may contain NaNs etc)
-  index_->classifyAndAddVectors(data, labels, bitset);
+  index_->classifyAndAddVectors(data, labels);

  // but keep the ntotal based on the total number of vectors that we attempted
  // to add

--- a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
+++ b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
@@ -244,13 +244,11 @@ GpuIndexIVFScalarQuantizer::addImpl_(int n,
  // Data is already resident on the GPU
  Tensor<float, 2, true> data(const_cast<float*>(x), {n, (int) this->d});

-  auto bitset = toDevice<uint8_t, 1>(resources_, device_, nullptr, stream, {0});
-
  static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch");
  Tensor<long, 1, true> labels(const_cast<long*>(xids), {n});

  // Not all vectors may be able to be added (some may contain NaNs etc)
-  index_->classifyAndAddVectors(data, labels, bitset);
+  index_->classifyAndAddVectors(data, labels);

  // but keep the ntotal based on the total number of vectors that we attempted
  // to add

--- a/core/src/index/thirdparty/faiss/gpu/impl/IVFFlat.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFFlat.cu
@@ -157,14 +157,15 @@ IVFFlat::addCodeVectorsFromCpu(int listId,

 int
 IVFFlat::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
-                               Tensor<long, 1, true>& indices,
-                               Tensor<uint8_t, 1, true>& bitset) {
+                               Tensor<long, 1, true>& indices) {
  FAISS_ASSERT(vecs.getSize(0) == indices.getSize(0));
  FAISS_ASSERT(vecs.getSize(1) == dim_);

  auto& mem = resources_->getMemoryManagerCurrentDevice();
  auto stream = resources_->getDefaultStreamCurrentDevice();

+  DeviceTensor<uint8_t, 1, true> bitset(mem, {0}, stream);
+
  // Number of valid vectors that we actually add; we return this
  int numAdded = 0;


--- a/core/src/index/thirdparty/faiss/gpu/impl/IVFFlat.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFFlat.cuh
@@ -44,8 +44,7 @@ class IVFFlat : public IVFBase {
  /// Returns the number of vectors successfully added. Vectors may
  /// not be able to be added because they contain NaNs.
  int classifyAndAddVectors(Tensor<float, 2, true>& vecs,
-                            Tensor<long, 1, true>& indices,
-                            Tensor<uint8_t, 1, true>& bitset);
+                            Tensor<long, 1, true>& indices);


  /// Find the approximate k nearest neigbors for `queries` against

--- a/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cu
@@ -110,8 +110,7 @@ IVFPQ::setPrecomputedCodes(bool enable) {

 int
 IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
-                             Tensor<long, 1, true>& indices,
-                             Tensor<uint8_t, 1, true>& bitset) {
+                             Tensor<long, 1, true>& indices) {
  FAISS_ASSERT(vecs.getSize(0) == indices.getSize(0));
  FAISS_ASSERT(vecs.getSize(1) == dim_);

@@ -119,6 +118,8 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
  auto& coarseCentroids = quantizer_->getVectorsFloat32Ref();
  auto& mem = resources_->getMemoryManagerCurrentDevice();
  auto stream = resources_->getDefaultStreamCurrentDevice();
+  
+  DeviceTensor<uint8_t, 1, true> bitset(mem, {0}, stream);

  // Number of valid vectors that we actually add; we return this
  int numAdded = 0;

--- a/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cuh
@@ -52,8 +52,7 @@ class IVFPQ : public IVFBase {
  /// Returns the number of vectors successfully added. Vectors may
  /// not be able to be added because they contain NaNs.
  int classifyAndAddVectors(Tensor<float, 2, true>& vecs,
-                            Tensor<long, 1, true>& indices,
-                            Tensor<uint8_t, 1, true>& bitset);
+                            Tensor<long, 1, true>& indices);

  /// Find the approximate k nearest neigbors for `queries` against
  /// our database