IVFPQ.cuh 5.7 KB
Newer Older
J
JinHai-CN 已提交
1 2 3 4 5 6 7 8 9 10
/**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */


#pragma once

C
Cai Yudong 已提交
11
#include <faiss/MetricType.h>
J
JinHai-CN 已提交
12 13 14 15 16 17 18 19 20
#include <faiss/gpu/impl/IVFBase.cuh>
#include <faiss/gpu/utils/Float16.cuh>

namespace faiss { namespace gpu {

/// Implementing class for IVFPQ on the GPU
class IVFPQ : public IVFBase {
 public:
  IVFPQ(GpuResources* resources,
C
Cai Yudong 已提交
21 22
        faiss::MetricType metric,
        float metricArg,
J
JinHai-CN 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
        /// We do not own this reference
        FlatIndex* quantizer,
        int numSubQuantizers,
        int bitsPerSubQuantizer,
        float* pqCentroidData,
        IndicesOptions indicesOptions,
        bool useFloat16LookupTables,
        MemorySpace space);

  /// Returns true if we support PQ in this size
  static bool isSupportedPQCodeLength(int size);

  /// For no precomputed codes, is this a supported sub-dimension
  /// size?
  /// FIXME: get MM implementation working again
  static bool isSupportedNoPrecomputedSubDimSize(int dims);

  ~IVFPQ() override;

  /// Enable or disable pre-computed codes
  void setPrecomputedCodes(bool enable);

  /// Adds a set of codes and indices to a list; the data can be
  /// resident on either the host or the device
  void addCodeVectorsFromCpu(int listId,
                             const void* codes,
                             const long* indices,
                             size_t numVecs);

  /// Calcuates the residual and quantizes the vectors, adding them to
  /// this index
  /// The input data must be on our current device.
  /// Returns the number of vectors successfully added. Vectors may
  /// not be able to be added because they contain NaNs.
  int classifyAndAddVectors(Tensor<float, 2, true>& vecs,
X
Xiaohai Xu 已提交
58
                            Tensor<long, 1, true>& indices);
J
JinHai-CN 已提交
59 60 61 62

  /// Find the approximate k nearest neigbors for `queries` against
  /// our database
  void query(Tensor<float, 2, true>& queries,
X
Xiaohai Xu 已提交
63
             Tensor<uint8_t, 1, true>& bitset,
J
JinHai-CN 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
             int nprobe,
             int k,
             Tensor<float, 2, true>& outDistances,
             Tensor<long, 2, true>& outIndices);

  /// Return the list codes of a particular list back to the CPU
  std::vector<unsigned char> getListCodes(int listId) const;

  /// Returns our set of sub-quantizers of the form
  /// (sub q)(code id)(sub dim)
  Tensor<float, 3, true> getPQCentroids();

 private:
  /// Sets the current product quantizer centroids; the data can be
  /// resident on either the host or the device. It will be transposed
  /// into our preferred data layout
  /// Data must be a row-major, 3-d array of size
  /// (numSubQuantizers, numSubQuantizerCodes, dim / numSubQuantizers)
  void setPQCentroids_(float* data);

  /// Calculate precomputed residual distance information
  void precomputeCodes_();

C
Cai Yudong 已提交
87 88 89 90 91
  /// Calculate precomputed residual distance information (for different coarse
  /// centroid type)
  template <typename CentroidT>
  void precomputeCodesT_();

J
JinHai-CN 已提交
92 93
  /// Runs kernels for scanning inverted lists with precomputed codes
  void runPQPrecomputedCodes_(Tensor<float, 2, true>& queries,
94
                              Tensor<uint8_t, 1, true>& bitset,
J
JinHai-CN 已提交
95 96 97 98 99 100 101 102
                              DeviceTensor<float, 2, true>& coarseDistances,
                              DeviceTensor<int, 2, true>& coarseIndices,
                              int k,
                              Tensor<float, 2, true>& outDistances,
                              Tensor<long, 2, true>& outIndices);

  /// Runs kernels for scanning inverted lists without precomputed codes
  void runPQNoPrecomputedCodes_(Tensor<float, 2, true>& queries,
103
                                Tensor<uint8_t, 1, true>& bitset,
J
JinHai-CN 已提交
104 105 106 107 108 109
                                DeviceTensor<float, 2, true>& coarseDistances,
                                DeviceTensor<int, 2, true>& coarseIndices,
                                int k,
                                Tensor<float, 2, true>& outDistances,
                                Tensor<long, 2, true>& outIndices);

C
Cai Yudong 已提交
110 111 112 113 114 115 116 117 118 119 120
  /// Runs kernels for scanning inverted lists without precomputed codes (for
  /// different coarse centroid type)
  template <typename CentroidT>
  void runPQNoPrecomputedCodesT_(Tensor<float, 2, true>& queries,
                                 Tensor<uint8_t, 1, true>& bitset,
                                 DeviceTensor<float, 2, true>& coarseDistances,
                                 DeviceTensor<int, 2, true>& coarseIndices,
                                 int k,
                                 Tensor<float, 2, true>& outDistances,
                                 Tensor<long, 2, true>& outIndices);

J
JinHai-CN 已提交
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
 private:
  /// Number of sub-quantizers per vector
  const int numSubQuantizers_;

  /// Number of bits per sub-quantizer
  const int bitsPerSubQuantizer_;

  /// Number of per sub-quantizer codes (2^bits)
  const int numSubQuantizerCodes_;

  /// Number of dimensions per each sub-quantizer
  const int dimPerSubQuantizer_;

  /// Do we maintain precomputed terms and lookup tables in float16
  /// form?
  const bool useFloat16LookupTables_;

  /// On the GPU, we prefer different PQ centroid data layouts for
  /// different purposes.
  ///
  /// (sub q)(sub dim)(code id)
  DeviceTensor<float, 3, true> pqCentroidsInnermostCode_;

  /// (sub q)(code id)(sub dim)
  DeviceTensor<float, 3, true> pqCentroidsMiddleCode_;

  /// Are precomputed codes enabled? (additional factoring and
  /// precomputation of the residual distance, to reduce query-time work)
  bool precomputedCodes_;

  /// Precomputed term 2 in float form
  /// (centroid id)(sub q)(code id)
  DeviceTensor<float, 3, true> precomputedCode_;

  /// Precomputed term 2 in half form
S
shengjun.li 已提交
156
#ifdef FAISS_USE_FLOAT16
J
JinHai-CN 已提交
157
  DeviceTensor<half, 3, true> precomputedCodeHalf_;
S
shengjun.li 已提交
158
#endif
J
JinHai-CN 已提交
159 160 161
};

} } // namespace