未验证 提交 c3bf2704 编写于 作者: C Cai Yudong 提交者: GitHub

support avx512 (#1195)

* #1122 support AVX-512 for IVFFLAT distance calculation
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 support AVX-512 for IVFSQ8
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix avx512 build option
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 remove global faiss compile option -march=skylake-avx512
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 add InstructionSet and unittest
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 update InstructionSet unittest
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 add FaissHook
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 add distances_simd_avx512.cpp
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix build issue
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 add ScalarQuantizer_avx512.cpp and IndexScalarQuantizer_avx512.cpp
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix build issue
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix clang format
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 move test_instructionset.cpp to index/unittest
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix clang format
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 set hook default to AVX
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 add hidden config use_avx512
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix faiss clone issue
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix faiss build issue
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix faiss ScalarQuantizer_avx512
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix faiss ScalarQuantizer_avx512
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 clean code
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 remove ScalarQuantizer_avx512 and IndexScalarQuantizer_avx512
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 change back index_factory.cpp/index_read.cpp/index_write.cpp
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 split ScalarQuantizer to ScalarQuantizerCodec and Similarity
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 #1122 split ScalarQuantizer to ScalarQuantizerCodec
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 split ScalarQuantizerCodec.cpp to ScalarQuantizerOp.cpp
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 ScalarQuantizer support avx512
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 add hook sse for ScalarQuantizer
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 correct head file
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 add changelog
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix CodeFactor
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>

* #1122 fix unittest
Signed-off-by: Nyudong.cai <yudong.cai@zilliz.com>
上级 79ac5443
......@@ -41,7 +41,9 @@ Please mark all change in change log and use the issue from GitHub
- \#823 - Support binary vector tanimoto/jaccard/hamming metric
- \#853 - Support HNSW
- \#910 - Change Milvus c++ standard to c++17
- \#1122 - Support AVX-512 in FAISS
- \#1204 - Add api to get table data information
- \#1250 - Support CPU profiling
- \#1302 - Get all record IDs in a segment by given a segment id
## Improvement
......
// -*- c++ -*-
#include <iostream>
#include <mutex>
#include <faiss/FaissHook.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/ScalarQuantizerDC.h>
#include <faiss/impl/ScalarQuantizerDC_avx512.h>
#include <faiss/utils/distances.h>
#include <faiss/utils/distances_avx512.h>
#include <faiss/utils/instruction_set.h>
namespace faiss {
bool faiss_use_avx512 = true;
/* set default to AVX */
fvec_func_ptr fvec_inner_product = fvec_inner_product_avx;
fvec_func_ptr fvec_L2sqr = fvec_L2sqr_avx;
fvec_func_ptr fvec_L1 = fvec_L1_avx;
fvec_func_ptr fvec_Linf = fvec_Linf_avx;
sq_get_func_ptr sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx;
sq_get_func_ptr sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx;
sq_sel_func_ptr sq_sel_quantizer = sq_select_quantizer_avx;
/*****************************************************************************/
bool support_avx512() {
if (!faiss_use_avx512) return false;
InstructionSet& instruction_set_inst = InstructionSet::GetInstance();
return (instruction_set_inst.AVX512F() &&
instruction_set_inst.AVX512DQ() &&
instruction_set_inst.AVX512BW());
}
bool support_avx() {
InstructionSet& instruction_set_inst = InstructionSet::GetInstance();
return (instruction_set_inst.AVX2());
}
bool support_sse() {
InstructionSet& instruction_set_inst = InstructionSet::GetInstance();
return (instruction_set_inst.SSE());
}
void hook_init() {
static std::mutex hook_mutex;
std::lock_guard<std::mutex> lock(hook_mutex);
if (support_avx512()) {
/* for IVFFLAT */
fvec_inner_product = fvec_inner_product_avx512;
fvec_L2sqr = fvec_L2sqr_avx512;
fvec_L1 = fvec_L1_avx512;
fvec_Linf = fvec_Linf_avx512;
/* for IVFSQ */
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx512;
sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx512;
sq_sel_quantizer = sq_select_quantizer_avx512;
std::cout << "FAISS hook AVX512" << std::endl;
} else if (support_avx()) {
/* for IVFFLAT */
fvec_inner_product = fvec_inner_product_avx;
fvec_L2sqr = fvec_L2sqr_avx;
fvec_L1 = fvec_L1_avx;
fvec_Linf = fvec_Linf_avx;
/* for IVFSQ */
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_avx;
sq_get_distance_computer_IP = sq_get_distance_computer_IP_avx;
sq_sel_quantizer = sq_select_quantizer_avx;
std::cout << "FAISS hook AVX" << std::endl;
} else if (support_sse()) {
/* for IVFFLAT */
fvec_inner_product = fvec_inner_product_sse;
fvec_L2sqr = fvec_L2sqr_sse;
fvec_L1 = fvec_L1_sse;
fvec_Linf = fvec_Linf_sse;
/* for IVFSQ */
sq_get_distance_computer_L2 = sq_get_distance_computer_L2_sse;
sq_get_distance_computer_IP = sq_get_distance_computer_IP_sse;
sq_sel_quantizer = sq_select_quantizer_sse;
std::cout << "FAISS hook SSE" << std::endl;
} else {
FAISS_ASSERT_MSG(false, "CPU not supported!");
}
}
} // namespace faiss
// -*- c++ -*-
#pragma once
#include <vector>
#include <stddef.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
typedef float (*fvec_func_ptr)(const float*, const float*, size_t);
typedef SQDistanceComputer* (*sq_get_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
typedef Quantizer* (*sq_sel_func_ptr)(QuantizerType, size_t, const std::vector<float>&);
extern bool faiss_use_avx512;
extern fvec_func_ptr fvec_inner_product;
extern fvec_func_ptr fvec_L2sqr;
extern fvec_func_ptr fvec_L1;
extern fvec_func_ptr fvec_Linf;
extern sq_get_func_ptr sq_get_distance_computer_L2;
extern sq_get_func_ptr sq_get_distance_computer_IP;
extern sq_sel_func_ptr sq_sel_quantizer;
extern bool support_avx512();
extern void hook_init();
} // namespace faiss
......@@ -12,6 +12,7 @@
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/distances.h>
#include <faiss/FaissHook.h>
#include <cstring>
......
......@@ -27,7 +27,7 @@
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/IndexFlat.h>
#include <faiss/utils/distances.h>
#include <faiss/FaissHook.h>
/*
#include <faiss/utils/Heap.h>
......
......@@ -16,7 +16,7 @@
#include <faiss/utils/Heap.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/FaissHook.h>
namespace faiss {
......
......@@ -37,7 +37,7 @@
#include <faiss/IndexIVFPQ.h>
#include <faiss/Index2Layer.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/FaissHook.h>
extern "C" {
......@@ -860,7 +860,7 @@ void IndexHNSWPQ::train(idx_t n, const float* x)
**************************************************************/
IndexHNSWSQ::IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M):
IndexHNSWSQ::IndexHNSWSQ(int d, QuantizerType qtype, int M):
IndexHNSW (new IndexScalarQuantizer (d, qtype), M)
{
is_trained = false;
......
......@@ -149,7 +149,7 @@ struct IndexHNSWPQ : IndexHNSW {
*/
struct IndexHNSWSQ : IndexHNSW {
IndexHNSWSQ();
IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
IndexHNSWSQ(int d, QuantizerType qtype, int M);
};
/** 2-level code structure with fast random access
......
......@@ -17,7 +17,7 @@
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/FaissHook.h>
namespace faiss {
......
......@@ -22,6 +22,7 @@
#include <faiss/Clustering.h>
#include <faiss/IndexFlat.h>
#include <faiss/FaissHook.h>
#include <faiss/utils/hamming.h>
......
......@@ -14,7 +14,7 @@
#include <faiss/utils/distances.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/FaissHook.h>
namespace faiss {
......
......@@ -26,7 +26,7 @@ namespace faiss {
IndexIVFSQHybrid::IndexIVFSQHybrid (
Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
QuantizerType qtype,
MetricType metric, bool encode_residual)
: IndexIVF(quantizer, d, nlist, 0, metric),
sq(d, qtype),
......@@ -54,7 +54,7 @@ void IndexIVFSQHybrid::encode_vectors(idx_t n, const float* x,
uint8_t * codes,
bool include_listnos) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
std::unique_ptr<Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = include_listnos ? coarse_code_size () : 0;
memset(codes, 0, (code_size + coarse_size) * n);
......@@ -85,7 +85,7 @@ void IndexIVFSQHybrid::encode_vectors(idx_t n, const float* x,
void IndexIVFSQHybrid::sa_decode (idx_t n, const uint8_t *codes,
float *x) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
std::unique_ptr<Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = coarse_code_size ();
#pragma omp parallel if(n > 1)
......@@ -117,7 +117,7 @@ void IndexIVFSQHybrid::add_with_ids
std::unique_ptr<int64_t []> idx (new int64_t [n]);
quantizer->assign (n, x, idx.get());
size_t nadd = 0;
std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
std::unique_ptr<Quantizer> squant(sq.select_quantizer ());
#pragma omp parallel reduction(+: nadd)
{
......
......@@ -15,6 +15,7 @@
#include <faiss/IndexIVF.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
......@@ -30,7 +31,7 @@ struct IndexIVFSQHybrid: IndexIVF {
bool by_residual;
IndexIVFSQHybrid(Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
QuantizerType qtype,
MetricType metric = METRIC_L2,
bool encode_residual = true);
......
......@@ -18,6 +18,7 @@
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
......@@ -28,20 +29,20 @@ namespace faiss {
********************************************************************/
IndexScalarQuantizer::IndexScalarQuantizer
(int d, ScalarQuantizer::QuantizerType qtype,
(int d, QuantizerType qtype,
MetricType metric):
Index(d, metric),
sq (d, qtype)
{
is_trained =
qtype == ScalarQuantizer::QT_fp16 ||
qtype == ScalarQuantizer::QT_8bit_direct;
qtype == QuantizerType::QT_fp16 ||
qtype == QuantizerType::QT_8bit_direct;
code_size = sq.code_size;
}
IndexScalarQuantizer::IndexScalarQuantizer ():
IndexScalarQuantizer(0, ScalarQuantizer::QT_8bit)
IndexScalarQuantizer(0, QuantizerType::QT_8bit)
{}
void IndexScalarQuantizer::train(idx_t n, const float* x)
......@@ -105,8 +106,7 @@ void IndexScalarQuantizer::search(
DistanceComputer *IndexScalarQuantizer::get_distance_computer () const
{
ScalarQuantizer::SQDistanceComputer *dc =
sq.get_distance_computer (metric_type);
SQDistanceComputer *dc = sq.get_distance_computer (metric_type);
dc->code_size = sq.code_size;
dc->codes = codes.data();
return dc;
......@@ -122,7 +122,7 @@ void IndexScalarQuantizer::reset()
void IndexScalarQuantizer::reconstruct_n(
idx_t i0, idx_t ni, float* recons) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
std::unique_ptr<Quantizer> squant(sq.select_quantizer ());
for (size_t i = 0; i < ni; i++) {
squant->decode_vector(&codes[(i + i0) * code_size], recons + i * d);
}
......@@ -161,7 +161,7 @@ void IndexScalarQuantizer::sa_decode (idx_t n, const uint8_t *bytes,
IndexIVFScalarQuantizer::IndexIVFScalarQuantizer (
Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
QuantizerType qtype,
MetricType metric, bool encode_residual)
: IndexIVF(quantizer, d, nlist, 0, metric),
sq(d, qtype),
......@@ -189,7 +189,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
uint8_t * codes,
bool include_listnos) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
std::unique_ptr<Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = include_listnos ? coarse_code_size () : 0;
memset(codes, 0, (code_size + coarse_size) * n);
......@@ -220,7 +220,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
float *x) const
{
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
std::unique_ptr<Quantizer> squant (sq.select_quantizer ());
size_t coarse_size = coarse_code_size ();
#pragma omp parallel if(n > 1)
......@@ -252,7 +252,7 @@ void IndexIVFScalarQuantizer::add_with_ids
std::unique_ptr<int64_t []> idx (new int64_t [n]);
quantizer->assign (n, x, idx.get());
size_t nadd = 0;
std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer ());
std::unique_ptr<Quantizer> squant(sq.select_quantizer ());
#pragma omp parallel reduction(+: nadd)
{
......
......@@ -15,6 +15,7 @@
#include <faiss/IndexIVF.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
......@@ -44,7 +45,7 @@ struct IndexScalarQuantizer: Index {
* @param nbits number of bit per subvector index
*/
IndexScalarQuantizer (int d,
ScalarQuantizer::QuantizerType qtype,
QuantizerType qtype,
MetricType metric = METRIC_L2);
IndexScalarQuantizer ();
......@@ -93,7 +94,7 @@ struct IndexIVFScalarQuantizer: IndexIVF {
bool by_residual;
IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
ScalarQuantizer::QuantizerType qtype,
QuantizerType qtype,
MetricType metric = METRIC_L2,
bool encode_residual = true);
......
......@@ -7,6 +7,7 @@
HEADERS = $(wildcard *.h impl/*.h utils/*.h)
SRC = $(wildcard *.cpp impl/*.cpp utils/*.cpp)
AVX512_SRC = $(wildcard *avx512.cpp impl/*avx512.cpp utils/*avx512.cpp)
OBJ = $(SRC:.cpp=.o)
INSTALLDIRS = $(DESTDIR)$(libdir) $(DESTDIR)$(includedir)/faiss
......@@ -41,6 +42,10 @@ libfaiss.$(SHAREDEXT): $(OBJ)
%.o: %.cpp
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -c $< -o $@
# support avx512
%avx512.o: %avx512.cpp
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $(CPUFLAGS) -mavx512f -mavx512dq -mavx512bw -c $< -o $@
%.o: %.cu
$(NVCC) $(NVCCFLAGS) -c $< -o $@
......
......@@ -19,6 +19,7 @@
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/IndexPQ.h>
#include <faiss/FaissHook.h>
using namespace faiss;
......
......@@ -43,7 +43,7 @@ GpuIndexIVFSQHybrid::GpuIndexIVFSQHybrid(
GpuResources* resources,
int dims,
int nlist,
faiss::ScalarQuantizer::QuantizerType qtype,
faiss::QuantizerType qtype,
faiss::MetricType metric,
bool encodeResidual,
GpuIndexIVFSQHybridConfig config) :
......
......@@ -37,7 +37,7 @@ class GpuIndexIVFSQHybrid : public GpuIndexIVF {
GpuResources* resources,
int dims,
int nlist,
faiss::ScalarQuantizer::QuantizerType qtype,
faiss::QuantizerType qtype,
faiss::MetricType metric = MetricType::METRIC_L2,
bool encodeResidual = true,
GpuIndexIVFSQHybridConfig config =
......
......@@ -41,7 +41,7 @@ GpuIndexIVFScalarQuantizer::GpuIndexIVFScalarQuantizer(
GpuResources* resources,
int dims,
int nlist,
faiss::ScalarQuantizer::QuantizerType qtype,
faiss::QuantizerType qtype,
faiss::MetricType metric,
bool encodeResidual,
GpuIndexIVFScalarQuantizerConfig config) :
......
......@@ -37,7 +37,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
GpuResources* resources,
int dims,
int nlist,
faiss::ScalarQuantizer::QuantizerType qtype,
faiss::QuantizerType qtype,
faiss::MetricType metric = MetricType::METRIC_L2,
bool encodeResidual = true,
GpuIndexIVFScalarQuantizerConfig config =
......
......@@ -14,14 +14,14 @@
namespace faiss { namespace gpu {
inline bool isSQSupported(ScalarQuantizer::QuantizerType qtype) {
inline bool isSQSupported(QuantizerType qtype) {
switch (qtype) {
case ScalarQuantizer::QuantizerType::QT_8bit:
case ScalarQuantizer::QuantizerType::QT_8bit_uniform:
case ScalarQuantizer::QuantizerType::QT_8bit_direct:
case ScalarQuantizer::QuantizerType::QT_4bit:
case ScalarQuantizer::QuantizerType::QT_4bit_uniform:
case ScalarQuantizer::QuantizerType::QT_fp16:
case QuantizerType::QT_8bit:
case QuantizerType::QT_8bit_uniform:
case QuantizerType::QT_8bit_direct:
case QuantizerType::QT_4bit:
case QuantizerType::QT_4bit_uniform:
case QuantizerType::QT_fp16:
return true;
default:
return false;
......@@ -107,7 +107,7 @@ struct CodecFloat {
// Arbitrary dimension fp16
template <>
struct Codec<ScalarQuantizer::QuantizerType::QT_fp16, 1> {
struct Codec<(int)QuantizerType::QT_fp16, 1> {
/// How many dimensions per iteration we are handling for encoding or decoding
static constexpr int kDimPerIter = 1;
......@@ -145,7 +145,7 @@ struct Codec<ScalarQuantizer::QuantizerType::QT_fp16, 1> {
// dim % 2 == 0, ensures uint32 alignment
template <>
struct Codec<ScalarQuantizer::QuantizerType::QT_fp16, 2> {
struct Codec<(int)QuantizerType::QT_fp16, 2> {
/// How many dimensions per iteration we are handling for encoding or decoding
static constexpr int kDimPerIter = 2;
......@@ -213,7 +213,7 @@ struct Get8BitType<4> { using T = uint32_t; };
// Uniform quantization across all dimensions
template <int DimMultiple>
struct Codec<ScalarQuantizer::QuantizerType::QT_8bit_uniform, DimMultiple> {
struct Codec<(int)QuantizerType::QT_8bit_uniform, DimMultiple> {
/// How many dimensions per iteration we are handling for encoding or decoding
static constexpr int kDimPerIter = DimMultiple;
using MemT = typename Get8BitType<DimMultiple>::T;
......@@ -307,7 +307,7 @@ struct Codec<ScalarQuantizer::QuantizerType::QT_8bit_uniform, DimMultiple> {
// Uniform quantization per each dimension
template <int DimMultiple>
struct Codec<ScalarQuantizer::QuantizerType::QT_8bit, DimMultiple> {
struct Codec<(int)QuantizerType::QT_8bit, DimMultiple> {
/// How many dimensions per iteration we are handling for encoding or decoding
static constexpr int kDimPerIter = DimMultiple;
using MemT = typename Get8BitType<DimMultiple>::T;
......@@ -421,7 +421,7 @@ struct Codec<ScalarQuantizer::QuantizerType::QT_8bit, DimMultiple> {
};
template <>
struct Codec<ScalarQuantizer::QuantizerType::QT_8bit_direct, 1> {
struct Codec<(int)QuantizerType::QT_8bit_direct, 1> {
/// How many dimensions per iteration we are handling for encoding or decoding
static constexpr int kDimPerIter = 1;
......@@ -465,7 +465,7 @@ struct Codec<ScalarQuantizer::QuantizerType::QT_8bit_direct, 1> {
// Uniform quantization across all dimensions
template <>
struct Codec<ScalarQuantizer::QuantizerType::QT_4bit_uniform, 1> {
struct Codec<(int)QuantizerType::QT_4bit_uniform, 1> {
/// How many dimensions per iteration we are handling for encoding or decoding
static constexpr int kDimPerIter = 2;
......@@ -525,7 +525,7 @@ struct Codec<ScalarQuantizer::QuantizerType::QT_4bit_uniform, 1> {
};
template <>
struct Codec<ScalarQuantizer::QuantizerType::QT_4bit, 1> {
struct Codec<(int)QuantizerType::QT_4bit, 1> {
/// How many dimensions per iteration we are handling for encoding or decoding
static constexpr int kDimPerIter = 2;
......
......@@ -286,17 +286,17 @@ runIVFFlatInvertedListAppend(Tensor<int, 1, true>& listIds,
RUN_APPEND;
} else {
switch (scalarQ->qtype) {
case ScalarQuantizer::QuantizerType::QT_8bit:
case QuantizerType::QT_8bit:
{
if (false) {
// if (dim % 4 == 0) {
Codec<ScalarQuantizer::QuantizerType::QT_8bit, 4>
Codec<(int)QuantizerType::QT_8bit, 4>
codec(scalarQ->code_size,
scalarQ->gpuTrained.data(),
scalarQ->gpuTrained.data() + dim);
RUN_APPEND;
} else {
Codec<ScalarQuantizer::QuantizerType::QT_8bit, 1>
Codec<(int)QuantizerType::QT_8bit, 1>
codec(scalarQ->code_size,
scalarQ->gpuTrained.data(),
scalarQ->gpuTrained.data() + dim);
......@@ -304,53 +304,53 @@ runIVFFlatInvertedListAppend(Tensor<int, 1, true>& listIds,
}
}
break;
case ScalarQuantizer::QuantizerType::QT_8bit_uniform:
case QuantizerType::QT_8bit_uniform:
{
// if (dim % 4 == 0) {
if (false) {
Codec<ScalarQuantizer::QuantizerType::QT_8bit_uniform, 4>
Codec<(int)QuantizerType::QT_8bit_uniform, 4>
codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]);
RUN_APPEND;
} else {
Codec<ScalarQuantizer::QuantizerType::QT_8bit_uniform, 1>
Codec<(int)QuantizerType::QT_8bit_uniform, 1>
codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]);
RUN_APPEND;
}
}
break;
case ScalarQuantizer::QuantizerType::QT_fp16:
case QuantizerType::QT_fp16:
{
// if (dim % 2 == 0) {
if (false) {
Codec<ScalarQuantizer::QuantizerType::QT_fp16, 2>
Codec<(int)QuantizerType::QT_fp16, 2>
codec(scalarQ->code_size);
RUN_APPEND;
} else {
Codec<ScalarQuantizer::QuantizerType::QT_fp16, 1>
Codec<(int)QuantizerType::QT_fp16, 1>
codec(scalarQ->code_size);
RUN_APPEND;
}
}
break;
case ScalarQuantizer::QuantizerType::QT_8bit_direct:
case QuantizerType::QT_8bit_direct:
{
Codec<ScalarQuantizer::QuantizerType::QT_8bit_direct, 1>
Codec<(int)QuantizerType::QT_8bit_direct, 1>
codec(scalarQ->code_size);
RUN_APPEND;
}
break;
case ScalarQuantizer::QuantizerType::QT_4bit:
case QuantizerType::QT_4bit:
{
Codec<ScalarQuantizer::QuantizerType::QT_4bit, 1>
Codec<(int)QuantizerType::QT_4bit, 1>
codec(scalarQ->code_size,
scalarQ->gpuTrained.data(),
scalarQ->gpuTrained.data() + dim);
RUN_APPEND;
}
break;
case ScalarQuantizer::QuantizerType::QT_4bit_uniform:
case QuantizerType::QT_4bit_uniform:
{
Codec<ScalarQuantizer::QuantizerType::QT_4bit_uniform, 1>
Codec<(int)QuantizerType::QT_4bit_uniform, 1>
codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]);
RUN_APPEND;
}
......
......@@ -20,6 +20,7 @@
#include <faiss/gpu/utils/PtxUtils.cuh>
#include <faiss/gpu/utils/Reductions.cuh>
#include <faiss/gpu/utils/StaticUtils.h>
#include <faiss/impl/ScalarQuantizerOp.h>
#include <thrust/host_vector.h>
namespace faiss { namespace gpu {
......@@ -181,8 +182,8 @@ runIVFFlatScanTile(Tensor<float, 2, true>& queries,
// Check the amount of shared memory per block available based on our type is
// sufficient
if (scalarQ &&
(scalarQ->qtype == ScalarQuantizer::QuantizerType::QT_8bit ||
scalarQ->qtype == ScalarQuantizer::QuantizerType::QT_4bit)) {
(scalarQ->qtype == QuantizerType::QT_8bit ||
scalarQ->qtype == QuantizerType::QT_4bit)) {
int maxDim = getMaxSharedMemPerBlockCurrentDevice() /
(sizeof(float) * 2);
......@@ -230,18 +231,18 @@ runIVFFlatScanTile(Tensor<float, 2, true>& queries,
HANDLE_METRICS;
} else {
switch (scalarQ->qtype) {
case ScalarQuantizer::QuantizerType::QT_8bit:
case QuantizerType::QT_8bit:
{
// FIXME: investigate 32 bit load perf issues
// if (dim % 4 == 0) {
if (false) {
Codec<ScalarQuantizer::QuantizerType::QT_8bit, 4>
Codec<(int)QuantizerType::QT_8bit, 4>
codec(scalarQ->code_size,
scalarQ->gpuTrained.data(),
scalarQ->gpuTrained.data() + dim);
HANDLE_METRICS;
} else {
Codec<ScalarQuantizer::QuantizerType::QT_8bit, 1>
Codec<(int)QuantizerType::QT_8bit, 1>
codec(scalarQ->code_size,
scalarQ->gpuTrained.data(),
scalarQ->gpuTrained.data() + dim);
......@@ -249,55 +250,55 @@ runIVFFlatScanTile(Tensor<float, 2, true>& queries,
}
}
break;
case ScalarQuantizer::QuantizerType::QT_8bit_uniform:
case QuantizerType::QT_8bit_uniform:
{
// FIXME: investigate 32 bit load perf issues
if (false) {
// if (dim % 4 == 0) {
Codec<ScalarQuantizer::QuantizerType::QT_8bit_uniform, 4>
Codec<(int)QuantizerType::QT_8bit_uniform, 4>
codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]);
HANDLE_METRICS;
} else {
Codec<ScalarQuantizer::QuantizerType::QT_8bit_uniform, 1>
Codec<(int)QuantizerType::QT_8bit_uniform, 1>
codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]);
HANDLE_METRICS;
}
}
break;
case ScalarQuantizer::QuantizerType::QT_fp16:
case QuantizerType::QT_fp16:
{
if (false) {
// FIXME: investigate 32 bit load perf issues
// if (dim % 2 == 0) {
Codec<ScalarQuantizer::QuantizerType::QT_fp16, 2>
Codec<(int)QuantizerType::QT_fp16, 2>
codec(scalarQ->code_size);
HANDLE_METRICS;
} else {
Codec<ScalarQuantizer::QuantizerType::QT_fp16, 1>
Codec<(int)QuantizerType::QT_fp16, 1>
codec(scalarQ->code_size);
HANDLE_METRICS;
}
}
break;
case ScalarQuantizer::QuantizerType::QT_8bit_direct:
case QuantizerType::QT_8bit_direct:
{
Codec<ScalarQuantizer::QuantizerType::QT_8bit_direct, 1>
Codec<(int)QuantizerType::QT_8bit_direct, 1>
codec(scalarQ->code_size);
HANDLE_METRICS;
}
break;
case ScalarQuantizer::QuantizerType::QT_4bit:
case QuantizerType::QT_4bit:
{
Codec<ScalarQuantizer::QuantizerType::QT_4bit, 1>
Codec<(int)QuantizerType::QT_4bit, 1>
codec(scalarQ->code_size,
scalarQ->gpuTrained.data(),
scalarQ->gpuTrained.data() + dim);
HANDLE_METRICS;
}
break;
case ScalarQuantizer::QuantizerType::QT_4bit_uniform:
case QuantizerType::QT_4bit_uniform:
{
Codec<ScalarQuantizer::QuantizerType::QT_4bit_uniform, 1>
Codec<(int)QuantizerType::QT_4bit_uniform, 1>
codec(scalarQ->code_size, scalarQ->trained[0], scalarQ->trained[1]);
HANDLE_METRICS;
}
......
......@@ -22,6 +22,7 @@
#include <faiss/utils/hamming.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/FaissHook.h>
/*****************************************
* Mixed PQ / Hamming
......
......@@ -10,7 +10,7 @@
#pragma once
#include <faiss/IndexIVF.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
......@@ -23,30 +23,12 @@ namespace faiss {
struct ScalarQuantizer {
enum QuantizerType {
QT_8bit, ///< 8 bits per component
QT_4bit, ///< 4 bits per component
QT_8bit_uniform, ///< same, shared range for all dimensions
QT_4bit_uniform,
QT_fp16,
QT_8bit_direct, /// fast indexing of uint8s
QT_6bit, ///< 6 bits per component
};
QuantizerType qtype;
/** The uniform encoder can estimate the range of representable
* values of the unform encoder using different statistics. Here
* rs = rangestat_arg */
// rangestat_arg.
enum RangeStat {
RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)]
RS_meanstd, ///< [mean - std * rs, mean + std * rs]
RS_quantiles, ///< [Q(rs), Q(1-rs)]
RS_optim, ///< alternate optimization of reconstruction error
};
RangeStat rangestat;
float rangestat_arg;
......@@ -85,27 +67,8 @@ struct ScalarQuantizer {
* computation and inverted list scanning
*****************************************************/
struct Quantizer {
// encodes one vector. Assumes code is filled with 0s on input!
virtual void encode_vector(const float *x, uint8_t *code) const = 0;
virtual void decode_vector(const uint8_t *code, float *x) const = 0;
virtual ~Quantizer() {}
};
Quantizer * select_quantizer() const;
struct SQDistanceComputer: DistanceComputer {
const float *q;
const uint8_t *codes;
size_t code_size;
SQDistanceComputer (): q(nullptr), codes (nullptr), code_size (0)
{}
};
SQDistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
const;
......
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/impl/ScalarQuantizerDC.h>
#include <faiss/impl/ScalarQuantizerCodec.h>
namespace faiss {
#ifdef __AVX__
#define USE_AVX
#endif
/*******************************************************************
* ScalarQuantizer Distance Computer
********************************************************************/
/* AVX */
SQDistanceComputer *
sq_get_distance_computer_L2_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
#ifdef USE_AVX
if (dim % 8 == 0) {
return select_distance_computer<SimilarityL2<8>> (qtype, dim, trained);
} else
#endif
{
return select_distance_computer<SimilarityL2<1>> (qtype, dim, trained);
}
}
SQDistanceComputer *
sq_get_distance_computer_IP_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
#ifdef USE_AVX
if (dim % 8 == 0) {
return select_distance_computer<SimilarityIP<8>> (qtype, dim, trained);
} else
#endif
{
return select_distance_computer<SimilarityIP<1>> (qtype, dim, trained);
}
}
Quantizer *
sq_select_quantizer_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
#ifdef USE_AVX
if (dim % 8 == 0) {
return select_quantizer_1<8> (qtype, dim, trained);
} else
#endif
{
return select_quantizer_1<1> (qtype, dim, trained);
}
}
/* SSE */
SQDistanceComputer *
sq_get_distance_computer_L2_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
return select_distance_computer<SimilarityL2<1>> (qtype, dim, trained);
}
SQDistanceComputer *
sq_get_distance_computer_IP_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
return select_distance_computer<SimilarityIP<1>> (qtype, dim, trained);
}
Quantizer *
sq_select_quantizer_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
return select_quantizer_1<1> (qtype, dim, trained);
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
SQDistanceComputer *
sq_get_distance_computer_L2_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
SQDistanceComputer *
sq_get_distance_computer_IP_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
Quantizer *
sq_select_quantizer_avx (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
SQDistanceComputer *
sq_get_distance_computer_L2_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
SQDistanceComputer *
sq_get_distance_computer_IP_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
Quantizer *
sq_select_quantizer_sse (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <faiss/impl/ScalarQuantizerDC_avx512.h>
#include <faiss/impl/ScalarQuantizerCodec_avx512.h>
namespace faiss {
#ifdef __AVX__
#define USE_AVX
#endif
#if (defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__))
#define USE_AVX_512
#endif
/*******************************************************************
* ScalarQuantizer Distance Computer
********************************************************************/
SQDistanceComputer *
sq_get_distance_computer_L2_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
#ifdef USE_AVX_512
if (dim % 16 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<16>> (qtype, dim, trained);
} else
#endif
#ifdef USE_AVX
if (dim % 8 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<8>> (qtype, dim, trained);
} else
#endif
{
return select_distance_computer_avx512<SimilarityL2_avx512<1>> (qtype, dim, trained);
}
}
SQDistanceComputer *
sq_get_distance_computer_IP_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
#ifdef USE_AVX_512
if (dim % 16 == 0) {
return select_distance_computer_avx512<SimilarityL2_avx512<16>> (qtype, dim, trained);
} else
#endif
#ifdef USE_AVX
if (dim % 8 == 0) {
return select_distance_computer_avx512<SimilarityIP_avx512<8>> (qtype, dim, trained);
} else
#endif
{
return select_distance_computer_avx512<SimilarityIP_avx512<1>> (qtype, dim, trained);
}
}
Quantizer *
sq_select_quantizer_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained) {
#ifdef USE_AVX_512
if (dim % 16 == 0) {
return select_quantizer_1_avx512<16> (qtype, dim, trained);
} else
#endif
#ifdef USE_AVX
if (dim % 8 == 0) {
return select_quantizer_1_avx512<8> (qtype, dim, trained);
} else
#endif
{
return select_quantizer_1_avx512<1> (qtype, dim, trained);
}
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <vector>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
SQDistanceComputer *
sq_get_distance_computer_L2_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
SQDistanceComputer *
sq_get_distance_computer_IP_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
Quantizer *
sq_select_quantizer_avx512 (QuantizerType qtype, size_t dim, const std::vector<float>& trained);
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#include <cstdio>
#include <algorithm>
#include <omp.h>
#ifdef __SSE__
#include <immintrin.h>
#endif
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/ScalarQuantizerOp.h>
namespace faiss {
#ifdef __AVX__
#define USE_AVX
#endif
#ifdef USE_AVX
uint16_t encode_fp16 (float x) {
__m128 xf = _mm_set1_ps (x);
__m128i xi = _mm_cvtps_ph (
xf, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC);
return _mm_cvtsi128_si32 (xi) & 0xffff;
}
float decode_fp16 (uint16_t x) {
__m128i xi = _mm_set1_epi16 (x);
__m128 xf = _mm_cvtph_ps (xi);
return _mm_cvtss_f32 (xf);
}
#else
// non-intrinsic FP16 <-> FP32 code adapted from
// https://github.com/ispc/ispc/blob/master/stdlib.ispc
float floatbits (uint32_t x) {
void *xptr = &x;
return *(float*)xptr;
}
uint32_t intbits (float f) {
void *fptr = &f;
return *(uint32_t*)fptr;
}
uint16_t encode_fp16 (float f) {
// via Fabian "ryg" Giesen.
// https://gist.github.com/2156668
uint32_t sign_mask = 0x80000000u;
int32_t o;
uint32_t fint = intbits(f);
uint32_t sign = fint & sign_mask;
fint ^= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code (since
// there's no unsigned PCMPGTD).
// Inf or NaN (all exponent bits set)
// NaN->qNaN and Inf->Inf
// unconditional assignment here, will override with right value for
// the regular case below.
uint32_t f32infty = 255u << 23;
o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
// (De)normalized number or zero
// update fint unconditionally to save the blending; we don't need it
// anymore for the Inf/NaN case anyway.
const uint32_t round_mask = ~0xfffu;
const uint32_t magic = 15u << 23;
// Shift exponent down, denormalize if necessary.
// NOTE This represents half-float denormals using single
// precision denormals. The main reason to do this is that
// there's no shift with per-lane variable shifts in SSE*, which
// we'd otherwise need. It has some funky side effects though:
// - This conversion will actually respect the FTZ (Flush To Zero)
// flag in MXCSR - if it's set, no half-float denormals will be
// generated. I'm honestly not sure whether this is good or
// bad. It's definitely interesting.
// - If the underlying HW doesn't support denormals (not an issue
// with Intel CPUs, but might be a problem on GPUs or PS3 SPUs),
// you will always get flush-to-zero behavior. This is bad,
// unless you're on a CPU where you don't care.
// - Denormals tend to be slow. FP32 denormals are rare in
// practice outside of things like recursive filters in DSP -
// not a typical half-float application. Whether FP16 denormals
// are rare in practice, I don't know. Whatever slow path your
// HW may or may not have for denormals, this may well hit it.
float fscale = floatbits(fint & round_mask) * floatbits(magic);
fscale = std::min(fscale, floatbits((31u << 23) - 0x1000u));
int32_t fint2 = intbits(fscale) - round_mask;
if (fint < f32infty)
o = fint2 >> 13; // Take the bits!
return (o | (sign >> 16));
}
float decode_fp16 (uint16_t h) {
// https://gist.github.com/2144712
// Fabian "ryg" Giesen.
const uint32_t shifted_exp = 0x7c00u << 13; // exponent mask after shift
int32_t o = ((int32_t)(h & 0x7fffu)) << 13; // exponent/mantissa bits
int32_t exp = shifted_exp & o; // just the exponent
o += (int32_t)(127 - 15) << 23; // exponent adjust
int32_t infnan_val = o + ((int32_t)(128 - 16) << 23);
int32_t zerodenorm_val = intbits(
floatbits(o + (1u<<23)) - floatbits(113u << 23));
int32_t reg_val = (exp == 0) ? zerodenorm_val : o;
int32_t sign_bit = ((int32_t)(h & 0x8000u)) << 16;
return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
}
#endif
/*******************************************************************
* Quantizer range training
*/
static float sqr (float x) {
return x * x;
}
void train_Uniform(RangeStat rs, float rs_arg,
idx_t n, int k, const float *x,
std::vector<float> & trained)
{
trained.resize (2);
float & vmin = trained[0];
float & vmax = trained[1];
if (rs == RangeStat::RS_minmax) {
vmin = HUGE_VAL; vmax = -HUGE_VAL;
for (size_t i = 0; i < n; i++) {
if (x[i] < vmin) vmin = x[i];
if (x[i] > vmax) vmax = x[i];
}
float vexp = (vmax - vmin) * rs_arg;
vmin -= vexp;
vmax += vexp;
} else if (rs == RangeStat::RS_meanstd) {
double sum = 0, sum2 = 0;
for (size_t i = 0; i < n; i++) {
sum += x[i];
sum2 += x[i] * x[i];
}
float mean = sum / n;
float var = sum2 / n - mean * mean;
float std = var <= 0 ? 1.0 : sqrt(var);
vmin = mean - std * rs_arg ;
vmax = mean + std * rs_arg ;
} else if (rs == RangeStat::RS_quantiles) {
std::vector<float> x_copy(n);
memcpy(x_copy.data(), x, n * sizeof(*x));
// TODO just do a qucikselect
std::sort(x_copy.begin(), x_copy.end());
int o = int(rs_arg * n);
if (o < 0) o = 0;
if (o > n - o) o = n / 2;
vmin = x_copy[o];
vmax = x_copy[n - 1 - o];
} else if (rs == RangeStat::RS_optim) {
float a, b;
float sx = 0;
{
vmin = HUGE_VAL, vmax = -HUGE_VAL;
for (size_t i = 0; i < n; i++) {
if (x[i] < vmin) vmin = x[i];
if (x[i] > vmax) vmax = x[i];
sx += x[i];
}
b = vmin;
a = (vmax - vmin) / (k - 1);
}
int verbose = false;
int niter = 2000;
float last_err = -1;
int iter_last_err = 0;
for (int it = 0; it < niter; it++) {
float sn = 0, sn2 = 0, sxn = 0, err1 = 0;
for (idx_t i = 0; i < n; i++) {
float xi = x[i];
float ni = floor ((xi - b) / a + 0.5);
if (ni < 0) ni = 0;
if (ni >= k) ni = k - 1;
err1 += sqr (xi - (ni * a + b));
sn += ni;
sn2 += ni * ni;
sxn += ni * xi;
}
if (err1 == last_err) {
iter_last_err ++;
if (iter_last_err == 16) break;
} else {
last_err = err1;
iter_last_err = 0;
}
float det = sqr (sn) - sn2 * n;
b = (sn * sxn - sn2 * sx) / det;
a = (sn * sx - n * sxn) / det;
if (verbose) {
printf ("it %d, err1=%g \r", it, err1);
fflush(stdout);
}
}
if (verbose) printf("\n");
vmin = b;
vmax = b + a * (k - 1);
} else {
FAISS_THROW_MSG ("Invalid qtype");
}
vmax -= vmin;
}
void train_NonUniform(RangeStat rs, float rs_arg,
idx_t n, int d, int k, const float *x,
std::vector<float> & trained)
{
trained.resize (2 * d);
float * vmin = trained.data();
float * vmax = trained.data() + d;
if (rs == RangeStat::RS_minmax) {
memcpy (vmin, x, sizeof(*x) * d);
memcpy (vmax, x, sizeof(*x) * d);
for (size_t i = 1; i < n; i++) {
const float *xi = x + i * d;
for (size_t j = 0; j < d; j++) {
if (xi[j] < vmin[j]) vmin[j] = xi[j];
if (xi[j] > vmax[j]) vmax[j] = xi[j];
}
}
float *vdiff = vmax;
for (size_t j = 0; j < d; j++) {
float vexp = (vmax[j] - vmin[j]) * rs_arg;
vmin[j] -= vexp;
vmax[j] += vexp;
vdiff [j] = vmax[j] - vmin[j];
}
} else {
// transpose
std::vector<float> xt(n * d);
for (size_t i = 1; i < n; i++) {
const float *xi = x + i * d;
for (size_t j = 0; j < d; j++) {
xt[j * n + i] = xi[j];
}
}
std::vector<float> trained_d(2);
#pragma omp parallel for
for (size_t j = 0; j < d; j++) {
train_Uniform(rs, rs_arg,
n, k, xt.data() + j * n,
trained_d);
vmin[j] = trained_d[0];
vmax[j] = trained_d[1];
}
}
}
} // namespace faiss
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#pragma once
#include <cstdio>
#include <algorithm>
#include <omp.h>
#ifdef __SSE__
#include <immintrin.h>
#endif
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
namespace faiss {
typedef Index::idx_t idx_t;
enum class QuantizerType {
QT_8bit = 0, ///< 8 bits per component
QT_4bit, ///< 4 bits per component
QT_8bit_uniform, ///< same, shared range for all dimensions
QT_4bit_uniform,
QT_fp16,
QT_8bit_direct, /// fast indexing of uint8s
QT_6bit, ///< 6 bits per component
};
// rangestat_arg.
enum class RangeStat {
RS_minmax = 0, ///< [min - rs*(max-min), max + rs*(max-min)]
RS_meanstd, ///< [mean - std * rs, mean + std * rs]
RS_quantiles, ///< [Q(rs), Q(1-rs)]
RS_optim, ///< alternate optimization of reconstruction error
};
struct Quantizer {
// encodes one vector. Assumes code is filled with 0s on input!
virtual void encode_vector(const float *x, uint8_t *code) const = 0;
virtual void decode_vector(const uint8_t *code, float *x) const = 0;
virtual ~Quantizer() {}
};
struct SQDistanceComputer: DistanceComputer {
const float *q;
const uint8_t *codes;
size_t code_size;
SQDistanceComputer (): q(nullptr), codes (nullptr), code_size (0)
{}
};
extern uint16_t encode_fp16 (float x);
extern float decode_fp16 (uint16_t x);
extern void train_Uniform(RangeStat rs, float rs_arg,
idx_t n, int k, const float *x,
std::vector<float> & trained);
extern void train_NonUniform(RangeStat rs, float rs_arg,
idx_t n, int d, int k, const float *x,
std::vector<float> & trained);
} // namespace faiss
......@@ -20,6 +20,7 @@
#include <algorithm>
#include <faiss/utils/distances.h>
#include <faiss/FaissHook.h>
namespace faiss {
......
......@@ -195,12 +195,12 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
}
} else if (!index && (stok == "SQ8" || stok == "SQ4" || stok == "SQ6" ||
stok == "SQfp16")) {
ScalarQuantizer::QuantizerType qt =
stok == "SQ8" ? ScalarQuantizer::QT_8bit :
stok == "SQ6" ? ScalarQuantizer::QT_6bit :
stok == "SQ4" ? ScalarQuantizer::QT_4bit :
stok == "SQfp16" ? ScalarQuantizer::QT_fp16 :
ScalarQuantizer::QT_4bit;
QuantizerType qt =
stok == "SQ8" ? QuantizerType::QT_8bit :
stok == "SQ6" ? QuantizerType::QT_6bit :
stok == "SQ4" ? QuantizerType::QT_4bit :
stok == "SQfp16" ? QuantizerType::QT_fp16 :
QuantizerType::QT_4bit;
if (coarse_quantizer) {
FAISS_THROW_IF_NOT (!use_2layer);
IndexIVFScalarQuantizer *index_ivf =
......@@ -216,12 +216,12 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
}
} else if (!index && (stok == "SQ8Hybrid" || stok == "SQ4Hybrid" || stok == "SQ6Hybrid" ||
stok == "SQfp16Hybrid")) {
ScalarQuantizer::QuantizerType qt =
stok == "SQ8Hybrid" ? ScalarQuantizer::QT_8bit :
stok == "SQ6Hybrid" ? ScalarQuantizer::QT_6bit :
stok == "SQ4Hybrid" ? ScalarQuantizer::QT_4bit :
stok == "SQfp16Hybrid" ? ScalarQuantizer::QT_fp16 :
ScalarQuantizer::QT_4bit;
QuantizerType qt =
stok == "SQ8Hybrid" ? QuantizerType::QT_8bit :
stok == "SQ6Hybrid" ? QuantizerType::QT_6bit :
stok == "SQ4Hybrid" ? QuantizerType::QT_4bit :
stok == "SQfp16Hybrid" ? QuantizerType::QT_fp16 :
QuantizerType::QT_4bit;
FAISS_THROW_IF_NOT_MSG(coarse_quantizer,
"SQ Hybrid only with an IVF");
FAISS_THROW_IF_NOT (!use_2layer);
......@@ -299,7 +299,7 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
} else if (!index &&
sscanf (tok, "HNSW%d_SQ%d", &M, &pq_m) == 2 &&
pq_m == 8) {
index_1 = new IndexHNSWSQ (d, ScalarQuantizer::QT_8bit, M);
index_1 = new IndexHNSWSQ (d, QuantizerType::QT_8bit, M);
} else if (!index && (stok == "LSH" || stok == "LSHr" ||
stok == "LSHrt" || stok == "LSHt")) {
bool rotate_data = strstr(tok, "r") != nullptr;
......
......@@ -16,6 +16,7 @@
#include <omp.h>
#include <faiss/FaissHook.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/utils/ConcurrentBitset.h>
......
......@@ -24,29 +24,52 @@ namespace faiss {
* Optimized distance/norm/inner prod computations
*********************************************************/
#ifdef __AVX__
/// Squared L2 distance between two vectors
float fvec_L2sqr (
float fvec_L2sqr_avx (
const float * x,
const float * y,
size_t d);
/// inner product
float fvec_inner_product (
float fvec_inner_product_avx (
const float * x,
const float * y,
size_t d);
/// L1 distance
float fvec_L1 (
float fvec_L1_avx (
const float * x,
const float * y,
size_t d);
float fvec_Linf_avx (
const float * x,
const float * y,
size_t d);
#endif
#ifdef __SSE__
float fvec_L2sqr_sse (
const float * x,
const float * y,
size_t d);
float fvec_inner_product_sse (
const float * x,
const float * y,
size_t d);
float fvec_L1_sse (
const float * x,
const float * y,
size_t d);
float fvec_Linf (
float fvec_Linf_sse (
const float * x,
const float * y,
size_t d);
#endif
float fvec_jaccard (
const float * x,
......
......@@ -12,7 +12,7 @@
#include <cmath>
#include <omp.h>
#include <faiss/FaissHook.h>
#include <faiss/utils/utils.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/AuxIndexStructures.h>
......
此差异已折叠。
......@@ -117,6 +117,12 @@ if (KNOWHERE_GPU_VERSION)
target_link_libraries(test_customized_index ${depend_libs} ${unittest_libs} ${basic_libs})
endif ()
#<INSTRUCTIONSET-TEST>
if (NOT TARGET test_instructionset)
add_executable(test_instructionset test_instructionset.cpp)
endif ()
target_link_libraries(test_instructionset ${depend_libs} ${unittest_libs})
if (NOT TARGET test_knowhere_common)
add_executable(test_knowhere_common test_common.cpp ${util_srcs})
endif ()
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册