faiss_benchmark_test.cpp 20.5 KB
Newer Older
Y
yudong.cai 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

Y
yudong.cai 已提交
20
#include <cassert>
Y
yudong.cai 已提交
21 22 23 24 25 26 27 28 29 30
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include <faiss/AutoTune.h>
#include <faiss/Index.h>
#include <faiss/IndexIVF.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexFlat.h>
Y
yudong.cai 已提交
31
#ifdef CUSTOMIZATION
32
#include <faiss/gpu/GpuIndexIVFSQHybrid.h>
Y
yudong.cai 已提交
33
#endif
Y
yudong.cai 已提交
34
#include <faiss/gpu/StandardGpuResources.h>
Y
yudong.cai 已提交
35 36 37 38 39 40 41 42 43
#include <faiss/index_io.h>
#include <faiss/utils.h>

#include <hdf5.h>

#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
Y
yudong.cai 已提交
44
#include <vector>
Y
yudong.cai 已提交
45 46 47 48 49 50

/*****************************************************
 * To run this test, please download the HDF5 from
 *  https://support.hdfgroup.org/ftp/HDF5/releases/
 * and install it to /usr/local/hdf5 .
 *****************************************************/
Y
yudong.cai 已提交
51 52
#define DEBUG_VERBOSE 0

Y
yudong.cai 已提交
53 54 55 56 57 58 59
const char HDF5_POSTFIX[] = ".hdf5";
const char HDF5_DATASET_TRAIN[] = "train";
const char HDF5_DATASET_TEST[] = "test";
const char HDF5_DATASET_NEIGHBORS[] = "neighbors";
const char HDF5_DATASET_DISTANCES[] = "distances";

enum QueryMode { MODE_CPU = 0, MODE_MIX, MODE_GPU };
Y
yudong.cai 已提交
60

Y
yudong.cai 已提交
61 62
double
elapsed() {
Y
yudong.cai 已提交
63 64 65 66 67
    struct timeval tv;
    gettimeofday(&tv, nullptr);
    return tv.tv_sec + tv.tv_usec * 1e-6;
}

Y
yudong.cai 已提交
68 69
void
normalize(float* arr, size_t nq, size_t dim) {
Y
yudong.cai 已提交
70
    for (size_t i = 0; i < nq; i++) {
Y
yudong.cai 已提交
71
        double vecLen = 0.0, inv_vecLen = 0.0;
Y
yudong.cai 已提交
72 73 74 75
        for (size_t j = 0; j < dim; j++) {
            double val = arr[i * dim + j];
            vecLen += val * val;
        }
Y
yudong.cai 已提交
76
        inv_vecLen = 1.0 / std::sqrt(vecLen);
Y
yudong.cai 已提交
77
        for (size_t j = 0; j < dim; j++) {
Y
yudong.cai 已提交
78
            arr[i * dim + j] = (float)(arr[i * dim + j] * inv_vecLen);
Y
yudong.cai 已提交
79 80 81 82
        }
    }
}

Y
yudong.cai 已提交
83
void*
Y
yudong.cai 已提交
84 85
hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_class_t dataset_class, size_t& d_out,
          size_t& n_out) {
Y
yudong.cai 已提交
86 87 88 89 90 91 92 93
    hid_t file, dataset, datatype, dataspace, memspace;
    H5T_class_t t_class;   /* data type class */
    hsize_t dimsm[3];      /* memory space dimensions */
    hsize_t dims_out[2];   /* dataset dimensions */
    hsize_t count[2];      /* size of the hyperslab in the file */
    hsize_t offset[2];     /* hyperslab offset in the file */
    hsize_t count_out[3];  /* size of the hyperslab in memory */
    hsize_t offset_out[3]; /* hyperslab offset in memory */
Y
yudong.cai 已提交
94
    void* data_out;        /* output buffer */
Y
yudong.cai 已提交
95 96

    /* Open the file and the dataset. */
Y
yudong.cai 已提交
97 98
    file = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
    dataset = H5Dopen2(file, dataset_name.c_str(), H5P_DEFAULT);
Y
yudong.cai 已提交
99

Y
yudong.cai 已提交
100 101
    /* Get datatype and dataspace handles and then query
     * dataset class, order, size, rank and dimensions. */
Y
yudong.cai 已提交
102
    datatype = H5Dget_type(dataset); /* datatype handle */
Y
yudong.cai 已提交
103 104 105
    t_class = H5Tget_class(datatype);
    assert(t_class == dataset_class || !"Illegal dataset class type");

Y
yudong.cai 已提交
106
    dataspace = H5Dget_space(dataset); /* dataspace handle */
Y
yudong.cai 已提交
107 108 109 110 111 112
    H5Sget_simple_extent_dims(dataspace, dims_out, NULL);
    n_out = dims_out[0];
    d_out = dims_out[1];

    /* Define hyperslab in the dataset. */
    offset[0] = offset[1] = 0;
Y
yudong.cai 已提交
113 114
    count[0] = dims_out[0];
    count[1] = dims_out[1];
Y
yudong.cai 已提交
115 116 117 118 119 120 121 122 123 124
    H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);

    /* Define the memory dataspace. */
    dimsm[0] = dims_out[0];
    dimsm[1] = dims_out[1];
    dimsm[2] = 1;
    memspace = H5Screate_simple(3, dimsm, NULL);

    /* Define memory hyperslab. */
    offset_out[0] = offset_out[1] = offset_out[2] = 0;
Y
yudong.cai 已提交
125 126 127
    count_out[0] = dims_out[0];
    count_out[1] = dims_out[1];
    count_out[2] = 1;
Y
yudong.cai 已提交
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
    H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL);

    /* Read data from hyperslab in the file into the hyperslab in memory and display. */
    switch (t_class) {
        case H5T_INTEGER:
            data_out = new int[dims_out[0] * dims_out[1]];
            H5Dread(dataset, H5T_NATIVE_INT, memspace, dataspace, H5P_DEFAULT, data_out);
            break;
        case H5T_FLOAT:
            data_out = new float[dims_out[0] * dims_out[1]];
            H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace, H5P_DEFAULT, data_out);
            break;
        default:
            printf("Illegal dataset class type\n");
            break;
    }

    /* Close/release resources. */
    H5Tclose(datatype);
    H5Dclose(dataset);
    H5Sclose(dataspace);
    H5Sclose(memspace);
    H5Fclose(file);

    return data_out;
}

Y
yudong.cai 已提交
155 156
std::string
get_index_file_name(const std::string& ann_test_name, const std::string& index_key, int32_t data_loops) {
Y
yudong.cai 已提交
157 158
    size_t pos = index_key.find_first_of(',', 0);
    std::string file_name = ann_test_name;
Y
yudong.cai 已提交
159
    file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos + 1);
Y
yudong.cai 已提交
160 161 162 163
    file_name = file_name + "_" + std::to_string(data_loops) + ".index";
    return file_name;
}

Y
yudong.cai 已提交
164 165
bool
parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::MetricType& metric_type) {
Y
yudong.cai 已提交
166 167
    size_t pos1, pos2;

Y
yudong.cai 已提交
168 169
    if (ann_test_name.empty())
        return false;
Y
yudong.cai 已提交
170 171

    pos1 = ann_test_name.find_first_of('-', 0);
Y
yudong.cai 已提交
172 173
    if (pos1 == std::string::npos)
        return false;
Y
yudong.cai 已提交
174
    pos2 = ann_test_name.find_first_of('-', pos1 + 1);
Y
yudong.cai 已提交
175 176
    if (pos2 == std::string::npos)
        return false;
Y
yudong.cai 已提交
177

Y
yudong.cai 已提交
178 179
    dim = std::stoi(ann_test_name.substr(pos1 + 1, pos2 - pos1 - 1));
    std::string metric_str = ann_test_name.substr(pos2 + 1);
Y
yudong.cai 已提交
180 181 182 183 184 185 186 187 188 189 190
    if (metric_str == "angular") {
        metric_type = faiss::METRIC_INNER_PRODUCT;
    } else if (metric_str == "euclidean") {
        metric_type = faiss::METRIC_L2;
    } else {
        return false;
    }

    return true;
}

191 192 193 194 195 196 197 198 199 200
int32_t
GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::idx_t* index, size_t ground_k, size_t k,
                  size_t nq, int32_t index_add_loops) {
    assert(ground_k <= k);
    int hit = 0;
    for (int i = 0; i < nq; i++) {
        // count the num of results exist in ground truth result set
        // each result replicates INDEX_ADD_LOOPS times
        for (int j_c = 0; j_c < ground_k; j_c++) {
            int r_c = index[i * k + j_c];
Y
yudong.cai 已提交
201
            for (int j_g = 0; j_g < ground_k / index_add_loops; j_g++) {
202 203 204 205 206 207 208 209 210 211
                if (ground_index[i * ground_k + j_g] == r_c) {
                    hit++;
                    continue;
                }
            }
        }
    }
    return hit;
}

Y
yudong.cai 已提交
212
#if DEBUG_VERBOSE
Y
yudong.cai 已提交
213
void
Y
yudong.cai 已提交
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
print_array(const char* header, bool is_integer, const void* arr, size_t nq, size_t k) {
    const int ROW = 10;
    const int COL = 10;
    assert(ROW <= nq);
    assert(COL <= k);
    printf("%s\n", header);
    printf("==============================================\n");
    for (int i = 0; i < 10; i++) {
        for (int j = 0; j < 10; j++) {
            if (is_integer) {
                printf("%7ld ", ((int64_t*)arr)[i * k + j]);
            } else {
                printf("%.6f ", ((float*)arr)[i * k + j]);
            }
        }
        printf("\n");
    }
    printf("\n");
}
#endif
Y
yudong.cai 已提交
234

Y
yudong.cai 已提交
235
void
Y
yudong.cai 已提交
236
load_base_data(faiss::Index*& index, const std::string& ann_test_name, const std::string& index_key,
Y
yudong.cai 已提交
237 238 239
               faiss::gpu::StandardGpuResources& res, const faiss::MetricType metric_type, const size_t dim,
               int32_t index_add_loops, QueryMode mode = MODE_CPU) {
    double t0 = elapsed();
Y
yudong.cai 已提交
240

Y
yudong.cai 已提交
241 242
    const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;
    const int GPU_DEVICE_IDX = 0;
Y
yudong.cai 已提交
243

Y
yudong.cai 已提交
244 245
    faiss::Index *cpu_index = nullptr, *gpu_index = nullptr;
    faiss::distance_compute_blas_threshold = 800;
Y
yudong.cai 已提交
246 247

    std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops);
Y
yudong.cai 已提交
248

Y
yudong.cai 已提交
249
    try {
Y
yudong.cai 已提交
250 251
        printf("[%.3f s] Reading index file: %s\n", elapsed() - t0, index_file_name.c_str());
        cpu_index = faiss::read_index(index_file_name.c_str());
Y
yudong.cai 已提交
252

Y
yudong.cai 已提交
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
        if (mode != MODE_CPU) {
            faiss::gpu::GpuClonerOptions option;
            option.allInGpu = true;

            faiss::IndexComposition index_composition;
            index_composition.index = cpu_index;
            index_composition.quantizer = nullptr;

            switch (mode) {
                case MODE_CPU:
                    assert(false);
                    break;
                case MODE_MIX:
                    index_composition.mode = 1;  // 0: all data, 1: copy quantizer, 2: copy data
                    break;
                case MODE_GPU:
                    index_composition.mode = 0;  // 0: all data, 1: copy quantizer, 2: copy data
                    break;
            }
Y
yudong.cai 已提交
272

Y
yudong.cai 已提交
273 274 275 276 277 278 279
            printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0);
            gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option);
        }
    } catch (...) {
        size_t nb, d;
        printf("[%.3f s] Loading HDF5 file: %s\n", elapsed() - t0, ann_file_name.c_str());
        float* xb = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TRAIN, H5T_FLOAT, d, nb);
Y
yudong.cai 已提交
280 281
        assert(d == dim || !"dataset does not have correct dimension");

Y
yudong.cai 已提交
282
        if (metric_type == faiss::METRIC_INNER_PRODUCT) {
Y
yudong.cai 已提交
283
            printf("[%.3f s] Normalizing base data set \n", elapsed() - t0);
Y
yudong.cai 已提交
284 285 286
            normalize(xb, nb, d);
        }

Y
yudong.cai 已提交
287 288
        printf("[%.3f s] Creating CPU index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d);
        cpu_index = faiss::index_factory(d, index_key.c_str(), metric_type);
Y
yudong.cai 已提交
289

Y
yudong.cai 已提交
290 291
        printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0);
        gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, cpu_index);
Y
yudong.cai 已提交
292

Y
yudong.cai 已提交
293
        printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb);
Y
yudong.cai 已提交
294
        gpu_index->train(nb, xb);
Y
yudong.cai 已提交
295 296 297

        // add index multiple times to get ~1G data set
        for (int i = 0; i < index_add_loops; i++) {
Y
yudong.cai 已提交
298 299
            printf("[%.3f s] No.%d Indexing database, size %ld*%ld\n", elapsed() - t0, i, nb, d);
            gpu_index->add(nb, xb);
Y
yudong.cai 已提交
300 301
        }

Y
yudong.cai 已提交
302 303 304 305
        printf("[%.3f s] Coping GPU index to CPU\n", elapsed() - t0);
        delete cpu_index;
        cpu_index = faiss::gpu::index_gpu_to_cpu(gpu_index);

Y
yudong.cai 已提交
306
        faiss::IndexIVF* cpu_ivf_index = dynamic_cast<faiss::IndexIVF*>(cpu_index);
Y
yudong.cai 已提交
307 308 309 310 311 312
        if (cpu_ivf_index != nullptr) {
            cpu_ivf_index->to_readonly();
        }

        printf("[%.3f s] Writing index file: %s\n", elapsed() - t0, index_file_name.c_str());
        faiss::write_index(cpu_index, index_file_name.c_str());
Y
yudong.cai 已提交
313

Y
yudong.cai 已提交
314
        delete[] xb;
Y
yudong.cai 已提交
315 316
    }

Y
yudong.cai 已提交
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
    switch (mode) {
        case MODE_CPU:
        case MODE_MIX:
            index = cpu_index;
            if (gpu_index) {
                delete gpu_index;
            }
            break;
        case MODE_GPU:
            index = gpu_index;
            if (cpu_index) {
                delete cpu_index;
            }
            break;
    }
}
Y
yudong.cai 已提交
333

Y
yudong.cai 已提交
334
void
Y
yudong.cai 已提交
335
load_query_data(faiss::Index::distance_t*& xq, size_t& nq, const std::string& ann_test_name,
Y
yudong.cai 已提交
336 337 338 339 340 341
                const faiss::MetricType metric_type, const size_t dim) {
    double t0 = elapsed();
    size_t d;

    const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;

Y
yudong.cai 已提交
342
    xq = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, d, nq);
Y
yudong.cai 已提交
343 344 345 346 347
    assert(d == dim || !"query does not have same dimension as train set");

    if (metric_type == faiss::METRIC_INNER_PRODUCT) {
        printf("[%.3f s] Normalizing query data \n", elapsed() - t0);
        normalize(xq, nq, d);
Y
yudong.cai 已提交
348
    }
Y
yudong.cai 已提交
349
}
Y
yudong.cai 已提交
350

Y
yudong.cai 已提交
351
void
Y
yudong.cai 已提交
352
load_ground_truth(faiss::Index::idx_t*& gt, size_t& k, const std::string& ann_test_name, const size_t nq) {
Y
yudong.cai 已提交
353
    const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;
Y
yudong.cai 已提交
354

Y
yudong.cai 已提交
355 356
    // load ground-truth and convert int to long
    size_t nq2;
Y
yudong.cai 已提交
357
    int* gt_int = (int*)hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, k, nq2);
Y
yudong.cai 已提交
358
    assert(nq2 == nq || !"incorrect nb of ground truth index");
Y
yudong.cai 已提交
359

Y
yudong.cai 已提交
360 361 362
    gt = new faiss::Index::idx_t[k * nq];
    for (int i = 0; i < k * nq; i++) {
        gt[i] = gt_int[i];
Y
yudong.cai 已提交
363
    }
Y
yudong.cai 已提交
364
    delete[] gt_int;
Y
yudong.cai 已提交
365

Y
yudong.cai 已提交
366 367 368 369
#if DEBUG_VERBOSE
    faiss::Index::distance_t* gt_dist;  // nq * k matrix of ground-truth nearest-neighbors distances
    gt_dist = (float*)hdf5_read(ann_file_name, HDF5_DATASET_DISTANCES, H5T_FLOAT, k, nq2);
    assert(nq2 == nq || !"incorrect nb of ground truth distance");
Y
yudong.cai 已提交
370

Y
yudong.cai 已提交
371 372 373 374 375 376 377 378 379 380 381 382
    std::string str;
    str = ann_test_name + " ground truth index";
    print_array(str.c_str(), true, gt, nq, k);
    str = ann_test_name + " ground truth distance";
    print_array(str.c_str(), false, gt_dist, nq, k);

    delete gt_dist;
#endif
}

void
test_with_nprobes(const std::string& ann_test_name, const std::string& index_key, faiss::Index* index,
Y
yudong.cai 已提交
383 384 385
                  faiss::gpu::StandardGpuResources& res, const QueryMode query_mode, const faiss::Index::distance_t* xq,
                  const faiss::Index::idx_t* gt, const std::vector<size_t> nprobes, const int32_t index_add_loops,
                  const int32_t search_loops) {
Y
yudong.cai 已提交
386 387 388 389
    const size_t NQ = 1000, NQ_START = 10, NQ_STEP = 10;
    const size_t K = 1000, K_START = 100, K_STEP = 10;
    const size_t GK = 100;  // topk of ground truth

Y
yudong.cai 已提交
390 391
    std::unordered_map<size_t, std::string> mode_str_map = {
        {MODE_CPU, "MODE_CPU"}, {MODE_MIX, "MODE_MIX"}, {MODE_GPU, "MODE_GPU"}};
Y
yudong.cai 已提交
392 393 394 395 396 397 398 399 400 401 402

    for (auto nprobe : nprobes) {
        switch (query_mode) {
            case MODE_CPU:
            case MODE_MIX: {
                faiss::ParameterSpace params;
                std::string nprobe_str = "nprobe=" + std::to_string(nprobe);
                params.set_index_parameters(index, nprobe_str.c_str());
                break;
            }
            case MODE_GPU: {
Y
yudong.cai 已提交
403
                faiss::gpu::GpuIndexIVF* gpu_index_ivf = dynamic_cast<faiss::gpu::GpuIndexIVF*>(index);
Y
yudong.cai 已提交
404 405 406
                gpu_index_ivf->setNumProbes(nprobe);
            }
        }
Y
yudong.cai 已提交
407 408

        // output buffers
Y
yudong.cai 已提交
409 410
        faiss::Index::idx_t* I = new faiss::Index::idx_t[NQ * K];
        faiss::Index::distance_t* D = new faiss::Index::distance_t[NQ * K];
Y
yudong.cai 已提交
411

Y
yudong.cai 已提交
412
        printf("\n%s | %s - %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(),
Y
yudong.cai 已提交
413
               mode_str_map[query_mode].c_str(), nprobe);
414
        printf("======================================================================================\n");
Y
yudong.cai 已提交
415 416
        for (size_t t_nq = NQ_START; t_nq <= NQ; t_nq *= NQ_STEP) {  // nq = {10, 100, 1000}
            for (size_t t_k = K_START; t_k <= K; t_k *= K_STEP) {    //  k = {100, 1000}
Y
yudong.cai 已提交
417 418 419
                faiss::indexIVF_stats.quantization_time = 0.0;
                faiss::indexIVF_stats.search_time = 0.0;

Y
yudong.cai 已提交
420
                double t_start = elapsed(), t_end;
421 422 423
                for (int i = 0; i < search_loops; i++) {
                    index->search(t_nq, xq, t_k, D, I);
                }
Y
yudong.cai 已提交
424 425
                t_end = elapsed();

Y
yudong.cai 已提交
426 427 428 429 430 431 432 433
#if DEBUG_VERBOSE
                std::string str;
                str = "I (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")";
                print_array(str.c_str(), true, I, t_nq, t_k);
                str = "D (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")";
                print_array(str.c_str(), false, D, t_nq, t_k);
#endif

Y
yudong.cai 已提交
434
                // k = 100 for ground truth
Y
yudong.cai 已提交
435
                int32_t hit = GetResultHitCount(gt, I, GK, t_k, t_nq, index_add_loops);
436 437 438 439

                printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, t_k,
                       (t_end - t_start) / search_loops, faiss::indexIVF_stats.quantization_time / 1000 / search_loops,
                       faiss::indexIVF_stats.search_time / 1000 / search_loops,
Y
yudong.cai 已提交
440
                       (hit / float(t_nq * GK / index_add_loops)));
Y
yudong.cai 已提交
441 442
            }
        }
443
        printf("======================================================================================\n");
Y
yudong.cai 已提交
444

Y
yudong.cai 已提交
445 446
        delete[] I;
        delete[] D;
Y
yudong.cai 已提交
447 448 449
    }
}

Y
yudong.cai 已提交
450
void
Y
yudong.cai 已提交
451 452
test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, const QueryMode query_mode,
              int32_t index_add_loops, const std::vector<size_t>& nprobes, int32_t search_loops) {
Y
yudong.cai 已提交
453 454
    double t0 = elapsed();

Y
yudong.cai 已提交
455
    faiss::gpu::StandardGpuResources res;
Y
yudong.cai 已提交
456 457 458 459

    faiss::MetricType metric_type;
    size_t dim;

Y
yudong.cai 已提交
460 461
    if (query_mode == MODE_MIX && index_key.find("SQ8Hybrid") == std::string::npos) {
        printf("Only SQ8Hybrid support MODE_MIX\n");
Y
yudong.cai 已提交
462 463 464
        return;
    }

Y
yudong.cai 已提交
465 466 467
    if (!parse_ann_test_name(ann_test_name, dim, metric_type)) {
        printf("Invalid ann test name: %s\n", ann_test_name.c_str());
        return;
Y
yudong.cai 已提交
468 469
    }

Y
yudong.cai 已提交
470
    size_t nq, k;
H
Heisenberg 已提交
471
    faiss::Index* index;
Y
yudong.cai 已提交
472 473
    faiss::Index::distance_t* xq;
    faiss::Index::idx_t* gt;  // ground-truth index
H
Heisenberg 已提交
474

Y
yudong.cai 已提交
475 476
    printf("[%.3f s] Loading base data\n", elapsed() - t0);
    load_base_data(index, ann_test_name, index_key, res, metric_type, dim, index_add_loops, query_mode);
H
Heisenberg 已提交
477

Y
yudong.cai 已提交
478 479
    printf("[%.3f s] Loading queries\n", elapsed() - t0);
    load_query_data(xq, nq, ann_test_name, metric_type, dim);
H
Heisenberg 已提交
480

Y
yudong.cai 已提交
481 482
    printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq);
    load_ground_truth(gt, k, ann_test_name, nq);
H
Heisenberg 已提交
483

Y
yudong.cai 已提交
484 485
    test_with_nprobes(ann_test_name, index_key, index, res, query_mode, xq, gt, nprobes, index_add_loops, search_loops);
    printf("[%.3f s] Search test done\n\n", elapsed() - t0);
Y
yudong.cai 已提交
486

Y
yudong.cai 已提交
487 488
    delete[] xq;
    delete[] gt;
Y
yudong.cai 已提交
489
    delete index;
Y
yudong.cai 已提交
490 491 492 493 494 495 496 497 498 499 500 501 502 503
}

/************************************************************************************
 * https://github.com/erikbern/ann-benchmarks
 *
 * Dataset 	Dimensions 	Train_size 	Test_size 	Neighbors 	Distance 	Download
 * Fashion-
 *  MNIST   784         60,000      10,000 	    100         Euclidean   HDF5 (217MB)
 * GIST     960         1,000,000   1,000       100         Euclidean   HDF5 (3.6GB)
 * GloVe    100         1,183,514   10,000      100         Angular     HDF5 (463MB)
 * GloVe    200         1,183,514   10,000      100         Angular     HDF5 (918MB)
 * MNIST    784         60,000 	    10,000      100         Euclidean   HDF5 (217MB)
 * NYTimes  256         290,000     10,000      100         Angular     HDF5 (301MB)
 * SIFT     128         1,000,000   10,000      100         Euclidean   HDF5 (501MB)
Y
yudong.cai 已提交
504
 *************************************************************************************/
Y
yudong.cai 已提交
505

Y
yudong.cai 已提交
506
TEST(FAISSTEST, BENCHMARK) {
507 508 509 510 511
    std::vector<size_t> param_nprobes = {8, 128};
    const int32_t SEARCH_LOOPS = 5;
    const int32_t SIFT_INSERT_LOOPS = 2;  // insert twice to get ~1G data set
    const int32_t GLOVE_INSERT_LOOPS = 1;

Y
yudong.cai 已提交
512
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Y
yudong.cai 已提交
513 514 515 516 517 518
    test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

Y
yudong.cai 已提交
519
#ifdef CUSTOMIZATION
Y
yudong.cai 已提交
520 521
    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
Y
yudong.cai 已提交
522 523
//    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes,
//    SEARCH_LOOPS);
Y
yudong.cai 已提交
524 525
#endif

Y
yudong.cai 已提交
526
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
Y
yudong.cai 已提交
527 528 529 530 531 532
    test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

Y
yudong.cai 已提交
533
#ifdef CUSTOMIZATION
Y
yudong.cai 已提交
534 535
    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
Y
yudong.cai 已提交
536 537
//    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes,
//    SEARCH_LOOPS);
Y
yudong.cai 已提交
538 539
#endif
}