faiss_benchmark_test.cpp 20.6 KB
Newer Older
Y
yudong.cai 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

Y
yudong.cai 已提交
20
#include <cassert>
Y
yudong.cai 已提交
21 22 23 24 25 26 27 28 29 30
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>

#include <faiss/AutoTune.h>
#include <faiss/Index.h>
#include <faiss/IndexIVF.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexFlat.h>
Y
yudong.cai 已提交
31
#ifdef CUSTOMIZATION
32
#include <faiss/gpu/GpuIndexIVFSQHybrid.h>
Y
yudong.cai 已提交
33
#endif
Y
yudong.cai 已提交
34
#include <faiss/gpu/StandardGpuResources.h>
Y
yudong.cai 已提交
35 36 37 38 39 40 41 42 43
#include <faiss/index_io.h>
#include <faiss/utils.h>

#include <hdf5.h>

#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
Y
yudong.cai 已提交
44
#include <vector>
Y
yudong.cai 已提交
45 46 47 48 49 50

/*****************************************************
 * To run this test, please download the HDF5 from
 *  https://support.hdfgroup.org/ftp/HDF5/releases/
 * and install it to /usr/local/hdf5 .
 *****************************************************/
Y
yudong.cai 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63
#define DEBUG_VERBOSE 0

const std::string HDF5_POSTFIX = ".hdf5";
const std::string HDF5_DATASET_TRAIN = "train";
const std::string HDF5_DATASET_TEST = "test";
const std::string HDF5_DATASET_NEIGHBORS = "neighbors";
const std::string HDF5_DATASET_DISTANCES = "distances";

enum QueryMode {
    MODE_CPU = 0,
    MODE_MIX,
    MODE_GPU
};
Y
yudong.cai 已提交
64

Y
yudong.cai 已提交
65 66
double
elapsed() {
Y
yudong.cai 已提交
67 68 69 70 71
    struct timeval tv;
    gettimeofday(&tv, nullptr);
    return tv.tv_sec + tv.tv_usec * 1e-6;
}

Y
yudong.cai 已提交
72 73
void
normalize(float* arr, size_t nq, size_t dim) {
Y
yudong.cai 已提交
74
    for (size_t i = 0; i < nq; i++) {
Y
yudong.cai 已提交
75
        double vecLen = 0.0, inv_vecLen = 0.0;
Y
yudong.cai 已提交
76 77 78 79
        for (size_t j = 0; j < dim; j++) {
            double val = arr[i * dim + j];
            vecLen += val * val;
        }
Y
yudong.cai 已提交
80
        inv_vecLen = 1.0 / std::sqrt(vecLen);
Y
yudong.cai 已提交
81
        for (size_t j = 0; j < dim; j++) {
Y
yudong.cai 已提交
82
            arr[i * dim + j] = (float)(arr[i * dim + j] * inv_vecLen);
Y
yudong.cai 已提交
83 84 85 86
        }
    }
}

Y
yudong.cai 已提交
87
void*
Y
yudong.cai 已提交
88 89
hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_class_t dataset_class,
          size_t& d_out, size_t& n_out) {
Y
yudong.cai 已提交
90 91 92 93 94 95 96 97 98
    hid_t file, dataset, datatype, dataspace, memspace;
    H5T_class_t t_class;   /* data type class */
    hsize_t dimsm[3];      /* memory space dimensions */
    hsize_t dims_out[2];   /* dataset dimensions */
    hsize_t count[2];      /* size of the hyperslab in the file */
    hsize_t offset[2];     /* hyperslab offset in the file */
    hsize_t count_out[3];  /* size of the hyperslab in memory */
    hsize_t offset_out[3]; /* hyperslab offset in memory */
    void* data_out; /* output buffer */
Y
yudong.cai 已提交
99 100

    /* Open the file and the dataset. */
Y
yudong.cai 已提交
101 102
    file = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
    dataset = H5Dopen2(file, dataset_name.c_str(), H5P_DEFAULT);
Y
yudong.cai 已提交
103

Y
yudong.cai 已提交
104 105
    /* Get datatype and dataspace handles and then query
     * dataset class, order, size, rank and dimensions. */
Y
yudong.cai 已提交
106
    datatype = H5Dget_type(dataset); /* datatype handle */
Y
yudong.cai 已提交
107 108 109
    t_class = H5Tget_class(datatype);
    assert(t_class == dataset_class || !"Illegal dataset class type");

Y
yudong.cai 已提交
110
    dataspace = H5Dget_space(dataset); /* dataspace handle */
Y
yudong.cai 已提交
111 112 113 114 115 116
    H5Sget_simple_extent_dims(dataspace, dims_out, NULL);
    n_out = dims_out[0];
    d_out = dims_out[1];

    /* Define hyperslab in the dataset. */
    offset[0] = offset[1] = 0;
Y
yudong.cai 已提交
117 118
    count[0] = dims_out[0];
    count[1] = dims_out[1];
Y
yudong.cai 已提交
119 120 121 122 123 124 125 126 127 128
    H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);

    /* Define the memory dataspace. */
    dimsm[0] = dims_out[0];
    dimsm[1] = dims_out[1];
    dimsm[2] = 1;
    memspace = H5Screate_simple(3, dimsm, NULL);

    /* Define memory hyperslab. */
    offset_out[0] = offset_out[1] = offset_out[2] = 0;
Y
yudong.cai 已提交
129 130 131
    count_out[0] = dims_out[0];
    count_out[1] = dims_out[1];
    count_out[2] = 1;
Y
yudong.cai 已提交
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
    H5Sselect_hyperslab(memspace, H5S_SELECT_SET, offset_out, NULL, count_out, NULL);

    /* Read data from hyperslab in the file into the hyperslab in memory and display. */
    switch (t_class) {
        case H5T_INTEGER:
            data_out = new int[dims_out[0] * dims_out[1]];
            H5Dread(dataset, H5T_NATIVE_INT, memspace, dataspace, H5P_DEFAULT, data_out);
            break;
        case H5T_FLOAT:
            data_out = new float[dims_out[0] * dims_out[1]];
            H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace, H5P_DEFAULT, data_out);
            break;
        default:
            printf("Illegal dataset class type\n");
            break;
    }

    /* Close/release resources. */
    H5Tclose(datatype);
    H5Dclose(dataset);
    H5Sclose(dataspace);
    H5Sclose(memspace);
    H5Fclose(file);

    return data_out;
}

Y
yudong.cai 已提交
159 160
std::string
get_index_file_name(const std::string& ann_test_name, const std::string& index_key, int32_t data_loops) {
Y
yudong.cai 已提交
161 162
    size_t pos = index_key.find_first_of(',', 0);
    std::string file_name = ann_test_name;
Y
yudong.cai 已提交
163
    file_name = file_name + "_" + index_key.substr(0, pos) + "_" + index_key.substr(pos + 1);
Y
yudong.cai 已提交
164 165 166 167
    file_name = file_name + "_" + std::to_string(data_loops) + ".index";
    return file_name;
}

Y
yudong.cai 已提交
168 169
bool
parse_ann_test_name(const std::string& ann_test_name, size_t& dim, faiss::MetricType& metric_type) {
Y
yudong.cai 已提交
170 171
    size_t pos1, pos2;

Y
yudong.cai 已提交
172 173
    if (ann_test_name.empty())
        return false;
Y
yudong.cai 已提交
174 175

    pos1 = ann_test_name.find_first_of('-', 0);
Y
yudong.cai 已提交
176 177
    if (pos1 == std::string::npos)
        return false;
Y
yudong.cai 已提交
178
    pos2 = ann_test_name.find_first_of('-', pos1 + 1);
Y
yudong.cai 已提交
179 180
    if (pos2 == std::string::npos)
        return false;
Y
yudong.cai 已提交
181

Y
yudong.cai 已提交
182 183
    dim = std::stoi(ann_test_name.substr(pos1 + 1, pos2 - pos1 - 1));
    std::string metric_str = ann_test_name.substr(pos2 + 1);
Y
yudong.cai 已提交
184 185 186 187 188 189 190 191 192 193 194
    if (metric_str == "angular") {
        metric_type = faiss::METRIC_INNER_PRODUCT;
    } else if (metric_str == "euclidean") {
        metric_type = faiss::METRIC_L2;
    } else {
        return false;
    }

    return true;
}

195 196 197 198 199 200 201 202 203 204
int32_t
GetResultHitCount(const faiss::Index::idx_t* ground_index, const faiss::Index::idx_t* index, size_t ground_k, size_t k,
                  size_t nq, int32_t index_add_loops) {
    assert(ground_k <= k);
    int hit = 0;
    for (int i = 0; i < nq; i++) {
        // count the num of results exist in ground truth result set
        // each result replicates INDEX_ADD_LOOPS times
        for (int j_c = 0; j_c < ground_k; j_c++) {
            int r_c = index[i * k + j_c];
Y
yudong.cai 已提交
205
            for (int j_g = 0; j_g < ground_k / index_add_loops; j_g++) {
206 207 208 209 210 211 212 213 214 215
                if (ground_index[i * ground_k + j_g] == r_c) {
                    hit++;
                    continue;
                }
            }
        }
    }
    return hit;
}

Y
yudong.cai 已提交
216
#if DEBUG_VERBOSE
Y
yudong.cai 已提交
217
void
Y
yudong.cai 已提交
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
print_array(const char* header, bool is_integer, const void* arr, size_t nq, size_t k) {
    const int ROW = 10;
    const int COL = 10;
    assert(ROW <= nq);
    assert(COL <= k);
    printf("%s\n", header);
    printf("==============================================\n");
    for (int i = 0; i < 10; i++) {
        for (int j = 0; j < 10; j++) {
            if (is_integer) {
                printf("%7ld ", ((int64_t*)arr)[i * k + j]);
            } else {
                printf("%.6f ", ((float*)arr)[i * k + j]);
            }
        }
        printf("\n");
    }
    printf("\n");
}
#endif
Y
yudong.cai 已提交
238

Y
yudong.cai 已提交
239 240 241 242 243
void
load_base_data(faiss::Index* &index, const std::string& ann_test_name, const std::string& index_key,
               faiss::gpu::StandardGpuResources& res, const faiss::MetricType metric_type, const size_t dim,
               int32_t index_add_loops, QueryMode mode = MODE_CPU) {
    double t0 = elapsed();
Y
yudong.cai 已提交
244

Y
yudong.cai 已提交
245 246
    const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;
    const int GPU_DEVICE_IDX = 0;
Y
yudong.cai 已提交
247

Y
yudong.cai 已提交
248 249
    faiss::Index *cpu_index = nullptr, *gpu_index = nullptr;
    faiss::distance_compute_blas_threshold = 800;
Y
yudong.cai 已提交
250 251

    std::string index_file_name = get_index_file_name(ann_test_name, index_key, index_add_loops);
Y
yudong.cai 已提交
252

Y
yudong.cai 已提交
253
    try {
Y
yudong.cai 已提交
254 255
        printf("[%.3f s] Reading index file: %s\n", elapsed() - t0, index_file_name.c_str());
        cpu_index = faiss::read_index(index_file_name.c_str());
Y
yudong.cai 已提交
256

Y
yudong.cai 已提交
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
        if (mode != MODE_CPU) {
            faiss::gpu::GpuClonerOptions option;
            option.allInGpu = true;

            faiss::IndexComposition index_composition;
            index_composition.index = cpu_index;
            index_composition.quantizer = nullptr;

            switch (mode) {
                case MODE_CPU:
                    assert(false);
                    break;
                case MODE_MIX:
                    index_composition.mode = 1;  // 0: all data, 1: copy quantizer, 2: copy data
                    break;
                case MODE_GPU:
                    index_composition.mode = 0;  // 0: all data, 1: copy quantizer, 2: copy data
                    break;
            }
Y
yudong.cai 已提交
276

Y
yudong.cai 已提交
277 278 279 280 281 282 283
            printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0);
            gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, &index_composition, &option);
        }
    } catch (...) {
        size_t nb, d;
        printf("[%.3f s] Loading HDF5 file: %s\n", elapsed() - t0, ann_file_name.c_str());
        float* xb = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TRAIN, H5T_FLOAT, d, nb);
Y
yudong.cai 已提交
284 285
        assert(d == dim || !"dataset does not have correct dimension");

Y
yudong.cai 已提交
286
        if (metric_type == faiss::METRIC_INNER_PRODUCT) {
Y
yudong.cai 已提交
287
            printf("[%.3f s] Normalizing base data set \n", elapsed() - t0);
Y
yudong.cai 已提交
288 289 290
            normalize(xb, nb, d);
        }

Y
yudong.cai 已提交
291 292
        printf("[%.3f s] Creating CPU index \"%s\" d=%ld\n", elapsed() - t0, index_key.c_str(), d);
        cpu_index = faiss::index_factory(d, index_key.c_str(), metric_type);
Y
yudong.cai 已提交
293

Y
yudong.cai 已提交
294 295
        printf("[%.3f s] Cloning CPU index to GPU\n", elapsed() - t0);
        gpu_index = faiss::gpu::index_cpu_to_gpu(&res, GPU_DEVICE_IDX, cpu_index);
Y
yudong.cai 已提交
296

Y
yudong.cai 已提交
297
        printf("[%.3f s] Training on %ld vectors\n", elapsed() - t0, nb);
Y
yudong.cai 已提交
298
        gpu_index->train(nb, xb);
Y
yudong.cai 已提交
299 300 301

        // add index multiple times to get ~1G data set
        for (int i = 0; i < index_add_loops; i++) {
Y
yudong.cai 已提交
302 303
            printf("[%.3f s] No.%d Indexing database, size %ld*%ld\n", elapsed() - t0, i, nb, d);
            gpu_index->add(nb, xb);
Y
yudong.cai 已提交
304 305
        }

Y
yudong.cai 已提交
306 307 308 309 310 311 312 313 314 315 316
        printf("[%.3f s] Coping GPU index to CPU\n", elapsed() - t0);
        delete cpu_index;
        cpu_index = faiss::gpu::index_gpu_to_cpu(gpu_index);

        faiss::IndexIVF *cpu_ivf_index = dynamic_cast<faiss::IndexIVF *>(cpu_index);
        if (cpu_ivf_index != nullptr) {
            cpu_ivf_index->to_readonly();
        }

        printf("[%.3f s] Writing index file: %s\n", elapsed() - t0, index_file_name.c_str());
        faiss::write_index(cpu_index, index_file_name.c_str());
Y
yudong.cai 已提交
317

Y
yudong.cai 已提交
318
        delete[] xb;
Y
yudong.cai 已提交
319 320
    }

Y
yudong.cai 已提交
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
    switch (mode) {
        case MODE_CPU:
        case MODE_MIX:
            index = cpu_index;
            if (gpu_index) {
                delete gpu_index;
            }
            break;
        case MODE_GPU:
            index = gpu_index;
            if (cpu_index) {
                delete cpu_index;
            }
            break;
    }
}
Y
yudong.cai 已提交
337

Y
yudong.cai 已提交
338 339 340 341 342 343 344 345 346 347 348 349 350 351
void
load_query_data(faiss::Index::distance_t* &xq, size_t& nq, const std::string& ann_test_name,
                const faiss::MetricType metric_type, const size_t dim) {
    double t0 = elapsed();
    size_t d;

    const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;

    xq = (float *) hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, d, nq);
    assert(d == dim || !"query does not have same dimension as train set");

    if (metric_type == faiss::METRIC_INNER_PRODUCT) {
        printf("[%.3f s] Normalizing query data \n", elapsed() - t0);
        normalize(xq, nq, d);
Y
yudong.cai 已提交
352
    }
Y
yudong.cai 已提交
353
}
Y
yudong.cai 已提交
354

Y
yudong.cai 已提交
355 356 357
void
load_ground_truth(faiss::Index::idx_t* &gt, size_t& k, const std::string& ann_test_name, const size_t nq) {
    const std::string ann_file_name = ann_test_name + HDF5_POSTFIX;
Y
yudong.cai 已提交
358

Y
yudong.cai 已提交
359 360 361 362
    // load ground-truth and convert int to long
    size_t nq2;
    int *gt_int = (int *) hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, k, nq2);
    assert(nq2 == nq || !"incorrect nb of ground truth index");
Y
yudong.cai 已提交
363

Y
yudong.cai 已提交
364 365 366
    gt = new faiss::Index::idx_t[k * nq];
    for (int i = 0; i < k * nq; i++) {
        gt[i] = gt_int[i];
Y
yudong.cai 已提交
367
    }
Y
yudong.cai 已提交
368
    delete[] gt_int;
Y
yudong.cai 已提交
369

Y
yudong.cai 已提交
370 371 372 373
#if DEBUG_VERBOSE
    faiss::Index::distance_t* gt_dist;  // nq * k matrix of ground-truth nearest-neighbors distances
    gt_dist = (float*)hdf5_read(ann_file_name, HDF5_DATASET_DISTANCES, H5T_FLOAT, k, nq2);
    assert(nq2 == nq || !"incorrect nb of ground truth distance");
Y
yudong.cai 已提交
374

Y
yudong.cai 已提交
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
    std::string str;
    str = ann_test_name + " ground truth index";
    print_array(str.c_str(), true, gt, nq, k);
    str = ann_test_name + " ground truth distance";
    print_array(str.c_str(), false, gt_dist, nq, k);

    delete gt_dist;
#endif
}

void
test_with_nprobes(const std::string& ann_test_name, const std::string& index_key, faiss::Index* index,
                  faiss::gpu::StandardGpuResources& res, const QueryMode query_mode,
                  const faiss::Index::distance_t *xq, const faiss::Index::idx_t *gt, const std::vector<size_t> nprobes,
                  const int32_t index_add_loops, const int32_t search_loops) {
    const size_t NQ = 1000, NQ_START = 10, NQ_STEP = 10;
    const size_t K = 1000, K_START = 100, K_STEP = 10;
    const size_t GK = 100;  // topk of ground truth

    std::unordered_map<size_t, std::string> mode_str_map =
            {{MODE_CPU, "MODE_CPU"}, {MODE_MIX, "MODE_MIX"}, {MODE_GPU, "MODE_GPU"}};

    for (auto nprobe : nprobes) {
        switch (query_mode) {
            case MODE_CPU:
            case MODE_MIX: {
                faiss::ParameterSpace params;
                std::string nprobe_str = "nprobe=" + std::to_string(nprobe);
                params.set_index_parameters(index, nprobe_str.c_str());
                break;
            }
            case MODE_GPU: {
                faiss::gpu::GpuIndexIVF *gpu_index_ivf = dynamic_cast<faiss::gpu::GpuIndexIVF*>(index);
                gpu_index_ivf->setNumProbes(nprobe);
            }
        }
Y
yudong.cai 已提交
411 412

        // output buffers
Y
yudong.cai 已提交
413 414
        faiss::Index::idx_t *I = new faiss::Index::idx_t[NQ * K];
        faiss::Index::distance_t *D = new faiss::Index::distance_t[NQ * K];
Y
yudong.cai 已提交
415

Y
yudong.cai 已提交
416 417
        printf("\n%s | %s - %s | nprobe=%lu\n", ann_test_name.c_str(), index_key.c_str(),
                mode_str_map[query_mode].c_str(), nprobe);
418
        printf("======================================================================================\n");
Y
yudong.cai 已提交
419 420
        for (size_t t_nq = NQ_START; t_nq <= NQ; t_nq *= NQ_STEP) {   // nq = {10, 100, 1000}
            for (size_t t_k = K_START; t_k <= K; t_k *= K_STEP) {  //  k = {100, 1000}
Y
yudong.cai 已提交
421 422 423
                faiss::indexIVF_stats.quantization_time = 0.0;
                faiss::indexIVF_stats.search_time = 0.0;

Y
yudong.cai 已提交
424
                double t_start = elapsed(), t_end;
425 426 427
                for (int i = 0; i < search_loops; i++) {
                    index->search(t_nq, xq, t_k, D, I);
                }
Y
yudong.cai 已提交
428 429
                t_end = elapsed();

Y
yudong.cai 已提交
430 431 432 433 434 435 436 437
#if DEBUG_VERBOSE
                std::string str;
                str = "I (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")";
                print_array(str.c_str(), true, I, t_nq, t_k);
                str = "D (" + index_key + ", nq=" + std::to_string(t_nq) + ", k=" + std::to_string(t_k) + ")";
                print_array(str.c_str(), false, D, t_nq, t_k);
#endif

Y
yudong.cai 已提交
438
                // k = 100 for ground truth
Y
yudong.cai 已提交
439
                int32_t hit = GetResultHitCount(gt, I, GK, t_k, t_nq, index_add_loops);
440 441 442 443

                printf("nq = %4ld, k = %4ld, elapse = %.4fs (quant = %.4fs, search = %.4fs), R@ = %.4f\n", t_nq, t_k,
                       (t_end - t_start) / search_loops, faiss::indexIVF_stats.quantization_time / 1000 / search_loops,
                       faiss::indexIVF_stats.search_time / 1000 / search_loops,
Y
yudong.cai 已提交
444
                       (hit / float(t_nq * GK / index_add_loops)));
Y
yudong.cai 已提交
445 446
            }
        }
447
        printf("======================================================================================\n");
Y
yudong.cai 已提交
448

Y
yudong.cai 已提交
449 450
        delete[] I;
        delete[] D;
Y
yudong.cai 已提交
451 452 453
    }
}

Y
yudong.cai 已提交
454
void
Y
yudong.cai 已提交
455 456
test_ann_hdf5(const std::string& ann_test_name, const std::string& index_key, const QueryMode query_mode,
              int32_t index_add_loops, const std::vector<size_t>& nprobes, int32_t search_loops) {
Y
yudong.cai 已提交
457 458
    double t0 = elapsed();

Y
yudong.cai 已提交
459
    faiss::gpu::StandardGpuResources res;
Y
yudong.cai 已提交
460 461 462 463

    faiss::MetricType metric_type;
    size_t dim;

Y
yudong.cai 已提交
464 465
    if (query_mode == MODE_MIX && index_key.find("SQ8Hybrid") == std::string::npos) {
        printf("Only SQ8Hybrid support MODE_MIX\n");
Y
yudong.cai 已提交
466 467 468
        return;
    }

Y
yudong.cai 已提交
469 470 471
    if (!parse_ann_test_name(ann_test_name, dim, metric_type)) {
        printf("Invalid ann test name: %s\n", ann_test_name.c_str());
        return;
Y
yudong.cai 已提交
472 473
    }

Y
yudong.cai 已提交
474
    size_t nq, k;
H
Heisenberg 已提交
475
    faiss::Index* index;
Y
yudong.cai 已提交
476 477
    faiss::Index::distance_t* xq;
    faiss::Index::idx_t* gt;  // ground-truth index
H
Heisenberg 已提交
478

Y
yudong.cai 已提交
479 480
    printf("[%.3f s] Loading base data\n", elapsed() - t0);
    load_base_data(index, ann_test_name, index_key, res, metric_type, dim, index_add_loops, query_mode);
H
Heisenberg 已提交
481

Y
yudong.cai 已提交
482 483
    printf("[%.3f s] Loading queries\n", elapsed() - t0);
    load_query_data(xq, nq, ann_test_name, metric_type, dim);
H
Heisenberg 已提交
484

Y
yudong.cai 已提交
485 486
    printf("[%.3f s] Loading ground truth for %ld queries\n", elapsed() - t0, nq);
    load_ground_truth(gt, k, ann_test_name, nq);
H
Heisenberg 已提交
487

Y
yudong.cai 已提交
488 489
    test_with_nprobes(ann_test_name, index_key, index, res, query_mode, xq, gt, nprobes, index_add_loops, search_loops);
    printf("[%.3f s] Search test done\n\n", elapsed() - t0);
Y
yudong.cai 已提交
490

Y
yudong.cai 已提交
491 492
    delete[] xq;
    delete[] gt;
Y
yudong.cai 已提交
493
    delete index;
Y
yudong.cai 已提交
494 495 496 497 498 499 500 501 502 503 504 505 506 507
}

/************************************************************************************
 * https://github.com/erikbern/ann-benchmarks
 *
 * Dataset 	Dimensions 	Train_size 	Test_size 	Neighbors 	Distance 	Download
 * Fashion-
 *  MNIST   784         60,000      10,000 	    100         Euclidean   HDF5 (217MB)
 * GIST     960         1,000,000   1,000       100         Euclidean   HDF5 (3.6GB)
 * GloVe    100         1,183,514   10,000      100         Angular     HDF5 (463MB)
 * GloVe    200         1,183,514   10,000      100         Angular     HDF5 (918MB)
 * MNIST    784         60,000 	    10,000      100         Euclidean   HDF5 (217MB)
 * NYTimes  256         290,000     10,000      100         Angular     HDF5 (301MB)
 * SIFT     128         1,000,000   10,000      100         Euclidean   HDF5 (501MB)
Y
yudong.cai 已提交
508
 *************************************************************************************/
Y
yudong.cai 已提交
509

Y
yudong.cai 已提交
510
TEST(FAISSTEST, BENCHMARK) {
511 512 513 514 515
    std::vector<size_t> param_nprobes = {8, 128};
    const int32_t SEARCH_LOOPS = 5;
    const int32_t SIFT_INSERT_LOOPS = 2;  // insert twice to get ~1G data set
    const int32_t GLOVE_INSERT_LOOPS = 1;

Y
yudong.cai 已提交
516 517 518 519 520 521 522
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("sift-128-euclidean", "IVF16384,Flat", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

Y
yudong.cai 已提交
523
#ifdef CUSTOMIZATION
Y
yudong.cai 已提交
524 525 526
    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_CPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_GPU, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
//    test_ann_hdf5("sift-128-euclidean", "IVF16384,SQ8Hybrid", MODE_MIX, SIFT_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
Y
yudong.cai 已提交
527 528
#endif

Y
yudong.cai 已提交
529 530 531 532 533 534 535
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("glove-200-angular", "IVF16384,Flat", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);

Y
yudong.cai 已提交
536
#ifdef CUSTOMIZATION
Y
yudong.cai 已提交
537 538 539
    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_CPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_GPU, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
//    test_ann_hdf5("glove-200-angular", "IVF16384,SQ8Hybrid", MODE_MIX, GLOVE_INSERT_LOOPS, param_nprobes, SEARCH_LOOPS);
Y
yudong.cai 已提交
540 541
#endif
}