test_ivf.cpp 26.4 KB
Newer Older
J
jinhai 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

X
xj.lin 已提交
18 19 20
#include <gtest/gtest.h>

#include <iostream>
21 22 23 24 25
#include <thread>

#include <faiss/AutoTune.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
X
xj.lin 已提交
26

X
xiaojun.lin 已提交
27 28
#include "knowhere/common/Exception.h"
#include "knowhere/common/Timer.h"
X
xiaojun.lin 已提交
29 30 31
#include "knowhere/index/vector_index/IndexGPUIVF.h"
#include "knowhere/index/vector_index/IndexGPUIVFPQ.h"
#include "knowhere/index/vector_index/IndexGPUIVFSQ.h"
S
starlord 已提交
32 33 34
#include "knowhere/index/vector_index/IndexIVF.h"
#include "knowhere/index/vector_index/IndexIVFPQ.h"
#include "knowhere/index/vector_index/IndexIVFSQ.h"
35
#include "knowhere/index/vector_index/IndexIVFSQHybrid.h"
S
starlord 已提交
36 37
#include "knowhere/index/vector_index/helpers/Cloner.h"

38
#include "unittest/utils.h"
X
xj.lin 已提交
39

40
using ::testing::Combine;
X
xj.lin 已提交
41 42 43
using ::testing::TestWithParam;
using ::testing::Values;

X
xiaojun.lin 已提交
44 45
constexpr int device_id = 0;
constexpr int64_t DIM = 128;
S
starlord 已提交
46
constexpr int64_t NB = 1000000 / 100;
X
xiaojun.lin 已提交
47 48 49
constexpr int64_t NQ = 10;
constexpr int64_t K = 10;

S
starlord 已提交
50
knowhere::IVFIndexPtr
S
starlord 已提交
51
IndexFactory(const std::string& type) {
X
xj.lin 已提交
52
    if (type == "IVF") {
S
starlord 已提交
53
        return std::make_shared<knowhere::IVF>();
X
xj.lin 已提交
54
    } else if (type == "IVFPQ") {
S
starlord 已提交
55
        return std::make_shared<knowhere::IVFPQ>();
X
xj.lin 已提交
56
    } else if (type == "GPUIVF") {
S
starlord 已提交
57
        return std::make_shared<knowhere::GPUIVF>(device_id);
X
xj.lin 已提交
58
    } else if (type == "GPUIVFPQ") {
S
starlord 已提交
59
        return std::make_shared<knowhere::GPUIVFPQ>(device_id);
X
xj.lin 已提交
60
    } else if (type == "IVFSQ") {
S
starlord 已提交
61
        return std::make_shared<knowhere::IVFSQ>();
X
xj.lin 已提交
62
    } else if (type == "GPUIVFSQ") {
S
starlord 已提交
63
        return std::make_shared<knowhere::GPUIVFSQ>(device_id);
X
xiaojun.lin 已提交
64
    } else if (type == "IVFSQHybrid") {
S
starlord 已提交
65
        return std::make_shared<knowhere::IVFSQHybrid>(device_id);
X
xj.lin 已提交
66 67 68
    }
}

X
xiaojun.lin 已提交
69 70 71 72 73 74 75 76 77
enum class ParameterType {
    ivf,
    ivfpq,
    ivfsq,
    nsg,
};

class ParamGenerator {
 public:
S
starlord 已提交
78 79
    static ParamGenerator&
    GetInstance() {
X
xiaojun.lin 已提交
80 81 82 83
        static ParamGenerator instance;
        return instance;
    }

S
starlord 已提交
84
    knowhere::Config
S
starlord 已提交
85
    Gen(const ParameterType& type) {
X
xiaojun.lin 已提交
86
        if (type == ParameterType::ivf) {
S
starlord 已提交
87
            auto tempconf = std::make_shared<knowhere::IVFCfg>();
X
xiaojun.lin 已提交
88 89 90 91 92
            tempconf->d = DIM;
            tempconf->gpu_id = device_id;
            tempconf->nlist = 100;
            tempconf->nprobe = 16;
            tempconf->k = K;
S
starlord 已提交
93
            tempconf->metric_type = knowhere::METRICTYPE::L2;
X
xiaojun.lin 已提交
94
            return tempconf;
S
starlord 已提交
95
        } else if (type == ParameterType::ivfpq) {
S
starlord 已提交
96
            auto tempconf = std::make_shared<knowhere::IVFPQCfg>();
X
xiaojun.lin 已提交
97 98 99 100 101 102 103
            tempconf->d = DIM;
            tempconf->gpu_id = device_id;
            tempconf->nlist = 100;
            tempconf->nprobe = 16;
            tempconf->k = K;
            tempconf->m = 8;
            tempconf->nbits = 8;
S
starlord 已提交
104
            tempconf->metric_type = knowhere::METRICTYPE::L2;
X
xiaojun.lin 已提交
105
            return tempconf;
X
xiaojun.lin 已提交
106
        } else if (type == ParameterType::ivfsq) {
S
starlord 已提交
107
            auto tempconf = std::make_shared<knowhere::IVFSQCfg>();
X
xiaojun.lin 已提交
108 109 110 111 112 113
            tempconf->d = DIM;
            tempconf->gpu_id = device_id;
            tempconf->nlist = 100;
            tempconf->nprobe = 16;
            tempconf->k = K;
            tempconf->nbits = 8;
S
starlord 已提交
114
            tempconf->metric_type = knowhere::METRICTYPE::L2;
X
xiaojun.lin 已提交
115 116 117 118 119
            return tempconf;
        }
    }
};

S
starlord 已提交
120
class IVFTest : public DataGen, public TestWithParam<::std::tuple<std::string, ParameterType>> {
X
xj.lin 已提交
121
 protected:
S
starlord 已提交
122 123
    void
    SetUp() override {
X
xiaojun.lin 已提交
124 125
        ParameterType parameter_type;
        std::tie(index_type, parameter_type) = GetParam();
S
starlord 已提交
126
        // Init_with_default();
X
xiaojun.lin 已提交
127
        Generate(DIM, NB, NQ);
X
xj.lin 已提交
128
        index_ = IndexFactory(index_type);
X
xiaojun.lin 已提交
129
        conf = ParamGenerator::GetInstance().Gen(parameter_type);
S
starlord 已提交
130
        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 600, 2);
X
xj.lin 已提交
131
    }
S
starlord 已提交
132 133 134

    void
    TearDown() override {
S
starlord 已提交
135
        knowhere::FaissGpuResourceMgr::GetInstance().Free();
136
    }
X
xj.lin 已提交
137

S
starlord 已提交
138
    knowhere::VectorIndexPtr
139
    ChooseTodo() {
J
JinHai-CN 已提交
140 141 142
        std::vector<std::string> gpu_idx{"GPUIVFSQ"};
        auto finder = std::find(gpu_idx.cbegin(), gpu_idx.cend(), index_type);
        if (finder != gpu_idx.cend()) {
S
starlord 已提交
143
            return knowhere::cloner::CopyCpuToGpu(index_, device_id, knowhere::Config());
J
JinHai-CN 已提交
144 145 146 147
        }
        return index_;
    }

X
xj.lin 已提交
148 149
 protected:
    std::string index_type;
S
starlord 已提交
150 151
    knowhere::Config conf;
    knowhere::IVFIndexPtr index_ = nullptr;
X
xj.lin 已提交
152 153 154
};

INSTANTIATE_TEST_CASE_P(IVFParameters, IVFTest,
S
starlord 已提交
155 156
                        Values(std::make_tuple("IVF", ParameterType::ivf),
                               std::make_tuple("GPUIVF", ParameterType::ivf),
X
xiaojun.lin 已提交
157 158
                               std::make_tuple("IVFPQ", ParameterType::ivfpq),
                               std::make_tuple("GPUIVFPQ", ParameterType::ivfpq),
S
starlord 已提交
159
                               std::make_tuple("IVFSQ", ParameterType::ivfsq),
X
xiaojun.lin 已提交
160 161 162
#ifdef CUSTOMIZATION
                               std::make_tuple("IVFSQHybrid", ParameterType::ivfsq),
#endif
X
xiaojun.lin 已提交
163
                               std::make_tuple("GPUIVFSQ", ParameterType::ivfsq)));
S
starlord 已提交
164 165

void
S
starlord 已提交
166
AssertAnns(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
X
xj.lin 已提交
167 168 169 170 171 172
    auto ids = result->array()[0];
    for (auto i = 0; i < nq; i++) {
        EXPECT_EQ(i, *(ids->data()->GetValues<int64_t>(1, i * k)));
    }
}

S
starlord 已提交
173
void
S
starlord 已提交
174
PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) {
X
xj.lin 已提交
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
    auto ids = result->array()[0];
    auto dists = result->array()[1];

    std::stringstream ss_id;
    std::stringstream ss_dist;
    for (auto i = 0; i < 10; i++) {
        for (auto j = 0; j < k; ++j) {
            ss_id << *(ids->data()->GetValues<int64_t>(1, i * k + j)) << " ";
            ss_dist << *(dists->data()->GetValues<float>(1, i * k + j)) << " ";
        }
        ss_id << std::endl;
        ss_dist << std::endl;
    }
    std::cout << "id\n" << ss_id.str() << std::endl;
    std::cout << "dist\n" << ss_dist.str() << std::endl;
}

TEST_P(IVFTest, ivf_basic) {
    assert(!xb.empty());

X
xiaojun.lin 已提交
195
    auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
X
xj.lin 已提交
196 197
    index_->set_preprocessor(preprocessor);

X
xiaojun.lin 已提交
198
    auto model = index_->Train(base_dataset, conf);
X
xj.lin 已提交
199
    index_->set_index_model(model);
X
xiaojun.lin 已提交
200
    index_->Add(base_dataset, conf);
X
xj.lin 已提交
201 202
    EXPECT_EQ(index_->Count(), nb);
    EXPECT_EQ(index_->Dimension(), dim);
J
JinHai-CN 已提交
203 204 205

    auto new_idx = ChooseTodo();
    auto result = new_idx->Search(query_dataset, conf);
X
xiaojun.lin 已提交
206
    AssertAnns(result, nq, conf->k);
S
starlord 已提交
207
    // PrintResult(result, nq, k);
X
xj.lin 已提交
208 209
}

X
xiaojun.lin 已提交
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
TEST_P(IVFTest, hybrid) {
    if (index_type != "IVFSQHybrid") {
        return;
    }
    assert(!xb.empty());

    auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
    index_->set_preprocessor(preprocessor);

    auto model = index_->Train(base_dataset, conf);
    index_->set_index_model(model);
    index_->Add(base_dataset, conf);
    EXPECT_EQ(index_->Count(), nb);
    EXPECT_EQ(index_->Dimension(), dim);

225 226 227
    //    auto new_idx = ChooseTodo();
    //    auto result = new_idx->Search(query_dataset, conf);
    //    AssertAnns(result, nq, conf->k);
X
xiaojun.lin 已提交
228 229

    {
S
starlord 已提交
230
        auto hybrid_1_idx = std::make_shared<knowhere::IVFSQHybrid>(device_id);
X
xiaojun.lin 已提交
231 232 233 234

        auto binaryset = index_->Serialize();
        hybrid_1_idx->Load(binaryset);

S
starlord 已提交
235
        auto quantizer_conf = std::make_shared<knowhere::QuantizerCfg>();
X
xiaojun.lin 已提交
236 237 238 239 240 241 242
        quantizer_conf->mode = 1;
        quantizer_conf->gpu_id = device_id;
        auto q = hybrid_1_idx->LoadQuantizer(quantizer_conf);
        hybrid_1_idx->SetQuantizer(q);
        auto result = hybrid_1_idx->Search(query_dataset, conf);
        AssertAnns(result, nq, conf->k);
        PrintResult(result, nq, k);
X
xiaojun.lin 已提交
243
        hybrid_1_idx->UnsetQuantizer();
X
xiaojun.lin 已提交
244 245 246
    }

    {
S
starlord 已提交
247
        auto hybrid_2_idx = std::make_shared<knowhere::IVFSQHybrid>(device_id);
X
xiaojun.lin 已提交
248 249 250 251

        auto binaryset = index_->Serialize();
        hybrid_2_idx->Load(binaryset);

S
starlord 已提交
252
        auto quantizer_conf = std::make_shared<knowhere::QuantizerCfg>();
X
xiaojun.lin 已提交
253 254 255 256
        quantizer_conf->mode = 1;
        quantizer_conf->gpu_id = device_id;
        auto q = hybrid_2_idx->LoadQuantizer(quantizer_conf);
        quantizer_conf->mode = 2;
W
wxyu 已提交
257
        auto gpu_idx = hybrid_2_idx->LoadData(q, quantizer_conf);
X
xiaojun.lin 已提交
258

W
wxyu 已提交
259
        auto result = gpu_idx->Search(query_dataset, conf);
X
xiaojun.lin 已提交
260 261 262 263
        AssertAnns(result, nq, conf->k);
        PrintResult(result, nq, k);
    }
}
X
xiaojun.lin 已提交
264

S
starlord 已提交
265
// TEST_P(IVFTest, gpu_to_cpu) {
X
xj.lin 已提交
266 267 268 269 270
//    if (index_type.find("GPU") == std::string::npos) { return; }
//
//    // else
//    assert(!xb.empty());
//
X
xiaojun.lin 已提交
271
//    auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
X
xj.lin 已提交
272 273
//    index_->set_preprocessor(preprocessor);
//
X
xiaojun.lin 已提交
274
//    auto model = index_->Train(base_dataset, conf);
X
xj.lin 已提交
275
//    index_->set_index_model(model);
X
xiaojun.lin 已提交
276
//    index_->Add(base_dataset, conf);
X
xj.lin 已提交
277 278
//    EXPECT_EQ(index_->Count(), nb);
//    EXPECT_EQ(index_->Dimension(), dim);
X
xiaojun.lin 已提交
279
//    auto result = index_->Search(query_dataset, conf);
X
xj.lin 已提交
280 281 282 283
//    AssertAnns(result, nq, k);
//
//    if (auto device_index = std::dynamic_pointer_cast<GPUIVF>(index_)) {
//        auto host_index = device_index->Copy_index_gpu_to_cpu();
X
xiaojun.lin 已提交
284
//        auto result = host_index->Search(query_dataset, conf);
X
xj.lin 已提交
285 286 287 288 289
//        AssertAnns(result, nq, k);
//    }
//}

TEST_P(IVFTest, ivf_serialize) {
S
starlord 已提交
290
    auto serialize = [](const std::string& filename, knowhere::BinaryPtr& bin, uint8_t* ret) {
X
xj.lin 已提交
291
        FileIOWriter writer(filename);
S
starlord 已提交
292
        writer(static_cast<void*>(bin->data.get()), bin->size);
X
xj.lin 已提交
293 294 295 296 297 298 299

        FileIOReader reader(filename);
        reader(ret, bin->size);
    };

    {
        // serialize index-model
X
xiaojun.lin 已提交
300
        auto model = index_->Train(base_dataset, conf);
X
xj.lin 已提交
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
        auto binaryset = model->Serialize();
        auto bin = binaryset.GetByName("IVF");

        std::string filename = "/tmp/ivf_test_model_serialize.bin";
        auto load_data = new uint8_t[bin->size];
        serialize(filename, bin, load_data);

        binaryset.clear();
        auto data = std::make_shared<uint8_t>();
        data.reset(load_data);
        binaryset.Append("IVF", data, bin->size);

        model->Load(binaryset);

        index_->set_index_model(model);
X
xiaojun.lin 已提交
316
        index_->Add(base_dataset, conf);
J
JinHai-CN 已提交
317 318
        auto new_idx = ChooseTodo();
        auto result = new_idx->Search(query_dataset, conf);
X
xiaojun.lin 已提交
319
        AssertAnns(result, nq, conf->k);
X
xj.lin 已提交
320 321 322 323
    }

    {
        // serialize index
X
xiaojun.lin 已提交
324
        auto model = index_->Train(base_dataset, conf);
X
xj.lin 已提交
325
        index_->set_index_model(model);
X
xiaojun.lin 已提交
326
        index_->Add(base_dataset, conf);
X
xj.lin 已提交
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
        auto binaryset = index_->Serialize();
        auto bin = binaryset.GetByName("IVF");

        std::string filename = "/tmp/ivf_test_serialize.bin";
        auto load_data = new uint8_t[bin->size];
        serialize(filename, bin, load_data);

        binaryset.clear();
        auto data = std::make_shared<uint8_t>();
        data.reset(load_data);
        binaryset.Append("IVF", data, bin->size);

        index_->Load(binaryset);
        EXPECT_EQ(index_->Count(), nb);
        EXPECT_EQ(index_->Dimension(), dim);
J
JinHai-CN 已提交
342 343
        auto new_idx = ChooseTodo();
        auto result = new_idx->Search(query_dataset, conf);
X
xiaojun.lin 已提交
344
        AssertAnns(result, nq, conf->k);
X
xj.lin 已提交
345 346 347 348 349 350
    }
}

TEST_P(IVFTest, clone_test) {
    assert(!xb.empty());

X
xiaojun.lin 已提交
351
    auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
X
xj.lin 已提交
352 353
    index_->set_preprocessor(preprocessor);

X
xiaojun.lin 已提交
354
    auto model = index_->Train(base_dataset, conf);
X
xj.lin 已提交
355
    index_->set_index_model(model);
X
xiaojun.lin 已提交
356
    index_->Add(base_dataset, conf);
X
xj.lin 已提交
357 358
    EXPECT_EQ(index_->Count(), nb);
    EXPECT_EQ(index_->Dimension(), dim);
J
JinHai-CN 已提交
359 360
    auto new_idx = ChooseTodo();
    auto result = new_idx->Search(query_dataset, conf);
X
xiaojun.lin 已提交
361
    AssertAnns(result, nq, conf->k);
S
starlord 已提交
362
    // PrintResult(result, nq, k);
X
xj.lin 已提交
363

S
starlord 已提交
364
    auto AssertEqual = [&](knowhere::DatasetPtr p1, knowhere::DatasetPtr p2) {
X
xj.lin 已提交
365 366 367 368
        auto ids_p1 = p1->array()[0];
        auto ids_p2 = p2->array()[0];

        for (int i = 0; i < nq * k; ++i) {
S
starlord 已提交
369
            EXPECT_EQ(*(ids_p2->data()->GetValues<int64_t>(1, i)), *(ids_p1->data()->GetValues<int64_t>(1, i)));
X
xj.lin 已提交
370 371 372
        }
    };

373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
    //    {
    //        // clone in place
    //        std::vector<std::string> support_idx_vec{"IVF", "GPUIVF", "IVFPQ", "IVFSQ", "GPUIVFSQ"};
    //        auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
    //        if (finder != support_idx_vec.cend()) {
    //            EXPECT_NO_THROW({
    //                                auto clone_index = index_->Clone();
    //                                auto clone_result = clone_index->Search(query_dataset, conf);
    //                                //AssertAnns(result, nq, conf->k);
    //                                AssertEqual(result, clone_result);
    //                                std::cout << "inplace clone [" << index_type << "] success" << std::endl;
    //                            });
    //        } else {
    //            EXPECT_THROW({
    //                             std::cout << "inplace clone [" << index_type << "] failed" << std::endl;
    //                             auto clone_index = index_->Clone();
    //                         }, KnowhereException);
    //        }
    //    }
X
xiaojun.lin 已提交
392

X
xj.lin 已提交
393
    {
X
xiaojun.lin 已提交
394 395
        if (index_type == "IVFSQHybrid") {
            return;
X
xj.lin 已提交
396 397 398 399 400
        }
    }

    {
        // copy from gpu to cpu
X
xiaojun.lin 已提交
401
        std::vector<std::string> support_idx_vec{"GPUIVF", "GPUIVFSQ", "IVFSQHybrid"};
X
xj.lin 已提交
402 403 404
        auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
        if (finder != support_idx_vec.cend()) {
            EXPECT_NO_THROW({
S
starlord 已提交
405
                auto clone_index = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
S
starlord 已提交
406 407 408 409
                auto clone_result = clone_index->Search(query_dataset, conf);
                AssertEqual(result, clone_result);
                std::cout << "clone G <=> C [" << index_type << "] success" << std::endl;
            });
X
xj.lin 已提交
410
        } else {
S
starlord 已提交
411 412 413
            EXPECT_THROW(
                {
                    std::cout << "clone G <=> C [" << index_type << "] failed" << std::endl;
S
starlord 已提交
414
                    auto clone_index = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
S
starlord 已提交
415
                },
S
starlord 已提交
416
                knowhere::KnowhereException);
X
xj.lin 已提交
417 418 419 420 421 422 423 424 425
        }
    }

    {
        // copy to gpu
        std::vector<std::string> support_idx_vec{"IVF", "GPUIVF", "IVFSQ", "GPUIVFSQ"};
        auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
        if (finder != support_idx_vec.cend()) {
            EXPECT_NO_THROW({
S
starlord 已提交
426
                auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, device_id, knowhere::Config());
S
starlord 已提交
427 428 429 430
                auto clone_result = clone_index->Search(query_dataset, conf);
                AssertEqual(result, clone_result);
                std::cout << "clone C <=> G [" << index_type << "] success" << std::endl;
            });
X
xj.lin 已提交
431
        } else {
S
starlord 已提交
432 433 434
            EXPECT_THROW(
                {
                    std::cout << "clone C <=> G [" << index_type << "] failed" << std::endl;
S
starlord 已提交
435
                    auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, device_id, knowhere::Config());
S
starlord 已提交
436
                },
S
starlord 已提交
437
                knowhere::KnowhereException);
X
xj.lin 已提交
438 439 440 441
        }
    }
}

X
xiaojun.lin 已提交
442
#ifdef CUSTOMIZATION
X
xj.lin 已提交
443
TEST_P(IVFTest, seal_test) {
S
starlord 已提交
444
    // FaissGpuResourceMgr::GetInstance().InitDevice(device_id);
X
xj.lin 已提交
445

X
xiaojun.lin 已提交
446
    std::vector<std::string> support_idx_vec{"GPUIVF", "GPUIVFSQ", "IVFSQHybrid"};
X
xj.lin 已提交
447 448 449 450 451 452 453
    auto finder = std::find(support_idx_vec.cbegin(), support_idx_vec.cend(), index_type);
    if (finder == support_idx_vec.cend()) {
        return;
    }

    assert(!xb.empty());

X
xiaojun.lin 已提交
454
    auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
X
xj.lin 已提交
455 456
    index_->set_preprocessor(preprocessor);

X
xiaojun.lin 已提交
457
    auto model = index_->Train(base_dataset, conf);
X
xj.lin 已提交
458
    index_->set_index_model(model);
X
xiaojun.lin 已提交
459
    index_->Add(base_dataset, conf);
X
xj.lin 已提交
460 461
    EXPECT_EQ(index_->Count(), nb);
    EXPECT_EQ(index_->Dimension(), dim);
J
JinHai-CN 已提交
462 463
    auto new_idx = ChooseTodo();
    auto result = new_idx->Search(query_dataset, conf);
X
xiaojun.lin 已提交
464
    AssertAnns(result, nq, conf->k);
X
xj.lin 已提交
465

S
starlord 已提交
466
    auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
X
xj.lin 已提交
467

S
starlord 已提交
468 469
    knowhere::TimeRecorder tc("CopyToGpu");
    knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
X
xj.lin 已提交
470 471 472
    auto without_seal = tc.RecordSection("Without seal");
    cpu_idx->Seal();
    tc.RecordSection("seal cost");
S
starlord 已提交
473
    knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
X
xj.lin 已提交
474 475 476
    auto with_seal = tc.RecordSection("With seal");
    ASSERT_GE(without_seal, with_seal);
}
X
xiaojun.lin 已提交
477
#endif
X
xj.lin 已提交
478

S
starlord 已提交
479
class GPURESTEST : public DataGen, public ::testing::Test {
480
 protected:
S
starlord 已提交
481 482
    void
    SetUp() override {
483
        Generate(128, 1000000, 1000);
S
starlord 已提交
484
        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(device_id, 1024 * 1024 * 200, 1024 * 1024 * 300, 2);
485

X
xiaojun.lin 已提交
486
        k = 100;
487
        elems = nq * k;
S
starlord 已提交
488 489
        ids = (int64_t*)malloc(sizeof(int64_t) * elems);
        dis = (float*)malloc(sizeof(float) * elems);
490 491
    }

S
starlord 已提交
492 493
    void
    TearDown() override {
494 495
        delete ids;
        delete dis;
S
starlord 已提交
496
        knowhere::FaissGpuResourceMgr::GetInstance().Free();
497 498 499 500
    }

 protected:
    std::string index_type;
S
starlord 已提交
501
    knowhere::IVFIndexPtr index_ = nullptr;
502

S
starlord 已提交
503 504
    int64_t* ids = nullptr;
    float* dis = nullptr;
505 506 507
    int64_t elems = 0;
};

X
xj.lin 已提交
508
const int search_count = 18;
X
xj.lin 已提交
509
const int load_count = 3;
510 511 512 513 514

TEST_F(GPURESTEST, gpu_ivf_resource_test) {
    assert(!xb.empty());

    {
S
starlord 已提交
515 516 517 518
        index_ = std::make_shared<knowhere::GPUIVF>(-1);
        ASSERT_EQ(std::dynamic_pointer_cast<knowhere::GPUIVF>(index_)->GetGpuDevice(), -1);
        std::dynamic_pointer_cast<knowhere::GPUIVF>(index_)->SetGpuDevice(device_id);
        ASSERT_EQ(std::dynamic_pointer_cast<knowhere::GPUIVF>(index_)->GetGpuDevice(), device_id);
X
xj.lin 已提交
519

S
starlord 已提交
520
        auto conf = std::make_shared<knowhere::IVFCfg>();
X
xiaojun.lin 已提交
521 522 523
        conf->nlist = 1638;
        conf->d = dim;
        conf->gpu_id = device_id;
S
starlord 已提交
524
        conf->metric_type = knowhere::METRICTYPE::L2;
X
xiaojun.lin 已提交
525 526 527 528
        conf->k = k;
        conf->nprobe = 1;

        auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
529
        index_->set_preprocessor(preprocessor);
X
xiaojun.lin 已提交
530
        auto model = index_->Train(base_dataset, conf);
531
        index_->set_index_model(model);
X
xiaojun.lin 已提交
532
        index_->Add(base_dataset, conf);
533 534 535
        EXPECT_EQ(index_->Count(), nb);
        EXPECT_EQ(index_->Dimension(), dim);

S
starlord 已提交
536
        knowhere::TimeRecorder tc("knowere GPUIVF");
537
        for (int i = 0; i < search_count; ++i) {
X
xiaojun.lin 已提交
538
            index_->Search(query_dataset, conf);
539 540 541
            if (i > search_count - 6 || i < 5)
                tc.RecordSection("search once");
        }
X
xj.lin 已提交
542
        tc.ElapseFromBegin("search all");
543
    }
S
starlord 已提交
544
    knowhere::FaissGpuResourceMgr::GetInstance().Dump();
545 546 547 548 549 550 551 552 553 554

    {
        // IVF-Search
        faiss::gpu::StandardGpuResources res;
        faiss::gpu::GpuIndexIVFFlatConfig idx_config;
        idx_config.device = device_id;
        faiss::gpu::GpuIndexIVFFlat device_index(&res, dim, 1638, faiss::METRIC_L2, idx_config);
        device_index.train(nb, xb.data());
        device_index.add(nb, xb.data());

S
starlord 已提交
555
        knowhere::TimeRecorder tc("ori IVF");
556 557 558 559 560
        for (int i = 0; i < search_count; ++i) {
            device_index.search(nq, xq.data(), k, dis, ids);
            if (i > search_count - 6 || i < 5)
                tc.RecordSection("search once");
        }
X
xj.lin 已提交
561
        tc.ElapseFromBegin("search all");
562 563 564
    }
}

X
xiaojun.lin 已提交
565
#ifdef CUSTOMIZATION
566 567 568 569 570
TEST_F(GPURESTEST, gpuivfsq) {
    {
        // knowhere gpu ivfsq
        index_type = "GPUIVFSQ";
        index_ = IndexFactory(index_type);
X
xiaojun.lin 已提交
571

S
starlord 已提交
572
        auto conf = std::make_shared<knowhere::IVFSQCfg>();
X
xiaojun.lin 已提交
573 574 575
        conf->nlist = 1638;
        conf->d = dim;
        conf->gpu_id = device_id;
S
starlord 已提交
576
        conf->metric_type = knowhere::METRICTYPE::L2;
X
xiaojun.lin 已提交
577 578 579 580 581
        conf->k = k;
        conf->nbits = 8;
        conf->nprobe = 1;

        auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
582
        index_->set_preprocessor(preprocessor);
X
xiaojun.lin 已提交
583
        auto model = index_->Train(base_dataset, conf);
584
        index_->set_index_model(model);
X
xiaojun.lin 已提交
585
        index_->Add(base_dataset, conf);
586 587
        //        auto result = index_->Search(query_dataset, conf);
        //        AssertAnns(result, nq, k);
588

S
starlord 已提交
589
        auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
590 591
        cpu_idx->Seal();

S
starlord 已提交
592 593
        knowhere::TimeRecorder tc("knowhere GPUSQ8");
        auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
594 595
        tc.RecordSection("Copy to gpu");
        for (int i = 0; i < search_count; ++i) {
X
xiaojun.lin 已提交
596
            search_idx->Search(query_dataset, conf);
597 598 599
            if (i > search_count - 6 || i < 5)
                tc.RecordSection("search once");
        }
X
xj.lin 已提交
600
        tc.ElapseFromBegin("search all");
601 602 603 604
    }

    {
        // Ori gpuivfsq Test
S
starlord 已提交
605 606
        const char* index_description = "IVF1638,SQ8";
        faiss::Index* ori_index = faiss::index_factory(dim, index_description, faiss::METRIC_L2);
607 608 609 610 611 612 613

        faiss::gpu::StandardGpuResources res;
        auto device_index = faiss::gpu::index_cpu_to_gpu(&res, device_id, ori_index);
        device_index->train(nb, xb.data());
        device_index->add(nb, xb.data());

        auto cpu_index = faiss::gpu::index_gpu_to_cpu(device_index);
S
starlord 已提交
614
        auto idx = dynamic_cast<faiss::IndexIVF*>(cpu_index);
615 616 617 618 619 620 621 622 623
        if (idx != nullptr) {
            idx->to_readonly();
        }
        delete device_index;
        delete ori_index;

        faiss::gpu::GpuClonerOptions option;
        option.allInGpu = true;

S
starlord 已提交
624
        knowhere::TimeRecorder tc("ori GPUSQ8");
S
starlord 已提交
625
        faiss::Index* search_idx = faiss::gpu::index_cpu_to_gpu(&res, device_id, cpu_index, &option);
626 627 628 629 630 631
        tc.RecordSection("Copy to gpu");
        for (int i = 0; i < search_count; ++i) {
            search_idx->search(nq, xq.data(), k, dis, ids);
            if (i > search_count - 6 || i < 5)
                tc.RecordSection("search once");
        }
X
xj.lin 已提交
632
        tc.ElapseFromBegin("search all");
633 634 635 636
        delete cpu_index;
        delete search_idx;
    }
}
X
xiaojun.lin 已提交
637
#endif
638 639

TEST_F(GPURESTEST, copyandsearch) {
X
xiaojun.lin 已提交
640
    // search and copy at the same time
641 642
    printf("==================\n");

X
xiaojun.lin 已提交
643
    index_type = "GPUIVF";
644
    index_ = IndexFactory(index_type);
X
xiaojun.lin 已提交
645

S
starlord 已提交
646
    auto conf = std::make_shared<knowhere::IVFSQCfg>();
X
xiaojun.lin 已提交
647 648 649
    conf->nlist = 1638;
    conf->d = dim;
    conf->gpu_id = device_id;
S
starlord 已提交
650
    conf->metric_type = knowhere::METRICTYPE::L2;
X
xiaojun.lin 已提交
651 652 653 654 655
    conf->k = k;
    conf->nbits = 8;
    conf->nprobe = 1;

    auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
656
    index_->set_preprocessor(preprocessor);
X
xiaojun.lin 已提交
657
    auto model = index_->Train(base_dataset, conf);
658
    index_->set_index_model(model);
X
xiaojun.lin 已提交
659
    index_->Add(base_dataset, conf);
660 661
    //    auto result = index_->Search(query_dataset, conf);
    //    AssertAnns(result, nq, k);
662

S
starlord 已提交
663
    auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
664 665
    cpu_idx->Seal();

S
starlord 已提交
666
    auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
667 668

    auto search_func = [&] {
S
starlord 已提交
669
        // TimeRecorder tc("search&load");
670
        for (int i = 0; i < search_count; ++i) {
X
xiaojun.lin 已提交
671
            search_idx->Search(query_dataset, conf);
S
starlord 已提交
672
            // if (i > search_count - 6 || i == 0)
673 674
            //    tc.RecordSection("search once");
        }
S
starlord 已提交
675
        // tc.ElapseFromBegin("search finish");
676 677
    };
    auto load_func = [&] {
S
starlord 已提交
678
        // TimeRecorder tc("search&load");
679
        for (int i = 0; i < load_count; ++i) {
S
starlord 已提交
680
            knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
S
starlord 已提交
681 682
            // if (i > load_count -5 || i < 5)
            // tc.RecordSection("Copy to gpu");
683
        }
S
starlord 已提交
684
        // tc.ElapseFromBegin("load finish");
685 686
    };

S
starlord 已提交
687 688
    knowhere::TimeRecorder tc("basic");
    knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
689
    tc.RecordSection("Copy to gpu once");
X
xiaojun.lin 已提交
690
    search_idx->Search(query_dataset, conf);
691 692 693 694 695 696 697 698 699 700 701 702 703
    tc.RecordSection("search once");
    search_func();
    tc.RecordSection("only search total");
    load_func();
    tc.RecordSection("only copy total");

    std::thread search_thread(search_func);
    std::thread load_thread(load_func);
    search_thread.join();
    load_thread.join();
    tc.RecordSection("Copy&search total");
}

704
TEST_F(GPURESTEST, TrainAndSearch) {
Z
update  
zhiru 已提交
705
    index_type = "GPUIVF";
706
    index_ = IndexFactory(index_type);
X
xiaojun.lin 已提交
707

S
starlord 已提交
708
    auto conf = std::make_shared<knowhere::IVFSQCfg>();
X
xiaojun.lin 已提交
709 710 711
    conf->nlist = 1638;
    conf->d = dim;
    conf->gpu_id = device_id;
S
starlord 已提交
712
    conf->metric_type = knowhere::METRICTYPE::L2;
X
xiaojun.lin 已提交
713 714 715 716 717
    conf->k = k;
    conf->nbits = 8;
    conf->nprobe = 1;

    auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
718
    index_->set_preprocessor(preprocessor);
X
xiaojun.lin 已提交
719
    auto model = index_->Train(base_dataset, conf);
720 721
    auto new_index = IndexFactory(index_type);
    new_index->set_index_model(model);
X
xiaojun.lin 已提交
722
    new_index->Add(base_dataset, conf);
S
starlord 已提交
723
    auto cpu_idx = knowhere::cloner::CopyGpuToCpu(new_index, knowhere::Config());
724
    cpu_idx->Seal();
S
starlord 已提交
725
    auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
726

X
xiaojun.lin 已提交
727 728
    constexpr int train_count = 1;
    constexpr int search_count = 5000;
729 730
    auto train_stage = [&] {
        for (int i = 0; i < train_count; ++i) {
X
xiaojun.lin 已提交
731
            auto model = index_->Train(base_dataset, conf);
732 733
            auto test_idx = IndexFactory(index_type);
            test_idx->set_index_model(model);
X
xiaojun.lin 已提交
734
            test_idx->Add(base_dataset, conf);
735 736
        }
    };
S
starlord 已提交
737
    auto search_stage = [&](knowhere::VectorIndexPtr& search_idx) {
738
        for (int i = 0; i < search_count; ++i) {
X
xiaojun.lin 已提交
739
            auto result = search_idx->Search(query_dataset, conf);
740 741 742 743
            AssertAnns(result, nq, k);
        }
    };

S
starlord 已提交
744 745 746 747 748
    // TimeRecorder tc("record");
    // train_stage();
    // tc.RecordSection("train cost");
    // search_stage(search_idx);
    // tc.RecordSection("search cost");
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765

    {
        // search and build parallel
        std::thread search_thread(search_stage, std::ref(search_idx));
        std::thread train_thread(train_stage);
        train_thread.join();
        search_thread.join();
    }
    {
        // build parallel
        std::thread train_1(train_stage);
        std::thread train_2(train_stage);
        train_1.join();
        train_2.join();
    }
    {
        // search parallel
S
starlord 已提交
766
        auto search_idx_2 = knowhere::cloner::CopyCpuToGpu(cpu_idx, device_id, knowhere::Config());
767 768 769 770 771 772 773
        std::thread search_1(search_stage, std::ref(search_idx));
        std::thread search_2(search_stage, std::ref(search_idx_2));
        search_1.join();
        search_2.join();
    }
}

S
starlord 已提交
774
// TODO(lxj): Add exception test