db_tests.cpp 15.2 KB
Newer Older
G
groot 已提交
1 2 3 4 5 6
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
X
Xu Peng 已提交
7 8
#include <thread>
#include <easylogging++.h>
G
groot 已提交
9
#include <boost/filesystem.hpp>
G
groot 已提交
10

X
Xu Peng 已提交
11
#include "utils.h"
G
groot 已提交
12
#include "db/DB.h"
X
Xu Peng 已提交
13
#include "db/DBImpl.h"
X
Xu Peng 已提交
14
#include "db/MetaConsts.h"
Z
zhiru 已提交
15
#include "db/Factories.h"
G
groot 已提交
16

J
jinhai 已提交
17
using namespace zilliz::milvus;
G
groot 已提交
18

G
groot 已提交
19 20
namespace {

Z
zhiru 已提交
21 22
    static const std::string TABLE_NAME = "test_group";
    static constexpr int64_t TABLE_DIM = 256;
G
groot 已提交
23

Z
zhiru 已提交
24 25 26 27 28 29 30
    engine::meta::TableSchema BuildTableSchema() {
        engine::meta::TableSchema table_info;
        table_info.dimension_ = TABLE_DIM;
        table_info.table_id_ = TABLE_NAME;
        table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
        return table_info;
    }
G
groot 已提交
31

Z
zhiru 已提交
32 33 34 35 36 37 38 39
    void BuildVectors(int64_t n, std::vector<float>& vectors) {
        vectors.clear();
        vectors.resize(n*TABLE_DIM);
        float* data = vectors.data();
        for(int i = 0; i < n; i++) {
            for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
            data[TABLE_DIM * i] += i / 2000.;
        }
G
groot 已提交
40 41 42 43
    }

}

X
Xu Peng 已提交
44 45
TEST_F(DBTest, CONFIG_TEST) {
    {
46 47
        ASSERT_ANY_THROW(engine::ArchiveConf conf("wrong"));
        /* EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); */
X
Xu Peng 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
    }
    {
        engine::ArchiveConf conf("delete");
        ASSERT_EQ(conf.GetType(), "delete");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        engine::ArchiveConf conf("swap");
        ASSERT_EQ(conf.GetType(), "swap");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a"));
        engine::ArchiveConf conf("swap", "disk:1024");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 1024);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["days"] == 100);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100;disk:200");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 2);
        ASSERT_TRUE(criterias["days"] == 100);
        ASSERT_TRUE(criterias["disk"] == 200);
    }
}

X
Xu Peng 已提交
90

91
TEST_F(DBTest, DB_TEST) {
G
groot 已提交
92 93 94 95 96 97 98 99 100 101 102 103
    static const std::string table_name = "test_group";
    static const int table_dim = 256;

    engine::meta::TableSchema table_info;
    table_info.dimension_ = table_dim;
    table_info.table_id_ = table_name;
    table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = table_name;
    stat = db_->DescribeTable(table_info_get);
G
groot 已提交
104
    ASSERT_STATS(stat);
G
groot 已提交
105
    ASSERT_EQ(table_info_get.dimension_, table_dim);
G
groot 已提交
106 107

    engine::IDNumbers vector_ids;
X
Xu Peng 已提交
108 109
    engine::IDNumbers target_ids;

G
groot 已提交
110 111 112
    int64_t nb = 50;
    std::vector<float> xb;
    BuildVectors(nb, xb);
G
groot 已提交
113

G
groot 已提交
114 115 116
    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);
X
Xu Peng 已提交
117

X
Xu Peng 已提交
118 119 120 121 122 123 124
    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
G
groot 已提交
125 126
        uint64_t count = 0;
        uint64_t prev_count = 0;
X
Xu Peng 已提交
127

X
Xu Peng 已提交
128
        for (auto j=0; j<10; ++j) {
X
Xu Peng 已提交
129
            ss.str("");
X
Xu Peng 已提交
130
            db_->Size(count);
X
Xu Peng 已提交
131
            prev_count = count;
X
Xu Peng 已提交
132 133

            START_TIMER;
G
groot 已提交
134
            stat = db_->Query(table_name, k, qb, qxb.data(), results);
135
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
X
Xu Peng 已提交
136 137 138
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
X
Xu Peng 已提交
139
            for (auto k=0; k<qb; ++k) {
G
groot 已提交
140
                ASSERT_EQ(results[k][0].first, target_ids[k]);
X
Xu Peng 已提交
141 142 143
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
G
groot 已提交
144
                    ss << result.first << " ";
X
Xu Peng 已提交
145
                }
146
                /* LOG(DEBUG) << ss.str(); */
X
Xu Peng 已提交
147
            }
X
Xu Peng 已提交
148
            ASSERT_TRUE(count >= prev_count);
X
Xu Peng 已提交
149 150 151 152
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

X
Xu Peng 已提交
153
    int loop = 100000;
X
Xu Peng 已提交
154 155 156

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
G
groot 已提交
157
            db_->InsertVectors(table_name, qb, qxb.data(), target_ids);
158
            ASSERT_EQ(target_ids.size(), qb);
X
Xu Peng 已提交
159
        } else {
G
groot 已提交
160
            db_->InsertVectors(table_name, nb, xb.data(), vector_ids);
X
Xu Peng 已提交
161
        }
X
Xu Peng 已提交
162
        std::this_thread::sleep_for(std::chrono::microseconds(1));
X
Xu Peng 已提交
163
    }
X
xj.lin 已提交
164

X
Xu Peng 已提交
165
    search.join();
166
};
X
xj.lin 已提交
167

168
TEST_F(DBTest, SEARCH_TEST) {
G
groot 已提交
169
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
170
    engine::Status stat = db_->CreateTable(table_info);
X
xj.lin 已提交
171

G
groot 已提交
172
    engine::meta::TableSchema table_info_get;
G
groot 已提交
173
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
174
    stat = db_->DescribeTable(table_info_get);
X
xj.lin 已提交
175
    ASSERT_STATS(stat);
G
groot 已提交
176
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
X
xj.lin 已提交
177 178

    // prepare raw data
X
xj.lin 已提交
179
    size_t nb = 250000;
X
xj.lin 已提交
180 181
    size_t nq = 10;
    size_t k = 5;
G
groot 已提交
182 183
    std::vector<float> xb(nb*TABLE_DIM);
    std::vector<float> xq(nq*TABLE_DIM);
X
xj.lin 已提交
184 185 186 187 188
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
G
groot 已提交
189
    for (size_t i = 0; i < nb*TABLE_DIM; i++) {
X
xj.lin 已提交
190 191 192 193 194
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
G
groot 已提交
195
    for (size_t i = 0; i < nq*TABLE_DIM; i++) {
X
xj.lin 已提交
196 197 198 199 200 201 202 203 204 205 206 207
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
G
groot 已提交
208
        stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids);
X
xj.lin 已提交
209
        if (j == 200){ sleep(1);}
X
xj.lin 已提交
210 211 212
        ASSERT_STATS(stat);
    }

X
Xu Peng 已提交
213
    sleep(2); // wait until build index finish
X
xj.lin 已提交
214

G
groot 已提交
215
    engine::QueryResults results;
G
groot 已提交
216
    stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results);
G
groot 已提交
217
    ASSERT_STATS(stat);
X
xj.lin 已提交
218 219

    // TODO(linxj): add groundTruth assert
220
};
Y
c  
yu yunfeng 已提交
221

G
groot 已提交
222
TEST_F(DBTest2, ARHIVE_DISK_CHECK) {
Z
zhiru 已提交
223

G
groot 已提交
224 225
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);
Z
zhiru 已提交
226

G
groot 已提交
227 228 229
    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
Z
zhiru 已提交
230
    ASSERT_STATS(stat);
G
groot 已提交
231
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
Z
zhiru 已提交
232 233 234 235

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

G
groot 已提交
236
    uint64_t size;
Z
zhiru 已提交
237 238
    db_->Size(size);

G
groot 已提交
239 240 241
    int64_t nb = 10;
    std::vector<float> xb;
    BuildVectors(nb, xb);
Z
zhiru 已提交
242

G
groot 已提交
243
    int loop = 100000;
Z
zhiru 已提交
244
    for (auto i=0; i<loop; ++i) {
G
groot 已提交
245
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
Z
zhiru 已提交
246 247 248
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

G
groot 已提交
249
    std::this_thread::sleep_for(std::chrono::seconds(1));
Z
zhiru 已提交
250 251 252

    db_->Size(size);
    LOG(DEBUG) << "size=" << size;
G
groot 已提交
253 254
    ASSERT_LE(size, 1 * engine::meta::G);
};
Z
zhiru 已提交
255

G
groot 已提交
256
TEST_F(DBTest2, DELETE_TEST) {
Z
zhiru 已提交
257

G
groot 已提交
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);

    ASSERT_TRUE(boost::filesystem::exists(table_info_get.location_));

    engine::IDNumbers vector_ids;

    uint64_t size;
    db_->Size(size);

    int64_t nb = 100000;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 20;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::vector<engine::meta::DateT> dates;
    stat = db_->DeleteTable(TABLE_NAME, dates);
    std::this_thread::sleep_for(std::chrono::seconds(2));
    ASSERT_TRUE(stat.ok());
    ASSERT_FALSE(boost::filesystem::exists(table_info_get.location_));
Z
update  
zhiru 已提交
289 290 291

    stat = db_->DropAll();
    ASSERT_TRUE(stat.ok());
G
groot 已提交
292
};
Z
zhiru 已提交
293 294 295 296 297 298

TEST_F(MySQLDBTest, DB_TEST) {

    auto options = GetOptions();
    auto db_ = engine::DBFactory::Build(options);

Z
zhiru 已提交
299 300
    static const std::string table_name = "test_group";
    static const int table_dim = 256;
Z
zhiru 已提交
301

Z
zhiru 已提交
302 303 304 305 306
    engine::meta::TableSchema table_info;
    table_info.dimension_ = table_dim;
    table_info.table_id_ = table_name;
    table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
    engine::Status stat = db_->CreateTable(table_info);
Z
zhiru 已提交
307

Z
zhiru 已提交
308 309 310
    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = table_name;
    stat = db_->DescribeTable(table_info_get);
Z
zhiru 已提交
311
    ASSERT_STATS(stat);
Z
zhiru 已提交
312
    ASSERT_EQ(table_info_get.dimension_, table_dim);
Z
zhiru 已提交
313 314 315 316

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

Z
zhiru 已提交
317 318 319
    int64_t nb = 50;
    std::vector<float> xb;
    BuildVectors(nb, xb);
Z
zhiru 已提交
320

Z
zhiru 已提交
321 322 323
    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);
Z
zhiru 已提交
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340

    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
        uint64_t count = 0;
        uint64_t prev_count = 0;

        for (auto j=0; j<10; ++j) {
            ss.str("");
            db_->Size(count);
            prev_count = count;

            START_TIMER;
Z
zhiru 已提交
341
            stat = db_->Query(table_name, k, qb, qxb.data(), results);
Z
zhiru 已提交
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
            for (auto k=0; k<qb; ++k) {
                ASSERT_EQ(results[k][0].first, target_ids[k]);
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
                    ss << result.first << " ";
                }
                /* LOG(DEBUG) << ss.str(); */
            }
            ASSERT_TRUE(count >= prev_count);
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

    int loop = 100000;

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
Z
zhiru 已提交
364
            db_->InsertVectors(table_name, qb, qxb.data(), target_ids);
Z
zhiru 已提交
365 366
            ASSERT_EQ(target_ids.size(), qb);
        } else {
Z
zhiru 已提交
367
            db_->InsertVectors(table_name, nb, xb.data(), vector_ids);
Z
zhiru 已提交
368 369 370 371 372
        }
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    search.join();
Z
update  
zhiru 已提交
373 374 375

    stat = db_->DropAll();
    ASSERT_TRUE(stat.ok());
Z
zhiru 已提交
376 377 378 379 380 381
};

TEST_F(MySQLDBTest, SEARCH_TEST) {
    auto options = GetOptions();
    auto db_ = engine::DBFactory::Build(options);

Z
zhiru 已提交
382 383
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);
Z
zhiru 已提交
384

Z
zhiru 已提交
385 386 387
    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
Z
zhiru 已提交
388
    ASSERT_STATS(stat);
Z
zhiru 已提交
389
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
Z
zhiru 已提交
390 391 392 393 394

    // prepare raw data
    size_t nb = 250000;
    size_t nq = 10;
    size_t k = 5;
Z
zhiru 已提交
395 396
    std::vector<float> xb(nb*TABLE_DIM);
    std::vector<float> xq(nq*TABLE_DIM);
Z
zhiru 已提交
397 398 399 400 401
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
Z
zhiru 已提交
402
    for (size_t i = 0; i < nb*TABLE_DIM; i++) {
Z
zhiru 已提交
403 404 405 406 407
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
Z
zhiru 已提交
408
    for (size_t i = 0; i < nq*TABLE_DIM; i++) {
Z
zhiru 已提交
409 410 411 412 413 414 415 416 417 418 419 420
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
Z
zhiru 已提交
421
        stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids);
Z
zhiru 已提交
422 423 424 425 426 427 428
        if (j == 200){ sleep(1);}
        ASSERT_STATS(stat);
    }

    sleep(2); // wait until build index finish

    engine::QueryResults results;
Z
zhiru 已提交
429
    stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results);
Z
zhiru 已提交
430 431
    ASSERT_STATS(stat);

Z
update  
zhiru 已提交
432 433 434
    stat = db_->DropAll();
    ASSERT_TRUE(stat.ok());

Z
zhiru 已提交
435
    // TODO(linxj): add groundTruth assert
Z
zhiru 已提交
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
};

TEST_F(MySQLDBTest, ARHIVE_DISK_CHECK) {

    auto options = GetOptions();
    options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1");
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

    uint64_t size;
    db_->Size(size);

    int64_t nb = 10;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 100000;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::this_thread::sleep_for(std::chrono::seconds(10)); //change to 10 to make sure files are discarded

    db_->Size(size);
    LOG(DEBUG) << "size=" << size;
    ASSERT_LE(size, 1 * engine::meta::G);
Z
update  
zhiru 已提交
474 475 476

    stat = db_->DropAll();
    ASSERT_TRUE(stat.ok());
Z
zhiru 已提交
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
};

TEST_F(MySQLDBTest, DELETE_TEST) {

    auto options = GetOptions();
    options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1");
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);

    ASSERT_TRUE(boost::filesystem::exists(table_info_get.location_));

    engine::IDNumbers vector_ids;

    uint64_t size;
    db_->Size(size);

    int64_t nb = 100000;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 20;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::vector<engine::meta::DateT> dates;
    stat = db_->DeleteTable(TABLE_NAME, dates);
Z
update  
zhiru 已提交
512 513 514

    std::this_thread::sleep_for(std::chrono::seconds(10)); //change to 10 to make sure files are discarded

Z
zhiru 已提交
515
    ASSERT_TRUE(stat.ok());
Z
update  
zhiru 已提交
516
//    std::cout << table_info_get.location_ << std::endl;
Z
zhiru 已提交
517
    ASSERT_FALSE(boost::filesystem::exists(table_info_get.location_));
Z
update  
zhiru 已提交
518 519 520

    stat = db_->DropAll();
    ASSERT_TRUE(stat.ok());
Z
zhiru 已提交
521
};