db_tests.cpp 13.8 KB
Newer Older
G
groot 已提交
1 2 3 4 5 6
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
X
Xu Peng 已提交
7 8
#include <thread>
#include <easylogging++.h>
G
groot 已提交
9

X
Xu Peng 已提交
10
#include "utils.h"
G
groot 已提交
11
#include "db/DB.h"
X
Xu Peng 已提交
12
#include "db/DBImpl.h"
X
Xu Peng 已提交
13
#include "db/MetaConsts.h"
Z
zhiru 已提交
14 15
#include "db/Factories.h"
#include "db/Options.h"
G
groot 已提交
16

J
jinhai 已提交
17
using namespace zilliz::milvus;
G
groot 已提交
18

X
Xu Peng 已提交
19 20
TEST_F(DBTest, CONFIG_TEST) {
    {
21 22
        ASSERT_ANY_THROW(engine::ArchiveConf conf("wrong"));
        /* EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); */
X
Xu Peng 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    }
    {
        engine::ArchiveConf conf("delete");
        ASSERT_EQ(conf.GetType(), "delete");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        engine::ArchiveConf conf("swap");
        ASSERT_EQ(conf.GetType(), "swap");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a"));
        engine::ArchiveConf conf("swap", "disk:1024");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 1024);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["days"] == 100);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100;disk:200");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 2);
        ASSERT_TRUE(criterias["days"] == 100);
        ASSERT_TRUE(criterias["disk"] == 200);
    }
}

X
Xu Peng 已提交
65 66 67 68
TEST_F(DBTest2, ARHIVE_DISK_CHECK) {

    static const std::string group_name = "test_group";
    static const int group_dim = 256;
G
groot 已提交
69
    uint64_t size;
X
Xu Peng 已提交
70

71
    engine::meta::TableSchema group_info;
G
groot 已提交
72 73
    group_info.dimension_ = group_dim;
    group_info.table_id_ = group_name;
74
    group_info.engine_type_ = (int)engine::EngineType::FAISS_IVFFLAT;
X
Xu Peng 已提交
75
    engine::Status stat = db_->CreateTable(group_info);
X
Xu Peng 已提交
76

77
    engine::meta::TableSchema group_info_get;
G
groot 已提交
78
    group_info_get.table_id_ = group_name;
X
Xu Peng 已提交
79
    stat = db_->DescribeTable(group_info_get);
X
Xu Peng 已提交
80
    ASSERT_STATS(stat);
G
groot 已提交
81
    ASSERT_EQ(group_info_get.dimension_, group_dim);
X
Xu Peng 已提交
82 83 84 85

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

X
Xu Peng 已提交
86
    db_->Size(size);
X
Xu Peng 已提交
87
    int d = 256;
X
Xu Peng 已提交
88
    int nb = 20;
X
Xu Peng 已提交
89 90 91 92 93 94 95 96 97
    float *xb = new float[d * nb];
    for(int i = 0; i < nb; i++) {
        for(int j = 0; j < d; j++) xb[d * i + j] = drand48();
        xb[d * i] += i / 2000.;
    }

    int loop = 100000;

    for (auto i=0; i<loop; ++i) {
X
Xu Peng 已提交
98
        db_->InsertVectors(group_name, nb, xb, vector_ids);
X
Xu Peng 已提交
99 100 101 102 103
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::this_thread::sleep_for(std::chrono::seconds(1));

X
Xu Peng 已提交
104
    db_->Size(size);
X
Xu Peng 已提交
105 106
    LOG(DEBUG) << "size=" << size;
    ASSERT_TRUE(size < 1 * engine::meta::G);
X
Xu Peng 已提交
107 108 109 110 111

    delete [] xb;
};


112 113
TEST_F(DBTest, DB_TEST) {

Y
c  
yu yunfeng 已提交
114 115


G
groot 已提交
116 117 118
    static const std::string group_name = "test_group";
    static const int group_dim = 256;

119
    engine::meta::TableSchema group_info;
G
groot 已提交
120 121
    group_info.dimension_ = group_dim;
    group_info.table_id_ = group_name;
122
    group_info.engine_type_ = (int)engine::EngineType::FAISS_IVFFLAT;
X
Xu Peng 已提交
123
    engine::Status stat = db_->CreateTable(group_info);
G
groot 已提交
124

125
    engine::meta::TableSchema group_info_get;
G
groot 已提交
126
    group_info_get.table_id_ = group_name;
X
Xu Peng 已提交
127
    stat = db_->DescribeTable(group_info_get);
G
groot 已提交
128
    ASSERT_STATS(stat);
G
groot 已提交
129
    ASSERT_EQ(group_info_get.dimension_, group_dim);
G
groot 已提交
130 131

    engine::IDNumbers vector_ids;
X
Xu Peng 已提交
132 133 134
    engine::IDNumbers target_ids;

    int d = 256;
X
Xu Peng 已提交
135
    int nb = 50;
X
Xu Peng 已提交
136 137 138 139
    float *xb = new float[d * nb];
    for(int i = 0; i < nb; i++) {
        for(int j = 0; j < d; j++) xb[d * i + j] = drand48();
        xb[d * i] += i / 2000.;
G
groot 已提交
140 141
    }

X
Xu Peng 已提交
142
    int qb = 5;
X
Xu Peng 已提交
143 144 145 146 147 148
    float *qxb = new float[d * qb];
    for(int i = 0; i < qb; i++) {
        for(int j = 0; j < d; j++) qxb[d * i + j] = drand48();
        qxb[d * i] += i / 2000.;
    }

X
Xu Peng 已提交
149 150 151 152 153 154 155
    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
G
groot 已提交
156 157
        uint64_t count = 0;
        uint64_t prev_count = 0;
X
Xu Peng 已提交
158

X
Xu Peng 已提交
159
        for (auto j=0; j<10; ++j) {
X
Xu Peng 已提交
160
            ss.str("");
X
Xu Peng 已提交
161
            db_->Size(count);
X
Xu Peng 已提交
162
            prev_count = count;
X
Xu Peng 已提交
163 164

            START_TIMER;
X
Xu Peng 已提交
165
            stat = db_->Query(group_name, k, qb, qxb, results);
166
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
X
Xu Peng 已提交
167 168 169
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
X
Xu Peng 已提交
170
            for (auto k=0; k<qb; ++k) {
G
groot 已提交
171
                ASSERT_EQ(results[k][0].first, target_ids[k]);
X
Xu Peng 已提交
172 173 174
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
G
groot 已提交
175
                    ss << result.first << " ";
X
Xu Peng 已提交
176
                }
177
                /* LOG(DEBUG) << ss.str(); */
X
Xu Peng 已提交
178
            }
X
Xu Peng 已提交
179
            ASSERT_TRUE(count >= prev_count);
X
Xu Peng 已提交
180 181 182 183
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

X
Xu Peng 已提交
184
    int loop = 100000;
X
Xu Peng 已提交
185 186 187

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
X
Xu Peng 已提交
188
            db_->InsertVectors(group_name, qb, qxb, target_ids);
189
            ASSERT_EQ(target_ids.size(), qb);
X
Xu Peng 已提交
190
        } else {
X
Xu Peng 已提交
191
            db_->InsertVectors(group_name, nb, xb, vector_ids);
X
Xu Peng 已提交
192
        }
X
Xu Peng 已提交
193
        std::this_thread::sleep_for(std::chrono::microseconds(1));
X
Xu Peng 已提交
194
    }
X
xj.lin 已提交
195

X
Xu Peng 已提交
196
    search.join();
X
Xu Peng 已提交
197 198 199

    delete [] xb;
    delete [] qxb;
200
};
X
xj.lin 已提交
201

202
TEST_F(DBTest, SEARCH_TEST) {
X
xj.lin 已提交
203 204 205
    static const std::string group_name = "test_group";
    static const int group_dim = 256;

206
    engine::meta::TableSchema group_info;
G
groot 已提交
207 208
    group_info.dimension_ = group_dim;
    group_info.table_id_ = group_name;
209
    group_info.engine_type_ = (int)engine::EngineType::FAISS_IVFFLAT;
X
Xu Peng 已提交
210
    engine::Status stat = db_->CreateTable(group_info);
X
xj.lin 已提交
211

212
    engine::meta::TableSchema group_info_get;
G
groot 已提交
213
    group_info_get.table_id_ = group_name;
X
Xu Peng 已提交
214
    stat = db_->DescribeTable(group_info_get);
X
xj.lin 已提交
215
    ASSERT_STATS(stat);
G
groot 已提交
216
    ASSERT_EQ(group_info_get.dimension_, group_dim);
X
xj.lin 已提交
217 218

    // prepare raw data
X
xj.lin 已提交
219
    size_t nb = 250000;
X
xj.lin 已提交
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    size_t nq = 10;
    size_t k = 5;
    std::vector<float> xb(nb*group_dim);
    std::vector<float> xq(nq*group_dim);
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
    for (size_t i = 0; i < nb*group_dim; i++) {
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
    for (size_t i = 0; i < nq*group_dim; i++) {
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
X
Xu Peng 已提交
248
        stat = db_->InsertVectors(group_name, batch_size, xb.data()+batch_size*j*group_dim, ids);
X
xj.lin 已提交
249
        if (j == 200){ sleep(1);}
X
xj.lin 已提交
250 251 252
        ASSERT_STATS(stat);
    }

X
Xu Peng 已提交
253
    sleep(2); // wait until build index finish
X
xj.lin 已提交
254

G
groot 已提交
255
    engine::QueryResults results;
X
Xu Peng 已提交
256
    stat = db_->Query(group_name, k, nq, xq.data(), results);
G
groot 已提交
257
    ASSERT_STATS(stat);
X
xj.lin 已提交
258 259

    // TODO(linxj): add groundTruth assert
260
};
Y
c  
yu yunfeng 已提交
261

Z
zhiru 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
TEST_F(MySQLDBTest, ARHIVE_DISK_CHECK) {
    auto options = GetOptions();
    options.meta.archive_conf = engine::ArchiveConf("delete", "disk:1");
    auto db_ = engine::DBFactory::Build(options);

    static const std::string group_name = "test_group";
    static const int group_dim = 256;
    uint64_t size;

    engine::meta::TableSchema group_info;
    group_info.dimension_ = group_dim;
    group_info.table_id_ = group_name;
    group_info.engine_type_ = (int)engine::EngineType::FAISS_IVFFLAT;
    engine::Status stat = db_->CreateTable(group_info);

    engine::meta::TableSchema group_info_get;
    group_info_get.table_id_ = group_name;
    stat = db_->DescribeTable(group_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(group_info_get.dimension_, group_dim);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

    db_->Size(size);
    int d = 256;
    int nb = 20;
    float *xb = new float[d * nb];
    for(int i = 0; i < nb; i++) {
        for(int j = 0; j < d; j++) xb[d * i + j] = drand48();
        xb[d * i] += i / 2000.;
    }

    int loop = 100000;

    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(group_name, nb, xb, vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::this_thread::sleep_for(std::chrono::seconds(10));

    db_->Size(size);
    LOG(DEBUG) << "size=" << size;
//    LOG(DEBUG) << "1 * engine::meta::G=" << 1 * engine::meta::G;
    ASSERT_TRUE(size < 1 * engine::meta::G);

    delete [] xb;

    db_->DropAll();
}

TEST_F(MySQLDBTest, DB_TEST) {

    auto options = GetOptions();
    auto db_ = engine::DBFactory::Build(options);

    static const std::string group_name = "test_group";
    static const int group_dim = 256;

    engine::meta::TableSchema group_info;
    group_info.dimension_ = group_dim;
    group_info.table_id_ = group_name;
    group_info.engine_type_ = (int)engine::EngineType::FAISS_IVFFLAT;
    engine::Status stat = db_->CreateTable(group_info);

    engine::meta::TableSchema group_info_get;
    group_info_get.table_id_ = group_name;
    stat = db_->DescribeTable(group_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(group_info_get.dimension_, group_dim);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

    int d = 256;
    int nb = 50;
    float *xb = new float[d * nb];
    for(int i = 0; i < nb; i++) {
        for(int j = 0; j < d; j++) xb[d * i + j] = drand48();
        xb[d * i] += i / 2000.;
    }

    int qb = 5;
    float *qxb = new float[d * qb];
    for(int i = 0; i < qb; i++) {
        for(int j = 0; j < d; j++) qxb[d * i + j] = drand48();
        qxb[d * i] += i / 2000.;
    }

    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
        uint64_t count = 0;
        uint64_t prev_count = 0;

        for (auto j=0; j<10; ++j) {
            ss.str("");
            db_->Size(count);
            prev_count = count;

            START_TIMER;
            stat = db_->Query(group_name, k, qb, qxb, results);
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
//            std::cout << results.size() << std::endl;
            for (auto k=0; k<qb; ++k) {
                ASSERT_EQ(results[k][0].first, target_ids[k]);
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
                    ss << result.first << " ";
                }
                /* LOG(DEBUG) << ss.str(); */
            }
            ASSERT_TRUE(count >= prev_count);
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

    int loop = 100000;

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
            db_->InsertVectors(group_name, qb, qxb, target_ids);
            ASSERT_EQ(target_ids.size(), qb);
        } else {
            db_->InsertVectors(group_name, nb, xb, vector_ids);
        }
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    search.join();

    delete [] xb;
    delete [] qxb;

    db_->DropAll();
};

TEST_F(MySQLDBTest, SEARCH_TEST) {

    auto options = GetOptions();
    auto db_ = engine::DBFactory::Build(options);

    static const std::string group_name = "test_group";
    static const int group_dim = 256;

    engine::meta::TableSchema group_info;
    group_info.dimension_ = group_dim;
    group_info.table_id_ = group_name;
    group_info.engine_type_ = (int)engine::EngineType::FAISS_IVFFLAT;
    engine::Status stat = db_->CreateTable(group_info);

    engine::meta::TableSchema group_info_get;
    group_info_get.table_id_ = group_name;
    stat = db_->DescribeTable(group_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(group_info_get.dimension_, group_dim);

    // prepare raw data
    size_t nb = 250000;
    size_t nq = 10;
    size_t k = 5;
    std::vector<float> xb(nb*group_dim);
    std::vector<float> xq(nq*group_dim);
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
    for (size_t i = 0; i < nb*group_dim; i++) {
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
    for (size_t i = 0; i < nq*group_dim; i++) {
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
        stat = db_->InsertVectors(group_name, batch_size, xb.data()+batch_size*j*group_dim, ids);
        if (j == 200){ sleep(1);}
        ASSERT_STATS(stat);
    }

    sleep(2); // wait until build index finish

    engine::QueryResults results;
    stat = db_->Query(group_name, k, nq, xq.data(), results);
    ASSERT_STATS(stat);

    db_->DropAll();

    // TODO(linxj): add groundTruth assert
};