db_tests.cpp 13.2 KB
Newer Older
G
groot 已提交
1 2 3 4 5
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
X
Xu Peng 已提交
6
#include "utils.h"
G
groot 已提交
7
#include "db/DB.h"
X
Xu Peng 已提交
8
#include "db/DBImpl.h"
S
starlord 已提交
9
#include "db/meta/MetaConsts.h"
Z
zhiru 已提交
10
#include "db/Factories.h"
Y
Yu Kun 已提交
11
#include "cache/CpuCacheMgr.h"
12
#include "utils/CommonUtil.h"
G
groot 已提交
13

J
jinhai 已提交
14 15 16 17 18 19 20 21
#include <gtest/gtest.h>
#include <easylogging++.h>

#include <boost/filesystem.hpp>

#include <thread>
#include <random>

J
jinhai 已提交
22
using namespace zilliz::milvus;
G
groot 已提交
23

G
groot 已提交
24 25
namespace {

J
jinhai 已提交
26
    static const char* TABLE_NAME = "test_group";
Z
zhiru 已提交
27
    static constexpr int64_t TABLE_DIM = 256;
G
groot 已提交
28
    static constexpr int64_t VECTOR_COUNT = 250000;
S
starlord 已提交
29
    static constexpr int64_t INSERT_LOOP = 10000;
30 31
    static constexpr int64_t SECONDS_EACH_HOUR = 3600;
    static constexpr int64_t DAY_SECONDS = 24 * 60 * 60;
G
groot 已提交
32

Z
zhiru 已提交
33 34 35 36 37 38 39
    engine::meta::TableSchema BuildTableSchema() {
        engine::meta::TableSchema table_info;
        table_info.dimension_ = TABLE_DIM;
        table_info.table_id_ = TABLE_NAME;
        table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
        return table_info;
    }
G
groot 已提交
40

Z
zhiru 已提交
41 42 43 44 45 46 47 48
    void BuildVectors(int64_t n, std::vector<float>& vectors) {
        vectors.clear();
        vectors.resize(n*TABLE_DIM);
        float* data = vectors.data();
        for(int i = 0; i < n; i++) {
            for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
            data[TABLE_DIM * i] += i / 2000.;
        }
G
groot 已提交
49 50
    }

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    std::string CurrentTmDate(int64_t offset_day = 0) {
        time_t tt;
        time( &tt );
        tt = tt + 8*SECONDS_EACH_HOUR;
        tt = tt + 24*SECONDS_EACH_HOUR*offset_day;
        tm* t= gmtime( &tt );

        std::string str = std::to_string(t->tm_year + 1900) + "-" + std::to_string(t->tm_mon + 1)
                          + "-" + std::to_string(t->tm_mday);

        return str;
    }

    void
    ConvertTimeRangeToDBDates(const std::string &start_value,
                              const std::string &end_value,
                              std::vector<engine::meta::DateT > &dates) {
        dates.clear();

        time_t tt_start, tt_end;
        tm tm_start, tm_end;
        if (!zilliz::milvus::server::CommonUtil::TimeStrToTime(start_value, tt_start, tm_start)) {
            return;
        }

        if (!zilliz::milvus::server::CommonUtil::TimeStrToTime(end_value, tt_end, tm_end)) {
            return;
        }

        long days = (tt_end > tt_start) ? (tt_end - tt_start) / DAY_SECONDS : (tt_start - tt_end) /
                                                                              DAY_SECONDS;
        if (days == 0) {
            return;
        }

        for (long i = 0; i < days; i++) {
            time_t tt_day = tt_start + DAY_SECONDS * i;
            tm tm_day;
            zilliz::milvus::server::CommonUtil::ConvertTime(tt_day, tm_day);

            long date = tm_day.tm_year * 10000 + tm_day.tm_mon * 100 +
                        tm_day.tm_mday;//according to db logic
            dates.push_back(date);
        }
    }

G
groot 已提交
97 98
}

X
Xu Peng 已提交
99 100
TEST_F(DBTest, CONFIG_TEST) {
    {
101 102
        ASSERT_ANY_THROW(engine::ArchiveConf conf("wrong"));
        /* EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); */
X
Xu Peng 已提交
103 104 105 106 107
    }
    {
        engine::ArchiveConf conf("delete");
        ASSERT_EQ(conf.GetType(), "delete");
        auto criterias = conf.GetCriterias();
S
starlord 已提交
108
        ASSERT_TRUE(criterias.size() == 0);
X
Xu Peng 已提交
109 110 111 112 113
    }
    {
        engine::ArchiveConf conf("swap");
        ASSERT_EQ(conf.GetType(), "swap");
        auto criterias = conf.GetCriterias();
S
starlord 已提交
114
        ASSERT_TRUE(criterias.size() == 0);
X
Xu Peng 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a"));
        engine::ArchiveConf conf("swap", "disk:1024");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 1024);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["days"] == 100);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100;disk:200");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 2);
        ASSERT_TRUE(criterias["days"] == 100);
        ASSERT_TRUE(criterias["disk"] == 200);
    }
}

X
Xu Peng 已提交
143

144
TEST_F(DBTest, DB_TEST) {
S
starlord 已提交
145
    db_->Open(GetOptions(), &db_);
G
groot 已提交
146
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
147 148 149
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
G
groot 已提交
150
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
151
    stat = db_->DescribeTable(table_info_get);
G
groot 已提交
152
    ASSERT_STATS(stat);
G
groot 已提交
153
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
G
groot 已提交
154 155

    engine::IDNumbers vector_ids;
X
Xu Peng 已提交
156 157
    engine::IDNumbers target_ids;

G
groot 已提交
158 159 160
    int64_t nb = 50;
    std::vector<float> xb;
    BuildVectors(nb, xb);
G
groot 已提交
161

G
groot 已提交
162 163 164
    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);
X
Xu Peng 已提交
165

X
Xu Peng 已提交
166 167 168 169 170 171 172
    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
G
groot 已提交
173 174
        uint64_t count = 0;
        uint64_t prev_count = 0;
X
Xu Peng 已提交
175

X
Xu Peng 已提交
176
        for (auto j=0; j<10; ++j) {
X
Xu Peng 已提交
177
            ss.str("");
X
Xu Peng 已提交
178
            db_->Size(count);
X
Xu Peng 已提交
179
            prev_count = count;
X
Xu Peng 已提交
180 181

            START_TIMER;
Y
Yu Kun 已提交
182
            stat = db_->Query(TABLE_NAME, k, qb, 10, qxb.data(), results);
183
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
X
Xu Peng 已提交
184 185 186
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
X
Xu Peng 已提交
187
            for (auto k=0; k<qb; ++k) {
G
groot 已提交
188
                ASSERT_EQ(results[k][0].first, target_ids[k]);
X
Xu Peng 已提交
189 190 191
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
G
groot 已提交
192
                    ss << result.first << " ";
X
Xu Peng 已提交
193
                }
194
                /* LOG(DEBUG) << ss.str(); */
X
Xu Peng 已提交
195
            }
X
Xu Peng 已提交
196
            ASSERT_TRUE(count >= prev_count);
X
Xu Peng 已提交
197 198 199 200
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

G
groot 已提交
201
    int loop = INSERT_LOOP;
X
Xu Peng 已提交
202 203 204

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
G
groot 已提交
205
            db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
206
            ASSERT_EQ(target_ids.size(), qb);
X
Xu Peng 已提交
207
        } else {
G
groot 已提交
208
            db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
X
Xu Peng 已提交
209
        }
X
Xu Peng 已提交
210
        std::this_thread::sleep_for(std::chrono::microseconds(1));
X
Xu Peng 已提交
211
    }
X
xj.lin 已提交
212

X
Xu Peng 已提交
213
    search.join();
S
starlord 已提交
214 215 216 217 218

    uint64_t count;
    stat = db_->GetTableRowCount(TABLE_NAME, count);
    ASSERT_STATS(stat);
    ASSERT_TRUE(count > 0);
219
};
X
xj.lin 已提交
220

221
TEST_F(DBTest, SEARCH_TEST) {
G
groot 已提交
222
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
223
    engine::Status stat = db_->CreateTable(table_info);
X
xj.lin 已提交
224

G
groot 已提交
225
    engine::meta::TableSchema table_info_get;
G
groot 已提交
226
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
227
    stat = db_->DescribeTable(table_info_get);
X
xj.lin 已提交
228
    ASSERT_STATS(stat);
G
groot 已提交
229
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
X
xj.lin 已提交
230 231

    // prepare raw data
G
groot 已提交
232
    size_t nb = VECTOR_COUNT;
X
xj.lin 已提交
233 234
    size_t nq = 10;
    size_t k = 5;
G
groot 已提交
235 236
    std::vector<float> xb(nb*TABLE_DIM);
    std::vector<float> xq(nq*TABLE_DIM);
X
xj.lin 已提交
237 238 239 240 241
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
G
groot 已提交
242
    for (size_t i = 0; i < nb*TABLE_DIM; i++) {
X
xj.lin 已提交
243 244 245 246 247
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
G
groot 已提交
248
    for (size_t i = 0; i < nq*TABLE_DIM; i++) {
X
xj.lin 已提交
249 250 251 252 253 254 255 256 257 258 259 260
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
G
groot 已提交
261
        stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids);
X
xj.lin 已提交
262
        if (j == 200){ sleep(1);}
X
xj.lin 已提交
263 264 265
        ASSERT_STATS(stat);
    }

S
starlord 已提交
266
    db_->BuildIndex(TABLE_NAME); // wait until build index finish
X
xj.lin 已提交
267

S
starlord 已提交
268 269
    {
        engine::QueryResults results;
Y
Yu Kun 已提交
270
        stat = db_->Query(TABLE_NAME, k, nq, 10, xq.data(), results);
S
starlord 已提交
271 272 273
        ASSERT_STATS(stat);
    }

X
xj.lin 已提交
274 275 276 277
    {//search by specify index file
        engine::meta::DatesT dates;
        std::vector<std::string> file_ids = {"4", "5", "6"};
        engine::QueryResults results;
Y
Yu Kun 已提交
278
        stat = db_->Query(TABLE_NAME, file_ids, k, nq, 10, xq.data(), dates, results);
X
xj.lin 已提交
279 280
        ASSERT_STATS(stat);
    }
X
xj.lin 已提交
281 282

    // TODO(linxj): add groundTruth assert
283
};
Y
c  
yu yunfeng 已提交
284

Y
Yu Kun 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297
TEST_F(DBTest, PRELOADTABLE_TEST) {
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

Y
Yu Kun 已提交
298
    int64_t nb = 100000;
Y
Yu Kun 已提交
299 300 301
    std::vector<float> xb;
    BuildVectors(nb, xb);

Y
Yu Kun 已提交
302
    int loop = 5;
Y
Yu Kun 已提交
303 304

    for (auto i=0; i<loop; ++i) {
Y
Yu Kun 已提交
305 306
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), target_ids);
        ASSERT_EQ(target_ids.size(), nb);
Y
Yu Kun 已提交
307
    }
Y
Yu Kun 已提交
308
    db_->BuildIndex(TABLE_NAME);
Y
Yu Kun 已提交
309 310 311 312 313 314 315 316 317

    int64_t prev_cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
    stat = db_->PreloadTable(TABLE_NAME);
    ASSERT_STATS(stat);
    int64_t cur_cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
    ASSERT_TRUE(prev_cache_usage < cur_cache_usage);

}

G
groot 已提交
318
TEST_F(DBTest2, ARHIVE_DISK_CHECK) {
Z
zhiru 已提交
319

G
groot 已提交
320 321
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);
Z
zhiru 已提交
322

G
groot 已提交
323 324 325 326 327 328 329 330 331 332 333 334
    std::vector<engine::meta::TableSchema> table_schema_array;
    stat = db_->AllTables(table_schema_array);
    ASSERT_STATS(stat);
    bool bfound = false;
    for(auto& schema : table_schema_array) {
        if(schema.table_id_ == TABLE_NAME) {
            bfound = true;
            break;
        }
    }
    ASSERT_TRUE(bfound);

G
groot 已提交
335 336 337
    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
Z
zhiru 已提交
338
    ASSERT_STATS(stat);
G
groot 已提交
339
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
Z
zhiru 已提交
340 341 342 343

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

G
groot 已提交
344
    uint64_t size;
Z
zhiru 已提交
345 346
    db_->Size(size);

G
groot 已提交
347 348 349
    int64_t nb = 10;
    std::vector<float> xb;
    BuildVectors(nb, xb);
Z
zhiru 已提交
350

G
groot 已提交
351
    int loop = INSERT_LOOP;
Z
zhiru 已提交
352
    for (auto i=0; i<loop; ++i) {
G
groot 已提交
353
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
Z
zhiru 已提交
354 355 356
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

G
groot 已提交
357
    std::this_thread::sleep_for(std::chrono::seconds(1));
Z
zhiru 已提交
358 359 360

    db_->Size(size);
    LOG(DEBUG) << "size=" << size;
G
groot 已提交
361 362
    ASSERT_LE(size, 1 * engine::meta::G);
};
Z
zhiru 已提交
363

G
groot 已提交
364 365 366 367 368 369 370 371 372
TEST_F(DBTest2, DELETE_TEST) {
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);

S
starlord 已提交
373 374 375
    bool has_table = false;
    db_->HasTable(TABLE_NAME, has_table);
    ASSERT_TRUE(has_table);
G
groot 已提交
376 377 378 379 380 381

    engine::IDNumbers vector_ids;

    uint64_t size;
    db_->Size(size);

G
groot 已提交
382
    int64_t nb = INSERT_LOOP;
G
groot 已提交
383 384 385 386 387 388 389 390 391 392 393 394 395
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 20;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::vector<engine::meta::DateT> dates;
    stat = db_->DeleteTable(TABLE_NAME, dates);
    std::this_thread::sleep_for(std::chrono::seconds(2));
    ASSERT_TRUE(stat.ok());
S
starlord 已提交
396 397 398

    db_->HasTable(TABLE_NAME, has_table);
    ASSERT_FALSE(has_table);
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
};

TEST_F(DBTest2, DELETE_BY_RANGE_TEST) {
    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);

    bool has_table = false;
    db_->HasTable(TABLE_NAME, has_table);
    ASSERT_TRUE(has_table);

    engine::IDNumbers vector_ids;

    uint64_t size;
    db_->Size(size);

    int64_t nb = INSERT_LOOP;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 20;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::vector<engine::meta::DateT> dates;
    std::string start_value = CurrentTmDate(-3);
    std::string end_value = CurrentTmDate(-2);
    ConvertTimeRangeToDBDates(start_value, end_value, dates);

    db_->DeleteTable(TABLE_NAME, dates);
}