db_tests.cpp 10.1 KB
Newer Older
G
groot 已提交
1 2 3 4 5
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
X
Xu Peng 已提交
6
#include "utils.h"
G
groot 已提交
7
#include "db/DB.h"
X
Xu Peng 已提交
8
#include "db/DBImpl.h"
S
starlord 已提交
9
#include "db/meta/MetaConsts.h"
Z
zhiru 已提交
10
#include "db/Factories.h"
G
groot 已提交
11

J
jinhai 已提交
12 13 14 15 16 17 18 19
#include <gtest/gtest.h>
#include <easylogging++.h>

#include <boost/filesystem.hpp>

#include <thread>
#include <random>

J
jinhai 已提交
20
using namespace zilliz::milvus;
G
groot 已提交
21

G
groot 已提交
22 23
namespace {

J
jinhai 已提交
24
    static const char* TABLE_NAME = "test_group";
Z
zhiru 已提交
25
    static constexpr int64_t TABLE_DIM = 256;
G
groot 已提交
26
    static constexpr int64_t VECTOR_COUNT = 250000;
S
starlord 已提交
27
    static constexpr int64_t INSERT_LOOP = 10000;
G
groot 已提交
28

Z
zhiru 已提交
29 30 31 32 33 34 35
    engine::meta::TableSchema BuildTableSchema() {
        engine::meta::TableSchema table_info;
        table_info.dimension_ = TABLE_DIM;
        table_info.table_id_ = TABLE_NAME;
        table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
        return table_info;
    }
G
groot 已提交
36

Z
zhiru 已提交
37 38 39 40 41 42 43 44
    void BuildVectors(int64_t n, std::vector<float>& vectors) {
        vectors.clear();
        vectors.resize(n*TABLE_DIM);
        float* data = vectors.data();
        for(int i = 0; i < n; i++) {
            for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
            data[TABLE_DIM * i] += i / 2000.;
        }
G
groot 已提交
45 46 47 48
    }

}

X
Xu Peng 已提交
49 50
TEST_F(DBTest, CONFIG_TEST) {
    {
51 52
        ASSERT_ANY_THROW(engine::ArchiveConf conf("wrong"));
        /* EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); */
X
Xu Peng 已提交
53 54 55 56 57
    }
    {
        engine::ArchiveConf conf("delete");
        ASSERT_EQ(conf.GetType(), "delete");
        auto criterias = conf.GetCriterias();
S
starlord 已提交
58
        ASSERT_TRUE(criterias.size() == 0);
X
Xu Peng 已提交
59 60 61 62 63
    }
    {
        engine::ArchiveConf conf("swap");
        ASSERT_EQ(conf.GetType(), "swap");
        auto criterias = conf.GetCriterias();
S
starlord 已提交
64
        ASSERT_TRUE(criterias.size() == 0);
X
Xu Peng 已提交
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a"));
        engine::ArchiveConf conf("swap", "disk:1024");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 1024);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["days"] == 100);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100;disk:200");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 2);
        ASSERT_TRUE(criterias["days"] == 100);
        ASSERT_TRUE(criterias["disk"] == 200);
    }
}

X
Xu Peng 已提交
93

94
TEST_F(DBTest, DB_TEST) {
G
groot 已提交
95
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
96 97 98
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
G
groot 已提交
99
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
100
    stat = db_->DescribeTable(table_info_get);
G
groot 已提交
101
    ASSERT_STATS(stat);
G
groot 已提交
102
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
G
groot 已提交
103 104

    engine::IDNumbers vector_ids;
X
Xu Peng 已提交
105 106
    engine::IDNumbers target_ids;

G
groot 已提交
107 108 109
    int64_t nb = 50;
    std::vector<float> xb;
    BuildVectors(nb, xb);
G
groot 已提交
110

G
groot 已提交
111 112 113
    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);
X
Xu Peng 已提交
114

X
Xu Peng 已提交
115 116 117 118 119 120 121
    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
G
groot 已提交
122 123
        uint64_t count = 0;
        uint64_t prev_count = 0;
X
Xu Peng 已提交
124

X
Xu Peng 已提交
125
        for (auto j=0; j<10; ++j) {
X
Xu Peng 已提交
126
            ss.str("");
X
Xu Peng 已提交
127
            db_->Size(count);
X
Xu Peng 已提交
128
            prev_count = count;
X
Xu Peng 已提交
129 130

            START_TIMER;
G
groot 已提交
131
            stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results);
132
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
X
Xu Peng 已提交
133 134 135
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
X
Xu Peng 已提交
136
            for (auto k=0; k<qb; ++k) {
G
groot 已提交
137
                ASSERT_EQ(results[k][0].first, target_ids[k]);
X
Xu Peng 已提交
138 139 140
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
G
groot 已提交
141
                    ss << result.first << " ";
X
Xu Peng 已提交
142
                }
143
                /* LOG(DEBUG) << ss.str(); */
X
Xu Peng 已提交
144
            }
X
Xu Peng 已提交
145
            ASSERT_TRUE(count >= prev_count);
X
Xu Peng 已提交
146 147 148 149
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

G
groot 已提交
150
    int loop = INSERT_LOOP;
X
Xu Peng 已提交
151 152 153

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
G
groot 已提交
154
            db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
155
            ASSERT_EQ(target_ids.size(), qb);
X
Xu Peng 已提交
156
        } else {
G
groot 已提交
157
            db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
X
Xu Peng 已提交
158
        }
X
Xu Peng 已提交
159
        std::this_thread::sleep_for(std::chrono::microseconds(1));
X
Xu Peng 已提交
160
    }
X
xj.lin 已提交
161

X
Xu Peng 已提交
162
    search.join();
163
};
X
xj.lin 已提交
164

165
TEST_F(DBTest, SEARCH_TEST) {
G
groot 已提交
166
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
167
    engine::Status stat = db_->CreateTable(table_info);
X
xj.lin 已提交
168

G
groot 已提交
169
    engine::meta::TableSchema table_info_get;
G
groot 已提交
170
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
171
    stat = db_->DescribeTable(table_info_get);
X
xj.lin 已提交
172
    ASSERT_STATS(stat);
G
groot 已提交
173
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
X
xj.lin 已提交
174 175

    // prepare raw data
G
groot 已提交
176
    size_t nb = VECTOR_COUNT;
X
xj.lin 已提交
177 178
    size_t nq = 10;
    size_t k = 5;
G
groot 已提交
179 180
    std::vector<float> xb(nb*TABLE_DIM);
    std::vector<float> xq(nq*TABLE_DIM);
X
xj.lin 已提交
181 182 183 184 185
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
G
groot 已提交
186
    for (size_t i = 0; i < nb*TABLE_DIM; i++) {
X
xj.lin 已提交
187 188 189 190 191
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
G
groot 已提交
192
    for (size_t i = 0; i < nq*TABLE_DIM; i++) {
X
xj.lin 已提交
193 194 195 196 197 198 199 200 201 202 203 204
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
G
groot 已提交
205
        stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids);
X
xj.lin 已提交
206
        if (j == 200){ sleep(1);}
X
xj.lin 已提交
207 208 209
        ASSERT_STATS(stat);
    }

S
starlord 已提交
210
    db_->BuildIndex(TABLE_NAME); // wait until build index finish
X
xj.lin 已提交
211

S
starlord 已提交
212 213 214 215 216 217
    {
        engine::QueryResults results;
        stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results);
        ASSERT_STATS(stat);
    }

X
xj.lin 已提交
218 219 220 221 222 223 224
    {//search by specify index file
        engine::meta::DatesT dates;
        std::vector<std::string> file_ids = {"4", "5", "6"};
        engine::QueryResults results;
        stat = db_->Query(TABLE_NAME, file_ids, k, nq, xq.data(), dates, results);
        ASSERT_STATS(stat);
    }
X
xj.lin 已提交
225 226

    // TODO(linxj): add groundTruth assert
227
};
Y
c  
yu yunfeng 已提交
228

Y
Yu Kun 已提交
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
TEST_F(DBTest, PRELOADTABLE_TEST) {
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

    int64_t nb = 50;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = INSERT_LOOP;

    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
        ASSERT_EQ(target_ids.size(), qb);
    }

    int64_t prev_cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();

    stat = db_->PreloadTable(TABLE_NAME);
    ASSERT_STATS(stat);
    int64_t cur_cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
    ASSERT_TRUE(prev_cache_usage < cur_cache_usage);

}

G
groot 已提交
262
TEST_F(DBTest2, ARHIVE_DISK_CHECK) {
Z
zhiru 已提交
263

G
groot 已提交
264 265
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);
Z
zhiru 已提交
266

G
groot 已提交
267 268 269 270 271 272 273 274 275 276 277 278
    std::vector<engine::meta::TableSchema> table_schema_array;
    stat = db_->AllTables(table_schema_array);
    ASSERT_STATS(stat);
    bool bfound = false;
    for(auto& schema : table_schema_array) {
        if(schema.table_id_ == TABLE_NAME) {
            bfound = true;
            break;
        }
    }
    ASSERT_TRUE(bfound);

G
groot 已提交
279 280 281
    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
Z
zhiru 已提交
282
    ASSERT_STATS(stat);
G
groot 已提交
283
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
Z
zhiru 已提交
284 285 286 287

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

G
groot 已提交
288
    uint64_t size;
Z
zhiru 已提交
289 290
    db_->Size(size);

G
groot 已提交
291 292 293
    int64_t nb = 10;
    std::vector<float> xb;
    BuildVectors(nb, xb);
Z
zhiru 已提交
294

G
groot 已提交
295
    int loop = INSERT_LOOP;
Z
zhiru 已提交
296
    for (auto i=0; i<loop; ++i) {
G
groot 已提交
297
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
Z
zhiru 已提交
298 299 300
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

G
groot 已提交
301
    std::this_thread::sleep_for(std::chrono::seconds(1));
Z
zhiru 已提交
302 303 304

    db_->Size(size);
    LOG(DEBUG) << "size=" << size;
G
groot 已提交
305 306
    ASSERT_LE(size, 1 * engine::meta::G);
};
Z
zhiru 已提交
307

G
groot 已提交
308
TEST_F(DBTest2, DELETE_TEST) {
Z
zhiru 已提交
309

G
groot 已提交
310 311 312 313 314 315 316 317 318

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);

S
starlord 已提交
319 320 321
    bool has_table = false;
    db_->HasTable(TABLE_NAME, has_table);
    ASSERT_TRUE(has_table);
G
groot 已提交
322 323 324 325 326 327

    engine::IDNumbers vector_ids;

    uint64_t size;
    db_->Size(size);

G
groot 已提交
328
    int64_t nb = INSERT_LOOP;
G
groot 已提交
329 330 331 332 333 334 335 336 337 338 339 340 341
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 20;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::vector<engine::meta::DateT> dates;
    stat = db_->DeleteTable(TABLE_NAME, dates);
    std::this_thread::sleep_for(std::chrono::seconds(2));
    ASSERT_TRUE(stat.ok());
S
starlord 已提交
342 343 344

    db_->HasTable(TABLE_NAME, has_table);
    ASSERT_FALSE(has_table);
Y
Yu Kun 已提交
345
};