db_tests.cpp 8.9 KB
Newer Older
G
groot 已提交
1 2 3 4 5 6
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
X
Xu Peng 已提交
7 8
#include <thread>
#include <easylogging++.h>
G
groot 已提交
9
#include <boost/filesystem.hpp>
G
groot 已提交
10

X
Xu Peng 已提交
11
#include "utils.h"
G
groot 已提交
12
#include "db/DB.h"
X
Xu Peng 已提交
13
#include "db/DBImpl.h"
X
Xu Peng 已提交
14
#include "db/MetaConsts.h"
Z
zhiru 已提交
15
#include "db/Factories.h"
G
groot 已提交
16

J
jinhai 已提交
17
using namespace zilliz::milvus;
G
groot 已提交
18

G
groot 已提交
19 20
namespace {

Z
zhiru 已提交
21 22
    static const std::string TABLE_NAME = "test_group";
    static constexpr int64_t TABLE_DIM = 256;
S
starlord 已提交
23
    static constexpr int64_t VECTOR_COUNT = 250000;
S
starlord 已提交
24
    static constexpr int64_t INSERT_LOOP = 10000;
G
groot 已提交
25

Z
zhiru 已提交
26 27 28 29 30 31 32
    engine::meta::TableSchema BuildTableSchema() {
        engine::meta::TableSchema table_info;
        table_info.dimension_ = TABLE_DIM;
        table_info.table_id_ = TABLE_NAME;
        table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
        return table_info;
    }
G
groot 已提交
33

Z
zhiru 已提交
34 35 36 37 38 39 40 41
    void BuildVectors(int64_t n, std::vector<float>& vectors) {
        vectors.clear();
        vectors.resize(n*TABLE_DIM);
        float* data = vectors.data();
        for(int i = 0; i < n; i++) {
            for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
            data[TABLE_DIM * i] += i / 2000.;
        }
G
groot 已提交
42 43 44 45
    }

}

X
Xu Peng 已提交
46 47
TEST_F(DBTest, CONFIG_TEST) {
    {
48 49
        ASSERT_ANY_THROW(engine::ArchiveConf conf("wrong"));
        /* EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); */
X
Xu Peng 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
    }
    {
        engine::ArchiveConf conf("delete");
        ASSERT_EQ(conf.GetType(), "delete");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        engine::ArchiveConf conf("swap");
        ASSERT_EQ(conf.GetType(), "swap");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a"));
        engine::ArchiveConf conf("swap", "disk:1024");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 1024);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["days"] == 100);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100;disk:200");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 2);
        ASSERT_TRUE(criterias["days"] == 100);
        ASSERT_TRUE(criterias["disk"] == 200);
    }
}

X
Xu Peng 已提交
92

93
TEST_F(DBTest, DB_TEST) {
S
starlord 已提交
94
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
95 96 97
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
S
starlord 已提交
98
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
99
    stat = db_->DescribeTable(table_info_get);
G
groot 已提交
100
    ASSERT_STATS(stat);
S
starlord 已提交
101
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
G
groot 已提交
102 103

    engine::IDNumbers vector_ids;
X
Xu Peng 已提交
104 105
    engine::IDNumbers target_ids;

G
groot 已提交
106 107 108
    int64_t nb = 50;
    std::vector<float> xb;
    BuildVectors(nb, xb);
G
groot 已提交
109

G
groot 已提交
110 111 112
    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);
X
Xu Peng 已提交
113

X
Xu Peng 已提交
114 115 116 117 118 119 120
    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
G
groot 已提交
121 122
        uint64_t count = 0;
        uint64_t prev_count = 0;
X
Xu Peng 已提交
123

X
Xu Peng 已提交
124
        for (auto j=0; j<10; ++j) {
X
Xu Peng 已提交
125
            ss.str("");
X
Xu Peng 已提交
126
            db_->Size(count);
X
Xu Peng 已提交
127
            prev_count = count;
X
Xu Peng 已提交
128 129

            START_TIMER;
S
starlord 已提交
130
            stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results);
131
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
X
Xu Peng 已提交
132 133 134
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
X
Xu Peng 已提交
135
            for (auto k=0; k<qb; ++k) {
G
groot 已提交
136
                ASSERT_EQ(results[k][0].first, target_ids[k]);
X
Xu Peng 已提交
137 138 139
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
G
groot 已提交
140
                    ss << result.first << " ";
X
Xu Peng 已提交
141
                }
142
                /* LOG(DEBUG) << ss.str(); */
X
Xu Peng 已提交
143
            }
X
Xu Peng 已提交
144
            ASSERT_TRUE(count >= prev_count);
X
Xu Peng 已提交
145 146 147 148
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

S
starlord 已提交
149
    int loop = INSERT_LOOP;
X
Xu Peng 已提交
150 151 152

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
S
starlord 已提交
153
            db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
154
            ASSERT_EQ(target_ids.size(), qb);
X
Xu Peng 已提交
155
        } else {
S
starlord 已提交
156
            db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
X
Xu Peng 已提交
157
        }
X
Xu Peng 已提交
158
        std::this_thread::sleep_for(std::chrono::microseconds(1));
X
Xu Peng 已提交
159
    }
X
xj.lin 已提交
160

X
Xu Peng 已提交
161
    search.join();
162
};
X
xj.lin 已提交
163

164
TEST_F(DBTest, SEARCH_TEST) {
G
groot 已提交
165
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
166
    engine::Status stat = db_->CreateTable(table_info);
X
xj.lin 已提交
167

G
groot 已提交
168
    engine::meta::TableSchema table_info_get;
G
groot 已提交
169
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
170
    stat = db_->DescribeTable(table_info_get);
X
xj.lin 已提交
171
    ASSERT_STATS(stat);
G
groot 已提交
172
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
X
xj.lin 已提交
173 174

    // prepare raw data
S
starlord 已提交
175
    size_t nb = VECTOR_COUNT;
X
xj.lin 已提交
176 177
    size_t nq = 10;
    size_t k = 5;
G
groot 已提交
178 179
    std::vector<float> xb(nb*TABLE_DIM);
    std::vector<float> xq(nq*TABLE_DIM);
X
xj.lin 已提交
180 181 182 183 184
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
G
groot 已提交
185
    for (size_t i = 0; i < nb*TABLE_DIM; i++) {
X
xj.lin 已提交
186 187 188 189 190
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
G
groot 已提交
191
    for (size_t i = 0; i < nq*TABLE_DIM; i++) {
X
xj.lin 已提交
192 193 194 195 196 197 198 199 200 201 202 203
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
G
groot 已提交
204
        stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids);
X
xj.lin 已提交
205
        if (j == 200){ sleep(1);}
X
xj.lin 已提交
206 207 208
        ASSERT_STATS(stat);
    }

X
Xu Peng 已提交
209
    sleep(2); // wait until build index finish
X
xj.lin 已提交
210

G
groot 已提交
211
    engine::QueryResults results;
G
groot 已提交
212
    stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results);
G
groot 已提交
213
    ASSERT_STATS(stat);
X
xj.lin 已提交
214 215

    // TODO(linxj): add groundTruth assert
216
};
Y
c  
yu yunfeng 已提交
217

G
groot 已提交
218
TEST_F(DBTest2, ARHIVE_DISK_CHECK) {
Z
zhiru 已提交
219

G
groot 已提交
220 221
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);
Z
zhiru 已提交
222

S
starlord 已提交
223 224 225 226 227 228 229 230 231 232 233 234
    std::vector<engine::meta::TableSchema> table_schema_array;
    stat = db_->AllTables(table_schema_array);
    ASSERT_STATS(stat);
    bool bfound = false;
    for(auto& schema : table_schema_array) {
        if(schema.table_id_ == TABLE_NAME) {
            bfound = true;
            break;
        }
    }
    ASSERT_TRUE(bfound);

G
groot 已提交
235 236 237
    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
Z
zhiru 已提交
238
    ASSERT_STATS(stat);
G
groot 已提交
239
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
Z
zhiru 已提交
240 241 242 243

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

G
groot 已提交
244
    uint64_t size;
Z
zhiru 已提交
245 246
    db_->Size(size);

G
groot 已提交
247 248 249
    int64_t nb = 10;
    std::vector<float> xb;
    BuildVectors(nb, xb);
Z
zhiru 已提交
250

S
starlord 已提交
251
    int loop = INSERT_LOOP;
Z
zhiru 已提交
252
    for (auto i=0; i<loop; ++i) {
G
groot 已提交
253
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
Z
zhiru 已提交
254 255 256
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

G
groot 已提交
257
    std::this_thread::sleep_for(std::chrono::seconds(1));
Z
zhiru 已提交
258 259 260

    db_->Size(size);
    LOG(DEBUG) << "size=" << size;
G
groot 已提交
261 262
    ASSERT_LE(size, 1 * engine::meta::G);
};
Z
zhiru 已提交
263

G
groot 已提交
264
TEST_F(DBTest2, DELETE_TEST) {
Z
zhiru 已提交
265

G
groot 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);

    ASSERT_TRUE(boost::filesystem::exists(table_info_get.location_));

    engine::IDNumbers vector_ids;

    uint64_t size;
    db_->Size(size);

S
starlord 已提交
282
    int64_t nb = INSERT_LOOP;
G
groot 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 20;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::vector<engine::meta::DateT> dates;
    stat = db_->DeleteTable(TABLE_NAME, dates);
    std::this_thread::sleep_for(std::chrono::seconds(2));
    ASSERT_TRUE(stat.ok());
    ASSERT_FALSE(boost::filesystem::exists(table_info_get.location_));
};