db_tests.cpp 8.6 KB
Newer Older
G
groot 已提交
1 2 3 4 5 6
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
X
Xu Peng 已提交
7 8
#include <thread>
#include <easylogging++.h>
G
groot 已提交
9
#include <boost/filesystem.hpp>
G
groot 已提交
10

X
Xu Peng 已提交
11
#include "utils.h"
G
groot 已提交
12
#include "db/DB.h"
X
Xu Peng 已提交
13
#include "db/DBImpl.h"
X
Xu Peng 已提交
14
#include "db/MetaConsts.h"
G
groot 已提交
15

J
jinhai 已提交
16
using namespace zilliz::milvus;
G
groot 已提交
17

G
groot 已提交
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
namespace {

static const std::string TABLE_NAME = "test_group";
static constexpr int64_t TABLE_DIM = 256;

engine::meta::TableSchema BuildTableSchema() {
    engine::meta::TableSchema table_info;
    table_info.dimension_ = TABLE_DIM;
    table_info.table_id_ = TABLE_NAME;
    table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
    return table_info;
}

void BuildVectors(int64_t n, std::vector<float>& vectors) {
    vectors.clear();
    vectors.resize(n*TABLE_DIM);
    float* data = vectors.data();
    for(int i = 0; i < n; i++) {
        for(int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
        data[TABLE_DIM * i] += i / 2000.;
    }
}

}

X
Xu Peng 已提交
43 44
TEST_F(DBTest, CONFIG_TEST) {
    {
45 46
        ASSERT_ANY_THROW(engine::ArchiveConf conf("wrong"));
        /* EXPECT_DEATH(engine::ArchiveConf conf("wrong"), ""); */
X
Xu Peng 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    }
    {
        engine::ArchiveConf conf("delete");
        ASSERT_EQ(conf.GetType(), "delete");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        engine::ArchiveConf conf("swap");
        ASSERT_EQ(conf.GetType(), "swap");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 512);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "disk:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "disk:a"));
        engine::ArchiveConf conf("swap", "disk:1024");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["disk"] == 1024);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 1);
        ASSERT_TRUE(criterias["days"] == 100);
    }
    {
        ASSERT_ANY_THROW(engine::ArchiveConf conf1("swap", "days:"));
        ASSERT_ANY_THROW(engine::ArchiveConf conf2("swap", "days:a"));
        engine::ArchiveConf conf("swap", "days:100;disk:200");
        auto criterias = conf.GetCriterias();
        ASSERT_TRUE(criterias.size() == 2);
        ASSERT_TRUE(criterias["days"] == 100);
        ASSERT_TRUE(criterias["disk"] == 200);
    }
}

X
Xu Peng 已提交
89

90
TEST_F(DBTest, DB_TEST) {
G
groot 已提交
91 92 93 94 95 96 97 98 99 100 101 102
    static const std::string table_name = "test_group";
    static const int table_dim = 256;

    engine::meta::TableSchema table_info;
    table_info.dimension_ = table_dim;
    table_info.table_id_ = table_name;
    table_info.engine_type_ = (int)engine::EngineType::FAISS_IDMAP;
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = table_name;
    stat = db_->DescribeTable(table_info_get);
G
groot 已提交
103
    ASSERT_STATS(stat);
G
groot 已提交
104
    ASSERT_EQ(table_info_get.dimension_, table_dim);
G
groot 已提交
105 106

    engine::IDNumbers vector_ids;
X
Xu Peng 已提交
107 108
    engine::IDNumbers target_ids;

G
groot 已提交
109 110 111
    int64_t nb = 50;
    std::vector<float> xb;
    BuildVectors(nb, xb);
G
groot 已提交
112

G
groot 已提交
113 114 115
    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);
X
Xu Peng 已提交
116

X
Xu Peng 已提交
117 118 119 120 121 122 123
    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
G
groot 已提交
124 125
        uint64_t count = 0;
        uint64_t prev_count = 0;
X
Xu Peng 已提交
126

X
Xu Peng 已提交
127
        for (auto j=0; j<10; ++j) {
X
Xu Peng 已提交
128
            ss.str("");
X
Xu Peng 已提交
129
            db_->Size(count);
X
Xu Peng 已提交
130
            prev_count = count;
X
Xu Peng 已提交
131 132

            START_TIMER;
G
groot 已提交
133
            stat = db_->Query(table_name, k, qb, qxb.data(), results);
134
            ss << "Search " << j << " With Size " << count/engine::meta::M << " M";
X
Xu Peng 已提交
135 136 137
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
X
Xu Peng 已提交
138
            for (auto k=0; k<qb; ++k) {
G
groot 已提交
139
                ASSERT_EQ(results[k][0].first, target_ids[k]);
X
Xu Peng 已提交
140 141 142
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
G
groot 已提交
143
                    ss << result.first << " ";
X
Xu Peng 已提交
144
                }
145
                /* LOG(DEBUG) << ss.str(); */
X
Xu Peng 已提交
146
            }
X
Xu Peng 已提交
147
            ASSERT_TRUE(count >= prev_count);
X
Xu Peng 已提交
148 149 150 151
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

X
Xu Peng 已提交
152
    int loop = 100000;
X
Xu Peng 已提交
153 154 155

    for (auto i=0; i<loop; ++i) {
        if (i==40) {
G
groot 已提交
156
            db_->InsertVectors(table_name, qb, qxb.data(), target_ids);
157
            ASSERT_EQ(target_ids.size(), qb);
X
Xu Peng 已提交
158
        } else {
G
groot 已提交
159
            db_->InsertVectors(table_name, nb, xb.data(), vector_ids);
X
Xu Peng 已提交
160
        }
X
Xu Peng 已提交
161
        std::this_thread::sleep_for(std::chrono::microseconds(1));
X
Xu Peng 已提交
162
    }
X
xj.lin 已提交
163

X
Xu Peng 已提交
164
    search.join();
165
};
X
xj.lin 已提交
166

167
TEST_F(DBTest, SEARCH_TEST) {
G
groot 已提交
168
    engine::meta::TableSchema table_info = BuildTableSchema();
G
groot 已提交
169
    engine::Status stat = db_->CreateTable(table_info);
X
xj.lin 已提交
170

G
groot 已提交
171
    engine::meta::TableSchema table_info_get;
G
groot 已提交
172
    table_info_get.table_id_ = TABLE_NAME;
G
groot 已提交
173
    stat = db_->DescribeTable(table_info_get);
X
xj.lin 已提交
174
    ASSERT_STATS(stat);
G
groot 已提交
175
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);
X
xj.lin 已提交
176 177

    // prepare raw data
X
xj.lin 已提交
178
    size_t nb = 250000;
X
xj.lin 已提交
179 180
    size_t nq = 10;
    size_t k = 5;
G
groot 已提交
181 182
    std::vector<float> xb(nb*TABLE_DIM);
    std::vector<float> xq(nq*TABLE_DIM);
X
xj.lin 已提交
183 184 185 186 187
    std::vector<long> ids(nb);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis_xt(-1.0, 1.0);
G
groot 已提交
188
    for (size_t i = 0; i < nb*TABLE_DIM; i++) {
X
xj.lin 已提交
189 190 191 192 193
        xb[i] = dis_xt(gen);
        if (i < nb){
            ids[i] = i;
        }
    }
G
groot 已提交
194
    for (size_t i = 0; i < nq*TABLE_DIM; i++) {
X
xj.lin 已提交
195 196 197 198 199 200 201 202 203 204 205 206
        xq[i] = dis_xt(gen);
    }

    // result data
    //std::vector<long> nns_gt(k*nq);
    std::vector<long> nns(k*nq);  // nns = nearst neg search
    //std::vector<float> dis_gt(k*nq);
    std::vector<float> dis(k*nq);

    // insert data
    const int batch_size = 100;
    for (int j = 0; j < nb / batch_size; ++j) {
G
groot 已提交
207
        stat = db_->InsertVectors(TABLE_NAME, batch_size, xb.data()+batch_size*j*TABLE_DIM, ids);
X
xj.lin 已提交
208
        if (j == 200){ sleep(1);}
X
xj.lin 已提交
209 210 211
        ASSERT_STATS(stat);
    }

X
Xu Peng 已提交
212
    sleep(2); // wait until build index finish
X
xj.lin 已提交
213

G
groot 已提交
214
    engine::QueryResults results;
G
groot 已提交
215
    stat = db_->Query(TABLE_NAME, k, nq, xq.data(), results);
G
groot 已提交
216
    ASSERT_STATS(stat);
X
xj.lin 已提交
217 218

    // TODO(linxj): add groundTruth assert
219
};
Y
c  
yu yunfeng 已提交
220

G
groot 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
TEST_F(DBTest2, ARHIVE_DISK_CHECK) {

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

    uint64_t size;
    db_->Size(size);

    int64_t nb = 10;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 100000;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::this_thread::sleep_for(std::chrono::seconds(1));

    db_->Size(size);
    LOG(DEBUG) << "size=" << size;
    ASSERT_LE(size, 1 * engine::meta::G);
};

TEST_F(DBTest2, DELETE_TEST) {


    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);

    ASSERT_TRUE(boost::filesystem::exists(table_info_get.location_));

    engine::IDNumbers vector_ids;

    uint64_t size;
    db_->Size(size);

    int64_t nb = 100000;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int loop = 20;
    for (auto i=0; i<loop; ++i) {
        db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    std::vector<engine::meta::DateT> dates;
    stat = db_->DeleteTable(TABLE_NAME, dates);
    std::this_thread::sleep_for(std::chrono::seconds(2));
    ASSERT_TRUE(stat.ok());
    ASSERT_FALSE(boost::filesystem::exists(table_info_get.location_));
};