mem_test.cpp 13.8 KB
Newer Older
Z
zhiru 已提交
1 2
#include "gtest/gtest.h"

S
starlord 已提交
3 4 5
#include "db/insert/VectorSource.h"
#include "db/insert/MemTableFile.h"
#include "db/insert/MemTable.h"
Z
zhiru 已提交
6 7 8
#include "utils.h"
#include "db/Factories.h"
#include "db/Constants.h"
S
starlord 已提交
9
#include "db/engine/EngineFactory.h"
Z
zhiru 已提交
10
#include "metrics/Metrics.h"
S
starlord 已提交
11
#include "db/meta/MetaConsts.h"
Z
zhiru 已提交
12

S
starlord 已提交
13
#include <boost/filesystem.hpp>
Z
zhiru 已提交
14 15 16
#include <thread>
#include <fstream>
#include <iostream>
J
jinhai 已提交
17
#include <cmath>
J
jinhai 已提交
18
#include <random>
Z
update  
zhiru 已提交
19

Z
zhiru 已提交
20 21 22 23
using namespace zilliz::milvus;

namespace {

J
jinhai 已提交
24
static const char* TABLE_NAME = "test_group";
Z
update  
zhiru 已提交
25 26 27 28 29 30 31 32 33 34 35
static constexpr int64_t TABLE_DIM = 256;
static constexpr int64_t VECTOR_COUNT = 250000;
static constexpr int64_t INSERT_LOOP = 10000;

engine::meta::TableSchema BuildTableSchema() {
    engine::meta::TableSchema table_info;
    table_info.dimension_ = TABLE_DIM;
    table_info.table_id_ = TABLE_NAME;
    table_info.engine_type_ = (int) engine::EngineType::FAISS_IDMAP;
    return table_info;
}
Z
zhiru 已提交
36

Z
update  
zhiru 已提交
37 38 39 40 41 42 43
void BuildVectors(int64_t n, std::vector<float> &vectors) {
    vectors.clear();
    vectors.resize(n * TABLE_DIM);
    float *data = vectors.data();
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
        data[TABLE_DIM * i] += i / 2000.;
Z
zhiru 已提交
44 45
    }
}
Z
update  
zhiru 已提交
46
}
Z
zhiru 已提交
47

Z
zhiru 已提交
48
TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) {
Z
zhiru 已提交
49

S
starlord 已提交
50
    std::shared_ptr<engine::meta::SqliteMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
Z
zhiru 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

    engine::meta::TableFileSchema table_file_schema;
    table_file_schema.table_id_ = TABLE_NAME;
    status = impl_->CreateTableFile(table_file_schema);
    ASSERT_TRUE(status.ok());

    int64_t n = 100;
    std::vector<float> vectors;
    BuildVectors(n, vectors);

    engine::VectorSource source(n, vectors.data());

    size_t num_vectors_added;
Z
update  
zhiru 已提交
68 69
    engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_,
                                                                                table_file_schema.location_,
Z
update  
zhiru 已提交
70
                                                                                (engine::EngineType) table_file_schema.engine_type_);
Y
Yu Kun 已提交
71 72
    engine::IDNumbers vector_ids;
    status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added, vector_ids);
Z
zhiru 已提交
73
    ASSERT_TRUE(status.ok());
Y
Yu Kun 已提交
74
    vector_ids = source.GetVectorIds();
Z
zhiru 已提交
75
    ASSERT_EQ(vector_ids.size(), 50);
Y
Yu Kun 已提交
76
    ASSERT_EQ(num_vectors_added, 50);
Z
zhiru 已提交
77

Y
Yu Kun 已提交
78 79
    vector_ids.clear();
    status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added, vector_ids);
Z
zhiru 已提交
80 81 82 83 84 85 86 87 88 89 90
    ASSERT_TRUE(status.ok());

    ASSERT_EQ(num_vectors_added, 50);

    vector_ids = source.GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);

    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
91
TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) {
Z
zhiru 已提交
92

S
starlord 已提交
93
    std::shared_ptr<engine::meta::SqliteMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
Z
zhiru 已提交
94
    auto options = engine::OptionsFactory::Build();
Z
zhiru 已提交
95 96 97 98 99

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

Z
update  
zhiru 已提交
100
    engine::MemTableFile mem_table_file(TABLE_NAME, impl_, options);
Z
zhiru 已提交
101 102 103 104 105 106 107

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

    engine::VectorSource::Ptr source = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());

Y
Yu Kun 已提交
108 109
    engine::IDNumbers vector_ids;
    status = mem_table_file.Add(source, vector_ids);
Z
zhiru 已提交
110 111
    ASSERT_TRUE(status.ok());

Z
update  
zhiru 已提交
112
//    std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl;
Z
zhiru 已提交
113

Y
Yu Kun 已提交
114
    vector_ids = source->GetVectorIds();
Z
zhiru 已提交
115 116 117
    ASSERT_EQ(vector_ids.size(), 100);

    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
118
    ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
119 120 121 122 123 124

    int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

    engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
Y
Yu Kun 已提交
125 126
    vector_ids.clear();
    status = mem_table_file.Add(source_128M, vector_ids);
Z
zhiru 已提交
127 128 129 130

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max - n_100);

Z
update  
zhiru 已提交
131
    ASSERT_TRUE(mem_table_file.IsFull());
Z
zhiru 已提交
132 133 134 135 136

    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
137
TEST_F(NewMemManagerTest, MEM_TABLE_TEST) {
Z
zhiru 已提交
138

S
starlord 已提交
139
    std::shared_ptr<engine::meta::SqliteMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
Z
zhiru 已提交
140
    auto options = engine::OptionsFactory::Build();
Z
zhiru 已提交
141 142 143 144 145 146 147 148 149 150 151

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

    engine::VectorSource::Ptr source_100 = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());

Z
update  
zhiru 已提交
152 153
    engine::MemTable mem_table(TABLE_NAME, impl_, options);

Y
Yu Kun 已提交
154 155
    engine::IDNumbers vector_ids;
    status = mem_table.Add(source_100, vector_ids);
Z
zhiru 已提交
156
    ASSERT_TRUE(status.ok());
Y
Yu Kun 已提交
157
    vector_ids = source_100->GetVectorIds();
Z
zhiru 已提交
158 159
    ASSERT_EQ(vector_ids.size(), 100);

Z
update  
zhiru 已提交
160 161
    engine::MemTableFile::Ptr mem_table_file;
    mem_table.GetCurrentMemTableFile(mem_table_file);
Z
zhiru 已提交
162
    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
163
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
164 165 166 167 168

    int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

Y
Yu Kun 已提交
169
    vector_ids.clear();
Z
zhiru 已提交
170
    engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
Y
Yu Kun 已提交
171
    status = mem_table.Add(source_128M, vector_ids);
Z
zhiru 已提交
172 173 174 175 176
    ASSERT_TRUE(status.ok());

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max);

Z
update  
zhiru 已提交
177 178
    mem_table.GetCurrentMemTableFile(mem_table_file);
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
179

Z
update  
zhiru 已提交
180
    ASSERT_EQ(mem_table.GetTableFileCount(), 2);
Z
zhiru 已提交
181 182 183 184 185 186 187

    int64_t n_1G = 1024000;
    std::vector<float> vectors_1G;
    BuildVectors(n_1G, vectors_1G);

    engine::VectorSource::Ptr source_1G = std::make_shared<engine::VectorSource>(n_1G, vectors_1G.data());

Y
Yu Kun 已提交
188 189
    vector_ids.clear();
    status = mem_table.Add(source_1G, vector_ids);
Z
zhiru 已提交
190 191 192 193 194
    ASSERT_TRUE(status.ok());

    vector_ids = source_1G->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_1G);

Z
zhiru 已提交
195
    int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM);
Z
update  
zhiru 已提交
196
    ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount);
Z
zhiru 已提交
197

Z
update  
zhiru 已提交
198
    status = mem_table.Serialize();
Z
zhiru 已提交
199 200
    ASSERT_TRUE(status.ok());

Z
zhiru 已提交
201 202 203 204
    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
205
TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) {
Z
zhiru 已提交
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

Z
update  
zhiru 已提交
221
    std::map<int64_t, std::vector<float>> search_vectors;
Z
zhiru 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235
    {
        engine::IDNumbers vector_ids;
        int64_t nb = 1024000;
        std::vector<float> xb;
        BuildVectors(nb, xb);
        engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        ASSERT_TRUE(status.ok());

        std::this_thread::sleep_for(std::chrono::seconds(3));

        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<int64_t> dis(0, nb - 1);

Z
update  
zhiru 已提交
236 237
        int64_t num_query = 20;
        for (int64_t i = 0; i < num_query; ++i) {
Z
zhiru 已提交
238 239 240 241 242 243 244 245 246 247
            int64_t index = dis(gen);
            std::vector<float> search;
            for (int64_t j = 0; j < TABLE_DIM; j++) {
                search.push_back(xb[index * TABLE_DIM + j]);
            }
            search_vectors.insert(std::make_pair(vector_ids[index], search));
        }
    }

    int k = 10;
Z
update  
zhiru 已提交
248 249
    for (auto &pair : search_vectors) {
        auto &search = pair.second;
Z
zhiru 已提交
250
        engine::QueryResults results;
Y
Yu Kun 已提交
251
        stat = db_->Query(TABLE_NAME, k, 1, 10, search.data(), results);
Z
zhiru 已提交
252 253 254 255
        ASSERT_EQ(results[0][0].first, pair.first);
        ASSERT_LT(results[0][0].second, 0.00001);
    }

Z
zhiru 已提交
256 257
    delete db_;
    boost::filesystem::remove_all(options.meta.path);
Z
zhiru 已提交
258 259 260

}

Z
zhiru 已提交
261
TEST_F(NewMemManagerTest, INSERT_TEST) {
Z
zhiru 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    auto start_time = METRICS_NOW_TIME;

Z
zhiru 已提交
279
    int insert_loop = 20;
Z
zhiru 已提交
280
    for (int i = 0; i < insert_loop; ++i) {
S
starlord 已提交
281
        int64_t nb = 40960;
Z
zhiru 已提交
282 283 284 285 286 287 288 289
        std::vector<float> xb;
        BuildVectors(nb, xb);
        engine::IDNumbers vector_ids;
        engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        ASSERT_TRUE(status.ok());
    }
    auto end_time = METRICS_NOW_TIME;
    auto total_time = METRICS_MICROSECONDS(start_time, end_time);
Z
zhiru 已提交
290
    LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time;
Z
zhiru 已提交
291

Z
zhiru 已提交
292 293
    delete db_;
    boost::filesystem::remove_all(options.meta.path);
Z
zhiru 已提交
294 295

}
Z
zhiru 已提交
296

Z
update  
zhiru 已提交
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) {

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

S
starlord 已提交
316
    int64_t nb = 40960;
Z
update  
zhiru 已提交
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);

    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
        uint64_t count = 0;
        uint64_t prev_count = 0;

        for (auto j = 0; j < 10; ++j) {
            ss.str("");
            db_->Size(count);
            prev_count = count;

            START_TIMER;
Y
Yu Kun 已提交
340
            stat = db_->Query(TABLE_NAME, k, qb, 10, qxb.data(), results);
Z
update  
zhiru 已提交
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
            ss << "Search " << j << " With Size " << count / engine::meta::M << " M";
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
            for (auto k = 0; k < qb; ++k) {
                ASSERT_EQ(results[k][0].first, target_ids[k]);
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
                    ss << result.first << " ";
                }
                /* LOG(DEBUG) << ss.str(); */
            }
            ASSERT_TRUE(count >= prev_count);
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

    int loop = 20;

    for (auto i = 0; i < loop; ++i) {
        if (i == 0) {
            db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
            ASSERT_EQ(target_ids.size(), qb);
        } else {
            db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        }
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    search.join();

    delete db_;
    boost::filesystem::remove_all(options.meta.path);

};
Z
zhiru 已提交
377

Y
Yu Kun 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
TEST_F(DBTest, VECTOR_IDS_TEST)
{
    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;


    int64_t nb = 100000;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i;
    }

    stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
    ASSERT_EQ(vector_ids[0], 0);
    ASSERT_STATS(stat);

    nb = 25000;
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i + nb;
    }
    stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
    ASSERT_EQ(vector_ids[0], nb);
    ASSERT_STATS(stat);

    nb = 262144; //512M
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i + nb / 2;
    }
    stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
    ASSERT_EQ(vector_ids[0], nb/2);
    ASSERT_STATS(stat);

    nb = 65536; //128M
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
    stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
    ASSERT_STATS(stat);
Y
Yu Kun 已提交
435 436 437 438 439 440 441 442 443 444 445 446 447

    nb = 100;
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i + nb;
    }
    stat = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
    for (auto i = 0; i < nb; i++) {
        ASSERT_EQ(vector_ids[i], i + nb);
    }
Y
Yu Kun 已提交
448
}