mem_test.cpp 11.7 KB
Newer Older
Z
zhiru 已提交
1 2 3 4 5 6 7 8
#include "gtest/gtest.h"

#include "db/VectorSource.h"
#include "db/MemTableFile.h"
#include "db/MemTable.h"
#include "utils.h"
#include "db/Factories.h"
#include "db/Constants.h"
Z
update  
zhiru 已提交
9
#include "db/EngineFactory.h"
Z
zhiru 已提交
10
#include "metrics/Metrics.h"
Z
zhiru 已提交
11 12
#include "db/MetaConsts.h"
#include "boost/filesystem.hpp"
Z
zhiru 已提交
13 14 15 16

#include <thread>
#include <fstream>
#include <iostream>
Z
zhiru 已提交
17

Z
update  
zhiru 已提交
18

Z
zhiru 已提交
19 20 21 22
using namespace zilliz::milvus;

namespace {

Z
update  
zhiru 已提交
23 24 25 26 27 28 29 30 31 32 33 34
static const std::string TABLE_NAME = "test_group";
static constexpr int64_t TABLE_DIM = 256;
static constexpr int64_t VECTOR_COUNT = 250000;
static constexpr int64_t INSERT_LOOP = 10000;

engine::meta::TableSchema BuildTableSchema() {
    engine::meta::TableSchema table_info;
    table_info.dimension_ = TABLE_DIM;
    table_info.table_id_ = TABLE_NAME;
    table_info.engine_type_ = (int) engine::EngineType::FAISS_IDMAP;
    return table_info;
}
Z
zhiru 已提交
35

Z
update  
zhiru 已提交
36 37 38 39 40 41 42
void BuildVectors(int64_t n, std::vector<float> &vectors) {
    vectors.clear();
    vectors.resize(n * TABLE_DIM);
    float *data = vectors.data();
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
        data[TABLE_DIM * i] += i / 2000.;
Z
zhiru 已提交
43 44
    }
}
Z
update  
zhiru 已提交
45
}
Z
zhiru 已提交
46

Z
zhiru 已提交
47
TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) {
Z
zhiru 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66

    std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

    engine::meta::TableFileSchema table_file_schema;
    table_file_schema.table_id_ = TABLE_NAME;
    status = impl_->CreateTableFile(table_file_schema);
    ASSERT_TRUE(status.ok());

    int64_t n = 100;
    std::vector<float> vectors;
    BuildVectors(n, vectors);

    engine::VectorSource source(n, vectors.data());

    size_t num_vectors_added;
Z
update  
zhiru 已提交
67 68
    engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_,
                                                                                table_file_schema.location_,
Z
update  
zhiru 已提交
69
                                                                                (engine::EngineType) table_file_schema.engine_type_);
Z
update  
zhiru 已提交
70
    status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added);
Z
zhiru 已提交
71 72 73 74 75 76 77
    ASSERT_TRUE(status.ok());

    ASSERT_EQ(num_vectors_added, 50);

    engine::IDNumbers vector_ids = source.GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 50);

Z
update  
zhiru 已提交
78
    status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added);
Z
zhiru 已提交
79 80 81 82 83 84 85 86 87 88 89
    ASSERT_TRUE(status.ok());

    ASSERT_EQ(num_vectors_added, 50);

    vector_ids = source.GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);

    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
90
TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) {
Z
zhiru 已提交
91 92

    std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
Z
zhiru 已提交
93
    auto options = engine::OptionsFactory::Build();
Z
zhiru 已提交
94 95 96 97 98

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

Z
update  
zhiru 已提交
99
    engine::MemTableFile mem_table_file(TABLE_NAME, impl_, options);
Z
zhiru 已提交
100 101 102 103 104 105 106

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

    engine::VectorSource::Ptr source = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());

Z
update  
zhiru 已提交
107
    status = mem_table_file.Add(source);
Z
zhiru 已提交
108 109
    ASSERT_TRUE(status.ok());

Z
update  
zhiru 已提交
110
//    std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl;
Z
zhiru 已提交
111 112 113 114 115

    engine::IDNumbers vector_ids = source->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);

    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
116
    ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
117 118 119 120 121 122

    int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

    engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
Z
update  
zhiru 已提交
123
    status = mem_table_file.Add(source_128M);
Z
zhiru 已提交
124 125 126 127

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max - n_100);

Z
update  
zhiru 已提交
128
    ASSERT_TRUE(mem_table_file.IsFull());
Z
zhiru 已提交
129 130 131 132 133

    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
134
TEST_F(NewMemManagerTest, MEM_TABLE_TEST) {
Z
zhiru 已提交
135 136

    std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
Z
zhiru 已提交
137
    auto options = engine::OptionsFactory::Build();
Z
zhiru 已提交
138 139 140 141 142 143 144 145 146 147 148

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

    engine::VectorSource::Ptr source_100 = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());

Z
update  
zhiru 已提交
149 150 151
    engine::MemTable mem_table(TABLE_NAME, impl_, options);

    status = mem_table.Add(source_100);
Z
zhiru 已提交
152 153 154 155
    ASSERT_TRUE(status.ok());
    engine::IDNumbers vector_ids = source_100->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);

Z
update  
zhiru 已提交
156 157
    engine::MemTableFile::Ptr mem_table_file;
    mem_table.GetCurrentMemTableFile(mem_table_file);
Z
zhiru 已提交
158
    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
159
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
160 161 162 163 164 165

    int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

    engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
Z
update  
zhiru 已提交
166
    status = mem_table.Add(source_128M);
Z
zhiru 已提交
167 168 169 170 171
    ASSERT_TRUE(status.ok());

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max);

Z
update  
zhiru 已提交
172 173
    mem_table.GetCurrentMemTableFile(mem_table_file);
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
174

Z
update  
zhiru 已提交
175
    ASSERT_EQ(mem_table.GetTableFileCount(), 2);
Z
zhiru 已提交
176 177 178 179 180 181 182

    int64_t n_1G = 1024000;
    std::vector<float> vectors_1G;
    BuildVectors(n_1G, vectors_1G);

    engine::VectorSource::Ptr source_1G = std::make_shared<engine::VectorSource>(n_1G, vectors_1G.data());

Z
update  
zhiru 已提交
183
    status = mem_table.Add(source_1G);
Z
zhiru 已提交
184 185 186 187 188
    ASSERT_TRUE(status.ok());

    vector_ids = source_1G->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_1G);

Z
zhiru 已提交
189
    int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM);
Z
update  
zhiru 已提交
190
    ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount);
Z
zhiru 已提交
191

Z
update  
zhiru 已提交
192
    status = mem_table.Serialize();
Z
zhiru 已提交
193 194
    ASSERT_TRUE(status.ok());

Z
zhiru 已提交
195 196 197 198
    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
199
TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) {
Z
zhiru 已提交
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

Z
update  
zhiru 已提交
215
    std::map<int64_t, std::vector<float>> search_vectors;
Z
zhiru 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229
    {
        engine::IDNumbers vector_ids;
        int64_t nb = 1024000;
        std::vector<float> xb;
        BuildVectors(nb, xb);
        engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        ASSERT_TRUE(status.ok());

        std::this_thread::sleep_for(std::chrono::seconds(3));

        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<int64_t> dis(0, nb - 1);

Z
update  
zhiru 已提交
230 231
        int64_t num_query = 20;
        for (int64_t i = 0; i < num_query; ++i) {
Z
zhiru 已提交
232 233 234 235 236 237 238 239 240 241
            int64_t index = dis(gen);
            std::vector<float> search;
            for (int64_t j = 0; j < TABLE_DIM; j++) {
                search.push_back(xb[index * TABLE_DIM + j]);
            }
            search_vectors.insert(std::make_pair(vector_ids[index], search));
        }
    }

    int k = 10;
Z
update  
zhiru 已提交
242 243
    for (auto &pair : search_vectors) {
        auto &search = pair.second;
Z
zhiru 已提交
244 245 246 247 248 249
        engine::QueryResults results;
        stat = db_->Query(TABLE_NAME, k, 1, search.data(), results);
        ASSERT_EQ(results[0][0].first, pair.first);
        ASSERT_LT(results[0][0].second, 0.00001);
    }

Z
zhiru 已提交
250 251
    delete db_;
    boost::filesystem::remove_all(options.meta.path);
Z
zhiru 已提交
252 253 254

}

Z
zhiru 已提交
255
TEST_F(NewMemManagerTest, INSERT_TEST) {
Z
zhiru 已提交
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    auto start_time = METRICS_NOW_TIME;

Z
zhiru 已提交
273
    int insert_loop = 20;
Z
zhiru 已提交
274
    for (int i = 0; i < insert_loop; ++i) {
Z
zhiru 已提交
275
        int64_t nb = 409600;
Z
zhiru 已提交
276 277 278 279 280 281 282 283
        std::vector<float> xb;
        BuildVectors(nb, xb);
        engine::IDNumbers vector_ids;
        engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        ASSERT_TRUE(status.ok());
    }
    auto end_time = METRICS_NOW_TIME;
    auto total_time = METRICS_MICROSECONDS(start_time, end_time);
Z
zhiru 已提交
284
    LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time;
Z
zhiru 已提交
285

Z
zhiru 已提交
286 287
    delete db_;
    boost::filesystem::remove_all(options.meta.path);
Z
zhiru 已提交
288 289

}
Z
zhiru 已提交
290

Z
zhiru 已提交
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) {

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

    int64_t nb = 409600;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);

    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
        uint64_t count = 0;
        uint64_t prev_count = 0;

Z
update  
zhiru 已提交
328
        for (auto j = 0; j < 10; ++j) {
Z
zhiru 已提交
329 330 331 332 333 334
            ss.str("");
            db_->Size(count);
            prev_count = count;

            START_TIMER;
            stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results);
Z
update  
zhiru 已提交
335
            ss << "Search " << j << " With Size " << count / engine::meta::M << " M";
Z
zhiru 已提交
336 337 338
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
Z
update  
zhiru 已提交
339
            for (auto k = 0; k < qb; ++k) {
Z
zhiru 已提交
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
                ASSERT_EQ(results[k][0].first, target_ids[k]);
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
                    ss << result.first << " ";
                }
                /* LOG(DEBUG) << ss.str(); */
            }
            ASSERT_TRUE(count >= prev_count);
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

    int loop = 20;

Z
update  
zhiru 已提交
355 356
    for (auto i = 0; i < loop; ++i) {
        if (i == 0) {
Z
zhiru 已提交
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
            db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
            ASSERT_EQ(target_ids.size(), qb);
        } else {
            db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        }
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    search.join();

    delete db_;
    boost::filesystem::remove_all(options.meta.path);

};