mem_test.cpp 11.7 KB
Newer Older
Z
zhiru 已提交
1 2 3 4 5 6 7 8
#include "gtest/gtest.h"

#include "db/VectorSource.h"
#include "db/MemTableFile.h"
#include "db/MemTable.h"
#include "utils.h"
#include "db/Factories.h"
#include "db/Constants.h"
Z
update  
zhiru 已提交
9
#include "db/EngineFactory.h"
Z
zhiru 已提交
10
#include "metrics/Metrics.h"
Z
zhiru 已提交
11 12
#include "db/MetaConsts.h"
#include "boost/filesystem.hpp"
Z
zhiru 已提交
13 14 15 16

#include <thread>
#include <fstream>
#include <iostream>
Z
zhiru 已提交
17 18 19 20 21

using namespace zilliz::milvus;

namespace {

Z
update  
zhiru 已提交
22 23 24 25 26 27 28 29 30 31 32 33
static const std::string TABLE_NAME = "test_group";
static constexpr int64_t TABLE_DIM = 256;
static constexpr int64_t VECTOR_COUNT = 250000;
static constexpr int64_t INSERT_LOOP = 10000;

engine::meta::TableSchema BuildTableSchema() {
    engine::meta::TableSchema table_info;
    table_info.dimension_ = TABLE_DIM;
    table_info.table_id_ = TABLE_NAME;
    table_info.engine_type_ = (int) engine::EngineType::FAISS_IDMAP;
    return table_info;
}
Z
zhiru 已提交
34

Z
update  
zhiru 已提交
35 36 37 38 39 40 41
void BuildVectors(int64_t n, std::vector<float> &vectors) {
    vectors.clear();
    vectors.resize(n * TABLE_DIM);
    float *data = vectors.data();
    for (int i = 0; i < n; i++) {
        for (int j = 0; j < TABLE_DIM; j++) data[TABLE_DIM * i + j] = drand48();
        data[TABLE_DIM * i] += i / 2000.;
Z
zhiru 已提交
42 43
    }
}
Z
update  
zhiru 已提交
44
}
Z
zhiru 已提交
45

Z
zhiru 已提交
46
TEST_F(NewMemManagerTest, VECTOR_SOURCE_TEST) {
Z
zhiru 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65

    std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

    engine::meta::TableFileSchema table_file_schema;
    table_file_schema.table_id_ = TABLE_NAME;
    status = impl_->CreateTableFile(table_file_schema);
    ASSERT_TRUE(status.ok());

    int64_t n = 100;
    std::vector<float> vectors;
    BuildVectors(n, vectors);

    engine::VectorSource source(n, vectors.data());

    size_t num_vectors_added;
Z
update  
zhiru 已提交
66 67
    engine::ExecutionEnginePtr execution_engine_ = engine::EngineFactory::Build(table_file_schema.dimension_,
                                                                                table_file_schema.location_,
Z
update  
zhiru 已提交
68
                                                                                (engine::EngineType) table_file_schema.engine_type_);
Z
update  
zhiru 已提交
69
    status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added);
Z
zhiru 已提交
70 71 72 73 74 75 76
    ASSERT_TRUE(status.ok());

    ASSERT_EQ(num_vectors_added, 50);

    engine::IDNumbers vector_ids = source.GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 50);

Z
update  
zhiru 已提交
77
    status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added);
Z
zhiru 已提交
78 79 80 81 82 83 84 85 86 87 88
    ASSERT_TRUE(status.ok());

    ASSERT_EQ(num_vectors_added, 50);

    vector_ids = source.GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);

    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
89
TEST_F(NewMemManagerTest, MEM_TABLE_FILE_TEST) {
Z
zhiru 已提交
90 91

    std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
Z
zhiru 已提交
92
    auto options = engine::OptionsFactory::Build();
Z
zhiru 已提交
93 94 95 96 97

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

Z
update  
zhiru 已提交
98
    engine::MemTableFile mem_table_file(TABLE_NAME, impl_, options);
Z
zhiru 已提交
99 100 101 102 103 104 105

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

    engine::VectorSource::Ptr source = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());

Z
update  
zhiru 已提交
106
    status = mem_table_file.Add(source);
Z
zhiru 已提交
107 108
    ASSERT_TRUE(status.ok());

Z
update  
zhiru 已提交
109
//    std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl;
Z
zhiru 已提交
110 111 112 113 114

    engine::IDNumbers vector_ids = source->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);

    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
115
    ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
116 117 118 119 120 121

    int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

    engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
Z
update  
zhiru 已提交
122
    status = mem_table_file.Add(source_128M);
Z
zhiru 已提交
123 124 125 126

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max - n_100);

Z
update  
zhiru 已提交
127
    ASSERT_TRUE(mem_table_file.IsFull());
Z
zhiru 已提交
128 129 130 131 132

    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
133
TEST_F(NewMemManagerTest, MEM_TABLE_TEST) {
Z
zhiru 已提交
134 135

    std::shared_ptr<engine::meta::DBMetaImpl> impl_ = engine::DBMetaImplFactory::Build();
Z
zhiru 已提交
136
    auto options = engine::OptionsFactory::Build();
Z
zhiru 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151

    engine::meta::TableSchema table_schema = BuildTableSchema();
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

    engine::VectorSource::Ptr source_100 = std::make_shared<engine::VectorSource>(n_100, vectors_100.data());

    ASSERT_TRUE(status.ok());
    engine::IDNumbers vector_ids = source_100->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);

Z
update  
zhiru 已提交
152 153
    engine::MemTableFile::Ptr mem_table_file;
    mem_table.GetCurrentMemTableFile(mem_table_file);
Z
zhiru 已提交
154
    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
155
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
156 157 158 159 160 161

    int64_t n_max = engine::MAX_TABLE_FILE_MEM / singleVectorMem;
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

    engine::VectorSource::Ptr source_128M = std::make_shared<engine::VectorSource>(n_max, vectors_128M.data());
Z
update  
zhiru 已提交
162
    status = mem_table.Add(source_128M);
Z
zhiru 已提交
163 164 165 166 167
    ASSERT_TRUE(status.ok());

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max);

Z
update  
zhiru 已提交
168 169
    mem_table.GetCurrentMemTableFile(mem_table_file);
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
170

Z
update  
zhiru 已提交
171
    ASSERT_EQ(mem_table.GetTableFileCount(), 2);
Z
zhiru 已提交
172 173 174 175 176 177 178

    int64_t n_1G = 1024000;
    std::vector<float> vectors_1G;
    BuildVectors(n_1G, vectors_1G);

    engine::VectorSource::Ptr source_1G = std::make_shared<engine::VectorSource>(n_1G, vectors_1G.data());

Z
update  
zhiru 已提交
179
    status = mem_table.Add(source_1G);
Z
zhiru 已提交
180 181 182 183 184
    ASSERT_TRUE(status.ok());

    vector_ids = source_1G->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_1G);

Z
zhiru 已提交
185
    int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / engine::MAX_TABLE_FILE_MEM);
Z
update  
zhiru 已提交
186
    ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount);
Z
zhiru 已提交
187

Z
update  
zhiru 已提交
188
    status = mem_table.Serialize();
Z
zhiru 已提交
189 190
    ASSERT_TRUE(status.ok());

Z
zhiru 已提交
191 192 193
    status = memTable.Serialize();
    ASSERT_TRUE(status.ok());

Z
zhiru 已提交
194 195 196 197
    status = impl_->DropAll();
    ASSERT_TRUE(status.ok());
}

Z
zhiru 已提交
198
TEST_F(NewMemManagerTest, SERIAL_INSERT_SEARCH_TEST) {
Z
zhiru 已提交
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

Z
update  
zhiru 已提交
214
    std::map<int64_t, std::vector<float>> search_vectors;
Z
zhiru 已提交
215 216 217 218 219 220 221 222 223 224 225 226 227 228
    {
        engine::IDNumbers vector_ids;
        int64_t nb = 1024000;
        std::vector<float> xb;
        BuildVectors(nb, xb);
        engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        ASSERT_TRUE(status.ok());

        std::this_thread::sleep_for(std::chrono::seconds(3));

        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<int64_t> dis(0, nb - 1);

Z
update  
zhiru 已提交
229 230
        int64_t num_query = 20;
        for (int64_t i = 0; i < num_query; ++i) {
Z
zhiru 已提交
231 232 233 234 235 236 237 238 239 240
            int64_t index = dis(gen);
            std::vector<float> search;
            for (int64_t j = 0; j < TABLE_DIM; j++) {
                search.push_back(xb[index * TABLE_DIM + j]);
            }
            search_vectors.insert(std::make_pair(vector_ids[index], search));
        }
    }

    int k = 10;
Z
update  
zhiru 已提交
241 242
    for (auto &pair : search_vectors) {
        auto &search = pair.second;
Z
zhiru 已提交
243 244 245 246 247 248
        engine::QueryResults results;
        stat = db_->Query(TABLE_NAME, k, 1, search.data(), results);
        ASSERT_EQ(results[0][0].first, pair.first);
        ASSERT_LT(results[0][0].second, 0.00001);
    }

Z
zhiru 已提交
249 250
    delete db_;
    boost::filesystem::remove_all(options.meta.path);
Z
zhiru 已提交
251 252 253

}

Z
zhiru 已提交
254
TEST_F(NewMemManagerTest, INSERT_TEST) {
Z
zhiru 已提交
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    auto start_time = METRICS_NOW_TIME;

Z
zhiru 已提交
272
    int insert_loop = 20;
Z
zhiru 已提交
273
    for (int i = 0; i < insert_loop; ++i) {
Z
zhiru 已提交
274
        int64_t nb = 409600;
Z
zhiru 已提交
275 276 277 278 279 280 281 282
        std::vector<float> xb;
        BuildVectors(nb, xb);
        engine::IDNumbers vector_ids;
        engine::Status status = db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        ASSERT_TRUE(status.ok());
    }
    auto end_time = METRICS_NOW_TIME;
    auto total_time = METRICS_MICROSECONDS(start_time, end_time);
Z
zhiru 已提交
283
    LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time;
Z
zhiru 已提交
284

Z
zhiru 已提交
285 286
    delete db_;
    boost::filesystem::remove_all(options.meta.path);
Z
zhiru 已提交
287 288

}
Z
zhiru 已提交
289

Z
zhiru 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
TEST_F(NewMemManagerTest, CONCURRENT_INSERT_SEARCH_TEST) {

    auto options = engine::OptionsFactory::Build();
    options.meta.path = "/tmp/milvus_test";
    options.meta.backend_uri = "sqlite://:@:/";
    auto db_ = engine::DBFactory::Build(options);

    engine::meta::TableSchema table_info = BuildTableSchema();
    engine::Status stat = db_->CreateTable(table_info);

    engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = TABLE_NAME;
    stat = db_->DescribeTable(table_info_get);
    ASSERT_STATS(stat);
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    engine::IDNumbers vector_ids;
    engine::IDNumbers target_ids;

    int64_t nb = 409600;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);

    std::thread search([&]() {
        engine::QueryResults results;
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
        uint64_t count = 0;
        uint64_t prev_count = 0;

Z
update  
zhiru 已提交
327
        for (auto j = 0; j < 10; ++j) {
Z
zhiru 已提交
328 329 330 331 332 333
            ss.str("");
            db_->Size(count);
            prev_count = count;

            START_TIMER;
            stat = db_->Query(TABLE_NAME, k, qb, qxb.data(), results);
Z
update  
zhiru 已提交
334
            ss << "Search " << j << " With Size " << count / engine::meta::M << " M";
Z
zhiru 已提交
335 336 337
            STOP_TIMER(ss.str());

            ASSERT_STATS(stat);
Z
update  
zhiru 已提交
338
            for (auto k = 0; k < qb; ++k) {
Z
zhiru 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
                ASSERT_EQ(results[k][0].first, target_ids[k]);
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
                    ss << result.first << " ";
                }
                /* LOG(DEBUG) << ss.str(); */
            }
            ASSERT_TRUE(count >= prev_count);
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

    int loop = 20;

Z
update  
zhiru 已提交
354 355
    for (auto i = 0; i < loop; ++i) {
        if (i == 0) {
Z
zhiru 已提交
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
            db_->InsertVectors(TABLE_NAME, qb, qxb.data(), target_ids);
            ASSERT_EQ(target_ids.size(), qb);
        } else {
            db_->InsertVectors(TABLE_NAME, nb, xb.data(), vector_ids);
        }
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    search.join();

    delete db_;
    boost::filesystem::remove_all(options.meta.path);

};