test_mem.cpp 13.8 KB
Newer Older
J
jinhai 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.


Z
zhiru 已提交
19 20
#include "gtest/gtest.h"

S
starlord 已提交
21 22 23
#include "db/insert/VectorSource.h"
#include "db/insert/MemTableFile.h"
#include "db/insert/MemTable.h"
Z
zhiru 已提交
24
#include "db/Constants.h"
S
starlord 已提交
25 26
#include "db/engine/EngineFactory.h"
#include "db/meta/MetaConsts.h"
S
starlord 已提交
27
#include "metrics/Metrics.h"
S
starlord 已提交
28
#include "db/utils.h"
Z
zhiru 已提交
29

S
starlord 已提交
30
#include <boost/filesystem.hpp>
Z
zhiru 已提交
31 32 33
#include <thread>
#include <fstream>
#include <iostream>
J
jinhai 已提交
34
#include <cmath>
J
jinhai 已提交
35
#include <random>
S
starlord 已提交
36
#include <chrono>
Z
update  
zhiru 已提交
37

Z
zhiru 已提交
38 39
namespace {

S
starlord 已提交
40
namespace ms = milvus;
S
starlord 已提交
41

Z
update  
zhiru 已提交
42 43
static constexpr int64_t TABLE_DIM = 256;

S
starlord 已提交
44 45
std::string
GetTableName() {
S
starlord 已提交
46 47
    auto now = std::chrono::system_clock::now();
    auto micros = std::chrono::duration_cast<std::chrono::microseconds>(
S
starlord 已提交
48 49 50
        now.time_since_epoch()).count();
    static std::string table_name = std::to_string(micros);
    return table_name;
S
starlord 已提交
51 52
}

S
starlord 已提交
53 54 55
ms::engine::meta::TableSchema
BuildTableSchema() {
    ms::engine::meta::TableSchema table_info;
Z
update  
zhiru 已提交
56
    table_info.dimension_ = TABLE_DIM;
S
starlord 已提交
57 58
    table_info.table_id_ = GetTableName();
    table_info.engine_type_ = (int) ms::engine::EngineType::FAISS_IDMAP;
Z
update  
zhiru 已提交
59 60
    return table_info;
}
Z
zhiru 已提交
61

S
starlord 已提交
62 63
void
BuildVectors(int64_t n, std::vector<float> &vectors) {
Z
update  
zhiru 已提交
64 65 66 67
    vectors.clear();
    vectors.resize(n * TABLE_DIM);
    float *data = vectors.data();
    for (int i = 0; i < n; i++) {
S
starlord 已提交
68 69
        for (int j = 0; j < TABLE_DIM; j++)
            data[TABLE_DIM * i + j] = drand48();
Z
zhiru 已提交
70 71
    }
}
S
starlord 已提交
72
} // namespace
Z
zhiru 已提交
73

S
starlord 已提交
74
TEST_F(MemManagerTest, VECTOR_SOURCE_TEST) {
S
starlord 已提交
75
    ms::engine::meta::TableSchema table_schema = BuildTableSchema();
Z
zhiru 已提交
76 77 78
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

S
starlord 已提交
79 80
    ms::engine::meta::TableFileSchema table_file_schema;
    table_file_schema.table_id_ = GetTableName();
Z
zhiru 已提交
81 82 83 84 85 86 87
    status = impl_->CreateTableFile(table_file_schema);
    ASSERT_TRUE(status.ok());

    int64_t n = 100;
    std::vector<float> vectors;
    BuildVectors(n, vectors);

S
starlord 已提交
88
    ms::engine::VectorSource source(n, vectors.data());
Z
zhiru 已提交
89 90

    size_t num_vectors_added;
S
starlord 已提交
91 92 93 94 95 96 97 98
    ms::engine::ExecutionEnginePtr execution_engine_ =
        ms::engine::EngineFactory::Build(table_file_schema.dimension_,
                                         table_file_schema.location_,
                                         (ms::engine::EngineType) table_file_schema.engine_type_,
                                         (ms::engine::MetricType) table_file_schema.metric_type_,
                                         table_schema.nlist_);

    ms::engine::IDNumbers vector_ids;
Y
Yu Kun 已提交
99
    status = source.Add(execution_engine_, table_file_schema, 50, num_vectors_added, vector_ids);
Z
zhiru 已提交
100
    ASSERT_TRUE(status.ok());
Y
Yu Kun 已提交
101
    vector_ids = source.GetVectorIds();
Z
zhiru 已提交
102
    ASSERT_EQ(vector_ids.size(), 50);
Y
Yu Kun 已提交
103
    ASSERT_EQ(num_vectors_added, 50);
Z
zhiru 已提交
104

Y
Yu Kun 已提交
105 106
    vector_ids.clear();
    status = source.Add(execution_engine_, table_file_schema, 60, num_vectors_added, vector_ids);
Z
zhiru 已提交
107 108 109 110 111 112 113 114
    ASSERT_TRUE(status.ok());

    ASSERT_EQ(num_vectors_added, 50);

    vector_ids = source.GetVectorIds();
    ASSERT_EQ(vector_ids.size(), 100);
}

S
starlord 已提交
115
TEST_F(MemManagerTest, MEM_TABLE_FILE_TEST) {
S
starlord 已提交
116
    auto options = GetOptions();
Z
zhiru 已提交
117

S
starlord 已提交
118
    ms::engine::meta::TableSchema table_schema = BuildTableSchema();
Z
zhiru 已提交
119 120 121
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

S
starlord 已提交
122
    ms::engine::MemTableFile mem_table_file(GetTableName(), impl_, options);
Z
zhiru 已提交
123 124 125 126 127

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

S
starlord 已提交
128
    ms::engine::VectorSourcePtr source = std::make_shared<ms::engine::VectorSource>(n_100, vectors_100.data());
Z
zhiru 已提交
129

S
starlord 已提交
130
    ms::engine::IDNumbers vector_ids;
Y
Yu Kun 已提交
131
    status = mem_table_file.Add(source, vector_ids);
Z
zhiru 已提交
132 133
    ASSERT_TRUE(status.ok());

Z
update  
zhiru 已提交
134
//    std::cout << mem_table_file.GetCurrentMem() << " " << mem_table_file.GetMemLeft() << std::endl;
Z
zhiru 已提交
135

Y
Yu Kun 已提交
136
    vector_ids = source->GetVectorIds();
Z
zhiru 已提交
137 138 139
    ASSERT_EQ(vector_ids.size(), 100);

    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
140
    ASSERT_EQ(mem_table_file.GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
141

S
starlord 已提交
142
    int64_t n_max = ms::engine::MAX_TABLE_FILE_MEM / singleVectorMem;
Z
zhiru 已提交
143 144 145
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

S
starlord 已提交
146
    ms::engine::VectorSourcePtr source_128M = std::make_shared<ms::engine::VectorSource>(n_max, vectors_128M.data());
Y
Yu Kun 已提交
147 148
    vector_ids.clear();
    status = mem_table_file.Add(source_128M, vector_ids);
Z
zhiru 已提交
149 150 151 152

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max - n_100);

Z
update  
zhiru 已提交
153
    ASSERT_TRUE(mem_table_file.IsFull());
Z
zhiru 已提交
154 155
}

S
starlord 已提交
156
TEST_F(MemManagerTest, MEM_TABLE_TEST) {
S
starlord 已提交
157
    auto options = GetOptions();
Z
zhiru 已提交
158

S
starlord 已提交
159
    ms::engine::meta::TableSchema table_schema = BuildTableSchema();
Z
zhiru 已提交
160 161 162 163 164 165 166
    auto status = impl_->CreateTable(table_schema);
    ASSERT_TRUE(status.ok());

    int64_t n_100 = 100;
    std::vector<float> vectors_100;
    BuildVectors(n_100, vectors_100);

S
starlord 已提交
167
    ms::engine::VectorSourcePtr source_100 = std::make_shared<ms::engine::VectorSource>(n_100, vectors_100.data());
Z
zhiru 已提交
168

S
starlord 已提交
169
    ms::engine::MemTable mem_table(GetTableName(), impl_, options);
Z
update  
zhiru 已提交
170

S
starlord 已提交
171
    ms::engine::IDNumbers vector_ids;
Y
Yu Kun 已提交
172
    status = mem_table.Add(source_100, vector_ids);
Z
zhiru 已提交
173
    ASSERT_TRUE(status.ok());
Y
Yu Kun 已提交
174
    vector_ids = source_100->GetVectorIds();
Z
zhiru 已提交
175 176
    ASSERT_EQ(vector_ids.size(), 100);

S
starlord 已提交
177
    ms::engine::MemTableFilePtr mem_table_file;
Z
update  
zhiru 已提交
178
    mem_table.GetCurrentMemTableFile(mem_table_file);
Z
zhiru 已提交
179
    size_t singleVectorMem = sizeof(float) * TABLE_DIM;
Z
update  
zhiru 已提交
180
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
181

S
starlord 已提交
182
    int64_t n_max = ms::engine::MAX_TABLE_FILE_MEM / singleVectorMem;
Z
zhiru 已提交
183 184 185
    std::vector<float> vectors_128M;
    BuildVectors(n_max, vectors_128M);

Y
Yu Kun 已提交
186
    vector_ids.clear();
S
starlord 已提交
187
    ms::engine::VectorSourcePtr source_128M = std::make_shared<ms::engine::VectorSource>(n_max, vectors_128M.data());
Y
Yu Kun 已提交
188
    status = mem_table.Add(source_128M, vector_ids);
Z
zhiru 已提交
189 190 191 192 193
    ASSERT_TRUE(status.ok());

    vector_ids = source_128M->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_max);

Z
update  
zhiru 已提交
194 195
    mem_table.GetCurrentMemTableFile(mem_table_file);
    ASSERT_EQ(mem_table_file->GetCurrentMem(), n_100 * singleVectorMem);
Z
zhiru 已提交
196

Z
update  
zhiru 已提交
197
    ASSERT_EQ(mem_table.GetTableFileCount(), 2);
Z
zhiru 已提交
198 199 200 201 202

    int64_t n_1G = 1024000;
    std::vector<float> vectors_1G;
    BuildVectors(n_1G, vectors_1G);

S
starlord 已提交
203
    ms::engine::VectorSourcePtr source_1G = std::make_shared<ms::engine::VectorSource>(n_1G, vectors_1G.data());
Z
zhiru 已提交
204

Y
Yu Kun 已提交
205 206
    vector_ids.clear();
    status = mem_table.Add(source_1G, vector_ids);
Z
zhiru 已提交
207 208 209 210 211
    ASSERT_TRUE(status.ok());

    vector_ids = source_1G->GetVectorIds();
    ASSERT_EQ(vector_ids.size(), n_1G);

S
starlord 已提交
212
    int expectedTableFileCount = 2 + std::ceil((n_1G - n_100) * singleVectorMem / ms::engine::MAX_TABLE_FILE_MEM);
Z
update  
zhiru 已提交
213
    ASSERT_EQ(mem_table.GetTableFileCount(), expectedTableFileCount);
Z
zhiru 已提交
214

Z
update  
zhiru 已提交
215
    status = mem_table.Serialize();
Z
zhiru 已提交
216
    ASSERT_TRUE(status.ok());
Z
zhiru 已提交
217 218
}

S
starlord 已提交
219
TEST_F(MemManagerTest2, SERIAL_INSERT_SEARCH_TEST) {
S
starlord 已提交
220
    ms::engine::meta::TableSchema table_info = BuildTableSchema();
S
starlord 已提交
221
    auto stat = db_->CreateTable(table_info);
Z
zhiru 已提交
222

S
starlord 已提交
223 224
    ms::engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = GetTableName();
Z
zhiru 已提交
225
    stat = db_->DescribeTable(table_info_get);
S
starlord 已提交
226
    ASSERT_TRUE(stat.ok());
Z
zhiru 已提交
227 228
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

S
starlord 已提交
229 230 231
    int64_t nb = 100000;
    std::vector<float> xb;
    BuildVectors(nb, xb);
Z
zhiru 已提交
232

S
starlord 已提交
233 234
    ms::engine::IDNumbers vector_ids;
    for (int64_t i = 0; i < nb; i++) {
S
starlord 已提交
235 236 237
        vector_ids.push_back(i);
    }

S
starlord 已提交
238
    stat = db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
S
starlord 已提交
239 240 241
    ASSERT_TRUE(stat.ok());

    std::this_thread::sleep_for(std::chrono::seconds(3));//ensure raw data write to disk
Z
zhiru 已提交
242

S
starlord 已提交
243 244 245
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<int64_t> dis(0, nb - 1);
Z
zhiru 已提交
246

S
starlord 已提交
247 248 249 250 251 252 253
    int64_t num_query = 10;
    std::map<int64_t, std::vector<float>> search_vectors;
    for (int64_t i = 0; i < num_query; ++i) {
        int64_t index = dis(gen);
        std::vector<float> search;
        for (int64_t j = 0; j < TABLE_DIM; j++) {
            search.push_back(xb[index * TABLE_DIM + j]);
Z
zhiru 已提交
254
        }
S
starlord 已提交
255
        search_vectors.insert(std::make_pair(vector_ids[index], search));
Z
zhiru 已提交
256 257
    }

S
starlord 已提交
258
    int topk = 10, nprobe = 10;
Z
update  
zhiru 已提交
259 260
    for (auto &pair : search_vectors) {
        auto &search = pair.second;
S
starlord 已提交
261 262
        ms::engine::QueryResults results;
        stat = db_->Query(GetTableName(), topk, 1, nprobe, search.data(), results);
Z
zhiru 已提交
263
        ASSERT_EQ(results[0][0].first, pair.first);
S
starlord 已提交
264
        ASSERT_LT(results[0][0].second, 1e-4);
Z
zhiru 已提交
265 266 267
    }
}

S
starlord 已提交
268
TEST_F(MemManagerTest2, INSERT_TEST) {
S
starlord 已提交
269
    ms::engine::meta::TableSchema table_info = BuildTableSchema();
S
starlord 已提交
270
    auto stat = db_->CreateTable(table_info);
Z
zhiru 已提交
271

S
starlord 已提交
272 273
    ms::engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = GetTableName();
Z
zhiru 已提交
274
    stat = db_->DescribeTable(table_info_get);
S
starlord 已提交
275
    ASSERT_TRUE(stat.ok());
Z
zhiru 已提交
276 277 278 279
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

    auto start_time = METRICS_NOW_TIME;

Z
zhiru 已提交
280
    int insert_loop = 20;
Z
zhiru 已提交
281
    for (int i = 0; i < insert_loop; ++i) {
S
starlord 已提交
282
        int64_t nb = 40960;
Z
zhiru 已提交
283 284
        std::vector<float> xb;
        BuildVectors(nb, xb);
S
starlord 已提交
285 286
        ms::engine::IDNumbers vector_ids;
        stat = db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
S
starlord 已提交
287
        ASSERT_TRUE(stat.ok());
Z
zhiru 已提交
288 289 290
    }
    auto end_time = METRICS_NOW_TIME;
    auto total_time = METRICS_MICROSECONDS(start_time, end_time);
Z
zhiru 已提交
291
    LOG(DEBUG) << "total_time spent in INSERT_TEST (ms) : " << total_time;
Z
zhiru 已提交
292
}
Z
zhiru 已提交
293

S
starlord 已提交
294
TEST_F(MemManagerTest2, CONCURRENT_INSERT_SEARCH_TEST) {
S
starlord 已提交
295
    ms::engine::meta::TableSchema table_info = BuildTableSchema();
S
starlord 已提交
296
    auto stat = db_->CreateTable(table_info);
Z
update  
zhiru 已提交
297

S
starlord 已提交
298 299
    ms::engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = GetTableName();
Z
update  
zhiru 已提交
300
    stat = db_->DescribeTable(table_info_get);
S
starlord 已提交
301
    ASSERT_TRUE(stat.ok());
Z
update  
zhiru 已提交
302 303
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

S
starlord 已提交
304 305
    ms::engine::IDNumbers vector_ids;
    ms::engine::IDNumbers target_ids;
Z
update  
zhiru 已提交
306

S
starlord 已提交
307
    int64_t nb = 40960;
Z
update  
zhiru 已提交
308 309 310 311 312 313 314 315
    std::vector<float> xb;
    BuildVectors(nb, xb);

    int64_t qb = 5;
    std::vector<float> qxb;
    BuildVectors(qb, qxb);

    std::thread search([&]() {
S
starlord 已提交
316
        ms::engine::QueryResults results;
Z
update  
zhiru 已提交
317 318 319 320 321 322 323 324 325 326 327 328 329 330
        int k = 10;
        std::this_thread::sleep_for(std::chrono::seconds(2));

        INIT_TIMER;
        std::stringstream ss;
        uint64_t count = 0;
        uint64_t prev_count = 0;

        for (auto j = 0; j < 10; ++j) {
            ss.str("");
            db_->Size(count);
            prev_count = count;

            START_TIMER;
S
starlord 已提交
331 332
            stat = db_->Query(GetTableName(), k, qb, 10, qxb.data(), results);
            ss << "Search " << j << " With Size " << count / ms::engine::meta::M << " M";
Z
update  
zhiru 已提交
333 334
            STOP_TIMER(ss.str());

S
starlord 已提交
335
            ASSERT_TRUE(stat.ok());
Z
update  
zhiru 已提交
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
            for (auto k = 0; k < qb; ++k) {
                ASSERT_EQ(results[k][0].first, target_ids[k]);
                ss.str("");
                ss << "Result [" << k << "]:";
                for (auto result : results[k]) {
                    ss << result.first << " ";
                }
                /* LOG(DEBUG) << ss.str(); */
            }
            ASSERT_TRUE(count >= prev_count);
            std::this_thread::sleep_for(std::chrono::seconds(1));
        }
    });

    int loop = 20;

    for (auto i = 0; i < loop; ++i) {
        if (i == 0) {
S
starlord 已提交
354
            db_->InsertVectors(GetTableName(), qb, qxb.data(), target_ids);
Z
update  
zhiru 已提交
355 356
            ASSERT_EQ(target_ids.size(), qb);
        } else {
S
starlord 已提交
357
            db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
Z
update  
zhiru 已提交
358 359 360 361 362
        }
        std::this_thread::sleep_for(std::chrono::microseconds(1));
    }

    search.join();
S
starlord 已提交
363
}
Z
zhiru 已提交
364

S
starlord 已提交
365
TEST_F(MemManagerTest2, VECTOR_IDS_TEST) {
S
starlord 已提交
366
    ms::engine::meta::TableSchema table_info = BuildTableSchema();
S
starlord 已提交
367
    auto stat = db_->CreateTable(table_info);
Y
Yu Kun 已提交
368

S
starlord 已提交
369 370
    ms::engine::meta::TableSchema table_info_get;
    table_info_get.table_id_ = GetTableName();
Y
Yu Kun 已提交
371
    stat = db_->DescribeTable(table_info_get);
S
starlord 已提交
372
    ASSERT_TRUE(stat.ok());
Y
Yu Kun 已提交
373 374
    ASSERT_EQ(table_info_get.dimension_, TABLE_DIM);

S
starlord 已提交
375
    ms::engine::IDNumbers vector_ids;
Y
Yu Kun 已提交
376 377 378 379 380 381 382 383 384 385

    int64_t nb = 100000;
    std::vector<float> xb;
    BuildVectors(nb, xb);

    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i;
    }

S
starlord 已提交
386
    stat = db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
Y
Yu Kun 已提交
387
    ASSERT_EQ(vector_ids[0], 0);
S
starlord 已提交
388
    ASSERT_TRUE(stat.ok());
Y
Yu Kun 已提交
389 390 391 392 393 394 395 396 397

    nb = 25000;
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i + nb;
    }
S
starlord 已提交
398
    stat = db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
Y
Yu Kun 已提交
399
    ASSERT_EQ(vector_ids[0], nb);
S
starlord 已提交
400
    ASSERT_TRUE(stat.ok());
Y
Yu Kun 已提交
401 402 403 404 405 406 407 408 409

    nb = 262144; //512M
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i + nb / 2;
    }
S
starlord 已提交
410 411
    stat = db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
    ASSERT_EQ(vector_ids[0], nb / 2);
S
starlord 已提交
412
    ASSERT_TRUE(stat.ok());
Y
Yu Kun 已提交
413 414 415 416 417

    nb = 65536; //128M
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
S
starlord 已提交
418
    stat = db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
S
starlord 已提交
419
    ASSERT_TRUE(stat.ok());
Y
Yu Kun 已提交
420 421 422 423 424 425 426 427 428

    nb = 100;
    xb.clear();
    BuildVectors(nb, xb);
    vector_ids.clear();
    vector_ids.resize(nb);
    for (auto i = 0; i < nb; i++) {
        vector_ids[i] = i + nb;
    }
S
starlord 已提交
429
    stat = db_->InsertVectors(GetTableName(), nb, xb.data(), vector_ids);
Y
Yu Kun 已提交
430 431 432
    for (auto i = 0; i < nb; i++) {
        ASSERT_EQ(vector_ids[i], i + nb);
    }
S
starlord 已提交
433
}
S
starlord 已提交
434