DBImpl.cpp 32.2 KB
Newer Older
J
jinhai 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

S
starlord 已提交
18
#include "db/DBImpl.h"
S
starlord 已提交
19
#include "Utils.h"
S
starlord 已提交
20 21
#include "cache/CpuCacheMgr.h"
#include "cache/GpuCacheMgr.h"
S
starlord 已提交
22
#include "engine/EngineFactory.h"
S
starlord 已提交
23
#include "insert/MemMenagerFactory.h"
S
starlord 已提交
24
#include "meta/MetaConsts.h"
S
starlord 已提交
25 26
#include "meta/MetaFactory.h"
#include "meta/SqliteMetaImpl.h"
G
groot 已提交
27
#include "metrics/Metrics.h"
S
starlord 已提交
28
#include "scheduler/SchedInst.h"
Y
Yu Kun 已提交
29
#include "scheduler/job/BuildIndexJob.h"
S
starlord 已提交
30 31
#include "scheduler/job/DeleteJob.h"
#include "scheduler/job/SearchJob.h"
S
starlord 已提交
32
#include "utils/Log.h"
G
groot 已提交
33
#include "utils/StringHelpFunctions.h"
S
starlord 已提交
34
#include "utils/TimeRecorder.h"
X
Xu Peng 已提交
35

X
Xu Peng 已提交
36
#include <assert.h>
S
starlord 已提交
37
#include <algorithm>
G
groot 已提交
38
#include <boost/filesystem.hpp>
S
starlord 已提交
39 40 41
#include <chrono>
#include <cstring>
#include <iostream>
G
groot 已提交
42
#include <set>
S
starlord 已提交
43
#include <thread>
X
Xu Peng 已提交
44

J
jinhai 已提交
45
namespace milvus {
X
Xu Peng 已提交
46
namespace engine {
X
Xu Peng 已提交
47

G
groot 已提交
48 49
namespace {

J
jinhai 已提交
50 51 52
constexpr uint64_t METRIC_ACTION_INTERVAL = 1;
constexpr uint64_t COMPACT_ACTION_INTERVAL = 1;
constexpr uint64_t INDEX_ACTION_INTERVAL = 1;
G
groot 已提交
53

G
groot 已提交
54 55 56 57 58 59 60 61 62 63 64
static const Status SHUTDOWN_ERROR = Status(DB_ERROR, "Milsvus server is shutdown!");

void
TraverseFiles(const meta::DatePartionedTableFilesSchema& date_files, meta::TableFilesSchema& files_array) {
    for (auto& day_files : date_files) {
        for (auto& file : day_files.second) {
            files_array.push_back(file);
        }
    }
}

S
starlord 已提交
65
}  // namespace
G
groot 已提交
66

Y
Yu Kun 已提交
67
DBImpl::DBImpl(const DBOptions& options)
S
starlord 已提交
68
    : options_(options), shutting_down_(true), compact_thread_pool_(1, 1), index_thread_pool_(1, 1) {
S
starlord 已提交
69
    meta_ptr_ = MetaFactory::Build(options.meta_, options.mode_);
Z
zhiru 已提交
70
    mem_mgr_ = MemManagerFactory::Build(meta_ptr_, options_);
S
starlord 已提交
71 72 73 74 75 76 77
    Start();
}

DBImpl::~DBImpl() {
    Stop();
}

S
starlord 已提交
78
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
S
starlord 已提交
79
// external api
S
starlord 已提交
80
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
S
starlord 已提交
81 82 83
Status
DBImpl::Start() {
    if (!shutting_down_.load(std::memory_order_acquire)) {
S
starlord 已提交
84 85 86
        return Status::OK();
    }

S
starlord 已提交
87
    ENGINE_LOG_TRACE << "DB service start";
S
starlord 已提交
88 89
    shutting_down_.store(false, std::memory_order_release);

S
starlord 已提交
90
    // for distribute version, some nodes are read only
Y
yudong.cai 已提交
91
    if (options_.mode_ != DBOptions::MODE::CLUSTER_READONLY) {
92
        ENGINE_LOG_TRACE << "StartTimerTasks";
S
starlord 已提交
93
        bg_timer_thread_ = std::thread(&DBImpl::BackgroundTimerTask, this);
Z
update  
zhiru 已提交
94
    }
S
starlord 已提交
95

S
starlord 已提交
96 97 98
    return Status::OK();
}

S
starlord 已提交
99 100 101
Status
DBImpl::Stop() {
    if (shutting_down_.load(std::memory_order_acquire)) {
S
starlord 已提交
102 103 104 105
        return Status::OK();
    }

    shutting_down_.store(true, std::memory_order_release);
S
starlord 已提交
106

S
starlord 已提交
107
    // makesure all memory data serialized
S
starlord 已提交
108
    MemSerialize();
S
starlord 已提交
109

S
starlord 已提交
110
    // wait compaction/buildindex finish
S
starlord 已提交
111
    bg_timer_thread_.join();
S
starlord 已提交
112

Y
yudong.cai 已提交
113
    if (options_.mode_ != DBOptions::MODE::CLUSTER_READONLY) {
S
starlord 已提交
114
        meta_ptr_->CleanUp();
S
starlord 已提交
115 116
    }

S
starlord 已提交
117
    ENGINE_LOG_TRACE << "DB service stop";
S
starlord 已提交
118
    return Status::OK();
X
Xu Peng 已提交
119 120
}

S
starlord 已提交
121 122
Status
DBImpl::DropAll() {
S
starlord 已提交
123 124 125
    return meta_ptr_->DropAll();
}

S
starlord 已提交
126
Status
Y
Yu Kun 已提交
127
DBImpl::CreateTable(meta::TableSchema& table_schema) {
S
starlord 已提交
128
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
129
        return SHUTDOWN_ERROR;
S
starlord 已提交
130 131
    }

132
    meta::TableSchema temp_schema = table_schema;
S
starlord 已提交
133
    temp_schema.index_file_size_ *= ONE_MB;  // store as MB
134
    return meta_ptr_->CreateTable(temp_schema);
135 136
}

S
starlord 已提交
137
Status
G
groot 已提交
138
DBImpl::DropTable(const std::string& table_id, const meta::DatesT& dates) {
S
starlord 已提交
139
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
140
        return SHUTDOWN_ERROR;
S
starlord 已提交
141 142
    }

G
groot 已提交
143
    return DropTableRecursively(table_id, dates);
G
groot 已提交
144 145
}

S
starlord 已提交
146
Status
Y
Yu Kun 已提交
147
DBImpl::DescribeTable(meta::TableSchema& table_schema) {
S
starlord 已提交
148
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
149
        return SHUTDOWN_ERROR;
S
starlord 已提交
150 151
    }

S
starlord 已提交
152
    auto stat = meta_ptr_->DescribeTable(table_schema);
S
starlord 已提交
153
    table_schema.index_file_size_ /= ONE_MB;  // return as MB
S
starlord 已提交
154
    return stat;
155 156
}

S
starlord 已提交
157
Status
Y
Yu Kun 已提交
158
DBImpl::HasTable(const std::string& table_id, bool& has_or_not) {
S
starlord 已提交
159
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
160
        return SHUTDOWN_ERROR;
S
starlord 已提交
161 162
    }

G
groot 已提交
163
    return meta_ptr_->HasTable(table_id, has_or_not);
164 165
}

S
starlord 已提交
166
Status
Y
Yu Kun 已提交
167
DBImpl::AllTables(std::vector<meta::TableSchema>& table_schema_array) {
S
starlord 已提交
168
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
169
        return SHUTDOWN_ERROR;
S
starlord 已提交
170 171
    }

G
groot 已提交
172
    return meta_ptr_->AllTables(table_schema_array);
G
groot 已提交
173 174
}

S
starlord 已提交
175
Status
Y
Yu Kun 已提交
176
DBImpl::PreloadTable(const std::string& table_id) {
S
starlord 已提交
177
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
178
        return SHUTDOWN_ERROR;
S
starlord 已提交
179 180
    }

G
groot 已提交
181
    // get all table files from parent table
182
    std::vector<size_t> ids;
G
groot 已提交
183 184
    meta::TableFilesSchema files_array;
    auto status = GetFilesToSearch(table_id, ids, files_array);
Y
Yu Kun 已提交
185 186 187
    if (!status.ok()) {
        return status;
    }
Y
Yu Kun 已提交
188

G
groot 已提交
189 190 191 192 193 194 195
    // get files from partition tables
    std::vector<meta::TableSchema> partiton_array;
    status = meta_ptr_->ShowPartitions(table_id, partiton_array);
    for (auto& schema : partiton_array) {
        status = GetFilesToSearch(schema.table_id_, ids, files_array);
    }

Y
Yu Kun 已提交
196 197
    int64_t size = 0;
    int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
Y
Yu Kun 已提交
198 199
    int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
    int64_t available_size = cache_total - cache_usage;
Y
Yu Kun 已提交
200

G
groot 已提交
201 202 203 204 205 206 207
    for (auto& file : files_array) {
        ExecutionEnginePtr engine = EngineFactory::Build(file.dimension_, file.location_, (EngineType)file.engine_type_,
                                                         (MetricType)file.metric_type_, file.nlist_);
        if (engine == nullptr) {
            ENGINE_LOG_ERROR << "Invalid engine type";
            return Status(DB_ERROR, "Invalid engine type");
        }
Y
Yu Kun 已提交
208

G
groot 已提交
209 210 211 212 213 214 215 216 217 218 219
        size += engine->PhysicalSize();
        if (size > available_size) {
            return Status(SERVER_CACHE_FULL, "Cache is full");
        } else {
            try {
                // step 1: load index
                engine->Load(true);
            } catch (std::exception& ex) {
                std::string msg = "Pre-load table encounter exception: " + std::string(ex.what());
                ENGINE_LOG_ERROR << msg;
                return Status(DB_ERROR, msg);
Y
Yu Kun 已提交
220 221 222
            }
        }
    }
G
groot 已提交
223

Y
Yu Kun 已提交
224
    return Status::OK();
Y
Yu Kun 已提交
225 226
}

S
starlord 已提交
227
Status
Y
Yu Kun 已提交
228
DBImpl::UpdateTableFlag(const std::string& table_id, int64_t flag) {
S
starlord 已提交
229
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
230
        return SHUTDOWN_ERROR;
S
starlord 已提交
231 232
    }

S
starlord 已提交
233 234 235
    return meta_ptr_->UpdateTableFlag(table_id, flag);
}

S
starlord 已提交
236
Status
Y
Yu Kun 已提交
237
DBImpl::GetTableRowCount(const std::string& table_id, uint64_t& row_count) {
S
starlord 已提交
238
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
        return SHUTDOWN_ERROR;
    }

    return GetTableRowCountRecursively(table_id, row_count);
}

Status
DBImpl::CreatePartition(const std::string& table_id, const std::string& partition_name,
                        const std::string& partition_tag) {
    if (shutting_down_.load(std::memory_order_acquire)) {
        return SHUTDOWN_ERROR;
    }

    return meta_ptr_->CreatePartition(table_id, partition_name, partition_tag);
}

Status
DBImpl::DropPartition(const std::string& partition_name) {
    if (shutting_down_.load(std::memory_order_acquire)) {
        return SHUTDOWN_ERROR;
S
starlord 已提交
259 260
    }

G
groot 已提交
261 262 263 264 265 266 267 268 269 270
    auto status = mem_mgr_->EraseMemVector(partition_name);  // not allow insert
    status = meta_ptr_->DropPartition(partition_name);       // soft delete table

    // scheduler will determine when to delete table files
    auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource();
    scheduler::DeleteJobPtr job = std::make_shared<scheduler::DeleteJob>(partition_name, meta_ptr_, nres);
    scheduler::JobMgrInst::GetInstance()->Put(job);
    job->WaitAndDelete();

    return Status::OK();
G
groot 已提交
271 272
}

S
starlord 已提交
273
Status
G
groot 已提交
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
DBImpl::DropPartitionByTag(const std::string& table_id, const std::string& partition_tag) {
    if (shutting_down_.load(std::memory_order_acquire)) {
        return SHUTDOWN_ERROR;
    }

    std::string partition_name;
    auto status = meta_ptr_->GetPartitionName(table_id, partition_tag, partition_name);
    return DropPartition(partition_name);
}

Status
DBImpl::ShowPartitions(const std::string& table_id, std::vector<meta::TableSchema>& partiton_schema_array) {
    if (shutting_down_.load(std::memory_order_acquire)) {
        return SHUTDOWN_ERROR;
    }

    return meta_ptr_->ShowPartitions(table_id, partiton_schema_array);
}

Status
DBImpl::InsertVectors(const std::string& table_id, const std::string& partition_tag, uint64_t n, const float* vectors,
                      IDNumbers& vector_ids) {
S
starlord 已提交
296
    //    ENGINE_LOG_DEBUG << "Insert " << n << " vectors to cache";
S
starlord 已提交
297
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
298
        return SHUTDOWN_ERROR;
S
starlord 已提交
299
    }
Y
yu yunfeng 已提交
300

G
groot 已提交
301
    // if partition is specified, use partition as target table
302
    Status status;
G
groot 已提交
303 304 305 306 307 308 309
    std::string target_table_name = table_id;
    if (!partition_tag.empty()) {
        std::string partition_name;
        status = meta_ptr_->GetPartitionName(table_id, partition_tag, target_table_name);
    }

    // insert vectors into target table
S
starlord 已提交
310
    milvus::server::CollectInsertMetrics metrics(n, status);
G
groot 已提交
311
    status = mem_mgr_->InsertVectors(target_table_name, n, vectors, vector_ids);
S
starlord 已提交
312

G
groot 已提交
313
    return status;
X
Xu Peng 已提交
314 315
}

S
starlord 已提交
316
Status
Y
Yu Kun 已提交
317
DBImpl::CreateIndex(const std::string& table_id, const TableIndex& index) {
G
groot 已提交
318 319 320 321 322
    if (shutting_down_.load(std::memory_order_acquire)) {
        return SHUTDOWN_ERROR;
    }

    Status status;
S
starlord 已提交
323 324 325
    {
        std::unique_lock<std::mutex> lock(build_index_mutex_);

S
starlord 已提交
326
        // step 1: check index difference
S
starlord 已提交
327
        TableIndex old_index;
G
groot 已提交
328
        status = DescribeIndex(table_id, old_index);
S
starlord 已提交
329
        if (!status.ok()) {
S
starlord 已提交
330 331 332 333
            ENGINE_LOG_ERROR << "Failed to get table index info for table: " << table_id;
            return status;
        }

S
starlord 已提交
334
        // step 2: update index info
S
starlord 已提交
335
        TableIndex new_index = index;
S
starlord 已提交
336
        new_index.metric_type_ = old_index.metric_type_;  // dont change metric type, it was defined by CreateTable
S
starlord 已提交
337
        if (!utils::IsSameIndex(old_index, new_index)) {
G
groot 已提交
338
            status = UpdateTableIndexRecursively(table_id, new_index);
S
starlord 已提交
339 340 341 342 343 344
            if (!status.ok()) {
                return status;
            }
        }
    }

S
starlord 已提交
345 346
    // step 3: let merge file thread finish
    // to avoid duplicate data bug
347 348
    WaitMergeFileFinish();

S
starlord 已提交
349
    // step 4: wait and build index
G
groot 已提交
350
    status = BuildTableIndexRecursively(table_id, index);
S
starlord 已提交
351

G
groot 已提交
352
    return status;
S
starlord 已提交
353 354
}

S
starlord 已提交
355
Status
Y
Yu Kun 已提交
356
DBImpl::DescribeIndex(const std::string& table_id, TableIndex& index) {
G
groot 已提交
357 358 359 360
    if (shutting_down_.load(std::memory_order_acquire)) {
        return SHUTDOWN_ERROR;
    }

S
starlord 已提交
361 362 363
    return meta_ptr_->DescribeTableIndex(table_id, index);
}

S
starlord 已提交
364
Status
Y
Yu Kun 已提交
365
DBImpl::DropIndex(const std::string& table_id) {
G
groot 已提交
366 367 368 369
    if (shutting_down_.load(std::memory_order_acquire)) {
        return SHUTDOWN_ERROR;
    }

S
starlord 已提交
370
    ENGINE_LOG_DEBUG << "Drop index for table: " << table_id;
G
groot 已提交
371
    return DropTableIndexRecursively(table_id);
S
starlord 已提交
372 373
}

S
starlord 已提交
374
Status
G
groot 已提交
375 376
DBImpl::Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
              uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) {
S
starlord 已提交
377
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
378
        return SHUTDOWN_ERROR;
S
starlord 已提交
379 380
    }

381
    meta::DatesT dates = {utils::GetDate()};
G
groot 已提交
382
    Status result = Query(table_id, partition_tags, k, nq, nprobe, vectors, dates, result_ids, result_distances);
Y
yu yunfeng 已提交
383
    return result;
X
Xu Peng 已提交
384 385
}

S
starlord 已提交
386
Status
G
groot 已提交
387 388 389
DBImpl::Query(const std::string& table_id, const std::vector<std::string>& partition_tags, uint64_t k, uint64_t nq,
              uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
              ResultDistances& result_distances) {
S
starlord 已提交
390
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
391
        return SHUTDOWN_ERROR;
S
starlord 已提交
392 393
    }

394
    ENGINE_LOG_DEBUG << "Query by dates for table: " << table_id << " date range count: " << dates.size();
S
starlord 已提交
395

G
groot 已提交
396
    Status status;
397
    std::vector<size_t> ids;
G
groot 已提交
398
    meta::TableFilesSchema files_array;
399

G
groot 已提交
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
    if (partition_tags.empty()) {
        // no partition tag specified, means search in whole table
        // get all table files from parent table
        status = GetFilesToSearch(table_id, ids, files_array);
        if (!status.ok()) {
            return status;
        }

        std::vector<meta::TableSchema> partiton_array;
        status = meta_ptr_->ShowPartitions(table_id, partiton_array);
        for (auto& schema : partiton_array) {
            status = GetFilesToSearch(schema.table_id_, ids, files_array);
        }
    } else {
        // get files from specified partitions
        std::set<std::string> partition_name_array;
        GetPartitionsByTags(table_id, partition_tags, partition_name_array);

        for (auto& partition_name : partition_name_array) {
            status = GetFilesToSearch(partition_name, ids, files_array);
420 421 422
        }
    }

S
starlord 已提交
423
    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info before query
G
groot 已提交
424
    status = QueryAsync(table_id, files_array, k, nq, nprobe, vectors, result_ids, result_distances);
S
starlord 已提交
425
    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info after query
S
starlord 已提交
426
    return status;
G
groot 已提交
427
}
X
Xu Peng 已提交
428

S
starlord 已提交
429
Status
G
groot 已提交
430 431 432
DBImpl::QueryByFileID(const std::string& table_id, const std::vector<std::string>& file_ids, uint64_t k, uint64_t nq,
                      uint64_t nprobe, const float* vectors, const meta::DatesT& dates, ResultIds& result_ids,
                      ResultDistances& result_distances) {
S
starlord 已提交
433
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
434
        return SHUTDOWN_ERROR;
S
starlord 已提交
435 436
    }

437
    ENGINE_LOG_DEBUG << "Query by file ids for table: " << table_id << " date range count: " << dates.size();
S
starlord 已提交
438

S
starlord 已提交
439
    // get specified files
440
    std::vector<size_t> ids;
Y
Yu Kun 已提交
441
    for (auto& id : file_ids) {
442
        meta::TableFileSchema table_file;
443 444
        table_file.table_id_ = table_id;
        std::string::size_type sz;
J
jinhai 已提交
445
        ids.push_back(std::stoul(id, &sz));
446 447
    }

G
groot 已提交
448 449
    meta::TableFilesSchema files_array;
    auto status = GetFilesToSearch(table_id, ids, files_array);
450 451
    if (!status.ok()) {
        return status;
452 453
    }

G
groot 已提交
454
    if (files_array.empty()) {
S
starlord 已提交
455
        return Status(DB_ERROR, "Invalid file id");
G
groot 已提交
456 457
    }

S
starlord 已提交
458
    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info before query
G
groot 已提交
459
    status = QueryAsync(table_id, files_array, k, nq, nprobe, vectors, result_ids, result_distances);
S
starlord 已提交
460
    cache::CpuCacheMgr::GetInstance()->PrintInfo();  // print cache info after query
S
starlord 已提交
461
    return status;
462 463
}

S
starlord 已提交
464
Status
Y
Yu Kun 已提交
465
DBImpl::Size(uint64_t& result) {
S
starlord 已提交
466
    if (shutting_down_.load(std::memory_order_acquire)) {
G
groot 已提交
467
        return SHUTDOWN_ERROR;
S
starlord 已提交
468 469
    }

S
starlord 已提交
470
    return meta_ptr_->Size(result);
S
starlord 已提交
471 472 473
}

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
S
starlord 已提交
474
// internal methods
S
starlord 已提交
475
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
S
starlord 已提交
476
Status
Y
Yu Kun 已提交
477
DBImpl::QueryAsync(const std::string& table_id, const meta::TableFilesSchema& files, uint64_t k, uint64_t nq,
G
groot 已提交
478
                   uint64_t nprobe, const float* vectors, ResultIds& result_ids, ResultDistances& result_distances) {
Y
Yu Kun 已提交
479 480
    server::CollectQueryMetrics metrics(nq);

S
starlord 已提交
481
    TimeRecorder rc("");
G
groot 已提交
482

S
starlord 已提交
483
    // step 1: get files to search
484
    ENGINE_LOG_DEBUG << "Engine query begin, index file count: " << files.size();
W
wxyu 已提交
485
    scheduler::SearchJobPtr job = std::make_shared<scheduler::SearchJob>(k, nq, nprobe, vectors);
Y
Yu Kun 已提交
486
    for (auto& file : files) {
S
starlord 已提交
487
        scheduler::TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
W
wxyu 已提交
488
        job->AddIndexFile(file_ptr);
G
groot 已提交
489 490
    }

S
starlord 已提交
491
    // step 2: put search task to scheduler
S
starlord 已提交
492
    scheduler::JobMgrInst::GetInstance()->Put(job);
W
wxyu 已提交
493 494 495
    job->WaitResult();
    if (!job->GetStatus().ok()) {
        return job->GetStatus();
496
    }
G
groot 已提交
497

498
    // step 3: construct results
G
groot 已提交
499 500
    result_ids = job->GetResultIds();
    result_distances = job->GetResultDistances();
S
starlord 已提交
501
    rc.ElapseFromBegin("Engine query totally cost");
G
groot 已提交
502 503 504 505

    return Status::OK();
}

S
starlord 已提交
506 507
void
DBImpl::BackgroundTimerTask() {
X
Xu Peng 已提交
508
    Status status;
Y
yu yunfeng 已提交
509
    server::SystemInfo::GetInstance().Init();
X
Xu Peng 已提交
510
    while (true) {
S
starlord 已提交
511
        if (shutting_down_.load(std::memory_order_acquire)) {
512 513
            WaitMergeFileFinish();
            WaitBuildIndexFinish();
S
starlord 已提交
514 515

            ENGINE_LOG_DEBUG << "DB background thread exit";
G
groot 已提交
516 517
            break;
        }
X
Xu Peng 已提交
518

G
groot 已提交
519
        std::this_thread::sleep_for(std::chrono::seconds(1));
X
Xu Peng 已提交
520

G
groot 已提交
521
        StartMetricTask();
G
groot 已提交
522 523 524
        StartCompactionTask();
        StartBuildIndexTask();
    }
X
Xu Peng 已提交
525 526
}

S
starlord 已提交
527 528
void
DBImpl::WaitMergeFileFinish() {
529
    std::lock_guard<std::mutex> lck(compact_result_mutex_);
Y
Yu Kun 已提交
530
    for (auto& iter : compact_thread_results_) {
531 532 533 534
        iter.wait();
    }
}

S
starlord 已提交
535 536
void
DBImpl::WaitBuildIndexFinish() {
537
    std::lock_guard<std::mutex> lck(index_result_mutex_);
Y
Yu Kun 已提交
538
    for (auto& iter : index_thread_results_) {
539 540 541 542
        iter.wait();
    }
}

S
starlord 已提交
543 544
void
DBImpl::StartMetricTask() {
G
groot 已提交
545 546
    static uint64_t metric_clock_tick = 0;
    metric_clock_tick++;
S
starlord 已提交
547
    if (metric_clock_tick % METRIC_ACTION_INTERVAL != 0) {
G
groot 已提交
548 549 550
        return;
    }

551
    ENGINE_LOG_TRACE << "Start metric task";
S
starlord 已提交
552

G
groot 已提交
553 554 555
    server::Metrics::GetInstance().KeepingAliveCounterIncrement(METRIC_ACTION_INTERVAL);
    int64_t cache_usage = cache::CpuCacheMgr::GetInstance()->CacheUsage();
    int64_t cache_total = cache::CpuCacheMgr::GetInstance()->CacheCapacity();
J
JinHai-CN 已提交
556 557 558 559 560 561 562
    if (cache_total > 0) {
        double cache_usage_double = cache_usage;
        server::Metrics::GetInstance().CpuCacheUsageGaugeSet(cache_usage_double * 100 / cache_total);
    } else {
        server::Metrics::GetInstance().CpuCacheUsageGaugeSet(0);
    }

Y
Yu Kun 已提交
563
    server::Metrics::GetInstance().GpuCacheUsageGaugeSet();
G
groot 已提交
564 565 566 567 568 569 570 571
    uint64_t size;
    Size(size);
    server::Metrics::GetInstance().DataFileSizeGaugeSet(size);
    server::Metrics::GetInstance().CPUUsagePercentSet();
    server::Metrics::GetInstance().RAMUsagePercentSet();
    server::Metrics::GetInstance().GPUPercentGaugeSet();
    server::Metrics::GetInstance().GPUMemoryUsageGaugeSet();
    server::Metrics::GetInstance().OctetsSet();
S
starlord 已提交
572

K
kun yu 已提交
573
    server::Metrics::GetInstance().CPUCoreUsagePercentSet();
K
kun yu 已提交
574 575
    server::Metrics::GetInstance().GPUTemperature();
    server::Metrics::GetInstance().CPUTemperature();
K
kun yu 已提交
576

577
    ENGINE_LOG_TRACE << "Metric task finished";
G
groot 已提交
578 579
}

S
starlord 已提交
580 581
Status
DBImpl::MemSerialize() {
582
    std::lock_guard<std::mutex> lck(mem_serialize_mutex_);
G
groot 已提交
583
    std::set<std::string> temp_table_ids;
G
groot 已提交
584
    mem_mgr_->Serialize(temp_table_ids);
Y
Yu Kun 已提交
585
    for (auto& id : temp_table_ids) {
G
groot 已提交
586 587
        compact_table_ids_.insert(id);
    }
X
Xu Peng 已提交
588

S
starlord 已提交
589
    if (!temp_table_ids.empty()) {
590 591
        SERVER_LOG_DEBUG << "Insert cache serialized";
    }
S
starlord 已提交
592

593 594 595
    return Status::OK();
}

S
starlord 已提交
596 597
void
DBImpl::StartCompactionTask() {
598 599
    static uint64_t compact_clock_tick = 0;
    compact_clock_tick++;
S
starlord 已提交
600
    if (compact_clock_tick % COMPACT_ACTION_INTERVAL != 0) {
601 602 603
        return;
    }

S
starlord 已提交
604
    // serialize memory data
605 606
    MemSerialize();

S
starlord 已提交
607
    // compactiong has been finished?
608 609 610 611 612 613 614
    {
        std::lock_guard<std::mutex> lck(compact_result_mutex_);
        if (!compact_thread_results_.empty()) {
            std::chrono::milliseconds span(10);
            if (compact_thread_results_.back().wait_for(span) == std::future_status::ready) {
                compact_thread_results_.pop_back();
            }
G
groot 已提交
615 616
        }
    }
X
Xu Peng 已提交
617

S
starlord 已提交
618
    // add new compaction task
619 620 621 622
    {
        std::lock_guard<std::mutex> lck(compact_result_mutex_);
        if (compact_thread_results_.empty()) {
            compact_thread_results_.push_back(
G
groot 已提交
623
                compact_thread_pool_.enqueue(&DBImpl::BackgroundCompaction, this, compact_table_ids_));
624 625
            compact_table_ids_.clear();
        }
G
groot 已提交
626
    }
X
Xu Peng 已提交
627 628
}

S
starlord 已提交
629
Status
Y
Yu Kun 已提交
630
DBImpl::MergeFiles(const std::string& table_id, const meta::DateT& date, const meta::TableFilesSchema& files) {
S
starlord 已提交
631
    ENGINE_LOG_DEBUG << "Merge files for table: " << table_id;
S
starlord 已提交
632

S
starlord 已提交
633
    // step 1: create table file
X
Xu Peng 已提交
634
    meta::TableFileSchema table_file;
G
groot 已提交
635 636
    table_file.table_id_ = table_id;
    table_file.date_ = date;
637
    table_file.file_type_ = meta::TableFileSchema::NEW_MERGE;
G
groot 已提交
638
    Status status = meta_ptr_->CreateTableFile(table_file);
X
Xu Peng 已提交
639

640
    if (!status.ok()) {
S
starlord 已提交
641
        ENGINE_LOG_ERROR << "Failed to create table: " << status.ToString();
642 643 644
        return status;
    }

S
starlord 已提交
645
    // step 2: merge files
G
groot 已提交
646
    ExecutionEnginePtr index =
Y
Yu Kun 已提交
647 648
        EngineFactory::Build(table_file.dimension_, table_file.location_, (EngineType)table_file.engine_type_,
                             (MetricType)table_file.metric_type_, table_file.nlist_);
649

650
    meta::TableFilesSchema updated;
S
starlord 已提交
651
    int64_t index_size = 0;
652

Y
Yu Kun 已提交
653
    for (auto& file : files) {
Y
Yu Kun 已提交
654
        server::CollectMergeFilesMetrics metrics;
Y
yu yunfeng 已提交
655

G
groot 已提交
656
        index->Merge(file.location_);
657
        auto file_schema = file;
G
groot 已提交
658
        file_schema.file_type_ = meta::TableFileSchema::TO_DELETE;
659
        updated.push_back(file_schema);
G
groot 已提交
660
        ENGINE_LOG_DEBUG << "Merging file " << file_schema.file_id_;
G
groot 已提交
661
        index_size = index->Size();
X
Xu Peng 已提交
662

S
starlord 已提交
663
        if (index_size >= file_schema.index_file_size_) {
S
starlord 已提交
664
            break;
S
starlord 已提交
665
        }
666 667
    }

S
starlord 已提交
668
    // step 3: serialize to disk
S
starlord 已提交
669 670
    try {
        index->Serialize();
Y
Yu Kun 已提交
671
    } catch (std::exception& ex) {
S
starlord 已提交
672
        // typical error: out of disk space or permition denied
S
starlord 已提交
673
        std::string msg = "Serialize merged index encounter exception: " + std::string(ex.what());
S
starlord 已提交
674
        ENGINE_LOG_ERROR << msg;
Y
yu yunfeng 已提交
675

S
starlord 已提交
676 677 678
        table_file.file_type_ = meta::TableFileSchema::TO_DELETE;
        status = meta_ptr_->UpdateTableFile(table_file);
        ENGINE_LOG_DEBUG << "Failed to update file to index, mark file: " << table_file.file_id_ << " to to_delete";
X
Xu Peng 已提交
679

S
starlord 已提交
680 681 682
        std::cout << "ERROR: failed to persist merged index file: " << table_file.location_
                  << ", possible out of disk space" << std::endl;

S
starlord 已提交
683
        return Status(DB_ERROR, msg);
S
starlord 已提交
684 685
    }

S
starlord 已提交
686 687 688
    // step 4: update table files state
    // if index type isn't IDMAP, set file type to TO_INDEX if file size execeed index_file_size
    // else set file type to RAW, no need to build index
Y
Yu Kun 已提交
689
    if (table_file.engine_type_ != (int)EngineType::FAISS_IDMAP) {
S
starlord 已提交
690 691
        table_file.file_type_ = (index->PhysicalSize() >= table_file.index_file_size_) ? meta::TableFileSchema::TO_INDEX
                                                                                       : meta::TableFileSchema::RAW;
692 693 694
    } else {
        table_file.file_type_ = meta::TableFileSchema::RAW;
    }
695 696
    table_file.file_size_ = index->PhysicalSize();
    table_file.row_count_ = index->Count();
X
Xu Peng 已提交
697
    updated.push_back(table_file);
G
groot 已提交
698
    status = meta_ptr_->UpdateTableFiles(updated);
S
starlord 已提交
699
    ENGINE_LOG_DEBUG << "New merged file " << table_file.file_id_ << " of size " << index->PhysicalSize() << " bytes";
700

S
starlord 已提交
701
    if (options_.insert_cache_immediately_) {
S
starlord 已提交
702 703
        index->Cache();
    }
X
Xu Peng 已提交
704

705 706 707
    return status;
}

S
starlord 已提交
708
Status
Y
Yu Kun 已提交
709
DBImpl::BackgroundMergeFiles(const std::string& table_id) {
710
    meta::DatePartionedTableFilesSchema raw_files;
G
groot 已提交
711
    auto status = meta_ptr_->FilesToMerge(table_id, raw_files);
X
Xu Peng 已提交
712
    if (!status.ok()) {
S
starlord 已提交
713
        ENGINE_LOG_ERROR << "Failed to get merge files for table: " << table_id;
X
Xu Peng 已提交
714 715
        return status;
    }
716

Y
Yu Kun 已提交
717
    for (auto& kv : raw_files) {
X
Xu Peng 已提交
718
        auto files = kv.second;
S
starlord 已提交
719
        if (files.size() < options_.merge_trigger_number_) {
S
starlord 已提交
720
            ENGINE_LOG_DEBUG << "Files number not greater equal than merge trigger number, skip merge action";
X
Xu Peng 已提交
721 722
            continue;
        }
723

X
Xu Peng 已提交
724
        MergeFiles(table_id, kv.first, kv.second);
G
groot 已提交
725

S
starlord 已提交
726
        if (shutting_down_.load(std::memory_order_acquire)) {
S
starlord 已提交
727
            ENGINE_LOG_DEBUG << "Server will shutdown, skip merge action for table: " << table_id;
G
groot 已提交
728 729
            break;
        }
730
    }
X
Xu Peng 已提交
731

G
groot 已提交
732 733
    return Status::OK();
}
734

S
starlord 已提交
735 736
void
DBImpl::BackgroundCompaction(std::set<std::string> table_ids) {
737
    ENGINE_LOG_TRACE << " Background compaction thread start";
S
starlord 已提交
738

G
groot 已提交
739
    Status status;
Y
Yu Kun 已提交
740
    for (auto& table_id : table_ids) {
G
groot 已提交
741 742
        status = BackgroundMergeFiles(table_id);
        if (!status.ok()) {
S
starlord 已提交
743
            ENGINE_LOG_ERROR << "Merge files for table " << table_id << " failed: " << status.ToString();
G
groot 已提交
744
        }
S
starlord 已提交
745

S
starlord 已提交
746
        if (shutting_down_.load(std::memory_order_acquire)) {
S
starlord 已提交
747 748 749
            ENGINE_LOG_DEBUG << "Server will shutdown, skip merge action";
            break;
        }
G
groot 已提交
750
    }
X
Xu Peng 已提交
751

G
groot 已提交
752
    meta_ptr_->Archive();
Z
update  
zhiru 已提交
753

S
starlord 已提交
754
    int ttl = 5 * meta::M_SEC;  // default: file will be deleted after 5 minutes
Y
yudong.cai 已提交
755
    if (options_.mode_ == DBOptions::MODE::CLUSTER_WRITABLE) {
Z
update  
zhiru 已提交
756 757 758
        ttl = meta::D_SEC;
    }
    meta_ptr_->CleanUpFilesWithTTL(ttl);
S
starlord 已提交
759

760
    ENGINE_LOG_TRACE << " Background compaction thread exit";
G
groot 已提交
761
}
X
Xu Peng 已提交
762

S
starlord 已提交
763 764
void
DBImpl::StartBuildIndexTask(bool force) {
G
groot 已提交
765 766
    static uint64_t index_clock_tick = 0;
    index_clock_tick++;
S
starlord 已提交
767
    if (!force && (index_clock_tick % INDEX_ACTION_INTERVAL != 0)) {
G
groot 已提交
768 769 770
        return;
    }

S
starlord 已提交
771
    // build index has been finished?
772 773 774 775 776 777 778
    {
        std::lock_guard<std::mutex> lck(index_result_mutex_);
        if (!index_thread_results_.empty()) {
            std::chrono::milliseconds span(10);
            if (index_thread_results_.back().wait_for(span) == std::future_status::ready) {
                index_thread_results_.pop_back();
            }
G
groot 已提交
779 780 781
        }
    }

S
starlord 已提交
782
    // add new build index task
783 784 785
    {
        std::lock_guard<std::mutex> lck(index_result_mutex_);
        if (index_thread_results_.empty()) {
S
starlord 已提交
786
            index_thread_results_.push_back(index_thread_pool_.enqueue(&DBImpl::BackgroundBuildIndex, this));
787
        }
G
groot 已提交
788
    }
X
Xu Peng 已提交
789 790
}

S
starlord 已提交
791 792
void
DBImpl::BackgroundBuildIndex() {
S
starlord 已提交
793
    ENGINE_LOG_TRACE << "Background build index thread start";
S
starlord 已提交
794

P
peng.xu 已提交
795
    std::unique_lock<std::mutex> lock(build_index_mutex_);
796
    meta::TableFilesSchema to_index_files;
G
groot 已提交
797
    meta_ptr_->FilesToIndex(to_index_files);
X
Xu Peng 已提交
798
    Status status;
799

800
    if (!to_index_files.empty()) {
W
wxyu 已提交
801
        scheduler::BuildIndexJobPtr job = std::make_shared<scheduler::BuildIndexJob>(meta_ptr_, options_);
Y
Yu Kun 已提交
802

803 804 805 806 807 808 809 810 811 812 813
        // step 2: put build index task to scheduler
        for (auto& file : to_index_files) {
            scheduler::TableFileSchemaPtr file_ptr = std::make_shared<meta::TableFileSchema>(file);
            job->AddToIndexFiles(file_ptr);
        }
        scheduler::JobMgrInst::GetInstance()->Put(job);
        job->WaitBuildIndexFinish();
        if (!job->GetStatus().ok()) {
            Status status = job->GetStatus();
            ENGINE_LOG_ERROR << "Building index failed: " << status.ToString();
        }
Y
Yu Kun 已提交
814
    }
Y
Yu Kun 已提交
815

S
starlord 已提交
816
    ENGINE_LOG_TRACE << "Background build index thread exit";
X
Xu Peng 已提交
817 818
}

G
groot 已提交
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
Status
DBImpl::GetFilesToSearch(const std::string& table_id, const std::vector<size_t>& file_ids,
                         meta::TableFilesSchema& files) {
    meta::DatesT dates;
    meta::DatePartionedTableFilesSchema date_files;
    auto status = meta_ptr_->FilesToSearch(table_id, file_ids, dates, date_files);
    if (!status.ok()) {
        return status;
    }

    TraverseFiles(date_files, files);
    return Status::OK();
}

Status
DBImpl::GetPartitionsByTags(const std::string& table_id, const std::vector<std::string>& partition_tags,
                            std::set<std::string>& partition_name_array) {
    std::vector<meta::TableSchema> partiton_array;
    auto status = meta_ptr_->ShowPartitions(table_id, partiton_array);

    for (auto& tag : partition_tags) {
        for (auto& schema : partiton_array) {
            if (server::StringHelpFunctions::IsRegexMatch(schema.partition_tag_, tag)) {
                partition_name_array.insert(schema.table_id_);
            }
        }
    }

    return Status::OK();
}

Status
DBImpl::DropTableRecursively(const std::string& table_id, const meta::DatesT& dates) {
    // dates partly delete files of the table but currently we don't support
    ENGINE_LOG_DEBUG << "Prepare to delete table " << table_id;

    Status status;
    if (dates.empty()) {
        status = mem_mgr_->EraseMemVector(table_id);  // not allow insert
        status = meta_ptr_->DropTable(table_id);      // soft delete table

        // scheduler will determine when to delete table files
        auto nres = scheduler::ResMgrInst::GetInstance()->GetNumOfComputeResource();
        scheduler::DeleteJobPtr job = std::make_shared<scheduler::DeleteJob>(table_id, meta_ptr_, nres);
        scheduler::JobMgrInst::GetInstance()->Put(job);
        job->WaitAndDelete();
    } else {
        status = meta_ptr_->DropDataByDate(table_id, dates);
    }

    std::vector<meta::TableSchema> partiton_array;
    status = meta_ptr_->ShowPartitions(table_id, partiton_array);
    for (auto& schema : partiton_array) {
        status = DropTableRecursively(schema.table_id_, dates);
        if (!status.ok()) {
            return status;
        }
    }

    return Status::OK();
}

Status
DBImpl::UpdateTableIndexRecursively(const std::string& table_id, const TableIndex& index) {
    DropIndex(table_id);

    auto status = meta_ptr_->UpdateTableIndex(table_id, index);
    if (!status.ok()) {
        ENGINE_LOG_ERROR << "Failed to update table index info for table: " << table_id;
        return status;
    }

    std::vector<meta::TableSchema> partiton_array;
    status = meta_ptr_->ShowPartitions(table_id, partiton_array);
    for (auto& schema : partiton_array) {
        status = UpdateTableIndexRecursively(schema.table_id_, index);
        if (!status.ok()) {
            return status;
        }
    }

    return Status::OK();
}

Status
DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex& index) {
    // for IDMAP type, only wait all NEW file converted to RAW file
    // for other type, wait NEW/RAW/NEW_MERGE/NEW_INDEX/TO_INDEX files converted to INDEX files
    std::vector<int> file_types;
    if (index.engine_type_ == static_cast<int32_t>(EngineType::FAISS_IDMAP)) {
        file_types = {
            static_cast<int32_t>(meta::TableFileSchema::NEW),
            static_cast<int32_t>(meta::TableFileSchema::NEW_MERGE),
        };
    } else {
        file_types = {
            static_cast<int32_t>(meta::TableFileSchema::RAW),
            static_cast<int32_t>(meta::TableFileSchema::NEW),
            static_cast<int32_t>(meta::TableFileSchema::NEW_MERGE),
            static_cast<int32_t>(meta::TableFileSchema::NEW_INDEX),
            static_cast<int32_t>(meta::TableFileSchema::TO_INDEX),
        };
    }

    // get files to build index
    std::vector<std::string> file_ids;
    auto status = meta_ptr_->FilesByType(table_id, file_types, file_ids);
    int times = 1;

    while (!file_ids.empty()) {
        ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times;
        if (index.engine_type_ != (int)EngineType::FAISS_IDMAP) {
            status = meta_ptr_->UpdateTableFilesToIndex(table_id);
        }

        std::this_thread::sleep_for(std::chrono::milliseconds(std::min(10 * 1000, times * 100)));
        status = meta_ptr_->FilesByType(table_id, file_types, file_ids);
        times++;
    }

    // build index for partition
    std::vector<meta::TableSchema> partiton_array;
    status = meta_ptr_->ShowPartitions(table_id, partiton_array);
    for (auto& schema : partiton_array) {
        status = BuildTableIndexRecursively(schema.table_id_, index);
        if (!status.ok()) {
            return status;
        }
    }

    return Status::OK();
}

Status
DBImpl::DropTableIndexRecursively(const std::string& table_id) {
    ENGINE_LOG_DEBUG << "Drop index for table: " << table_id;
    auto status = meta_ptr_->DropTableIndex(table_id);
    if (!status.ok()) {
        return status;
    }

    // drop partition index
    std::vector<meta::TableSchema> partiton_array;
    status = meta_ptr_->ShowPartitions(table_id, partiton_array);
    for (auto& schema : partiton_array) {
        status = DropTableIndexRecursively(schema.table_id_);
        if (!status.ok()) {
            return status;
        }
    }

    return Status::OK();
}

Status
DBImpl::GetTableRowCountRecursively(const std::string& table_id, uint64_t& row_count) {
    row_count = 0;
    auto status = meta_ptr_->Count(table_id, row_count);
    if (!status.ok()) {
        return status;
    }

    // get partition row count
    std::vector<meta::TableSchema> partiton_array;
    status = meta_ptr_->ShowPartitions(table_id, partiton_array);
    for (auto& schema : partiton_array) {
        uint64_t partition_row_count = 0;
        status = GetTableRowCountRecursively(schema.table_id_, partition_row_count);
        if (!status.ok()) {
            return status;
        }

        row_count += partition_row_count;
    }

    return Status::OK();
}

S
starlord 已提交
997 998
}  // namespace engine
}  // namespace milvus