ServerConfig.cpp 23.4 KB
Newer Older
G
groot 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
////////////////////////////////////////////////////////////////////////////////
// Copyright 上海赜睿信息科技有限公司(Zilliz) - All Rights Reserved
// Unauthorized copying of this file, via any medium is strictly prohibited.
// Proprietary and confidential.
////////////////////////////////////////////////////////////////////////////////
#include "ServerConfig.h"

#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdlib.h>
#include <iostream>

S
starlord 已提交
14
#include "config/ConfigMgr.h"
15 16
#include "utils/CommonUtil.h"
#include "utils/ValidationUtil.h"
G
groot 已提交
17

18

G
groot 已提交
19
namespace zilliz {
J
jinhai 已提交
20
namespace milvus {
G
groot 已提交
21 22
namespace server {

23 24
constexpr uint64_t MB = 1024 * 1024;
constexpr uint64_t GB = MB * 1024;
25

26
ServerConfig &
G
groot 已提交
27 28
ServerConfig::GetInstance() {
    static ServerConfig config;
G
groot 已提交
29
    return config;
G
groot 已提交
30 31
}

S
starlord 已提交
32
ErrorCode
33
ServerConfig::LoadConfigFile(const std::string &config_filename) {
G
groot 已提交
34
    std::string filename = config_filename;
35
    if (filename.empty()) {
G
groot 已提交
36 37 38 39 40 41 42 43 44 45 46
        std::cout << "ERROR: a config file is required" << std::endl;
        exit(1);//directly exit program if config file not specified
    }
    struct stat directoryStat;
    int statOK = stat(filename.c_str(), &directoryStat);
    if (statOK != 0) {
        std::cout << "ERROR: " << filename << " not found!" << std::endl;
        exit(1);//directly exit program if config file not found
    }

    try {
47
        ConfigMgr *mgr = const_cast<ConfigMgr *>(ConfigMgr::GetInstance());
S
starlord 已提交
48
        ErrorCode err = mgr->LoadConfigFile(filename);
49
        if (err != 0) {
G
groot 已提交
50 51 52 53
            std::cout << "Server failed to load config file" << std::endl;
            exit(1);//directly exit program if the config file is illegal
        }
    }
54
    catch (YAML::Exception &e) {
G
groot 已提交
55 56 57 58 59 60 61
        std::cout << "Server failed to load config file: " << std::endl;
        return SERVER_UNEXPECTED_ERROR;
    }

    return SERVER_SUCCESS;
}

Z
zhiru 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
ErrorCode ServerConfig::ValidateConfig() {

    bool okay = true;
    if (CheckServerConfig() != SERVER_SUCCESS) {
        okay = false;
    }
    if (CheckDBConfig() != SERVER_SUCCESS) {
        okay = false;
    }
    if (CheckMetricConfig() != SERVER_SUCCESS) {
        okay = false;
    }
    if (CheckCacheConfig() != SERVER_SUCCESS) {
        okay = false;
    }
    if (CheckEngineConfig() != SERVER_SUCCESS) {
        okay = false;
    }
    if (CheckResourceConfig() != SERVER_SUCCESS) {
        okay = false;
    }
    return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
}

ErrorCode
ServerConfig::CheckServerConfig() {
/*
    server_config:
    address: 0.0.0.0            # milvus server ip address
    port: 19530                 # the port milvus listen to, default: 19530, range: 1025 ~ 65534
    gpu_index: 0                # the gpu milvus use, default: 0, range: 0 ~ gpu number - 1
    mode: single                # milvus deployment type: single, cluster, read_only
*/
    bool okay = true;
96
    ConfigNode server_config = GetConfig(CONFIG_SERVER);
Z
zhiru 已提交
97 98 99

    std::string ip_address = server_config.GetValue(CONFIG_SERVER_ADDRESS, "127.0.0.1");
    if (ValidationUtil::ValidateIpAddress(ip_address) != SERVER_SUCCESS) {
S
starlord 已提交
100
        std::cerr << "ERROR: invalid server IP address: " << ip_address << std::endl;
Z
zhiru 已提交
101 102 103 104 105
        okay = false;
    }

    std::string port_str = server_config.GetValue(CONFIG_SERVER_PORT, "19530");
    if (ValidationUtil::ValidateStringIsNumber(port_str) != SERVER_SUCCESS) {
S
starlord 已提交
106
        std::cerr << "ERROR: port " << port_str << " is not a number" << std::endl;
Z
zhiru 已提交
107
        okay = false;
108
    } else {
Z
zhiru 已提交
109 110
        int32_t port = std::stol(port_str);
        if (port < 1025 | port > 65534) {
S
starlord 已提交
111
            std::cerr << "ERROR: port " << port_str << " out of range [1025, 65534]" << std::endl;
Z
zhiru 已提交
112 113 114 115 116 117 118 119
            okay = false;
        }
    }

    std::string mode = server_config.GetValue(CONFIG_CLUSTER_MODE, "single");
    if (mode != "single" && mode != "cluster" && mode != "read_only") {
        std::cerr << "ERROR: mode " << mode << " is not one of ['single', 'cluster', 'read_only']" << std::endl;
        okay = false;
120 121
    }

122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
    std::string time_zone = server_config.GetValue(CONFIG_TIME_ZONE, "UTC+8");
    int flag = 0;
    if(time_zone.length() < 3)
        flag = 1;
    else if(time_zone.substr(0, 3) != "UTC")
        flag = 1;
    else if(time_zone.length() > 3){
        try {
            stoi(time_zone.substr(3, std::string::npos));
        }
        catch (std::invalid_argument &) {
            flag = 1;
        }
    }
    if(flag == 1){
        std::cerr << "ERROR: time_zone " << time_zone << " is not in a right format" << std::endl;
        okay = false;
    }

Z
zhiru 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
    return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
}

ErrorCode
ServerConfig::CheckDBConfig() {
/*
    db_config:
    db_path: @MILVUS_DB_PATH@             # milvus data storage path
    db_slave_path:                        # secondry data storage path, split by semicolon
    parallel_reduce: false                # use multi-threads to reduce topk result

    # URI format: dialect://username:password@host:port/database
    # All parts except dialect are optional, but you MUST include the delimiters
    # Currently dialect supports mysql or sqlite
    db_backend_url: sqlite://:@:/
156

Z
zhiru 已提交
157 158 159 160 161 162
    archive_disk_threshold: 0        # triger archive action if storage size exceed this value, 0 means no limit, unit: GB
    archive_days_threshold: 0        # files older than x days will be archived, 0 means no limit, unit: day
    insert_buffer_size: 4            # maximum insert buffer size allowed, default: 4, unit: GB, should be at least 1 GB.
    # the sum of insert_buffer_size and cpu_cache_capacity should be less than total memory, unit: GB
*/
    bool okay = true;
163
    ConfigNode db_config = GetConfig(CONFIG_DB);
Z
zhiru 已提交
164 165 166 167 168 169 170 171 172

    std::string db_path = db_config.GetValue(CONFIG_DB_PATH);
    if (db_path.empty()) {
        std::cerr << "ERROR: db_path is empty" << std::endl;
        okay = false;
    }

    std::string db_backend_url = db_config.GetValue(CONFIG_DB_URL);
    if (ValidationUtil::ValidateDbURI(db_backend_url) != SERVER_SUCCESS) {
S
starlord 已提交
173
        std::cerr << "ERROR: invalid db_backend_url: " << db_backend_url << std::endl;
Z
zhiru 已提交
174 175 176 177 178
        okay = false;
    }

    std::string archive_disk_threshold_str = db_config.GetValue(CONFIG_DB_INSERT_BUFFER_SIZE, "0");
    if (ValidationUtil::ValidateStringIsNumber(archive_disk_threshold_str) != SERVER_SUCCESS) {
S
starlord 已提交
179
        std::cerr << "ERROR: archive_disk_threshold " << archive_disk_threshold_str << " is not a number" << std::endl;
Z
zhiru 已提交
180 181 182 183 184
        okay = false;
    }

    std::string archive_days_threshold_str = db_config.GetValue(CONFIG_DB_INSERT_BUFFER_SIZE, "0");
    if (ValidationUtil::ValidateStringIsNumber(archive_days_threshold_str) != SERVER_SUCCESS) {
S
starlord 已提交
185
        std::cerr << "ERROR: archive_days_threshold " << archive_days_threshold_str << " is not a number" << std::endl;
Z
zhiru 已提交
186 187 188 189 190
        okay = false;
    }

    std::string insert_buffer_size_str = db_config.GetValue(CONFIG_DB_INSERT_BUFFER_SIZE, "4");
    if (ValidationUtil::ValidateStringIsNumber(insert_buffer_size_str) != SERVER_SUCCESS) {
S
starlord 已提交
191
        std::cerr << "ERROR: insert_buffer_size " << insert_buffer_size_str << " is not a number" << std::endl;
Z
zhiru 已提交
192 193 194
        okay = false;
    }
    else {
195
        uint64_t insert_buffer_size = (uint64_t) std::stol(insert_buffer_size_str);
Z
zhiru 已提交
196 197 198
        insert_buffer_size *= GB;
        unsigned long total_mem = 0, free_mem = 0;
        CommonUtil::GetSystemMemInfo(total_mem, free_mem);
199
        if (insert_buffer_size >= total_mem) {
S
starlord 已提交
200
            std::cerr << "ERROR: insert_buffer_size exceed system memory" << std::endl;
Z
zhiru 已提交
201 202 203 204
            okay = false;
        }
    }

S
starlord 已提交
205 206 207 208 209 210 211 212 213 214 215 216
    std::string gpu_index_str = db_config.GetValue(CONFIG_DB_BUILD_INDEX_GPU, "0");
    if (ValidationUtil::ValidateStringIsNumber(gpu_index_str) != SERVER_SUCCESS) {
        std::cerr << "ERROR: gpu_index " << gpu_index_str << " is not a number" << std::endl;
        okay = false;
    } else {
        int32_t gpu_index = std::stol(gpu_index_str);
        if (ValidationUtil::ValidateGpuIndex(gpu_index) != SERVER_SUCCESS) {
            std::cerr << "ERROR: invalid gpu_index " << gpu_index_str << std::endl;
            okay = false;
        }
    }

Z
zhiru 已提交
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
    return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
}

ErrorCode
ServerConfig::CheckMetricConfig() {
/*
    metric_config:
    is_startup: off                       # if monitoring start: on, off
    collector: prometheus                 # metrics collector: prometheus
    prometheus_config:                    # following are prometheus configure
    port: 8080                          # the port prometheus use to fetch metrics
    (not used) push_gateway_ip_address: 127.0.0.1  # push method configure: push gateway ip address
    (not used) push_gateway_port: 9091             # push method configure: push gateway port
*/
    bool okay = true;
    ConfigNode metric_config = GetConfig(CONFIG_METRIC);

    std::string is_startup_str = metric_config.GetValue(CONFIG_METRIC_IS_STARTUP, "off");
    if (ValidationUtil::ValidateStringIsBool(is_startup_str) != SERVER_SUCCESS) {
S
starlord 已提交
236
        std::cerr << "ERROR: invalid is_startup config: " << is_startup_str << std::endl;
Z
zhiru 已提交
237 238 239 240 241
        okay = false;
    }

    std::string port_str = metric_config.GetChild(CONFIG_PROMETHEUS).GetValue(CONFIG_METRIC_PROMETHEUS_PORT, "8080");
    if (ValidationUtil::ValidateStringIsNumber(port_str) != SERVER_SUCCESS) {
S
starlord 已提交
242
        std::cerr << "ERROR: port specified in prometheus_config " << port_str << " is not a number" << std::endl;
Z
zhiru 已提交
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
        okay = false;
    }

    return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
}

ErrorCode
ServerConfig::CheckCacheConfig() {
/*
    cache_config:
    cpu_cache_capacity: 16            # how many memory are used as cache, unit: GB, range: 0 ~ less than total memory
    cpu_cache_free_percent: 0.85      # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0
    insert_cache_immediately: false   # insert data will be load into cache immediately for hot query
    gpu_cache_capacity: 5             # how many memory are used as cache in gpu, unit: GB, RANGE: 0 ~ less than total memory
    gpu_cache_free_percent: 0.85      # old data will be erased from cache when cache is full, this value specify how much memory should be kept, range: greater than zero ~ 1.0
    gpu_ids:                          # gpu id
    - 0
    - 1
*/
    bool okay = true;
263
    ConfigNode cache_config = GetConfig(CONFIG_CACHE);
Z
zhiru 已提交
264 265 266

    std::string cpu_cache_capacity_str = cache_config.GetValue(CONFIG_CPU_CACHE_CAPACITY, "16");
    if (ValidationUtil::ValidateStringIsNumber(cpu_cache_capacity_str) != SERVER_SUCCESS) {
S
starlord 已提交
267
        std::cerr << "ERROR: cpu_cache_capacity " << cpu_cache_capacity_str << " is not a number" << std::endl;
Z
zhiru 已提交
268
        okay = false;
269
    }
Z
zhiru 已提交
270
    else {
271
        uint64_t cpu_cache_capacity = (uint64_t) std::stol(cpu_cache_capacity_str);
Z
zhiru 已提交
272 273 274 275
        cpu_cache_capacity *= GB;
        unsigned long total_mem = 0, free_mem = 0;
        CommonUtil::GetSystemMemInfo(total_mem, free_mem);
        if (cpu_cache_capacity >= total_mem) {
S
starlord 已提交
276
            std::cerr << "ERROR: cpu_cache_capacity exceed system memory" << std::endl;
Z
zhiru 已提交
277 278
            okay = false;
        }
Z
zhiru 已提交
279
        else if (cpu_cache_capacity > (double) total_mem * 0.9) {
Z
zhiru 已提交
280 281
            std::cerr << "Warning: cpu_cache_capacity value is too aggressive" << std::endl;
        }
282

283
        uint64_t insert_buffer_size = (uint64_t) GetConfig(CONFIG_DB).GetInt32Value(CONFIG_DB_INSERT_BUFFER_SIZE, 4);
Z
zhiru 已提交
284 285
        insert_buffer_size *= GB;
        if (insert_buffer_size + cpu_cache_capacity >= total_mem) {
S
starlord 已提交
286
            std::cerr << "ERROR: sum of cpu_cache_capacity and insert_buffer_size exceed system memory" << std::endl;
Z
zhiru 已提交
287 288 289 290 291 292 293
            okay = false;
        }
    }

    std::string cpu_cache_free_percent_str = cache_config.GetValue(CACHE_FREE_PERCENT, "0.85");
    double cpu_cache_free_percent;
    if (ValidationUtil::ValidateStringIsDouble(cpu_cache_free_percent_str, cpu_cache_free_percent) != SERVER_SUCCESS) {
S
starlord 已提交
294
        std::cerr << "ERROR: cpu_cache_free_percent " << cpu_cache_free_percent_str << " is not a double" << std::endl;
Z
zhiru 已提交
295 296 297
        okay = false;
    }
    else if (cpu_cache_free_percent < std::numeric_limits<double>::epsilon() || cpu_cache_free_percent > 1.0) {
S
starlord 已提交
298
        std::cerr << "ERROR: invalid cpu_cache_free_percent " << cpu_cache_free_percent_str << std::endl;
Z
zhiru 已提交
299
        okay = false;
300 301
    }

Z
zhiru 已提交
302 303
    std::string insert_cache_immediately_str = cache_config.GetValue(CONFIG_INSERT_CACHE_IMMEDIATELY, "false");
    if (ValidationUtil::ValidateStringIsBool(insert_cache_immediately_str) != SERVER_SUCCESS) {
S
starlord 已提交
304
        std::cerr << "ERROR: invalid insert_cache_immediately config: " << insert_cache_immediately_str << std::endl;
Z
zhiru 已提交
305
        okay = false;
306 307
    }

S
starlord 已提交
308
    std::string gpu_cache_capacity_str = cache_config.GetValue(CONFIG_GPU_CACHE_CAPACITY, "0");
Z
zhiru 已提交
309
    if (ValidationUtil::ValidateStringIsNumber(gpu_cache_capacity_str) != SERVER_SUCCESS) {
S
starlord 已提交
310
        std::cerr << "ERROR: gpu_cache_capacity " << gpu_cache_capacity_str << " is not a number" << std::endl;
Z
zhiru 已提交
311 312 313
        okay = false;
    }
    else {
314
        uint64_t gpu_cache_capacity = (uint64_t) std::stol(gpu_cache_capacity_str);
Z
zhiru 已提交
315
        gpu_cache_capacity *= GB;
S
starlord 已提交
316
        int gpu_index = GetConfig(CONFIG_DB).GetInt32Value(CONFIG_DB_BUILD_INDEX_GPU, 0);
Z
zhiru 已提交
317 318
        size_t gpu_memory;
        if (ValidationUtil::GetGpuMemory(gpu_index, gpu_memory) != SERVER_SUCCESS) {
S
starlord 已提交
319
            std::cerr << "ERROR: could not get gpu memory for device " << gpu_index << std::endl;
Z
zhiru 已提交
320 321 322
            okay = false;
        }
        else if (gpu_cache_capacity >= gpu_memory) {
S
starlord 已提交
323
            std::cerr << "ERROR: gpu_cache_capacity " << gpu_cache_capacity
Z
zhiru 已提交
324 325 326
                      << " exceed total gpu memory " << gpu_memory << std::endl;
            okay = false;
        }
Z
zhiru 已提交
327
        else if (gpu_cache_capacity > (double) gpu_memory * 0.9) {
Z
zhiru 已提交
328 329 330 331 332 333 334
            std::cerr << "Warning: gpu_cache_capacity value is too aggressive" << std::endl;
        }
    }

    std::string gpu_cache_free_percent_str = cache_config.GetValue(GPU_CACHE_FREE_PERCENT, "0.85");
    double gpu_cache_free_percent;
    if (ValidationUtil::ValidateStringIsDouble(gpu_cache_free_percent_str, gpu_cache_free_percent) != SERVER_SUCCESS) {
S
starlord 已提交
335
        std::cerr << "ERROR: gpu_cache_free_percent " << gpu_cache_free_percent_str << " is not a double" << std::endl;
Z
zhiru 已提交
336 337 338
        okay = false;
    }
    else if (gpu_cache_free_percent < std::numeric_limits<double>::epsilon() || gpu_cache_free_percent > 1.0) {
S
starlord 已提交
339
        std::cerr << "ERROR: invalid gpu_cache_free_percent " << gpu_cache_free_percent << std::endl;
Z
zhiru 已提交
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
        okay = false;
    }

    return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
}

ErrorCode
ServerConfig::CheckEngineConfig() {
/*
    engine_config:
    use_blas_threshold: 20
    omp_thread_num: 0             # how many compute threads be used by engine, 0 means use all cpu core to compute
*/
    bool okay = true;
    ConfigNode engine_config = GetConfig(CONFIG_ENGINE);

    std::string use_blas_threshold_str = engine_config.GetValue(CONFIG_DCBT, "20");
    if (ValidationUtil::ValidateStringIsNumber(use_blas_threshold_str) != SERVER_SUCCESS) {
S
starlord 已提交
358
        std::cerr << "ERROR: use_blas_threshold " << use_blas_threshold_str << " is not a number" << std::endl;
Z
zhiru 已提交
359 360 361 362 363
        okay = false;
    }

    std::string omp_thread_num_str = engine_config.GetValue(CONFIG_OMP_THREAD_NUM, "0");
    if (ValidationUtil::ValidateStringIsNumber(omp_thread_num_str) != SERVER_SUCCESS) {
S
starlord 已提交
364
        std::cerr << "ERROR: omp_thread_num " << omp_thread_num_str << " is not a number" << std::endl;
Z
zhiru 已提交
365
        okay = false;
366
    } else {
Z
zhiru 已提交
367 368
        int32_t omp_thread = std::stol(omp_thread_num_str);
        uint32_t sys_thread_cnt = 8;
369
        if (omp_thread > CommonUtil::GetSystemAvailableThreads(sys_thread_cnt)) {
S
starlord 已提交
370
            std::cerr << "ERROR: omp_thread_num " << omp_thread_num_str << " > system available thread "
371
                      << sys_thread_cnt << std::endl;
Z
zhiru 已提交
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
            okay = false;
        }
    }

    return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
}

ErrorCode
ServerConfig::CheckResourceConfig() {
/*

    resource_config:
    # resource list, length: 0~N
    # please set a DISK resource and a CPU resource least, or system will not return query result.
    #
      # example:
    # resource_name:               # resource name, just using in connections below
    #   type: DISK                 # resource type, optional: DISK/CPU/GPU
    #   device_id: 0
    #   enable_executor: false     # if is enable executor, optional: true, false

    resources:
    ssda:
    type: DISK
    device_id: 0
    enable_executor: false

    cpu:
    type: CPU
    device_id: 0
    enable_executor: false

    gpu0:
    type: GPU
    device_id: 0
    enable_executor: true
    gpu_resource_num: 2
    pinned_memory: 300
    temp_memory: 300

    # connection list, length: 0~N
    # example:
    # connection_name:
    #   speed: 100                                        # unit: MS/s
    #   endpoint: ${resource_name}===${resource_name}
    connections:
    io:
    speed: 500
    endpoint: ssda===cpu
    pcie0:
    speed: 11000
    endpoint: cpu===gpu0
*/
    bool okay = true;
    server::ConfigNode resource_config = GetConfig(CONFIG_RESOURCE);
    if (resource_config.GetChildren().empty()) {
S
starlord 已提交
428
        std::cerr << "ERROR: no context under resource" << std::endl;
Z
zhiru 已提交
429
        okay = false;
430 431
    }

Z
zhiru 已提交
432
    auto resources = resource_config.GetChild(CONFIG_RESOURCES).GetChildren();
433 434

    if (resources.empty()) {
Z
zhiru 已提交
435 436
        std::cerr << "no resources specified" << std::endl;
        okay = false;
437 438 439
    }

    bool resource_valid_flag = false;
Z
zhiru 已提交
440 441 442 443
    bool hasDisk = false;
    bool hasCPU = false;
    bool hasExecutor = false;
    std::set<std::string> resource_list;
444
    for (auto &resource : resources) {
Z
zhiru 已提交
445 446 447
        resource_list.emplace(resource.first);
        auto &resource_conf = resource.second;
        auto type = resource_conf.GetValue(CONFIG_RESOURCE_TYPE);
448

Z
zhiru 已提交
449
        std::string device_id_str = resource_conf.GetValue(CONFIG_RESOURCE_DEVICE_ID, "0");
Z
zhiru 已提交
450
        int32_t device_id = -1;
Z
zhiru 已提交
451
        if (ValidationUtil::ValidateStringIsNumber(device_id_str) != SERVER_SUCCESS) {
S
starlord 已提交
452
            std::cerr << "ERROR: device_id " << device_id_str << " is not a number" << std::endl;
Z
zhiru 已提交
453
            okay = false;
454
        } else {
Z
zhiru 已提交
455 456 457 458 459
            device_id = std::stol(device_id_str);
        }

        std::string enable_executor_str = resource_conf.GetValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, "off");
        if (ValidationUtil::ValidateStringIsBool(enable_executor_str) != SERVER_SUCCESS) {
S
starlord 已提交
460
            std::cerr << "ERROR: invalid enable_executor config: " << enable_executor_str << std::endl;
Z
zhiru 已提交
461 462 463 464 465
            okay = false;
        }

        if (type == "DISK") {
            hasDisk = true;
466
        } else if (type == "CPU") {
Z
zhiru 已提交
467 468 469 470 471
            hasCPU = true;
            if (resource_conf.GetBoolValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, false)) {
                hasExecutor = true;
            }
        }
Z
zhiru 已提交
472
        else if (type == "GPU") {
S
starlord 已提交
473
            int build_index_gpu_index = GetConfig(CONFIG_DB).GetInt32Value(CONFIG_DB_BUILD_INDEX_GPU, 0);
474
            if (device_id == build_index_gpu_index) {
475 476
                resource_valid_flag = true;
            }
Z
zhiru 已提交
477 478 479 480 481
            if (resource_conf.GetBoolValue(CONFIG_RESOURCE_ENABLE_EXECUTOR, false)) {
                hasExecutor = true;
            }
            std::string gpu_resource_num_str = resource_conf.GetValue(CONFIG_RESOURCE_NUM, "2");
            if (ValidationUtil::ValidateStringIsNumber(gpu_resource_num_str) != SERVER_SUCCESS) {
S
starlord 已提交
482
                std::cerr << "ERROR: gpu_resource_num " << gpu_resource_num_str << " is not a number" << std::endl;
Z
zhiru 已提交
483 484
                okay = false;
            }
Z
zhiru 已提交
485
            bool mem_valid = true;
Z
zhiru 已提交
486 487
            std::string pinned_memory_str = resource_conf.GetValue(CONFIG_RESOURCE_PIN_MEMORY, "300");
            if (ValidationUtil::ValidateStringIsNumber(pinned_memory_str) != SERVER_SUCCESS) {
S
starlord 已提交
488
                std::cerr << "ERROR: pinned_memory " << pinned_memory_str << " is not a number" << std::endl;
Z
zhiru 已提交
489
                okay = false;
Z
zhiru 已提交
490
                mem_valid = false;
Z
zhiru 已提交
491 492 493
            }
            std::string temp_memory_str = resource_conf.GetValue(CONFIG_RESOURCE_TEMP_MEMORY, "300");
            if (ValidationUtil::ValidateStringIsNumber(temp_memory_str) != SERVER_SUCCESS) {
S
starlord 已提交
494
                std::cerr << "ERROR: temp_memory " << temp_memory_str << " is not a number" << std::endl;
Z
zhiru 已提交
495
                okay = false;
Z
zhiru 已提交
496 497 498 499 500
                mem_valid = false;
            }
            if (mem_valid) {
                size_t gpu_memory;
                if (ValidationUtil::GetGpuMemory(device_id, gpu_memory) != SERVER_SUCCESS) {
S
starlord 已提交
501
                    std::cerr << "ERROR: could not get gpu memory for device " << device_id << std::endl;
Z
zhiru 已提交
502 503 504 505 506
                    okay = false;
                }
                else {
                    size_t prealoc_mem = std::stol(pinned_memory_str) + std::stol(temp_memory_str);
                    if (prealoc_mem >= gpu_memory) {
S
starlord 已提交
507
                        std::cerr << "ERROR: sum of pinned_memory and temp_memory " << prealoc_mem
Z
zhiru 已提交
508 509 510 511
                                  << " exceeds total gpu memory " << gpu_memory << " for device " << device_id << std::endl;
                        okay = false;
                    }
                }
Z
zhiru 已提交
512
            }
513 514 515
        }
    }

516
    if (!resource_valid_flag) {
517
        std::cerr << "Building index GPU can't be found in resource config." << std::endl;
Z
zhiru 已提交
518 519 520 521 522 523 524 525 526
        okay = false;
    }
    if (!hasDisk || !hasCPU) {
        std::cerr << "No DISK or CPU resource" << std::endl;
        okay = false;
    }
    if (!hasExecutor) {
        std::cerr << "No CPU or GPU resource has executor enabled" << std::endl;
        okay = false;
527
    }
528

Z
zhiru 已提交
529 530 531
    auto connections = resource_config.GetChild(CONFIG_RESOURCE_CONNECTIONS).GetChildren();
    for (auto &connection : connections) {
        auto &connection_conf = connection.second;
532

Z
zhiru 已提交
533 534
        std::string speed_str = connection_conf.GetValue(CONFIG_SPEED_CONNECTIONS);
        if (ValidationUtil::ValidateStringIsNumber(speed_str) != SERVER_SUCCESS) {
S
starlord 已提交
535
            std::cerr << "ERROR: speed " << speed_str << " is not a number" << std::endl;
Z
zhiru 已提交
536 537 538 539 540 541 542
            okay = false;
        }

        std::string endpoint_str = connection_conf.GetValue(CONFIG_ENDPOINT_CONNECTIONS);
        std::string delimiter = "===";
        auto delimiter_pos = endpoint_str.find(delimiter);
        if (delimiter_pos == std::string::npos) {
S
starlord 已提交
543
            std::cerr << "ERROR: invalid endpoint format: " << endpoint_str << std::endl;
Z
zhiru 已提交
544
            okay = false;
545
        } else {
Z
zhiru 已提交
546 547
            std::string left_resource = endpoint_str.substr(0, delimiter_pos);
            if (resource_list.find(left_resource) == resource_list.end()) {
S
starlord 已提交
548
                std::cerr << "ERROR: left resource " << left_resource << " does not exist" << std::endl;
Z
zhiru 已提交
549 550 551 552
                okay = false;
            }
            std::string right_resource = endpoint_str.substr(delimiter_pos + delimiter.length(), endpoint_str.length());
            if (resource_list.find(right_resource) == resource_list.end()) {
S
starlord 已提交
553
                std::cerr << "ERROR: right resource " << right_resource << " does not exist" << std::endl;
Z
zhiru 已提交
554 555 556 557 558 559
                okay = false;
            }
        }
    }

    return (okay ? SERVER_SUCCESS : SERVER_INVALID_ARGUMENT);
560 561
}

G
groot 已提交
562 563
void
ServerConfig::PrintAll() const {
564
    if (const ConfigMgr *mgr = ConfigMgr::GetInstance()) {
G
groot 已提交
565 566 567 568 569 570 571
        std::string str = mgr->DumpString();
//        SERVER_LOG_INFO << "\n" << str;
        std::cout << "\n" << str << std::endl;
    }
}

ConfigNode
572 573 574
ServerConfig::GetConfig(const std::string &name) const {
    const ConfigMgr *mgr = ConfigMgr::GetInstance();
    const ConfigNode &root_node = mgr->GetRootNode();
G
groot 已提交
575
    return root_node.GetChild(name);
G
groot 已提交
576 577
}

578 579 580 581
ConfigNode &
ServerConfig::GetConfig(const std::string &name) {
    ConfigMgr *mgr = ConfigMgr::GetInstance();
    ConfigNode &root_node = mgr->GetRootNode();
G
groot 已提交
582
    return root_node.GetChild(name);
G
groot 已提交
583 584 585 586 587 588
}


}
}
}