megbrain_config.cpp 10.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/**
 * \file python_module/src/cpp/megbrain_config.cpp
 *
 * This file is part of MegBrain, a deep learning framework developed by Megvii.
 *
 * \copyright Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
 *
 */

#include "./megbrain_config.h"
#include "./python_helper.h"

#include "megbrain/graph/event.h"
#include "megbrain/utils/debug.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/serialization/opr_registry.h"

#include <set>

20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
#if defined(WIN32)
#include <io.h>
#include <windows.h>
#define F_OK 0
#define RTLD_LAZY 0
#define RTLD_GLOBAL 0
#define RTLD_NOLOAD 0
#define access(a, b) false

static void* dlopen(const char* file, int) {
    return static_cast<void*>(LoadLibrary(file));
}

static void* dlerror() {
    const char* errmsg = "dlerror not aviable in windows";
    return const_cast<char*>(errmsg);
}

static void* dlsym(void* handle, const char* name) {
    FARPROC symbol = GetProcAddress((HMODULE)handle, name);
    return reinterpret_cast<void*>(symbol);
}

#else
44
#include <dlfcn.h>
45
#endif
46

47 48 49 50
#if MGB_ENABLE_OPR_MM
#include "megbrain/opr/mm_handler.h"
#endif

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
#if MGB_CUDA
#include <cuda.h>
#endif

using namespace mgb;

namespace {
    std::unordered_map<ComputingGraph*,
        SyncEventConnecter::ReceiverHandler>
        set_priority_on_opr_inserted_handle;
    std::mutex set_priority_on_opr_inserted_handle_mtx;

} // anonymous namespace

bool _config::set_comp_graph_option(
        CompGraph &cg, const std::string &name, int val_int) {

#define SET_CG_OPTION(name_chk) \
    do { \
        static_assert( \
                std::is_same<decltype(opt.name_chk), bool>::value || \
                std::is_same<decltype(opt.name_chk), uint8_t>::value || \
                std::is_same<decltype(opt.name_chk), int16_t>::value || \
74 75
                std::is_same<decltype(opt.name_chk), uint16_t>::value || \
                std::is_same<decltype(opt.name_chk), int32_t>::value, \
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
                "not bool/int opt"); \
        if (name == #name_chk) { \
            auto ret = opt.name_chk; \
            opt.name_chk = val_int; \
            return ret; \
        } \
    } while(0)

    auto &&opt = cg.get().options();
    SET_CG_OPTION(seq_opt.enable_mem_plan_opt);
    SET_CG_OPTION(seq_opt.enable_mem_reuse_alloc);
    SET_CG_OPTION(seq_opt.enable_seq_comp_node_opt);
    SET_CG_OPTION(force_dynamic_alloc);
    SET_CG_OPTION(enable_grad_var_static_reshape);
    SET_CG_OPTION(async_exec_level);
    SET_CG_OPTION(graph_opt.jit);
    SET_CG_OPTION(graph_opt.tensorrt);
    SET_CG_OPTION(graph_opt_level);
94 95
    SET_CG_OPTION(allreduce_pack_max_size);
    SET_CG_OPTION(allreduce_pack_ignore_first);
96 97 98 99 100
    SET_CG_OPTION(var_sanity_check_first_run);
    SET_CG_OPTION(no_profiling_on_shape_change);
    SET_CG_OPTION(allocate_static_mem_after_graph_compile);
    SET_CG_OPTION(log_level);
    SET_CG_OPTION(enable_sublinear_memory_opt);
101 102 103 104 105
    SET_CG_OPTION(sublinear_mem_cofig.lb_memory);
    SET_CG_OPTION(sublinear_mem_cofig.genetic_nr_iter);
    SET_CG_OPTION(sublinear_mem_cofig.genetic_pool_size);
    SET_CG_OPTION(sublinear_mem_cofig.thresh_nr_try);
    SET_CG_OPTION(sublinear_mem_cofig.num_worker);
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
    SET_CG_OPTION(enable_var_mem_defragment);
    SET_CG_OPTION(eager_evaluation);
    SET_CG_OPTION(enable_memory_swap);
    throw MegBrainError(ssprintf(
                "invalid computing graph option name: %s", name.c_str()));
#undef SET_CG_OPTION
}

bool _config::comp_graph_is_eager(CompGraph &cg) {
    return cg.get().options().eager_evaluation;
}

void _config::add_extra_vardep(const SymbolVar &var, const SymbolVar &dep) {
    auto og = var.node()->owner_graph();
    mgb_assert(og == dep.node()->owner_graph());
    og->options().extra_vardeps[var.node()].push_back(dep.node());
}

void _config::begin_set_opr_priority(CompGraph& cg, int priority) {
    SyncEventConnecter::ReceiverHandler* handle;
    {
        MGB_LOCK_GUARD(set_priority_on_opr_inserted_handle_mtx);
        handle = &set_priority_on_opr_inserted_handle[&cg.get()];
    }
    mgb_assert(!*handle, "multiple calls to _begin_set_opr_priority()");

    auto on_opr_inserted = [priority](const cg::event::OprInserted& event) {
        if (!event.exc && priority) {
            int& pri = event.opr->node_prop().attribute().priority;
            if (!pri)
                pri = priority;
            else
                pri = std::min(pri, priority);
        }
    };
    *handle = cg.get().event().register_receiver<cg::event::OprInserted>(
            on_opr_inserted);
}

void _config::end_set_opr_priority(CompGraph &cg) {
    MGB_LOCK_GUARD(set_priority_on_opr_inserted_handle_mtx);
    auto nr = set_priority_on_opr_inserted_handle.erase(&cg.get());
    mgb_assert(nr, "end_set_opr_priority called "
            "before begin_set_opr_priority");
}

void _config::begin_set_exc_opr_tracker(CompGraph &cg, PyObject *tracker) {
    OprPyTracker::begin_set_tracker(cg.get(), tracker);
}

void _config::end_set_exc_opr_tracker(CompGraph &cg) {
    OprPyTracker::end_set_tracker(cg.get());
}

PyObject* _config::get_opr_tracker(CompGraph &cg, size_t var_id) {
    auto var = cg.get().find_var_by_id(var_id);
    if (!var)
        Py_RETURN_NONE;
    return OprPyTracker::get_tracker(var->owner_opr()).as_tuple();
}

void _config::set_opr_sublinear_memory_endpoint(const SymbolVar &var) {
    MGB_MARK_USED_VAR(var);
#if MGB_ENABLE_SUBLINEAR
    auto opr = var.node()->owner_opr();
    opr->owner_graph()->options().opr_attribute.sublinear_memory_endpoint.
        insert(opr);
#endif
}

void _config::set_fork_cuda_warning_flag(int flag) {
#if MGB_ENABLE_DEBUG_UTIL
    debug::set_fork_cuda_warning_flag(flag);
#else
    MGB_MARK_USED_VAR(flag);
#endif
}

bool _config::is_cuda_ctx_set() {
#if MGB_CUDA
    CUcontext ctx;
    return cuCtxGetCurrent(&ctx) == CUDA_SUCCESS && ctx;
#else
    return false;
#endif
}

std::string _config::get_cuda_gencode() {
#if MGB_CUDA
    std::set<std::string> used;
    int nr_dev;
    auto err = cudaGetDeviceCount(&nr_dev);
    if (err == cudaErrorNoDevice) {
        return {};
    }
    MGB_CUDA_CHECK(err);
    for (int i = 0; i < nr_dev; ++ i) {
        cudaDeviceProp prop;
        MGB_CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
        std::string cur{std::to_string(prop.major)};
        cur += std::to_string(prop.minor);
        used.insert(cur);
    }

    std::string ret;
    for (auto &&i: used) {
        if (!ret.empty())
            ret.append(" ");
        ret.append(i);
    }
    return ret;
#else
    mgb_throw(MegBrainError, "cuda disabled at compile time");
#endif
}

namespace {
#if MGB_CUDA
    std::string get_loaded_shared_lib_path(const char* sl_name) {
        char path[PATH_MAX];
        auto handle = dlopen(sl_name,
                             RTLD_GLOBAL | RTLD_LAZY | RTLD_NOLOAD);
        mgb_assert(handle != nullptr, "%s", dlerror());
        mgb_assert(dlinfo(handle, RTLD_DI_ORIGIN, &path) != -1,
                   "%s", dlerror());
        return path;
    }
#endif
}

std::vector<std::string> _config::get_cuda_include_path() {
#if MGB_CUDA
    auto cuda_path = getenv("CUDA_BIN_PATH");
    if (cuda_path) {
        return std::vector<std::string>{cuda_path,
                                        std::string(cuda_path) + "/include"};
    } else {
        auto cuda_lib_path = get_loaded_shared_lib_path("libcudart.so");
        return {cuda_lib_path, cuda_lib_path + "/../",
                cuda_lib_path + "/../include"};
    }
#else
    mgb_throw(MegBrainError, "cuda disabled at compile time");
#endif
}

std::vector<std::string> _config::get_cuda_lib_path() {
#if MGB_CUDA
    auto cuda_path = getenv("CUDA_BIN_PATH");
    if (cuda_path) {
        return std::vector<std::string>{cuda_path,
                                        std::string(cuda_path) + "/lib64"};
    } else {
        auto cuda_lib_path = get_loaded_shared_lib_path("libcudart.so");
        return {cuda_lib_path};
    }
#else
    mgb_throw(MegBrainError, "cuda disabled at compile time");
#endif
}

int _config::get_cuda_version() {
#if MGB_CUDA
    int version;
    MGB_CUDA_CHECK(cudaRuntimeGetVersion(&version));
    return version;
#else
    mgb_throw(MegBrainError, "cuda disabled at compile time");
#endif
}

bool _config::is_compiled_with_cuda() {
#if MGB_CUDA
    return true;
#else
    return false;
#endif
}

void _config::load_opr_library(const char* self_path, const char* lib_path) {
    static bool self_global = false;
    static std::mutex self_global_mtx;
    {
        MGB_LOCK_GUARD(self_global_mtx);
        if (!self_global) {
            auto hdl = dlopen(self_path, RTLD_LAZY | RTLD_GLOBAL);
            mgb_assert(hdl, "failed to set mgb to global: %s", dlerror());
            self_global = true;
        }
    }
    if (lib_path) {
        auto hdl = dlopen(lib_path, RTLD_LAZY);
        mgb_assert(hdl, "failed to load libray %s: %s", lib_path, dlerror());
    }
}

302
std::vector<std::pair<size_t, std::string>> _config::dump_registered_oprs() {
303 304 305 306 307 308 309
#if MGB_ENABLE_DEBUG_UTIL
    return serialization::OprRegistry::dump_registries();
#else
    return {};
#endif
}

310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
#if MGB_ENABLE_OPR_MM
/*! see definition : src/cpp/megbrain_config.h.
 * Create mm server. port 0 is permitted, leave zmqrpc to decide which port
 * should be used.
 */
int _config::create_mm_server(const std::string& server_addr, int port) {
    return create_zmqrpc_server(server_addr, port);
}

void _config::group_barrier(const std::string& server_addr,
        int port, uint32_t size, uint32_t rank) {
    mgb_assert(rank < size, "invalid rank %d", rank);
    auto group_mgr = std::make_shared<GroupClientProxy>(
            ssprintf("%s:%d", server_addr.c_str(), port));
    uint32_t rsp = group_mgr->group_barrier(size, rank);
    mgb_assert(rsp != 0, "rank already registered: %d", rank);
    mgb_assert(size == rsp, "inconsistent size: %d, expect %d", size, rsp);
}

#else

int _config::create_mm_server(const std::string& server_addr, int port) {
    mgb_throw(mgb::MegBrainError, "OPR_MM suppport disable at compile time");
    return 0;
}

void _config::group_barrier(const std::string& server_addr,
        int port, uint32_t size, uint32_t rank) {
    mgb_throw(mgb::MegBrainError, "OPR_MM suppport disable at compile time");
}

#endif

343
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}