From 0ba52ad7cc47224755a564fe08d6d81eaaf2c564 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 24 Apr 2020 13:18:45 +0800 Subject: [PATCH] feat(cross_build): add inference cross build support GitOrigin-RevId: 30381dec0ec65db0c00db32cde2d4228b1ed531c --- CMakeLists.txt | 46 +-- cmake/mkl.cmake | 6 +- .../conv_bias/conv1x1/conv1x1_strategy.h | 1 + src/CMakeLists.txt | 12 +- src/core/impl/utils/debug.cpp | 272 +++++++++--------- 5 files changed, 182 insertions(+), 155 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b331d4a..0356ecc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) -if(NOT MSVC) +if(NOT MSVC AND NOT APPLE) set(CMAKE_CXX_ARCHIVE_CREATE " Dqc ") set(CMAKE_CXX_ARCHIVE_APPEND " Dq ") set(CMAKE_CXX_ARCHIVE_FINISH " -D ") @@ -22,6 +22,15 @@ set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO naive fallback ) +option(MGE_WITH_JIT "Build MegEngine with JIT." ON) +option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" ON) +option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF) +option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON) +option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON) +option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON) +option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF) +option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON) + if(${MGE_ARCH} STREQUAL "AUTO") if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") @@ -34,21 +43,12 @@ if(${MGE_ARCH} STREQUAL "AUTO") endif() CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD) -if(CXX_SUPPORT_GOLD) +if(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE) message("-- Using GNU gold linker.") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold") endif() -option(MGE_WITH_JIT "Build MegEngine with JIT." ON) -option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" ON) -option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF) -option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON) -option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON) -option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON) -option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF) -option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON) - if(NOT MGE_WITH_JIT) if(MGE_WITH_HALIDE) message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") @@ -79,13 +79,14 @@ find_package(PythonInterp 3 REQUIRED) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads) -if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA) - set_property(TARGET Threads::Threads - PROPERTY INTERFACE_COMPILE_OPTIONS "$<$:-Xcompiler=-pthread>" - "$<$>:-pthread>") +if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "") + if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA) + set_property(TARGET Threads::Threads + PROPERTY INTERFACE_COMPILE_OPTIONS "$<$:-Xcompiler=-pthread>" + "$<$>:-pthread>") + endif() endif() - set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.") set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS) set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA") @@ -149,7 +150,11 @@ if(MSVC) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") - set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG") + if(ANDROID) + set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -DNDEBUG") + else() + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") + endif() endif() if(MGE_WITH_CUDA) @@ -469,3 +474,10 @@ if(TARGET _mgb) ) endif() +IF(APPLE) + set(CMAKE_THREAD_LIBS_INIT "-lpthread") + set(CMAKE_HAVE_THREADS_LIBRARY 1) + set(CMAKE_USE_WIN32_THREADS_INIT 0) + set(CMAKE_USE_PTHREADS_INIT 1) + set(THREADS_PREFER_PTHREAD_FLAG ON) +ENDIF() diff --git a/cmake/mkl.cmake b/cmake/mkl.cmake index c2653fb6..d222b0a8 100644 --- a/cmake/mkl.cmake +++ b/cmake/mkl.cmake @@ -41,7 +41,11 @@ if(MGE_MKL_USE_STATIC) endif() add_library(libmkl INTERFACE) - target_link_libraries(libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY} -Wl,--end-group) + if(APPLE) + target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) + else() + target_link_libraries(libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY} -Wl,--end-group) + endif() target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) else() find_library(MKL_CORE_LIBRARY diff --git a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h index 7bb6028b..f0904ec8 100644 --- a/dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h +++ b/dnn/src/fallback/conv_bias/conv1x1/conv1x1_strategy.h @@ -146,6 +146,7 @@ public: const ConvBiasImpl::NCBKernSizeParam& param, const ConvBiasImpl::NCBKernParam& ncb_param, const ConvBiasImpl::NCBKernIndex& ncb_index) override { + MEGDNN_MARK_USED_VAR(ncb_index); if (pack_mode == MatrixMulImpl::AlgoBase::PackMode::DEFAULT) { whole_bundle.set(ncb_param.workspace_ptr); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8a0379de..21e06fcc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -129,10 +129,14 @@ if (MGB_WITH_FLATBUFFERS) target_sources(megbrain PRIVATE ${GENERATED_FLATBUFFERS_CONVERTER_PATH}/mgb_opr_param_defs_converter.inl) target_include_directories(megbrain PRIVATE ${GENERATED_FLATBUFFERS_CONVERTER_PATH}) endif() -if(UNIX) +if(UNIX AND NOT ANDROID AND NOT APPLE) target_link_libraries(megbrain dl rt atomic) endif() +if(ANDROID) + target_link_libraries(megbrain log) +endif() + add_library(megengine SHARED $) if(MGB_WITH_FLATBUFFERS) add_dependencies(megengine mgb_serialization_schema_fbs) @@ -140,7 +144,11 @@ endif() target_include_directories(megengine PRIVATE $) target_compile_definitions(megengine PUBLIC $) target_compile_options(megengine PUBLIC $) -target_link_libraries(megengine PRIVATE $ -Wl,--no-undefined -Wl,--version-script=${PROJECT_SOURCE_DIR}/python_module/src/version.ld) +if(APPLE) + target_link_libraries(megengine PRIVATE $) +else() + target_link_libraries(megengine PRIVATE $ -Wl,--no-undefined -Wl,--version-script=${PROJECT_SOURCE_DIR}/python_module/src/version.ld) +endif() set_target_properties(megengine PROPERTIES CXX_VISIBILITY_PRESET default) install(TARGETS megengine LIBRARY DESTINATION lib64) foreach(_PATH ${MGB_INC}) diff --git a/src/core/impl/utils/debug.cpp b/src/core/impl/utils/debug.cpp index 4423af65..8b97d745 100644 --- a/src/core/impl/utils/debug.cpp +++ b/src/core/impl/utils/debug.cpp @@ -6,39 +6,39 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "megbrain/utils/debug.h" -#include "megdnn/tensor_iter.h" -#include #include +#include +#include "megdnn/tensor_iter.h" using namespace mgb; using namespace debug; - #if MGB_ENABLE_DEBUG_UTIL -#include "megbrain/utils/metahelper.h" #include "megbrain/common.h" +#include "megbrain/utils/metahelper.h" -#include "megbrain/utils/thin/function.h" -#include -#include #include #include #include +#include +#include +#include "megbrain/utils/thin/function.h" #if MGB_CUDA #include #include #endif +#include #include #include #include -#include #ifdef __ANDROID__ #include @@ -54,17 +54,17 @@ struct AndroidBacktraceState { void** end; }; -static _Unwind_Reason_Code android_unwind_callback(struct _Unwind_Context* context, - void* arg) { +static _Unwind_Reason_Code android_unwind_callback( + struct _Unwind_Context* context, void* arg) { AndroidBacktraceState* state = static_cast(arg); - uintptr_t current_pc = _Unwind_GetIP(context); - if(current_pc == nullptr) + void* current_pc = reinterpret_cast(_Unwind_GetIP(context)); + if (current_pc == nullptr) return _URC_NO_REASON; if (state->current == state->end) { - return _URC_END_OF_STACK; + return _URC_END_OF_STACK; } else { - *state->current++ = reinterpret_cast(current_pc); + *state->current++ = current_pc; } return _URC_NO_REASON; @@ -89,26 +89,27 @@ struct MemmapEntry { uintptr_t low, high; std::string file; - MemmapEntry(uint64_t low_, uint64_t high_, const char *file_): - low(low_), high(high_), file(file_) - {} + MemmapEntry(uint64_t low_, uint64_t high_, const char* file_) + : low(low_), high(high_), file(file_) {} }; -void get_mem_map(int pid, thin_function callback) { +void get_mem_map( + int pid, + thin_function + callback) { char fpath[64]; if (pid) sprintf(fpath, "/proc/%d/maps", pid); else strcpy(fpath, "/proc/self/maps"); - FILE *fin = fopen(fpath, "r"); + FILE* fin = fopen(fpath, "r"); mgb_assert(fin, "failed to open %s", fpath); char linebuf[512]; while (fgets(linebuf, sizeof(linebuf), fin)) { uintptr_t begin, end; char perm[10], offset[20], dev[10], inode[20], path_mem[256], *path; - int nr = sscanf(linebuf, "%zx-%zx %s %s %s %s %s", - &begin, &end, perm, offset, dev, inode, path_mem); + int nr = sscanf(linebuf, "%zx-%zx %s %s %s %s %s", &begin, &end, perm, + offset, dev, inode, path_mem); if (nr == 6) path = nullptr; else { @@ -122,10 +123,12 @@ void get_mem_map(int pid, thin_function(getpid())); getchar(); @@ -133,9 +136,8 @@ class SigHandlerInit { if (signum == -1) { mgb_log_error("%s: std::terminate() called", msg0); } else { - mgb_log_error("%s: caught deadly signal %d(%s)", - msg0, - signum, strsignal(signum)); + mgb_log_error("%s: caught deadly signal %d(%s)", msg0, signum, + strsignal(signum)); } std::string bp; debug::backtrace(2).fmt_to_str(bp); @@ -143,14 +145,14 @@ class SigHandlerInit { exit(EXIT_FAILURE); } - public: - static void init_for_segv() { - struct sigaction action; - memset(&action, 0, sizeof(action)); - action.sa_handler = &death_handler; - sigaction(SIGSEGV, &action, nullptr); - std::set_terminate([](){death_handler(-1);}); - } +public: + static void init_for_segv() { + struct sigaction action; + memset(&action, 0, sizeof(action)); + action.sa_handler = &death_handler; + sigaction(SIGSEGV, &action, nullptr); + std::set_terminate([]() { death_handler(-1); }); + } }; #if MGB_CUDA @@ -164,7 +166,8 @@ class CudaCheckOnFork { if (flag() && !ScopedForkWarningSupress::supress()) { CUcontext ctx; if (cuCtxGetCurrent(&ctx) != CUDA_ERROR_NOT_INITIALIZED) { - mgb_log_debug("It is dangerous to call fork() after cuda " + mgb_log_debug( + "It is dangerous to call fork() after cuda " "context has been initialized; please ensure no cuda " "methods is invoked in the child process. You can set " "MGB_THROW_ON_FORK to find out where the fork() is " @@ -177,20 +180,17 @@ class CudaCheckOnFork { } } - public: - static void set_flag(int f) { - flag() = f; - } +public: + static void set_flag(int f) { flag() = f; } - static void init() { - int err = pthread_atfork(&CudaCheckOnFork::atfork_prepare, - nullptr, nullptr); - if (err) { - mgb_throw(SystemError, - "failed to setup atfork handler: %s", - strerror(err)); - } + static void init() { + int err = pthread_atfork(&CudaCheckOnFork::atfork_prepare, nullptr, + nullptr); + if (err) { + mgb_throw(SystemError, "failed to setup atfork handler: %s", + strerror(err)); } + } }; #endif @@ -206,7 +206,7 @@ class InitCaller { }; InitCaller InitCaller::inst; -} // anonymous namespace +} // anonymous namespace void (*ForkAfterCudaError::throw_)() = throw_fork_cuda_exc; @@ -221,7 +221,7 @@ BacktraceResult mgb::debug::backtrace(int nr_exclude) { recursive_call = true; constexpr size_t MAX_DEPTH = 6; - void *stack_mem[MAX_DEPTH]; + void* stack_mem[MAX_DEPTH]; int depth = ::backtrace(stack_mem, MAX_DEPTH); auto stack = stack_mem; if (depth > nr_exclude) { @@ -234,19 +234,19 @@ BacktraceResult mgb::debug::backtrace(int nr_exclude) { static std::mutex mtx; MGB_LOCK_GUARD(mtx); if (memmap.empty()) { - get_mem_map(0, [&](uintptr_t lo, uintptr_t hi, - const char * /*perm*/, const char *fname){ - if (fname && strlen(fname)) + get_mem_map(0, [&](uintptr_t lo, uintptr_t hi, const char* /*perm*/, + const char* fname) { + if (fname && strlen(fname)) memmap.emplace_back(lo, hi, fname); - }); + }); } } BacktraceResult result; - for (int i = 0; i < depth; ++ i) { + for (int i = 0; i < depth; ++i) { const char* fname = nullptr; auto addr = reinterpret_cast(stack[i]); - for (auto &&j: memmap) + for (auto&& j : memmap) if (j.low <= addr && j.high >= addr) { // theoretically we should examine file content to find whether // it is a shared library; but who would name an executable with @@ -264,12 +264,12 @@ BacktraceResult mgb::debug::backtrace(int nr_exclude) { return result; } -void BacktraceResult::fmt_to_str(std::string &dst) { +void BacktraceResult::fmt_to_str(std::string& dst) { char addr[128]; bool first = true; const char* prev_fname = nullptr; dst.append("bt:"); - for (auto &&i: stack) { + for (auto&& i : stack) { sprintf(addr, "%zx", i.second); if (i.first != prev_fname || first) { if (!first) @@ -296,85 +296,85 @@ void debug::set_fork_cuda_warning_flag(int flag) { #endif } -#endif // MGB_ENABLE_DEBUG_UTIL +#endif // MGB_ENABLE_DEBUG_UTIL namespace { - bool good_float(float val) { - return std::isfinite(val); - } +bool good_float(float val) { + return std::isfinite(val); +} - bool good_float(int) { - return true; - } +bool good_float(int) { + return true; +} #if MGB_ENABLE_LOGGING - // if not in MGB_ENABLE_LOGGING, num2str would become defined but not used - template - std::string num2str(T val) { - return std::to_string(val); - } +// if not in MGB_ENABLE_LOGGING, num2str would become defined but not used +template +std::string num2str(T val) { + return std::to_string(val); +} - std::string num2str(float val) { - union V { - uint32_t i; - float f; - }; - auto ret = std::to_string(val); - if (!good_float(val)) { - V v; - v.f = val; - ret.append(" (0x"); - ret.append(ssprintf("%x", v.i)); - ret.append(")"); - } - return ret; +std::string num2str(float val) { + union V { + uint32_t i; + float f; + }; + auto ret = std::to_string(val); + if (!good_float(val)) { + V v; + v.f = val; + ret.append(" (0x"); + ret.append(ssprintf("%x", v.i)); + ret.append(")"); } + return ret; +} #endif - template - Maybe do_compare_tensor_value( - const char *expr0, const char *expr1, - const HostTensorND &v0, const HostTensorND &v1, float maxerr) { - - auto it0 = megdnn::tensor_iter(v0.as_megdnn()).begin(), - it1 = megdnn::tensor_iter(v1.as_megdnn()).begin(); - for (size_t i = 0, it = v0.shape().total_nr_elems(); i < it; ++ i) { - ctype iv0 = *it0, iv1 = *it1; - double err = std::abs(iv0 - iv1) / std::max( - 1, std::min(std::abs(static_cast(iv0)), - std::abs((static_cast(iv1))))); - if (!good_float(iv0) || !good_float(iv1) || err >= maxerr) { - TensorShape idx_shp; - idx_shp.ndim = v0.shape().ndim; - std::copy(it0.idx(), it0.idx() + idx_shp.ndim, idx_shp.shape); - return mgb_ssprintf_log( - "Unequal value\n" - "Value of: %s\n" - " Actual: %s\n" - "Expected: %s\n" - "Which is: %s\n" - "At index: %s/%s\n" - " error: %.6g", - expr1, num2str(iv1).c_str(), - expr0, num2str(iv0).c_str(), - idx_shp.to_string().c_str(), - v0.shape().to_string().c_str(), - err); - } - - ++ it0; - ++ it1; +template +Maybe do_compare_tensor_value(const char* expr0, const char* expr1, + const HostTensorND& v0, + const HostTensorND& v1, + float maxerr) { + auto it0 = megdnn::tensor_iter(v0.as_megdnn()).begin(), + it1 = megdnn::tensor_iter(v1.as_megdnn()).begin(); + for (size_t i = 0, it = v0.shape().total_nr_elems(); i < it; ++i) { + ctype iv0 = *it0, iv1 = *it1; + double err = std::abs(iv0 - iv1) / + std::max( + 1, std::min(std::abs(static_cast(iv0)), + std::abs((static_cast(iv1))))); + if (!good_float(iv0) || !good_float(iv1) || err >= maxerr) { + TensorShape idx_shp; + idx_shp.ndim = v0.shape().ndim; + std::copy(it0.idx(), it0.idx() + idx_shp.ndim, idx_shp.shape); + return mgb_ssprintf_log( + "Unequal value\n" + "Value of: %s\n" + " Actual: %s\n" + "Expected: %s\n" + "Which is: %s\n" + "At index: %s/%s\n" + " error: %.6g", + expr1, num2str(iv1).c_str(), expr0, num2str(iv0).c_str(), + idx_shp.to_string().c_str(), v0.shape().to_string().c_str(), + err); } - return None; + + ++it0; + ++it1; } + return None; +} -} // anonymous namespace +} // anonymous namespace -Maybe debug::compare_tensor_value( - const HostTensorND &v0, const char *expr0, - const HostTensorND &v1, const char *expr1, - float maxerr) { +Maybe debug::compare_tensor_value(const HostTensorND& v0, + const char* expr0, + const HostTensorND& v1, + const char* expr1, + float maxerr) { if (!v0.shape().eq_shape(v1.shape())) { return mgb_ssprintf_log( "Shape mismatch\n" @@ -382,8 +382,8 @@ Maybe debug::compare_tensor_value( " Actual: %s\n" "Expected: %s\n" "Which is: %s", - expr1, v1.shape().to_string().c_str(), - expr0, v0.shape().to_string().c_str()); + expr1, v1.shape().to_string().c_str(), expr0, + v0.shape().to_string().c_str()); } auto dtype = v0.layout().dtype; if (dtype != v1.layout().dtype) { @@ -393,12 +393,13 @@ Maybe debug::compare_tensor_value( " Actual: %s\n" "Expected: %s\n" "Which is: %s", - expr1, v1.layout().dtype.name(), - expr0, v0.layout().dtype.name()); + expr1, v1.layout().dtype.name(), expr0, + v0.layout().dtype.name()); } - switch(dtype.enumv()) { -#define cb(_dt) case DTypeTrait<_dt>::enumv: \ + switch (dtype.enumv()) { +#define cb(_dt) \ + case DTypeTrait<_dt>::enumv: \ return do_compare_tensor_value::ctype>( \ expr0, expr1, v0, v1, maxerr); MEGDNN_FOREACH_COMPUTING_DTYPE(cb) @@ -408,7 +409,8 @@ Maybe debug::compare_tensor_value( } } -std::string debug::dump_tensor(const HostTensorND& value, const std::string &name) { +std::string debug::dump_tensor(const HostTensorND& value, + const std::string& name) { struct Header { uint32_t name_len; uint32_t dtype; @@ -418,7 +420,7 @@ std::string debug::dump_tensor(const HostTensorND& value, const std::string &nam }; mgb_assert(value.layout().is_contiguous()); auto value_bytes = value.layout().span().dist_byte(); - std::string ret(name.size() + value_bytes + sizeof(Header), '\0'); + std::string ret(name.size() + value_bytes + sizeof(Header), '\0'); auto header = reinterpret_cast(&ret[0]); memset(header, 0, sizeof(Header)); header->name_len = name.length(); @@ -439,8 +441,8 @@ void debug::write_to_file(const char* filename, const std::string& content, strerror(errno)); auto nr = fwrite(content.data(), 1, content.size(), fout); mgb_throw_if(nr != content.size(), SystemError, - "failed to write to %s: num=%zu size=%zu %s", filename, - nr, content.size() ,strerror(errno)); + "failed to write to %s: num=%zu size=%zu %s", filename, nr, + content.size(), strerror(errno)); auto err = fclose(fout); mgb_throw_if(err, SystemError, "failed to close %s: %s", filename, strerror(errno)); -- GitLab