diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d407edb35425aca609802df4672ecf6104bddbd..400a577a7ae890a6ebd2ac6eb28b2ea4dce84677 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,7 +177,7 @@ if(MGE_WITH_CUDA) if(NOT MGE_CUDA_GENCODE) if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DMEGDNN_THREADS_512=0") + set(MEGDNN_THREADS_512 0) if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52") set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60") @@ -202,7 +202,7 @@ if(MGE_WITH_CUDA) message(FATAL_ERROR "Unsupported CUDA host arch.") endif() else() - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DMEGDNN_THREADS_512=1") + set(MEGDNN_THREADS_512 1) endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}") @@ -287,35 +287,31 @@ option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON) # MKLDNN build if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") - add_definitions(-DMEGDNN_X86_WITH_MKL_DNN) include(cmake/MKL_DNN.cmake) + set(MEGDNN_X86_WITH_MKL_DNN 1) endif() # RTTI if(MGE_ENABLE_RTTI) - add_definitions(-DMEGDNN_ENABLE_MANGLING=0 -DMEGDNN_ENABLE_RTTI=1) + set(MEGDNN_ENABLE_MANGLING 0) + set(MEGDNN_ENABLE_RTTI 1) else() - add_definitions(-DMEGDNN_ENABLE_MANGLING=1 -DMEGDNN_ENABLE_RTTI=0) + set(MEGDNN_ENABLE_MANGLING 1) + set(MEGDNN_ENABLE_RTTI 0) endif() set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI}) # Logging -if(MGE_ENABLE_LOGGING) - add_definitions(-DMEGDNN_ENABLE_LOGGING=1) -else() - add_definitions(-DMEGDNN_ENABLE_LOGGING=0) -endif() set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING}) +set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING}) set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING}) # Exception -if(MGE_ENABLE_EXCEPTIONS) - add_definitions(-DMEGDNN_ENABLE_EXCEPTIONS=1) -else() +if(NOT MGE_ENABLE_EXCEPTIONS) message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.") - add_definitions(-DMEGDNN_ENABLE_EXCEPTIONS=0) endif() set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS}) +set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS}) # JIT if(MGE_WITH_JIT AND MGE_WITH_HALIDE) @@ -330,8 +326,15 @@ if(CMAKE_THREAD_LIBS_INIT) set(MGB_HAVE_THREAD 1) endif() +if(MGE_WITH_TEST) + # use intra-op multi threads + set(MEGDNN_ENABLE_MULTI_THREADS 1) +endif() + # CUDA set(MGB_CUDA ${MGE_WITH_CUDA}) +set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA}) + # Debug info if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo") @@ -357,8 +360,46 @@ endif() # Distributed communication set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED}) +# MGE_ARCH related flags +if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386") + if(MGE_BLAS STREQUAL "MKL") + set(MEGDNN_X86_WITH_MKL 1) + elseif(MGE_BLAS STREQUAL "OpenBLAS") + set(MEGDNN_X86_WITH_OPENBLAS 1) + endif() +endif() + +# Enable Naive +if(MGE_ARCH STREQUAL "naive") + set(MEGDNN_NAIVE 1) + message(WARNING "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.") +endif() + +if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386") + set(MEGDNN_X86 1) + if(MGE_ARCH STREQUAL "x86_64") + set(MEGDNN_X86_64 1) + set(MEGDNN_64_BIT 1) + if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") + endif() + else() + set(MEGDNN_X86_32 1) + if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") + endif() + endif() + if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") + endif() +endif() + + +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") + # Write out megbrain_build_config.h -configure_file(src/core/include/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) +# It defines macros needed by both megbrain and dnn +configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION include) add_subdirectory(dnn) diff --git a/dnn/CMakeLists.txt b/dnn/CMakeLists.txt index fa9dcd847dfa005183ea934f1bdc775a0ac75046..ecbc7d5987b852b5d55f0cfa629b0f0b7e3bb153 100644 --- a/dnn/CMakeLists.txt +++ b/dnn/CMakeLists.txt @@ -1,40 +1,3 @@ -if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") - if(${MGE_BLAS} STREQUAL "MKL") - add_definitions(-DMEGDNN_X86_WITH_MKL) - elseif(${MGE_BLAS} STREQUAL "OpenBLAS") - add_definitions(-DMEGDNN_X86_WITH_OPENBLAS) - endif() -endif() - -# Enable Naive -if(${MGE_ARCH} STREQUAL "naive") - add_definitions(-DMEGDNN_NAIVE=1) - message(WARNING "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.") -else() - add_definitions(-DMEGDNN_NAIVE=0) -endif() - - -if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") - add_definitions(-DMEGDNN_X86=1) - if(${MGE_ARCH} STREQUAL "x86_64") - add_definitions(-DMEGDNN_X86_64 -DMEGDNN_64_BIT) - if(NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") - endif() - else() - add_definitions(-DMEGDNN_X86_32) - if(NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") - endif() - endif() - if(NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") - endif() -endif() - - -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") list(APPEND OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/scripts/opr_param_defs.py) set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py) @@ -89,8 +52,6 @@ add_dependencies(opr_param_defs _opr_param_defs) if(MGE_WITH_TEST) - # use multi threads - add_definitions (-DMEGDNN_ENABLE_MULTI_THREADS=1) add_subdirectory(test) endif() diff --git a/dnn/include/megdnn/config/config.h b/dnn/include/megdnn/config/config.h index 5f144f2a007d8bb31740e674385b95686183d372..a428d4af7c71325df0d2ed12e30fe85d529f1521 100644 --- a/dnn/include/megdnn/config/config.h +++ b/dnn/include/megdnn/config/config.h @@ -9,22 +9,10 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ +#include "megbrain_build_config.h" + #if !defined(__CUDACC__) -// Try to detect if no architecture flags defined. -#if !defined(MEGDNN_NAIVE) && !defined(MEGDNN_X86) && \ - !defined(MEGDNN_X86_64) && !defined(MEGDNN_X86_32) && \ - !defined(MEGDNN_64_BIT) && !defined(MEGDNN_MIPS) && \ - !defined(MEGDNN_ARMV7) && !defined(MEGDNN_AARCH64) -#if defined(__x86_64__) || defined(_M_X64) -#define MEGDNN_X86 1 -#define MEGDNN_X86_64 1 -#define MEGDNN_64_BIT 1 -#elif defined(__i386) || defined(_M_IX86) -#define MEGDNN_X86 1 -#define MEGDNN_X86_32 1 -#endif -#endif #endif // !defined(__CUDACC__) diff --git a/dnn/src/CMakeLists.txt b/dnn/src/CMakeLists.txt index f0cbbe7994e9f54d06e613e6535f49e128695021..be9d48c510ee97622abe1c37a4aafae22017b585 100644 --- a/dnn/src/CMakeLists.txt +++ b/dnn/src/CMakeLists.txt @@ -1,7 +1,9 @@ - set(LIBMEGDNN_DEF) file(GLOB_RECURSE SOURCES common/*.cpp naive/*.cpp) +# Build configure +list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h) + if(NOT ${MGE_ARCH} STREQUAL "naive") file(GLOB_RECURSE SOURCES_ fallback/*.cpp) list(APPEND SOURCES ${SOURCES_}) @@ -24,7 +26,6 @@ if(MGE_WITH_CUDA) file(GLOB_RECURSE CUSOURCES cuda/*.cu) list(APPEND SOURCES ${CUSOURCES}) - list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_CUDA=1) endif() @@ -33,7 +34,7 @@ add_definitions(${LIBMEGDNN_DEF}) add_library(megdnn EXCLUDE_FROM_ALL STATIC ${SOURCES}) target_link_libraries(megdnn opr_param_defs) -target_include_directories(megdnn PUBLIC ${PROJECT_SOURCE_DIR}/dnn/include) +target_include_directories(megdnn PUBLIC ${PROJECT_BINARY_DIR}/genfiles ${PROJECT_SOURCE_DIR}/dnn/include) target_include_directories(megdnn PRIVATE ${PROJECT_SOURCE_DIR}/dnn ${PROJECT_SOURCE_DIR}/third_party/midout/src) install(DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include DESTINATION . FILES_MATCHING PATTERN "*.h*") diff --git a/dnn/src/x86/conv_bias/f32/algos.cpp b/dnn/src/x86/conv_bias/f32/algos.cpp index 01a629bfb1c585e64c96a0b22517ba15ec72f02f..28f80d3eec1bc05c3e07ef6e7813e413f47db5c5 100644 --- a/dnn/src/x86/conv_bias/f32/algos.cpp +++ b/dnn/src/x86/conv_bias/f32/algos.cpp @@ -645,7 +645,7 @@ void ConvBiasImpl::AlgoMatrixMul::kimpl(const NCBKernParam& param, } } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN static inline void mkldnn_fp32_conv_instance( const ConvBiasImpl::NCBKernParam& param, const uint32_t ocpg, const uint32_t icpg, const uint32_t group, const uint32_t in, diff --git a/dnn/src/x86/conv_bias/f32/algos.h b/dnn/src/x86/conv_bias/f32/algos.h index e5a63351b2ed19ca0fbb4a7d811acb6e843ff1b8..8c06542bbb0c81fb89de0bb0f0a0a4a165d44eb3 100644 --- a/dnn/src/x86/conv_bias/f32/algos.h +++ b/dnn/src/x86/conv_bias/f32/algos.h @@ -186,7 +186,7 @@ public: void* type() const override; }; -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN class ConvBiasImpl::AlgoMkldnnConv final : public AlgoBase { static void kern_mkldnn_fp32(const NCBKernParam& param, const NCBKernIndex&); diff --git a/dnn/src/x86/conv_bias/int8/algos.cpp b/dnn/src/x86/conv_bias/int8/algos.cpp index 24487fd50ca9b8b956dae058cb4d739568b0b31f..96a079696fd9d35096b10bae2a3380451591d819 100644 --- a/dnn/src/x86/conv_bias/int8/algos.cpp +++ b/dnn/src/x86/conv_bias/int8/algos.cpp @@ -20,13 +20,13 @@ #include "src/x86/conv_bias/postprocess_helper.h" #include "src/x86/handle.h" #include "src/x86/utils.h" -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN #include #endif #include -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN using namespace dnnl; #endif using namespace megdnn; @@ -161,7 +161,7 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls( return direct_conv_avx2_stride1::get_kimpls(param, bundle); } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, AlgoSelectionStrategy) const { @@ -353,7 +353,7 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32( #undef REORDER_MEMORY #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN /* ===================== mkldnn qint8 matmul algo ===================== */ bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, const NCBKernSizeParam& param, diff --git a/dnn/src/x86/conv_bias/int8/algos.h b/dnn/src/x86/conv_bias/int8/algos.h index 5a63c0b7ac8237e3c3f8c2fc328c2ea964e0da10..00135dd3a88efa9fa0a13055771294fb3ef40afc 100644 --- a/dnn/src/x86/conv_bias/int8/algos.h +++ b/dnn/src/x86/conv_bias/int8/algos.h @@ -58,7 +58,7 @@ public: void* type() const override; }; -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN /* ===================== mkldnn qint8 algo ===================== */ class ConvBiasImpl::AlgoMkldnnQint8 final : public AlgoBase { static void kern_mkldnn_s8x8x32(const NCBKernParam& param, diff --git a/dnn/src/x86/conv_bias/opr_impl.cpp b/dnn/src/x86/conv_bias/opr_impl.cpp index 2669ef2fd7c13fee6642beac038953f47ce0f4db..04d91d08d884b8f28e01bc382811d6ccce513a69 100644 --- a/dnn/src/x86/conv_bias/opr_impl.cpp +++ b/dnn/src/x86/conv_bias/opr_impl.cpp @@ -25,7 +25,7 @@ namespace { uint8_t x86_algo_type_storage; void* x86_algo_type = &x86_algo_type_storage; } // anonymous namespace -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN void* ConvBiasImpl::AlgoMkldnnQint8::type() const { return x86_algo_type; } @@ -78,7 +78,7 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { AlgoAVX2DirectConvStride2 avx2_stride2_direct; AlgoChanWiseAvx2Stride1Qint8 avx2_stride1_chanwsie_qint8; AlgoMatrixMul matmul; -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN AlgoMkldnnMatmulQint8 mkldnn_matmul_qint8; //! Because the mkldnnconv need handle AlgoMkldnnQint8 mkldnn_qint8; @@ -87,7 +87,7 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { SmallVector> refhold; public: AlgoPack() { -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN //! Create the mkldnn algo all_algos.emplace_back(&mkldnn_conv_fp32); all_algos.emplace_back(&mkldnn_matmul_qint8); diff --git a/dnn/src/x86/conv_bias/opr_impl.h b/dnn/src/x86/conv_bias/opr_impl.h index dc83ef0e364b4be42a552ff9267ab05ddc4f2036..bece5476cdf4b2af01f46ee9f1fa41d781a57cb1 100644 --- a/dnn/src/x86/conv_bias/opr_impl.h +++ b/dnn/src/x86/conv_bias/opr_impl.h @@ -32,7 +32,7 @@ public: class AlgoDirectAvx2Stride1Int8; class AlgoAVX2DirectConvStride2; class AlgoChanWiseAvx2Stride1Qint8; -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN class AlgoMkldnnConv; class AlgoMkldnnQint8; class AlgoMkldnnMatmulQint8; diff --git a/dnn/src/x86/handle.cpp b/dnn/src/x86/handle.cpp index 732b7229b10f6b548795eece1584296cb94fb8fb..b7c441c97f9077168f2aa479aa7d96f1cf8f9b20 100644 --- a/dnn/src/x86/handle.cpp +++ b/dnn/src/x86/handle.cpp @@ -32,7 +32,7 @@ #include "src/x86/warp_affine/opr_impl.h" #include "src/x86/warp_perspective/opr_impl.h" -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL #include #define STR_HELPER(x) #x @@ -57,11 +57,11 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t computing_handle, HandleType type) : fallback::HandleImpl::HandleImpl(computing_handle, type) { disable_denorm(); -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL vmlSetMode(VML_LA | VML_FTZDAZ_ON | VML_ERRMODE_ERRNO); #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN m_mkldnn_engine = dnnl::engine(dnnl::engine::kind::cpu, 0); m_mkldnn_stream = dnnl::stream(m_mkldnn_engine); #endif diff --git a/dnn/src/x86/handle.h b/dnn/src/x86/handle.h index 4131c7c9da846a7e2bc0395f749cdca5892cef6a..0443a6dcfad31f58618d788575cdc4af99edb14e 100644 --- a/dnn/src/x86/handle.h +++ b/dnn/src/x86/handle.h @@ -13,7 +13,7 @@ #include "src/x86/profile.h" -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN #include #endif @@ -31,14 +31,14 @@ public: std::unique_ptr create_operator(); size_t alignment_requirement() const override; -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN dnnl::engine mkldnn_engine() { return m_mkldnn_engine; } dnnl::stream mkldnn_stream() { return m_mkldnn_stream; } #endif private: ProfileCache m_profile_cache = get_profile_cache(); -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN dnnl::engine m_mkldnn_engine; dnnl::stream m_mkldnn_stream; #endif diff --git a/dnn/src/x86/matrix_mul/algos.cpp b/dnn/src/x86/matrix_mul/algos.cpp index 50f830cbd96dd73d173ff7469390f98ed28846fd..af1c5aa8bdf802a66ceeed378ab03883d5115028 100644 --- a/dnn/src/x86/matrix_mul/algos.cpp +++ b/dnn/src/x86/matrix_mul/algos.cpp @@ -18,15 +18,15 @@ #include "src/x86/matrix_mul/f32/strategy.h" -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL #include #include -#elif defined(MEGDNN_X86_WITH_OPENBLAS) +#elif MEGDNN_X86_WITH_OPENBLAS #include #else #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN #include #endif @@ -39,7 +39,7 @@ using namespace x86; namespace { void f32_blas_kern(const MatrixMulImpl::KernParam& kern_param) { -#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) +#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS auto m = kern_param.M, n = kern_param.N, k = kern_param.K; bool trA = kern_param.trA, trB = kern_param.trB; const auto Aptr = kern_param.A(), @@ -55,7 +55,7 @@ void f32_blas_kern(const MatrixMulImpl::KernParam& kern_param) { #endif } -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL void f32_blas_kern_only_packA(const MatrixMulImpl::KernParam& kern_param, const void* a_panel, const void* b_panel) { MEGDNN_MARK_USED_VAR(b_panel); @@ -75,7 +75,7 @@ void f32_blas_kern_only_packA(const MatrixMulImpl::KernParam& kern_param, bool MatrixMulImpl::AlgoF32Blas::usable( const KernSizeParam& kern_size_param) const { -#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) +#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS return kern_size_param.compute_mode == Param::ComputeMode::DEFAULT && kern_size_param.format == param::MatrixMul::Format::DEFAULT && kern_size_param.B_type == kern_size_param.A_type && @@ -93,7 +93,7 @@ MatrixMulImpl::kern_t MatrixMulImpl::AlgoF32Blas::get_kern( } /* ===================== AlgoF32BlasPackA====================== */ -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL bool MatrixMulImpl::AlgoF32MKLPackA::usable( const KernSizeParam& kern_size_param) const { return kern_size_param.compute_mode == Param::ComputeMode::DEFAULT && @@ -202,7 +202,7 @@ MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_DETAIL(AlgoInt8x8x32Vnni, #endif /* ===================== Int8 mkldnn algo ===================== */ -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN namespace { void int8x8x32_kern_mkldnn(const MatrixMulImpl::KernParam& kern_param) { MEGDNN_MARK_USED_VAR(kern_param); diff --git a/dnn/src/x86/matrix_mul/algos.h b/dnn/src/x86/matrix_mul/algos.h index f388983ee9fbadbd62d6f403eab6fa201c7030ac..3664b0e7f206b4920516310b352e4ef148857289 100644 --- a/dnn/src/x86/matrix_mul/algos.h +++ b/dnn/src/x86/matrix_mul/algos.h @@ -28,7 +28,7 @@ public: PackMode packmode() const override { return PackMode::NO_PACK; } }; -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL class MatrixMulImpl::AlgoF32MKLPackA : public AlgoBase { public: bool is_reproducible() const override { return true; } @@ -106,7 +106,7 @@ public: }; #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN class MatrixMulImpl::AlgoInt8x8x32Mkldnn : public AlgoBase { public: bool is_reproducible() const override { return true; } diff --git a/dnn/src/x86/matrix_mul/opr_impl.cpp b/dnn/src/x86/matrix_mul/opr_impl.cpp index d85d6df694c85cbb1cf1f875e5ea6361d20b28c6..6101c5e6e8a0c64587ad860fac7818b5f37d8b3e 100644 --- a/dnn/src/x86/matrix_mul/opr_impl.cpp +++ b/dnn/src/x86/matrix_mul/opr_impl.cpp @@ -25,13 +25,13 @@ void* const MatrixMulImpl::sm_x86_algo_type = &x86_algo_type_storage; class MatrixMulImpl::AlgoPack : NonCopyableObj { AlgoF32Blas f32blas; -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL AlgoF32MKLPackA f32mkl_packa; #endif #if MEGDNN_X86_WITH_VNNI AlgoInt8x8x32Vnni algoint8x8x32vnni; #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN AlgoInt8x8x32Mkldnn algoint8x8x32mkldnn; #endif AlgoInt8x8x32AVX2M4N16K2 algoint8x8x32avx2_m4n16k2; @@ -42,7 +42,7 @@ class MatrixMulImpl::AlgoPack : NonCopyableObj { public: AlgoPack() { if (is_supported(SIMDType::VNNI)) { -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN all_algos.emplace_back(&algoint8x8x32mkldnn); #endif #if MEGDNN_X86_WITH_VNNI @@ -53,11 +53,11 @@ public: all_algos.emplace_back(&algoint8x8x32avx2_m2n4k16); all_algos.emplace_back(&algoint8x8x32sse_m4n8k2); all_algos.emplace_back(&algof32mk8_8x8); -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN all_algos.emplace_back(&algoint8x8x32mkldnn); #endif all_algos.emplace_back(&f32blas); -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL all_algos.emplace_back(&f32mkl_packa); #endif } diff --git a/dnn/src/x86/matrix_mul/opr_impl.h b/dnn/src/x86/matrix_mul/opr_impl.h index 5d56aeb9a8baaa51ca892d0553edb0d8bde9f924..77e0dbf6bbcb498d19727e26868e4f9e808b5ab5 100644 --- a/dnn/src/x86/matrix_mul/opr_impl.h +++ b/dnn/src/x86/matrix_mul/opr_impl.h @@ -26,14 +26,14 @@ public: protected: static void* const sm_x86_algo_type; class AlgoF32Blas; -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL class AlgoF32MKLPackA; #endif #if MEGDNN_X86_WITH_VNNI class AlgoInt8x8x32Vnni; #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN class AlgoInt8x8x32Mkldnn; #endif diff --git a/dnn/src/x86/pooling/opr_impl.cpp b/dnn/src/x86/pooling/opr_impl.cpp index d08c5e7db60e2564e56fa006d9584c9e9364297c..e7e14eb03d213b226bdac6fc06af5a7eb1c8bad8 100644 --- a/dnn/src/x86/pooling/opr_impl.cpp +++ b/dnn/src/x86/pooling/opr_impl.cpp @@ -17,7 +17,7 @@ #include "src/x86/pooling/pooling_special_cases.h" #include "src/x86/utils.h" -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN #include "mkldnn.hpp" #endif @@ -45,7 +45,7 @@ WorkspaceBundle get_bundle(const TensorLayout& src, const TensorLayout& dst, return ws; } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN template dnnl::memory tensor_to_mkl_memory(_megdnn_tensor_in src, const dnnl::engine& mkldnn_eng, @@ -164,7 +164,7 @@ void PoolingImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, return; } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN // Mkldnn provide optimized code for nhwc int8 pooling now. // Mkldnn can not change the layout automatic. diff --git a/dnn/src/x86/utils.cpp b/dnn/src/x86/utils.cpp index 57aecb3881515fc88645f314f3813988919a708f..11a0d7f7f9f3719614e29f4521d617b8231b75f6 100644 --- a/dnn/src/x86/utils.cpp +++ b/dnn/src/x86/utils.cpp @@ -18,7 +18,7 @@ #include #endif -#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) +#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS #include #endif diff --git a/dnn/test/x86/conv_bias.cpp b/dnn/test/x86/conv_bias.cpp index deb2a843100fa652de67392a824fbca3e165e75c..7511d4d3f6db201c6a70e622fc3c7da5699c2120 100644 --- a/dnn/test/x86/conv_bias.cpp +++ b/dnn/test/x86/conv_bias.cpp @@ -777,7 +777,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32) { .execs({arg.src, arg.filter, {}, {}, {}}); \ } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN if (megdnn::x86::is_supported(x86::SIMDType::VNNI)) { cb("IM2COLMATMUL:X86_INT8X8X32_MKLDNN"); } @@ -846,14 +846,14 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32) { {arg.src, arg.filter, arg.bias, {}, {}}); \ } -#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) +#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS cb("IM2COLMATMUL:X86_F32_BLAS"); #endif #undef cb } -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) { using namespace conv_bias; std::vector args; @@ -973,7 +973,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QINT8) { .execs({arg.src, arg.filter, {}, {}, {}}); \ } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN if (x86::is_supported(x86::SIMDType::VNNI)) { cb("IM2COLMATMUL:X86_INT8X8X32_MKLDNN"); } @@ -1057,7 +1057,7 @@ TEST_F(X86, CONV_BIAS_MATMUL) { } } #if MEGDNN_WITH_BENCHMARK -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN static void x86_benchmark_fp32_mkldnn(Handle* handle) { constexpr size_t RUNS = 30; param::ConvBias param; @@ -1304,7 +1304,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { } /*********************************** End winograd ************************/ -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN static void x86_correctness_fp32_mkldnn_run( Checker& checker, UniformIntRNG& rng, Handle* handle, ConvBiasForward::BiasMode bias_mode, diff --git a/dnn/test/x86/convolution.cpp b/dnn/test/x86/convolution.cpp index fb19c533cc61ec10815b3458acca00983a670a35..039db8d4f9f0b63813c9786331e803d1d6032b99 100644 --- a/dnn/test/x86/convolution.cpp +++ b/dnn/test/x86/convolution.cpp @@ -20,7 +20,7 @@ #include "test/common/workspace_wrapper.h" namespace { -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN struct ConvArg { size_t batch_size, fh, sh, ph, ic, ih, iw, oc, groups; }; @@ -224,7 +224,7 @@ TEST_F(X86, DEFAULT_CONV_MATMUL) { } } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN TEST_F(X86, CONVOLUTION_FORWARD_INT8) { Checker checker(handle()); checker.set_before_exec_callback( @@ -369,7 +369,7 @@ TEST_F(X86, CONVOLUTION_DIRECT_MKLDNN_C8) { #endif #if MEGDNN_WITH_BENCHMARK -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN TEST_F(X86, BENCHMARK_CONVOLUTION_I8x8x32_MKLDNN) { using namespace convolution; using Param = param::Convolution; diff --git a/dnn/test/x86/matrix_mul.cpp b/dnn/test/x86/matrix_mul.cpp index c9760f1a83d040d3377d1c9b3a80ea72fb5a8517..1b6d8038697dd9c0a73ac2fc6d8ed8edb3c1e68b 100644 --- a/dnn/test/x86/matrix_mul.cpp +++ b/dnn/test/x86/matrix_mul.cpp @@ -26,7 +26,7 @@ TEST_F(X86, MATRIX_MUL_VNNI_8X8X32) { } #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN TEST_F(X86, MATRIX_MUL_MKLDNN_8X8X32) { if (is_supported(SIMDType::VNNI)) { matrix_mul::check_matrix_mul(dtype::Int8{}, dtype::Int8{}, @@ -52,7 +52,7 @@ TEST_F(X86, MATRIX_MUL_SSE_8X8X32) { handle(), "X86_INT8X8X32_SSE_4X8X2"); } -#if defined(MEGDNN_X86_WITH_MKL) +#if MEGDNN_X86_WITH_MKL TEST_F(X86, MATRIX_MUL_MKL_PACKA) { matrix_mul::check_matrix_mul(dtype::Float32{}, dtype::Float32{}, dtype::Float32{}, handle(), @@ -93,7 +93,7 @@ TEST_F(X86, BENCHMARK_MATRIX_MUL_8X8X32) { AlgoChecker("X86_INT8X8X32_VNNI")); #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN Benchmarker benchmarker_mkldnn(handle()); benchmarker_mkldnn.set_times(RUNS) .set_dtype(0, dtype::Int8{}) @@ -162,7 +162,7 @@ TEST_F(X86, BENCHMARK_MATRIX_MUL_8X8X32) { } #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN if (is_supported(SIMDType::VNNI)) { auto mkldnn_used = benchmarker_mkldnn.exec({{M, K}, {K, N}, {}}) / RUNS; diff --git a/dnn/test/x86/pooling.cpp b/dnn/test/x86/pooling.cpp index b500ddf57fe32960b78adce473551bf9f8a4f611..a4873d4b3793e6dbef7d60d4d536605dd3c6e51a 100644 --- a/dnn/test/x86/pooling.cpp +++ b/dnn/test/x86/pooling.cpp @@ -24,7 +24,7 @@ TEST_F(X86, POOLING) { } } -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN TEST_F(X86, POOLING88) { Checker checker(handle()); auto args = pooling::get_args(); @@ -105,7 +105,7 @@ TEST_F(X86_MULTI_THREADS, BENCHMARK_POOLING) { test_x86_megdnn_pooling(handle()); } #endif -#if defined(MEGDNN_X86_WITH_MKL_DNN) +#if MEGDNN_X86_WITH_MKL_DNN TEST_F(X86, POOLING_INT8) { auto args = pooling::get_args(); for (auto&& arg : args) { diff --git a/src/megbrain_build_config.h.in b/src/megbrain_build_config.h.in new file mode 100644 index 0000000000000000000000000000000000000000..3e0327bb8ecf18e33223e8c8f9b43b3b01d8631f --- /dev/null +++ b/src/megbrain_build_config.h.in @@ -0,0 +1,198 @@ +/** + * \file src/core/include/megbrain_build_config.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#ifndef _HEADER_MGB_BUILD_CONFIG +#define _HEADER_MGB_BUILD_CONFIG + +#cmakedefine01 MGB_CUDA +#cmakedefine01 MGB_ASSERT_LOC +#cmakedefine01 MGB_ENABLE_DEBUG_UTIL +#cmakedefine01 MGB_ENABLE_LOGGING +#cmakedefine01 MGB_ENABLE_GRAD +#cmakedefine01 MGB_VERBOSE_TYPEINFO_NAME +#cmakedefine01 MGB_BUILD_SLIM_SERVING +#cmakedefine01 MGB_ENABLE_EXCEPTION +#cmakedefine01 MGB_JIT +#cmakedefine01 MGB_JIT_HALIDE +#cmakedefine01 MGB_ENABLE_TENSOR_RT +#cmakedefine01 MGB_ENABLE_JSON +#cmakedefine01 MGB_HAVE_THREAD +#cmakedefine01 MGB_ENABLE_OPR_MM +#cmakedefine01 MEGDNN_ENABLE_MANGLING + +// DNN related flags +// Platform macro's +#cmakedefine01 MEGDNN_WITH_CUDA + + +#cmakedefine01 MEGDNN_X86_WITH_MKL +#cmakedefine01 MEGDNN_X86_WITH_OPENBLAS +#cmakedefine01 MEGDNN_X86_WITH_MKL_DNN +#cmakedefine01 MEGDNN_ENABLE_RTTI +#cmakedefine01 MEGDNN_ENABLE_LOGGING +#cmakedefine01 MEGDNN_ENABLE_LOGGING +#cmakedefine01 MEGDNN_ENABLE_EXCEPTIONS +#cmakedefine01 MEGDNN_NAIVE +#cmakedefine01 MEGDNN_X86 +#cmakedefine01 MEGDNN_X86_64 +#cmakedefine01 MEGDNN_64_BIT +#cmakedefine01 MEGDNN_THREADS_512 +#cmakedefine01 MEGDNN_ENABLE_MULTI_THREADS + +// whether cuda is available +#ifndef MGB_CUDA +#define MGB_CUDA 1 +#endif + + +// whether to include file/line location for assert message +#ifndef MGB_ASSERT_LOC +#define MGB_ASSERT_LOC 1 +#endif + +// whether to enable utils/debug.h and other debug methods +#ifndef MGB_ENABLE_DEBUG_UTIL +#define MGB_ENABLE_DEBUG_UTIL 1 +#endif + +// whether to enable logging +#ifndef MGB_ENABLE_LOGGING +#define MGB_ENABLE_LOGGING 1 +#endif + +// whether to enable registering opr grad functions +#ifndef MGB_ENABLE_GRAD +#define MGB_ENABLE_GRAD 1 +#endif + +// whether to include actual class name in mgb::Typeinfo object; if this is +// disabled, mgb::serialization::OprRegistry::find_opr_by_name would not work. +#ifndef MGB_VERBOSE_TYPEINFO_NAME +#define MGB_VERBOSE_TYPEINFO_NAME 1 +#endif + +// whether to enbale configuing megbrain internals through env vars +#ifndef MGB_ENABLE_GETENV +#define MGB_ENABLE_GETENV 1 +#endif + +// whether to remove unnecessary features when used for serving +#ifndef MGB_BUILD_SLIM_SERVING +#define MGB_BUILD_SLIM_SERVING 0 +#endif + +// whether to enable exception +#ifndef MGB_ENABLE_EXCEPTION +#if __EXCEPTIONS +#define MGB_ENABLE_EXCEPTION 1 +#else +#define MGB_ENABLE_EXCEPTION 0 +#endif +#endif + +// whether is available and usable +#ifndef MGB_HAVE_THREAD +#define MGB_HAVE_THREAD 1 +#endif + +// whether to trade thread safety for memory usage +#ifndef MGB_THREAD_SAFE +#define MGB_THREAD_SAFE MGB_HAVE_THREAD +#endif + +// whether to enable JIT +#ifndef MGB_JIT +#define MGB_JIT 1 +#endif +#ifndef MGB_JIT_HALIDE +#define MGB_JIT_HALIDE 0 +#endif + + +// whether to enable TensorRT support +#ifndef MGB_ENABLE_TENSOR_RT +#define MGB_ENABLE_TENSOR_RT MGB_CUDA +#endif + +// whether to enable fastrun profile +#ifndef MGB_ENABLE_FASTRUN +#define MGB_ENABLE_FASTRUN 1 +#endif + + +/* ================= following are more finegrind controls ================= */ + +// whether to enable json dumper +#ifndef MGB_ENABLE_JSON +#define MGB_ENABLE_JSON !MGB_BUILD_SLIM_SERVING +#endif + +// whether to enable distributed communication +#ifndef MGB_ENABLE_OPR_MM +#define MGB_ENABLE_OPR_MM 0 +#endif + +/* ================= DNN related flags ================= */ + +// whether to use mkl lib +#ifndef MEGDNN_X86_WITH_MKL +#define MEGDNN_X86_WITH_MKL 0 +#endif + +// whether to enable rtti +#ifndef MEGDNN_ENABLE_RTTI +#define MEGDNN_ENABLE_RTTI 1 +#endif + +// whether to enable mangling +#ifndef MEGDNN_ENABLE_MANGLING +#define MEGDNN_ENABLE_MANGLING !MEGDNN_ENABLE_RTTI +#endif + +// whether to enable logging +#ifndef MEGDNN_ENABLE_LOGGING +#define MEGDNN_ENABLE_LOGGING MGB_ENABLE_LOGGING +#endif + +// whether to enable exception +#ifndef MEGDNN_ENABLE_EXCEPTIONS +#define MEGDNN_ENABLE_EXCEPTIONS MGB_ENABLE_EXCEPTION +#endif + +// whether to build naive +#ifndef MEGDNN_NAIVE +#define MEGDNN_NAIVE 0 +#endif + +// whether to build x86 +#ifndef MEGDNN_X86 +#define MEGDNN_X86 0 +#endif + +// whether to use cuda thread 512 +#ifndef MEGDNN_THREADS_512 +#define MEGDNN_THREADS_512 0 +#endif + +// whether to enable intra-op multi threads +#ifndef MEGDNN_ENABLE_MULTI_THREADS +#define MEGDNN_ENABLE_MULTI_THREADS 1 +#endif + +#ifndef MEGDNN_X86_WITH_OPENBLAS +#define MEGDNN_X86_WITH_OPENBLAS 0 +#endif + +#ifndef MEGDNN_X86_WITH_MKL_DNN +#define MEGDNN_X86_WITH_MKL_DNN 0 +#endif + +#endif // _HEADER_MGB_BUILD_CONFIG