feat(cmake/windows/xp/sp2/inference): implement inference on windows xp

(os vesion >= sp2) build with cmake

* cmake build support(xp sp2):
(dbg)EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2=ON"
./scripts/cmake-build/host_build.sh -m -d

(opt)EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2=ON"
./scripts/cmake-build/host_build.sh -m

* cmake build support(xp sp3):
(dbg)EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP=ON"
./scripts/cmake-build/host_build.sh -m -d

(opt)EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP=ON"
./scripts/cmake-build/host_build.sh -m

* internal behavior:
will define MGB_HAVE_THREAD=0 when enable
-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2=ON

* refer to
https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160
xp sp2(x86) do not support vc runtime fully, casused by KERNEL32.dll do not
implement some base apis for c++ std function, for example,
std::mutex/std::thread/std::condition_variable as a workround, we will
disable some MegEngine features on xp sp2 env, for exampe, multi-thread etc!

* about DNN_MUTEX/MGB_MUTEX/LITE_MUTEX, if your code will build in inference
code (even CPU backends), please replace std::mutex to DNN_MUTEX/MGB_MUTEX,

* about multi-thread, if you code need multi-thread support, please
enable it when MGB_HAVE_THREAD=1

* about test build env status
1: Visual Studio 2019(MSVC version <= 14.26.28801)---- pass
2: Visual Studio 2019(MSVC version > 14.26.28801) ---- failed
   caused by this 'new' version will put VCR depends on win7
   KERNEL32.DLL, this may be fixed at Visual Studio 2019 later version
   but we do not test at this MR merge point
3: Visual Studio 2017   ---------- pass
4: Visual Studio 2014   ---------- pass
GitOrigin-RevId: ea6e1f8b4fea9aa03594e3af8d59708b4cdf7bdc
......@@ -66,6 +66,60 @@ option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
if(MSVC OR WIN32)
option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP "Enable deploy inference on Windows xp" OFF)
# special MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 for Windows XP sp2(32bit)
# internal behavior:
# 1: will force define MGB_HAVE_THREAD=0, which means only support single thread
# 2: some Feature will be disable, eg: MGB_ENABLE_JSON and var sanity check, do
# not too many care this!!, if you want to use this Feature to 'DEBUG', you can
# run same model at NON-XP-SP2 env, eg Win7 or XP-SP3(build without MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
# 3: we only support MegEngine(load_and_run) and MegEngineLite API work on XP SP2
# some debug utils, eg, megbrain_test/megdnn_test not support run, most caused by gtest src code
# sdk caller:
# 1: as we remove mutex, when you use MSVC self API eg CreateThread to start several MegEngine instances
# in the same progress, please call MegEngine API(init/run) as serial as possible, also please
# do not use std::thread std::mutex/std::this_thread_id at SDK caller side!!!
# check dll/exe can deploy on Windows XP sp2 or not:
# please checkout scripts/misc/check_windows_xp_sp2_deploy.py
option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 "Enable deploy inference on Windows xp sp2" OFF)
# PE file linked by LLVM lld can not run at Windows XP env, so we force use link.exe
# which always locate in Microsoft Visual Studio/*/*/VC/Tools/MSVC/*/bin/*/*/link.exe
if(${MGE_ARCH} STREQUAL "i386")
set(CMAKE_LINKER "link.exe")
message(STATUS "Force use MSVS link when build for i386")
endif()
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP OR MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
if(NOT ${MGE_ARCH} STREQUAL "i386")
message(FATAL_ERROR "only support 32bit when build for Windows xp")
endif()
if(NOT MGE_INFERENCE_ONLY)
message(FATAL_ERROR "only support inference when build for Windows xp")
endif()
if(MGE_WITH_CUDA)
message(FATAL_ERROR "do not support CUDA when build for Windows xp")
endif()
# Windows XP sp3 have thread issue, Workround for it
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
# for Windows XP type
add_link_options("/SUBSYSTEM:CONSOLE,5.01")
# some old lib(for example mkl for xp) use legacy stdio, so we force link legacy_stdio_definitions
add_link_options("/DEFAULTLIB:legacy_stdio_definitions.lib")
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0601")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0601")
endif()
endif()
if(MSVC OR WIN32)
message(STATUS "windows force cudnn static link")
......@@ -263,7 +317,7 @@ if(MSVC OR WIN32)
set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated -Wno-error=documentation -Wno-error=unreachable-code-break")
set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /D_WIN32_WINNT=0x0601 /wd4819")
set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /wd4819")
set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")
......@@ -675,7 +729,7 @@ if(MGE_WITH_CUDA)
endif()
###########please add_subdirectory from here###############
if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE)
if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE AND NOT MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
if(MGE_ENABLE_CPUINFO)
message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
......@@ -821,6 +875,14 @@ if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT OR ANDROID)
set(MGB_HAVE_THREAD 1)
endif()
if(MSVC OR WIN32)
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
message(STATUS "disable MGB_HAVE_THREAD/MGB_ENABLE_JSON when DEPLOY ON XP SP2")
set(MGB_HAVE_THREAD 0)
set(MGB_ENABLE_JSON 0)
endif()
endif()
if(MGE_WITH_TEST)
# use intra-op multi threads
set(MEGDNN_ENABLE_MULTI_THREADS 1)
......@@ -851,6 +913,13 @@ else()
set(MGB_ENABLE_DEBUG_UTIL 0)
endif()
if(MSVC OR WIN32)
if(${MGE_ARCH} STREQUAL "i386")
set(MGB_ENABLE_DEBUG_UTIL 0)
message(STATUS "disable MGB_ENABLE_DEBUG_UTIL at Windows i386 build")
endif()
endif()
# TensorRT
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
......
......@@ -4,6 +4,7 @@ find_path(MKL_ROOT_DIR
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32
$ENV{MKLDIR}
/opt/intel/mkl/*/
/opt/intel/cmkl/*/
......
......@@ -129,6 +129,11 @@ Windows shell env(bash from windows-git), infact if you can use git command on W
```
bash.exe ./third_party/prepare.sh
bash.exe ./third_party/install-mkl.sh
if you are use github MegEngine and build for Windows XP, please
1: donwload mkl for xp from: http://registrationcenter-download.intel.com/akdlm/irc_nas/4617/w_mkl_11.1.4.237.exe
2: install exe, then from install dir:
2a: cp include file to third_party/mkl/x86_32/include/
2b: cp lib file to third_party/mkl/x86_32/lib/
```
# How to build
......@@ -137,6 +142,10 @@ bash.exe ./third_party/install-mkl.sh
* command:
```
1: host build just use scripts:scripts/cmake-build/host_build.sh
1a: build for Windows for XP (sp3): (dbg) EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP=ON" ./scripts/cmake-build/host_build.sh -m -d
(opt) EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP=ON" ./scripts/cmake-build/host_build.sh -m
2a: build for Windows for XP (sp2): (dbg) EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2=ON" ./scripts/cmake-build/host_build.sh -m -d
(opt) EXTRA_CMAKE_ARGS="-DMGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2=ON" ./scripts/cmake-build/host_build.sh -m
2: cross build to ARM-Android: scripts/cmake-build/cross_build_android_arm_inference.sh
3: cross build to ARM-Linux: scripts/cmake-build/cross_build_linux_arm_inference.sh
4: cross build to IOS: scripts/cmake-build/cross_build_ios_arm_inference.sh
......
此差异已折叠。
......@@ -834,9 +834,7 @@ struct CpuCompNode::Pool {
void operator()(CompNodeRecorderImpl* p) { p->~CompNodeRecorderImpl(); }
};
#if !__DEPLOY_ON_XP_SP2__
std::recursive_mutex mtx;
#endif
MGB_RECURSIVE_MUTEX mtx;
// use global memory pool to ensuare object memory accessible even after
// global finalize
std::aligned_storage_t<sizeof(CompNodeRecorderImpl),
......@@ -868,9 +866,7 @@ void CpuCompNode::foreach (thin_function<void(CompNode)> callback) {
for (size_t i = 0;; ++i) {
CompNode cur;
{
#if !__DEPLOY_ON_XP_SP2__
MGB_LOCK_GUARD(sm_pool->mtx);
#endif
if (i >= sm_pool->nr_used_impl_storage)
return;
cur = make_comp_node_from_impl(
......@@ -917,9 +913,7 @@ CpuCompNode::Impl* CpuCompNode::load_cpu(Locator locator,
locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT,
"failed to load cpu for device:%d stream:%d", locator.device,
locator.stream);
#if !__DEPLOY_ON_XP_SP2__
MGB_LOCK_GUARD(sm_pool->mtx);
#endif
// encode both device ID and type into a int
mgb_assert(locator_logical.device >= -1 ||
......@@ -977,9 +971,7 @@ void CpuCompNode::sync_all() {
if (!sm_pool)
return;
#if !__DEPLOY_ON_XP_SP2__
MGB_LOCK_GUARD(sm_pool->mtx);
#endif
for (auto&& i : sm_pool->locator2impl)
i.second->sync();
for (auto&& i : sm_pool->locator2impl_multi_thread)
......@@ -1061,9 +1053,7 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::do_device_wait_by(
auto waiter = [this, version]() {
while (m_record_nr_finish.load(std::memory_order_acquire) < version) {
#if !__DEPLOY_ON_XP_SP2__
std::unique_lock<std::mutex> lk{m_dev_wait_mtx};
#endif
if (m_record_nr_finish.load(std::memory_order_acquire) >= version) {
break;
}
......@@ -1092,12 +1082,10 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::on_finish() {
}
m_record_nr_finish.fetch_add(1, std::memory_order_release);
#if !__DEPLOY_ON_XP_SP2__
if (m_dev_wait_nr_waiter.load(std::memory_order_acquire)) {
MGB_LOCK_GUARD(m_dev_wait_mtx);
m_dev_wait_cv.notify_all();
}
#endif
}
bool CpuCompNode::CpuDispatchableBase::EventImpl::do_finished() {
......@@ -1116,15 +1104,11 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::host_wait_cv() {
m_dev_wait_nr_waiter.fetch_add(1, std::memory_order_release);
for (;;) {
#if !__DEPLOY_ON_XP_SP2__
std::unique_lock<std::mutex> lock{m_dev_wait_mtx};
#endif
if (finished()) {
break;
}
#if !__DEPLOY_ON_XP_SP2__
m_dev_wait_cv.wait(lock);
#endif
}
m_dev_wait_nr_waiter.fetch_sub(1, std::memory_order_release);
}
......
......@@ -36,6 +36,7 @@ struct TaskElem {
size_t nr_parallelism;
};
#if MGB_HAVE_THREAD
/**
* \brief Worker and related flag
*/
......@@ -53,7 +54,6 @@ public:
bool affinity_flag{false};
};
#if MGB_HAVE_THREAD
/**
* \brief ThreadPool execute the task in multi-threads(nr_threads>1) mode , it
* will fallback to single-thread mode if nr_thread is 1.
......
......@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#if !__DEPLOY_ON_XP_SP2__
#include "megbrain/plugin/var_sanity_check.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/graph/event.h"
......@@ -363,5 +364,6 @@ void VarSanityCheck::setup_input_checker(bool add_debug_log,
env.dispatch_on_comp_node(cn, callback);
}
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -11,6 +11,7 @@
#pragma once
#if !__DEPLOY_ON_XP_SP2__
#include "megbrain/exception.h"
#include "megbrain/graph.h"
#include "megbrain/plugin/base.h"
......@@ -106,5 +107,6 @@ public:
VarNode* var, const ComputingGraph::VarReceiverInfo& recv);
};
} // namespace mgb
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -13,7 +13,6 @@ CONDA_BASE_URL=https://anaconda.org/intel
OS=$(uname -s)
FILE_PREFIX=null
TAR=tar
if [ $OS = "Darwin" ];then
FILE_PREFIX=osx
elif [ $OS = "Linux" ];then
......@@ -32,20 +31,24 @@ echo "config FILE_PREFIX to: $FILE_PREFIX"
rm -rf mkl
for platform in 32 64
do
if [ $OS = "Darwin" ]&&[ $platform = 32 ];then
echo "strip 32 bit file for Darwin"
continue
fi
mkdir -p mkl/x86_${platform}
for package in "mkl-include" "mkl-static"
if [[ -z ${ALREADY_INSTALL_MKL} ]]
then
echo "init mkl from software.intel.com"
for platform in 32 64
do
DOWNLOAD_FILE=${package}-${MKL_VERSION}-intel_${MKL_PATCH}.tar.bz2
echo "Installing ${DOWNLOAD_FILE} for x86_${platform}..."
URL=${CONDA_BASE_URL}/${package}/${MKL_VERSION}/download/$FILE_PREFIX-${platform}/${DOWNLOAD_FILE}
echo "try download mkl package from: ${URL}"
wget -q --show-progress "${URL}" -O mkl/x86_${platform}/${DOWNLOAD_FILE}
$TAR xvj -C mkl/x86_${platform} -f mkl/x86_${platform}/${DOWNLOAD_FILE}
if [ $OS = "Darwin" ]&&[ $platform = 32 ];then
echo "strip 32 bit file for Darwin"
continue
fi
mkdir -p mkl/x86_${platform}
for package in "mkl-include" "mkl-static"
do
DOWNLOAD_FILE=${package}-${MKL_VERSION}-intel_${MKL_PATCH}.tar.bz2
echo "Installing ${DOWNLOAD_FILE} for x86_${platform}..."
URL=${CONDA_BASE_URL}/${package}/${MKL_VERSION}/download/$FILE_PREFIX-${platform}/${DOWNLOAD_FILE}
echo "try download mkl package from: ${URL}"
wget -q --show-progress "${URL}" -O mkl/x86_${platform}/${DOWNLOAD_FILE}
$TAR xvj -C mkl/x86_${platform} -f mkl/x86_${platform}/${DOWNLOAD_FILE}
done
done
done
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部