提交 c68e6695 编写于 作者: M Megvii Engine Team

feat(bazel/windows/xp/sp2/inference): implement inference on windows xp

(os vesion >= sp2) build with bazel

* bazel build support(define __DEPLOY_ON_XP_SP2__ when deploy on xp sp2):
(dbg)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp'
--compiler='clang_cl' -c dbg --copt "-D__DEPLOY_ON_XP_SP2__=1"

(opt)./bazel build //brain/megbrain:load_and_run --cpu='x86_windows_xp'
--compiler='clang_cl' -c opt --copt "-D__DEPLOY_ON_XP_SP2__=1"

* internal behavior:
will define MGB_HAVE_THREAD=0 when enable __DEPLOY_ON_XP_SP2__

* refer to
https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160
xp sp2(x86) do not support vc runtime fully, casused by KERNEL32.dll do not
implement some base apis for c++ std function, for example,
std::mutex/std::thread/std::condition_variable as a workround, we will
disable some MegEngine features on xp sp2 env, for exampe, multi-thread etc!

* about DNN_MUTEX/MGB_MUTEX, if your code will build in inference
code (even CPU backends), please replace std::mutex to DNN_MUTEX/MGB_MUTEX,

* about multi-thread, if you code need multi-thread support, please
enable it when MGB_HAVE_THREAD=1

* about test build env status
1: Visual Studio 2019(MSVC version <= 14.26.28801)---- pass
2: Visual Studio 2019(MSVC version > 14.26.28801) ---- failed
   caused by this 'new' version will put VCR depends on win7
   KERNEL32.DLL, this may be fixed at Visual Studio 2019 later version
   but we do not test at this MR merge point
3: Visual Studio 2017   ---------- pass
4: Visual Studio 2014   ---------- pass
GitOrigin-RevId: 65ac48b95e99f2c510fe5db449cc8182d682e113
上级 125406e3
......@@ -60,10 +60,10 @@ T deserialize_pod(const std::string& data, size_t& offset) {
ErrorHandler* ErrorHandler::sm_inst;
ErrorHandler* ErrorHandler::inst() {
static std::mutex mtx;
static DNN_MUTEX mtx;
static DefaultErrorHandler default_handler;
if (megdnn_unlikely(!sm_inst)) {
std::lock_guard<std::mutex> lg{mtx};
MEGDNN_LOCK_GUARD(mtx);
if (!sm_inst) {
sm_inst = &default_handler;
}
......
......@@ -145,7 +145,7 @@ init_inter_tab_1d(InterpolationMode imode, float* tab, int tabsz) {
#if MEGDNN_X86
DEF_FUN(const int16_t*) get_linear_ic4_table() {
auto table_holder = &sm_tab_linear;
std::lock_guard<std::mutex> lg{table_holder->mtx};
MEGDNN_LOCK_GUARD(table_holder->mtx);
float* tab = nullptr;
short* itab = nullptr;
MEGDNN_MARK_USED_VAR(tab);
......@@ -175,7 +175,7 @@ DEF_FUN(const void*) get_table(InterpolationMode imode, bool fixpt) {
default:
megdnn_throw(("unsupported interpolation mode"));
}
std::lock_guard<std::mutex> lg{table_holder->mtx};
MEGDNN_LOCK_GUARD(table_holder->mtx);
float* tab = nullptr;
short* itab = nullptr;
......
......@@ -134,7 +134,7 @@ private:
};
struct TableHolderBase {
std::mutex mtx;
DNN_MUTEX mtx;
//! get table pointer; return whether already init
virtual bool get(float**, int16_t**) = 0;
......
......@@ -39,10 +39,10 @@ using Mode = param::Elemwise::Mode;
using ModeTrait = ElemwiseForward::ModeTrait;
const ModeTrait& ModeTrait::from_mode(Mode mode) {
static std::mutex mtx;
static DNN_MUTEX mtx;
static std::vector<ModeTrait> traits;
std::lock_guard<std::mutex> _lock(mtx);
MEGDNN_LOCK_GUARD(mtx);
if (traits.empty()) {
auto get = [&](Mode m) -> ModeTrait& {
......
......@@ -28,10 +28,10 @@ void check_dtype(const ModeTrait& trait, size_t i, const TensorLayout& src) {
} // anonymous namespace
const ModeTrait& ModeTrait::from_mode(Mode mode) {
static std::mutex mtx;
static DNN_MUTEX mtx;
static std::vector<ModeTrait> traits;
std::lock_guard<std::mutex> _lock(mtx);
MEGDNN_LOCK_GUARD(mtx);
auto make_check_dtype_func = [](DType expected) {
auto func = [expected](DType dtype) {
......
......@@ -70,7 +70,7 @@ protected:
MIDOUT_BEGIN(dnn_src_common_handle_impl, Opr, idx) {
static_assert(idx < NR_HELPER_OPRS, "invalid idx");
if (!self->m_helper_oprs[idx]) {
std::lock_guard<std::mutex> lg{self->m_helper_oprs_mtx};
MEGDNN_LOCK_GUARD(self->m_helper_oprs_mtx);
if (!self->m_helper_oprs[idx]) {
self->m_helper_oprs[idx] =
self->template create_operator<Opr>();
......@@ -88,7 +88,7 @@ protected:
private:
std::array<std::unique_ptr<OperatorBase>, NR_HELPER_OPRS> m_helper_oprs;
std::mutex m_helper_oprs_mtx;
DNN_MUTEX m_helper_oprs_mtx;
};
} // namespace megdnn
......
......@@ -38,7 +38,7 @@ const std::shared_ptr<Handle>& inplace_cpu_handle(int debug_level = 0);
*/
template <int nr_opr = 1>
class CpuOprDelegationStorage {
std::mutex m_mtx;
DNN_MUTEX m_mtx;
std::shared_ptr<Handle> m_handle;
std::unique_ptr<OperatorBase> m_oprs[nr_opr];
......
......@@ -604,7 +604,7 @@ TensorLayout LowbitsAlignedTensorFormatBase::collapse_contiguous_spec(
TensorFormat Image2DPack4TensorFormat::make_raw(
size_t align_axis, size_t align_size_in_elements,
Handle::HandleVendorType vendor_type) {
static std::mutex mtx;
static DNN_MUTEX mtx;
static std::unordered_map<uint64_t,
std::unique_ptr<Image2DPack4TensorFormat>>
cache;
......@@ -641,7 +641,7 @@ TensorFormat Image2DPack4TensorFormat::change_axis(size_t axis) const {
/* ===================== LowbitsitsAlignedToBytesTensorFormat
* ===================== */
TensorFormat LowbitsAlignedToBytesTensorFormat::make(size_t size_nbits) {
static std::mutex mtx;
static DNN_MUTEX mtx;
static std::unordered_map<
uint64_t, std::unique_ptr<LowbitsAlignedToBytesTensorFormat>>
cache;
......
......@@ -118,8 +118,17 @@
#define megdnn_layout_msg(layout) \
std::string(#layout "=" + (layout).to_string())
#define MEGDNN_LOCK_GUARD(var) \
std::lock_guard<std::remove_cv_t<decltype(var)>> _lock_guard_##var { var }
#if __DEPLOY_ON_XP_SP2__
#define DNN_MUTEX size_t
#define MEGDNN_LOCK_GUARD(var) MEGDNN_MARK_USED_VAR(var)
#else
#define DNN_MUTEX std::mutex
#define DNN_TOKENPASTE(x, y) x##y
#define DNN_TOKENPASTE2(x, y) DNN_TOKENPASTE(x, y)
#define DNN_LOCK_GUARD_CTOR(mtx) DNN_TOKENPASTE2(__lock_guard_, __LINE__)(mtx)
#define MEGDNN_LOCK_GUARD(mtx) \
std::lock_guard<decltype(mtx)> DNN_LOCK_GUARD_CTOR(mtx)
#endif
namespace megdnn {
......@@ -487,7 +496,7 @@ struct _SafeMultipliesImplUnsigned : public std::binary_function<T, T, T> {
"implicit conversion disallowed in SafeMultiplies");
megdnn_trap();
}
};
}; // namespace megdnn
template <>
struct SafeMultiplies<size_t> : public _SafeMultipliesImplUnsigned<size_t> {};
......
......@@ -81,7 +81,7 @@ public:
}
private:
std::mutex m_mtx;
DNN_MUTEX m_mtx;
std::unordered_map<StrategyHashKey, std::unique_ptr<T>, StrategyHasher,
StrategyHashKeyEqual>
m_map_strategies;
......@@ -99,4 +99,4 @@ MatrixMulImpl::KernSizeParam get_matmul_kern_param(
} // namespace fallback
} // namespace megdnn
// vim: syntax=cpp.doxygen
\ No newline at end of file
// vim: syntax=cpp.doxygen
......@@ -110,7 +110,7 @@ struct StrategyHashParamEqual {
};
class StrategyDelegationStorage {
std::mutex m_mtx;
DNN_MUTEX m_mtx;
std::unordered_map<StrategyHashParam, std::unique_ptr<StrategyBase>,
StrategyHashParamHash, StrategyHashParamEqual>
map_strategys;
......
......@@ -11,6 +11,10 @@
#include "./opr_impl.h"
#if __DEPLOY_ON_XP_SP2__
#define MEGDNN_NO_THREAD 1
#endif
#include "src/naive/handle.h"
#if !MEGDNN_NO_THREAD
#include <thread>
......@@ -20,10 +24,10 @@ namespace megdnn {
namespace naive {
void SleepForwardImpl::exec() {
double seconds = m_param.time;
#if MEGDNN_NO_THREAD
megdnn_trap();
#else
double seconds = m_param.time;
MEGDNN_DISPATCH_CPU_KERN_OPR(
std::this_thread::sleep_for(std::chrono::microseconds(
static_cast<uint64_t>(seconds * 1e6))););
......
......@@ -52,7 +52,7 @@ class InFilePersistentCache final : public PersistentCache {
std::unordered_map<std::string, std::unordered_map<BlobStorage, BlobStorage,
BlobStorage::Hash>>
m_cache;
std::mutex m_mtx;
MGB_MUTEX m_mtx;
template <typename Input>
void read_cache(Input& inp);
......
......@@ -32,7 +32,7 @@ namespace {
std::atomic_flag
g_default_cpu_initialized,
g_exit_handler_registered[CompNode::NR_DEVICE_TYPE];
std::mutex g_device_map_mtx;
MGB_MUTEX g_device_map_mtx;
ThinHashMap<CompNode::DeviceType, ThinHashMap<int, int>> g_device_map;
CompNode::DeviceType g_unspec_locator_type;
......
......@@ -60,7 +60,11 @@ class CpuCompNode::WorkerQueue final
sys::set_cpu_affinity({m_locator.device});
#endif
}
#if __DEPLOY_ON_XP_SP2__
__builtin_trap();
#else
sys::set_thread_name(m_locator.to_string());
#endif
}
void on_sync_all_task_finish() override {
......@@ -830,7 +834,9 @@ struct CpuCompNode::Pool {
void operator()(CompNodeRecorderImpl* p) { p->~CompNodeRecorderImpl(); }
};
#if !__DEPLOY_ON_XP_SP2__
std::recursive_mutex mtx;
#endif
// use global memory pool to ensuare object memory accessible even after
// global finalize
std::aligned_storage_t<sizeof(CompNodeRecorderImpl),
......@@ -862,7 +868,9 @@ void CpuCompNode::foreach (thin_function<void(CompNode)> callback) {
for (size_t i = 0;; ++i) {
CompNode cur;
{
#if !__DEPLOY_ON_XP_SP2__
MGB_LOCK_GUARD(sm_pool->mtx);
#endif
if (i >= sm_pool->nr_used_impl_storage)
return;
cur = make_comp_node_from_impl(
......@@ -909,7 +917,9 @@ CpuCompNode::Impl* CpuCompNode::load_cpu(Locator locator,
locator.device == Locator::DEVICE_MULTITHREAD_DEFAULT,
"failed to load cpu for device:%d stream:%d", locator.device,
locator.stream);
#if !__DEPLOY_ON_XP_SP2__
MGB_LOCK_GUARD(sm_pool->mtx);
#endif
// encode both device ID and type into a int
mgb_assert(locator_logical.device >= -1 ||
......@@ -967,7 +977,9 @@ void CpuCompNode::sync_all() {
if (!sm_pool)
return;
#if !__DEPLOY_ON_XP_SP2__
MGB_LOCK_GUARD(sm_pool->mtx);
#endif
for (auto&& i : sm_pool->locator2impl)
i.second->sync();
for (auto&& i : sm_pool->locator2impl_multi_thread)
......@@ -1049,7 +1061,9 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::do_device_wait_by(
auto waiter = [this, version]() {
while (m_record_nr_finish.load(std::memory_order_acquire) < version) {
#if !__DEPLOY_ON_XP_SP2__
std::unique_lock<std::mutex> lk{m_dev_wait_mtx};
#endif
if (m_record_nr_finish.load(std::memory_order_acquire) >= version) {
break;
}
......@@ -1078,10 +1092,12 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::on_finish() {
}
m_record_nr_finish.fetch_add(1, std::memory_order_release);
#if !__DEPLOY_ON_XP_SP2__
if (m_dev_wait_nr_waiter.load(std::memory_order_acquire)) {
MGB_LOCK_GUARD(m_dev_wait_mtx);
m_dev_wait_cv.notify_all();
}
#endif
}
bool CpuCompNode::CpuDispatchableBase::EventImpl::do_finished() {
......@@ -1100,11 +1116,15 @@ void CpuCompNode::CpuDispatchableBase::EventImpl::host_wait_cv() {
m_dev_wait_nr_waiter.fetch_add(1, std::memory_order_release);
for (;;) {
#if !__DEPLOY_ON_XP_SP2__
std::unique_lock<std::mutex> lock{m_dev_wait_mtx};
#endif
if (finished()) {
break;
}
#if !__DEPLOY_ON_XP_SP2__
m_dev_wait_cv.wait(lock);
#endif
}
m_dev_wait_nr_waiter.fetch_sub(1, std::memory_order_release);
}
......
......@@ -45,9 +45,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() {
return;
}
if (sm_cpu_sync_level >= 1) {
#if __DEPLOY_ON_XP_SP2__
#if MGB_HAVE_THREAD
__builtin_trap();
#else
return;
#endif
#else
while (!finished()) {
std::this_thread::yield();
}
#endif
return;
}
mgb_assert(!sm_cpu_sync_level, "invalid cpu sync level: %d",
......@@ -57,9 +65,17 @@ void CompNodeImplHelper::EventImplHelper::host_wait() {
}
void CompNodeImplHelper::EventImplHelper::host_wait_cv() {
#if __DEPLOY_ON_XP_SP2__
#if MGB_HAVE_THREAD
__builtin_trap();
#else
return;
#endif
#else
while (!finished()) {
std::this_thread::yield();
}
#endif
}
double CompNodeImplHelper::EventImplHelper::elapsed_time_until(Event& end_) {
......
......@@ -49,7 +49,7 @@ namespace mgb {
* been performed.
*/
class CompNodeImplHelper::EventImplHelper: public Event {
std::mutex m_mtx;
MGB_MUTEX m_mtx;
bool m_recorded = false, m_finished = false;
......
......@@ -59,11 +59,15 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc(
size_t size, bool allow_from_parent, bool log_stat_on_error) {
mgb_assert(size);
#if !__DEPLOY_ON_XP_SP2__
m_mutex.lock();
#endif
auto iter = m_free_blk_size.lower_bound(FreeBlock{MemAddr{0, 0}, size});
if (iter == m_free_blk_size.end()) {
#if !__DEPLOY_ON_XP_SP2__
m_mutex.unlock();
#endif
if (!allow_from_parent) {
if (log_stat_on_error) {
print_memory_state();
......@@ -87,7 +91,9 @@ MemAllocImplHelper::MemAddr MemAllocImplHelper::do_alloc(
if (remain)
insert_free_unsafe({alloc_addr + size, remain});
#if !__DEPLOY_ON_XP_SP2__
m_mutex.unlock();
#endif
return alloc_addr;
}
......@@ -267,7 +273,9 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) {
{
// sleep to wait for async dealloc
using namespace std::literals;
#if !__DEPLOY_ON_XP_SP2__
std::this_thread::sleep_for(0.2s);
#endif
}
get = gather_stream_free_blk_and_release_full();
mgb_log("device %d: sync all device and try to "
......
......@@ -73,7 +73,7 @@ class MemAllocImplHelper: virtual public MemAllocBase {
//! map from address to size and size iter
std::map<size_t, FreeBlockAddrInfo> m_free_blk_addr;
std::mutex m_mutex;
MGB_MUTEX m_mutex;
struct BlkByAddrIter {
decltype(m_free_blk_addr.begin()) aiter;
......
......@@ -48,7 +48,11 @@ class ComputingGraphImpl::ComputingSequence::ExecContext {
std::unique_ptr<CompNodeSeqRecorder> m_recorder;
bool has_var_sanity_check() const {
#if __DEPLOY_ON_XP_SP2__
return false;
#else
return static_cast<bool>(m_comp_seq->m_var_sanity_check);
#endif
}
void try_reset_recorder() {
......@@ -305,10 +309,12 @@ void ComputingGraphImpl::ComputingSequence::preprocess(ExecContext* ctx) {
m_owner_graph->var_node_mem_manager().alloc_var_node_mem_static();
bool first_exec = m_first_exec;
#if !__DEPLOY_ON_XP_SP2__
if (!first_exec) {
// var sanity check only for first run
m_var_sanity_check.reset();
}
#endif
m_owner_graph->event().signal_inplace<event::CompSeqExecBeforeStart>(
m_owner_graph, this, &ctx->m_cleanup_callback, &m_used_comp_node,
......@@ -342,9 +348,13 @@ void ComputingGraphImpl::ComputingSequence::attach_to_graph() {
static_cast<ComputingSequence*>(gimpl->m_current_comp_seq);
prev_seq->cleanup();
}
#if !__DEPLOY_ON_XP_SP2__
//! disable VarSanityCheck when __DEPLOY_ON_XP_SP2__=1. caused by
//! VarSanityCheck depends on std::thread
if (gimpl->options().var_sanity_check_first_run) {
m_var_sanity_check = std::make_unique<VarSanityCheck>(gimpl);
}
#endif
gimpl->m_current_comp_seq = this;
}
......@@ -403,7 +413,9 @@ void ComputingGraphImpl::ComputingSequence::do_wait(bool explicit_user_wait) {
}
void ComputingGraphImpl::ComputingSequence::cleanup() {
#if !__DEPLOY_ON_XP_SP2__
m_var_sanity_check.reset();
#endif
if (has_uncaught_exception()) {
mgb_log_warn(
"fallback to simple graph waiting in dtor due to uncaught "
......
......@@ -30,7 +30,9 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable {
size_t m_run_id = 0;
size_t m_cg_event_version = 0;
mutable Maybe<double> m_prev_exec_time;
#if !__DEPLOY_ON_XP_SP2__
std::unique_ptr<VarSanityCheck> m_var_sanity_check;
#endif
std::unique_ptr<CompNodeSeqRecorder> m_comp_node_seq_recorder;
NormalExecEnv m_exec_env;
......@@ -46,7 +48,7 @@ class ComputingGraphImpl::ComputingSequence final : public AsyncExecutable {
class ExecContext;
std::unique_ptr<MegBrainError> m_async_exc;
std::mutex m_async_exc_mutex;
MGB_MUTEX m_async_exc_mutex;
/*!
* \brief check whether recording comp seq is enabled
......
......@@ -713,7 +713,9 @@ void PostExecActions::perform() {
for (auto&& i : m_items) {
if (enable) {
#if !__DEPLOY_ON_XP_SP2__
VarSanityCheck::check_var_after_exec(i.var, *i.recv_info);
#endif
if (i.shape_sync_hdl)
i.shape_sync_hdl->sync_from_var();
......
......@@ -141,7 +141,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final,
TagTraitBase) // {
struct InferResultCache {
Spinlock mtx;
#if __DEPLOY_ON_XP_SP2__
ThinHashMap<size_t, InpElement> storage;
#else
ThinHashMap<std::thread::id, InpElement> storage;
#endif
};
static TagTraitArray sm_empty_deps;
static InferResultCache sm_result_cache;
......@@ -167,7 +171,11 @@ MGB_DEFINE_CLS_WITH_SUPER(StaticInferManagerImpl::TagConstShapeTrait final,
{
// thread_local not supported on ios; so we us a manual impl
MGB_LOCK_GUARD(sm_result_cache.mtx);
#if __DEPLOY_ON_XP_SP2__
ret = &sm_result_cache.storage[0];
#else
ret = &sm_result_cache.storage[std::this_thread::get_id()];
#endif
}
ret->m_shape = &tag()->shape();
return ret;
......
......@@ -122,7 +122,7 @@ class StaticInferManagerImpl final: public StaticInferManager {
struct TagTraitContainer;
ComputingGraph * const m_owner_graph;
std::recursive_mutex m_mtx;
MGB_RECURSIVE_MUTEX m_mtx;
//! callbacks to be invoked in destructor
ThinHashMap<void*, thin_function<void()>> m_dtor_callbacks;
......
......@@ -20,7 +20,7 @@ using namespace cg;
/* ===================== MemAllocPlan ===================== */
std::mutex MemAllocPlan::ReadonlyFwdList::list_mutex;
MGB_MUTEX MemAllocPlan::ReadonlyFwdList::list_mutex;
void MemAllocPlan::ReadonlyFwdList::reset() {
MGB_LOCK_GUARD(list_mutex);
......
......@@ -440,7 +440,7 @@ class VarNodeMemManager {
ImpureMemPlanManager m_impure_mem_plan_mgr;
std::mutex m_dynamic_alloc_mtx;
MGB_MUTEX m_dynamic_alloc_mtx;
const size_t* m_run_id_ptr = nullptr;
SyncableCounter m_cpu_async_release_barrier;
......
......@@ -19,7 +19,13 @@ using namespace mgb;
using namespace sys;
int sys::get_cpu_count() {
#if __DEPLOY_ON_XP_SP2__
//! when deploy on xp sp2, we only support single thread
//! so just return 1 even cpu number greater than 1
return 1;
#else
return std::max(std::thread::hardware_concurrency(), 1u);
#endif
}
#if defined(WIN32)
......@@ -153,9 +159,11 @@ bool sys::stderr_ansi_color() {
void sys::set_thread_name(const std::string &) {
}
#if !__DEPLOY_ON_XP_SP2__
std::string sys::get_thread_name(Maybe<std::thread::id>) {
return "@";
}
#endif
namespace {
class FakeTimedFuncInvoker final: public TimedFuncInvoker {
......@@ -254,6 +262,7 @@ void sys::set_thread_name(const std::string &name) {
#endif
}
#if !__DEPLOY_ON_XP_SP2__
std::string sys::get_thread_name(Maybe<std::thread::id> tid_) {
#if MGB_ENABLE_DEBUG_UTIL
MGB_LOCK_GUARD(thread_name_map_lock);
......@@ -269,10 +278,11 @@ std::string sys::get_thread_name(Maybe<std::thread::id> tid_) {
return "";
#endif
}
#endif
namespace {
class TimedFuncInvokerImpl final: public TimedFuncInvoker {
class TimedFuncInvokerImpl final : public TimedFuncInvoker {
/*
* server-client protocol:
*
......@@ -308,7 +318,7 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker {
bool m_watcher_should_stop = false;
std::condition_variable m_watcher_stop_cv;
std::mutex m_watcher_stop_mtx, m_global_mtx;
MGB_MUTEX m_watcher_stop_mtx, m_global_mtx;
void clear_sock_fd() {
if (m_peer_fd)
......@@ -567,8 +577,10 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker {
auto start = high_resolution_clock::now(),
end = start + timeout_due;
for (; ; ) {
#if !__DEPLOY_ON_XP_SP2__
std::unique_lock<std::mutex> lk(m_watcher_stop_mtx);
m_watcher_stop_cv.wait_until(lk, end);
#endif
if (m_watcher_should_stop)
return false;
......@@ -603,10 +615,9 @@ class TimedFuncInvokerImpl final: public TimedFuncInvoker {
} MGB_CATCH(..., {});
clear_sock_fd();
}
};
} // anonymous namespace
} // anonymous namespace
TimedFuncInvoker& TimedFuncInvoker::ins() {
static TimedFuncInvokerImpl impl;
......
......@@ -205,6 +205,21 @@ void __log__(LogLevel level, const char *file, const char *func, int line,
#define MGB_TOKENPASTE2(x, y) MGB_TOKENPASTE(x, y)
#define MGB_LOCK_GUARD_CTOR(mtx) MGB_TOKENPASTE2(__lock_guard_, __LINE__)(mtx)
#if __DEPLOY_ON_XP_SP2__
//! refer to
//! https://docs.microsoft.com/en-us/cpp/build/configuring-programs-for-windows-xp?view=msvc-160
//! xp sp2 do not support vc runtime fully, casused by KERNEL32.dll do not
//! implement some base apis for c++ std function, for example,
//! std::mutex/std::thread/std::condition_variable as a workround, we will
//! disable some MegEngine feature on xp sp2 env, for exampe, multi-thread etc!
#define MGB_MUTEX size_t
#define MGB_RECURSIVE_MUTEX size_t
#define MGB_LOCK_GUARD(mtx) MGB_MARK_USED_VAR(mtx)
#define MGB_LOCK_GUARD_UNIQUE(mtx) MGB_MARK_USED_VAR(mtx)
#define MGB_LOCK_GUARD_SHARED(mtx) MGB_MARK_USED_VAR(MGB_MARK_USED_VAR)
#else
#define MGB_MUTEX std::mutex
#define MGB_RECURSIVE_MUTEX std::recursive_mutex
#define MGB_LOCK_GUARD(mtx) \
std::lock_guard<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
......@@ -212,7 +227,8 @@ void __log__(LogLevel level, const char *file, const char *func, int line,
std::unique_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
#define MGB_LOCK_GUARD_SHARED(mtx) \
std::shared_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
std::shared_lock<decltype(mtx)> MGB_LOCK_GUARD_CTOR(mtx)
#endif
/*!
* \brief printf-like std::string constructor
......
......@@ -222,7 +222,7 @@ class MemAllocPlan final: public json::Serializable, public NonCopyableObj {
private:
class ReadonlyFwdList {
MemAllocPlan *m_prev = nullptr, *m_next = nullptr;
static std::mutex list_mutex;
static MGB_MUTEX list_mutex;
public:
MemAllocPlan* next() const { return m_next; }
void reset();
......
......@@ -27,11 +27,13 @@ namespace sys {
//! set name of caller thread
void set_thread_name(const std::string &name);
#if !__DEPLOY_ON_XP_SP2__
/*!
* \brief get name of of given thread
* \param tid thread id, or None to for the caller thread
*/
std::string get_thread_name(Maybe<std::thread::id> tid = None);
#endif
//! get number of CPU cores on this system
int get_cpu_count();
......
......@@ -35,14 +35,20 @@ class AsyncWorkerSet final: public NonCopyableObj {
};
class FutureThreadPoolBase : public NonCopyableObj {
#if !__DEPLOY_ON_XP_SP2__
std::vector<std::thread::id> m_ids;
#endif
public:
FutureThreadPoolBase(const Maybe<std::string>& = None) {}
#if __DEPLOY_ON_XP_SP2__
size_t start(size_t concurrency) { return concurrency; }
#else
const std::vector<std::thread::id>& start(size_t concurrency) {
m_ids.resize(concurrency, std::this_thread::get_id());
return m_ids;
}
#endif
void stop() {
}
......
......@@ -53,7 +53,7 @@ class SyncEventConnecter: public NonCopyableObj {
using ReceiverMap = ThinHashMap<Typeinfo*, ReceiverList>;
bool m_is_empty = true;
std::mutex m_mtx;
MGB_MUTEX m_mtx;
//! map from type to receiver; use shared_ptr because it would be kept by
//! handlers
std::shared_ptr<ReceiverMap> m_receiver_map =
......
......@@ -83,7 +83,7 @@ namespace mgb {
std::string,
std::unordered_map<BlobStorage, BlobStorage, BlobStorage::Hash>>
m_cache;
std::mutex m_mtx;
MGB_MUTEX m_mtx;
};
/*!
......
......@@ -33,7 +33,7 @@ namespace {
template<class Opr>
class StaticInferOpr {
intl::UniqPtrWithCN<Opr> m_opr;
std::mutex m_mtx;
MGB_MUTEX m_mtx;
public:
class Lock {
......@@ -43,7 +43,9 @@ namespace {
explicit Lock(StaticInferOpr *owner):
m_owner{owner}
{
#if !__DEPLOY_ON_XP_SP2__
m_owner->m_mtx.lock();
#endif
}
public:
......@@ -54,8 +56,10 @@ namespace {
}
~Lock() {
#if !__DEPLOY_ON_XP_SP2__
if (m_owner)
m_owner->m_mtx.unlock();
#endif
}
Lock& operator = (const Lock &) = delete;
......
......@@ -277,7 +277,7 @@ SubTensorSpec FancyIndexingHelper::fancy_indexing_make_sub_spec(
mgb_assert(m_require_scalar_index || !fake_single_idx);
static DeviceTensorND fake_val;
static std::mutex fake_val_mtx;
static MGB_MUTEX fake_val_mtx;
if (mgb_unlikely(fake_val.empty())) {
MGB_LOCK_GUARD(fake_val_mtx);
......
......@@ -53,7 +53,7 @@ namespace {
MGB_TYPEINFO_OBJ_DECL;
public:
std::mutex mtx;
MGB_MUTEX mtx;
CompNode::UnorderedMap<DeviceTensorStorage> cn2storage;
};
MGB_TYPEINFO_OBJ_IMPL(TempStorageContainer);
......
......@@ -377,7 +377,7 @@ MGB_DYN_TYPE_OBJ_FINAL_IMPL(SharedDeviceTensorWithFormat);
MGB_DYN_TYPE_OBJ_FINAL_IMPL(ImmutableTensor);
class ImmutableTensor::Value {
std::mutex m_mtx;
MGB_MUTEX m_mtx;
DeviceTensorND m_dev, m_static_infer;
std::string m_summary;
......@@ -527,7 +527,7 @@ class ImmutableTensor::DevValueCache final: public UserDataContainer::UserData {
std::unordered_map<TensorKey, Value, Hash> m_tensor2val;
std::unordered_map<ScalarKey, Value, Hash> m_scalar2val;
std::mutex m_mtx;
MGB_MUTEX m_mtx;
void setup_value(Value &dest, const HostTensorND &val) {
dest.setup(m_comp_node, val);
......
......@@ -888,7 +888,7 @@ class LoopImpl::MutableStateSaver::Recorder final: public NonCopyableObj {
//! mutex for m_saved_buckets, used between copy_bucket_to_host() and the
//! async copy task in m_copy_threadpool
std::mutex m_saved_buckets_mtx;
MGB_MUTEX m_saved_buckets_mtx;
//! see on_fwd_finish()
TensorShape m_var_shape;
bool m_enabled = false;
......
......@@ -356,7 +356,9 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
next_report_time = timer.get_secs() + 1;
}
using namespace std::literals;
#if !__DEPLOY_ON_XP_SP2__
std::this_thread::sleep_for(1000us);
#endif
}
// release all free blocks owned by child process,
// in order to avoid main process running out of memory
......
......@@ -731,7 +731,7 @@ class PersistentOutputStorage::StorageHolder final
key.second);
}
};
std::mutex m_mtx;
MGB_MUTEX m_mtx;
std::unordered_map<Key, DeviceTensorStorage, KeyHash> m_storage;
public:
......
......@@ -125,9 +125,13 @@ void VarValueChecker::on_var_computed(VarNode *var) {
}
if (!m_init_val_dumped) {
#if !__DEPLOY_ON_XP_SP2__
m_var2val_mtx.lock();
auto &&val = m_var2val[var];
#endif
auto&& val = m_var2val[var];
#if !__DEPLOY_ON_XP_SP2__
m_var2val_mtx.unlock();
#endif
mgb_assert(!val);
val = std::make_shared<DeviceTensorND>();
......
......@@ -22,7 +22,7 @@ namespace mgb {
* This is intended to find potential bugs in megdnn.
*/
class CPUDispatchChecker final: public PluginBase {
std::mutex m_cn2nr_task_mtx,
MGB_MUTEX m_cn2nr_task_mtx,
m_failed_oprs_mtx_storage,
*m_failed_oprs_mtx = &m_failed_oprs_mtx_storage;
CompNode::UnorderedMap<size_t> m_cn2nr_task;
......
......@@ -60,7 +60,7 @@ class TextOprIODump final : public OprIODumpBase {
bool m_print_addr = true;
std::shared_ptr<FILE> m_fout;
size_t m_max_size = 5;
std::mutex m_mtx;
MGB_MUTEX m_mtx;
std::unique_ptr<LazyValueRecorder> m_lazy_value;
void dump_var(VarNode* var, bool lazy_sync) override;
......
......@@ -64,7 +64,7 @@ class VarSanityCheck final : public PluginBase {
//! map from caller thread to workspace map
ThinHashMap<std::thread::id, WorkspaceCache> m_workspace;
std::mutex m_workspace_mtx;
MGB_MUTEX m_workspace_mtx;
ThinHashMap<VarNode*, ChecksumResult> m_var2chksum;
/*! the ids of varnodes that have been modified by recv_opr
......@@ -72,7 +72,7 @@ class VarSanityCheck final : public PluginBase {
* cg::OperatorNodeBase::NodeProp::Flag:: FORCE_UPDATE_INPUT_VAR.
*/
ThinHashSet<VarNode*> m_modified_vars;
std::mutex m_id2chksum_mtx;
MGB_MUTEX m_id2chksum_mtx;
typedef void (VarSanityCheck::*input_checker_fn)(cg::OperatorNodeBase*,
VarNode*);
......
......@@ -50,7 +50,7 @@ namespace mgb {
size_t m_cur_var_idx, m_nr_exec;
VarNodeArray m_vars;
std::mutex m_var2val_mtx;
MGB_MUTEX m_var2val_mtx;
ThinHashMap<VarNode*, std::shared_ptr<DeviceTensorND>> m_var2val;
Checker m_checker;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册