提交 95a30eb6 编写于 作者: M Megvii Engine Team

perf(imperative): speed up stackmanager guard

GitOrigin-RevId: 12d23b6f7ea00f852d3a7d0641bb723ada50cb3a
上级 f7a5fe17
...@@ -138,8 +138,11 @@ Interpreter& Interpreter::inst() { ...@@ -138,8 +138,11 @@ Interpreter& Interpreter::inst() {
Handle ChannelImpl::put(const HostTensorND& value, bool no_cache) { Handle ChannelImpl::put(const HostTensorND& value, bool no_cache) {
MGB_LOCK_GUARD(m_spin); MGB_LOCK_GUARD(m_spin);
mgb_assert(check_available(), "Channel already closed"); mgb_assert(check_available(), "Channel already closed");
auto& state = get_channel_state(); std::optional<StackManager::Guard> guard;
auto _ = StackManager::Guard{"Put", &state.stack_manager}; if (Profiler::is_profiling()) {
auto& state = get_channel_state();
guard.emplace("Put", &state.stack_manager);
}
auto info = put_impl(value, no_cache); auto info = put_impl(value, no_cache);
return reinterpret_cast<Handle>(info); return reinterpret_cast<Handle>(info);
} }
...@@ -183,8 +186,11 @@ Handle ChannelImpl::put(const DeviceTensorND& data, const HostTensorND& hvalue) ...@@ -183,8 +186,11 @@ Handle ChannelImpl::put(const DeviceTensorND& data, const HostTensorND& hvalue)
} }
TensorInfo* ChannelImpl::put_impl( TensorInfo* ChannelImpl::put_impl(
const DeviceTensorND& data, const HostTensorND& hvalue) { const DeviceTensorND& data, const HostTensorND& hvalue) {
auto& state = get_channel_state(); std::optional<StackManager::Guard> guard;
auto _ = StackManager::Guard{"Put", &state.stack_manager}; if (Profiler::is_profiling()) {
auto& state = get_channel_state();
guard.emplace("Put", &state.stack_manager);
}
auto info = alloc(); auto info = alloc();
MGB_RECORD_EVENT(TensorCommandEvent, info->id, TensorCommandKind::Put); MGB_RECORD_EVENT(TensorCommandEvent, info->id, TensorCommandKind::Put);
constexpr int size_threshold = TensorShape::MAX_NDIM; constexpr int size_threshold = TensorShape::MAX_NDIM;
...@@ -253,8 +259,10 @@ void ChannelImpl::dispatch_default_cpu( ...@@ -253,8 +259,10 @@ void ChannelImpl::dispatch_default_cpu(
SmallVector<Handle>* outputs) { SmallVector<Handle>* outputs) {
auto& state = get_channel_state(); auto& state = get_channel_state();
auto name = op->trait()->make_name(*op); std::optional<StackManager::Guard> guard;
auto _ = StackManager::Guard(name, &state.stack_manager); if (Profiler::is_profiling()) {
guard.emplace(op->trait()->make_name(*op), &state.stack_manager);
}
auto [output_descs, validated] = auto [output_descs, validated] =
OpDef::infer_output_attrs_fallible(*op, input_descs); OpDef::infer_output_attrs_fallible(*op, input_descs);
...@@ -329,8 +337,9 @@ void ChannelImpl::dispatch_default_cpu( ...@@ -329,8 +337,9 @@ void ChannelImpl::dispatch_default_cpu(
return op_info; return op_info;
}; };
MGB_RECORD_EVENT( MGB_RECORD_EVENT(
OpDispatchEvent, op_id, name, op_info_getter, tinfo_to_tid(input_infos), OpDispatchEvent, op_id, guard.value().name(), op_info_getter,
tinfo_to_tid(output_infos), state.stack_manager.dump()); tinfo_to_tid(input_infos), tinfo_to_tid(output_infos),
state.stack_manager.dump());
} }
void ChannelImpl::dispatch_kernel( void ChannelImpl::dispatch_kernel(
...@@ -340,8 +349,10 @@ void ChannelImpl::dispatch_kernel( ...@@ -340,8 +349,10 @@ void ChannelImpl::dispatch_kernel(
auto& state = get_channel_state(); auto& state = get_channel_state();
auto& options = state.options; auto& options = state.options;
auto name = op->trait()->make_name(*op); std::optional<StackManager::Guard> guard;
auto _ = StackManager::Guard{name, &state.stack_manager}; if (Profiler::is_profiling()) {
guard.emplace(op->trait()->make_name(*op), &state.stack_manager);
}
auto [output_descs, validated] = auto [output_descs, validated] =
OpDef::infer_output_attrs_fallible(*op, input_descs); OpDef::infer_output_attrs_fallible(*op, input_descs);
...@@ -376,8 +387,9 @@ void ChannelImpl::dispatch_kernel( ...@@ -376,8 +387,9 @@ void ChannelImpl::dispatch_kernel(
return op_info; return op_info;
}; };
MGB_RECORD_EVENT( MGB_RECORD_EVENT(
OpDispatchEvent, cmd.id, name, op_info_getter, tinfo_to_tid(cmd.inputs), OpDispatchEvent, cmd.id, guard.value().name(), op_info_getter,
tinfo_to_tid(cmd.outputs), state.stack_manager.dump()); tinfo_to_tid(cmd.inputs), tinfo_to_tid(cmd.outputs),
state.stack_manager.dump());
m_worker.add_task( m_worker.add_task(
{Profiler::next_id(), std::move(cmd), {Profiler::next_id(), std::move(cmd),
get_channel_state().stack_manager.dump()}); get_channel_state().stack_manager.dump()});
......
...@@ -98,6 +98,7 @@ public: ...@@ -98,6 +98,7 @@ public:
m_manager->enter(name); m_manager->enter(name);
} }
} }
std::string name() const { return m_name; }
~Guard() { release(); } ~Guard() { release(); }
void release() { void release() {
if (m_manager) { if (m_manager) {
......
...@@ -186,7 +186,7 @@ inline stats::Timer::Timer(std::string name, bool default_enabled) ...@@ -186,7 +186,7 @@ inline stats::Timer::Timer(std::string name, bool default_enabled)
} }
#if MGE_ENABLE_STATS #if MGE_ENABLE_STATS
#define MGE_TIMER_SCOPE(name) auto name = Stats::name.time_scope() #define MGE_TIMER_SCOPE(name) auto name = Stats::name.time_scope_recursive()
#define MGE_TIMER_SCOPE_RELEASE(name) name.release() #define MGE_TIMER_SCOPE_RELEASE(name) name.release()
#define MGE_TIMER_SCOPE_ENABLE(name) auto name = Stats::name.enable_scope() #define MGE_TIMER_SCOPE_ENABLE(name) auto name = Stats::name.enable_scope()
#else #else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册