提交 f95bb7b7 编写于 作者: M Megvii Engine Team

perf(mge/profiler): reduce profiler overhead

GitOrigin-RevId: 4e1fff9dc43341e1fd2cc414823cf31efd8e5831
上级 9cc732f8
...@@ -525,20 +525,24 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) { ...@@ -525,20 +525,24 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
// Before wait // Before wait
//TODO: split operator wait and execute so that OpWait could be corrected recorded. //TODO: split operator wait and execute so that OpWait could be corrected recorded.
// Before execute // Before execute
if (m_worker_state.profiler->is_profiling()) {
m_worker_state.profiler->record_host<HostOpExecuteEvent>(event_data); m_worker_state.profiler->record_host<HostOpExecuteEvent>(event_data);
for (auto&& device: devices) { for (auto&& device: devices) {
sync_device_scope(device); sync_device_scope(device);
m_worker_state.profiler->record_device<DeviceOpExecuteEvent>(device, event_data); m_worker_state.profiler->record_device<DeviceOpExecuteEvent>(device, event_data);
} }
}
// Apply op // Apply op
// Here std::move is REQUIRED for removing duplicated references. // Here std::move is REQUIRED for removing duplicated references.
auto tensor_outputs = OpDef::apply_on_physical_tensor( auto tensor_outputs = OpDef::apply_on_physical_tensor(
*cmd.op, std::move(tensor_inputs)); *cmd.op, std::move(tensor_inputs));
// After execute // After execute
if (m_worker_state.profiler->is_profiling()) {
m_worker_state.profiler->record_host<HostOpFinishEvent>(event_data); m_worker_state.profiler->record_host<HostOpFinishEvent>(event_data);
for (auto&& device: devices) { for (auto&& device: devices) {
m_worker_state.profiler->record_device<DeviceOpFinishEvent>(device, event_data); m_worker_state.profiler->record_device<DeviceOpFinishEvent>(device, event_data);
} }
}
// End profiling operator // End profiling operator
mgb_assert(tensor_outputs.size() == cmd.outputs.size()); mgb_assert(tensor_outputs.size() == cmd.outputs.size());
for (size_t i = 0; i < tensor_outputs.size(); ++i) { for (size_t i = 0; i < tensor_outputs.size(); ++i) {
......
...@@ -140,27 +140,32 @@ public: ...@@ -140,27 +140,32 @@ public:
public: public:
template <typename TEvent, typename... TArgs> template <typename TEvent, typename... TArgs>
void record_host(TArgs&&... args) { void record_host(TArgs&&... args) {
auto instant = HostInstant{std::this_thread::get_id(), m_host_timer.get_msecs()};
MGB_LOCK_GUARD(m_lock); MGB_LOCK_GUARD(m_lock);
if (!m_event_mask.test(index_of<TEvent>())) { if (!m_event_mask.test(index_of<TEvent>())) {
return; return;
} }
mgb_assert(m_status != Stopped, "record after stop"); mgb_assert(m_status != Stopped, "record after stop");
auto instant = HostInstant{std::this_thread::get_id(), m_host_timer.get_msecs()};
m_record_list.emplace_back(EventRecord<TEvent>{std::move(instant), {std::forward<TArgs>(args)...}}); m_record_list.emplace_back(EventRecord<TEvent>{std::move(instant), {std::forward<TArgs>(args)...}});
} }
template <typename TEvent, typename... TArgs> template <typename TEvent, typename... TArgs>
void record_device(Device device, TArgs&&... args) { void record_device(Device device, TArgs&&... args) {
auto before = m_host_timer.get_msecs();
auto event = m_device_timer.get_device_time(device);
auto after = m_host_timer.get_msecs();
auto instant = DeviceInstant{before, event, after};
MGB_LOCK_GUARD(m_lock); MGB_LOCK_GUARD(m_lock);
if (!m_event_mask.test(index_of<TEvent>())) { if (!m_event_mask.test(index_of<TEvent>())) {
return; return;
} }
mgb_assert(m_status != Stopped, "record after stop"); mgb_assert(m_status != Stopped, "record after stop");
auto before = m_host_timer.get_msecs();
auto event = m_device_timer.get_device_time(device);
auto after = m_host_timer.get_msecs();
auto instant = DeviceInstant{before, event, after};
m_record_list.emplace_back(EventRecord<TEvent>{std::move(instant), {std::forward<TArgs>(args)...}}); m_record_list.emplace_back(EventRecord<TEvent>{std::move(instant), {std::forward<TArgs>(args)...}});
} }
// unsafe
bool is_profiling() {
MGB_LOCK_GUARD(m_lock);
return m_status == Profiling;
}
void start(Mask mask) { void start(Mask mask) {
MGB_LOCK_GUARD(m_lock); MGB_LOCK_GUARD(m_lock);
mgb_assert(m_status == NotStarted, "profiler already started"); mgb_assert(m_status == NotStarted, "profiler already started");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册