提交 f95bb7b7 编写于 作者: M Megvii Engine Team

perf(mge/profiler): reduce profiler overhead

GitOrigin-RevId: 4e1fff9dc43341e1fd2cc414823cf31efd8e5831
上级 9cc732f8
......@@ -525,19 +525,23 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
// Before wait
//TODO: split operator wait and execute so that OpWait could be corrected recorded.
// Before execute
m_worker_state.profiler->record_host<HostOpExecuteEvent>(event_data);
for (auto&& device: devices) {
sync_device_scope(device);
m_worker_state.profiler->record_device<DeviceOpExecuteEvent>(device, event_data);
if (m_worker_state.profiler->is_profiling()) {
m_worker_state.profiler->record_host<HostOpExecuteEvent>(event_data);
for (auto&& device: devices) {
sync_device_scope(device);
m_worker_state.profiler->record_device<DeviceOpExecuteEvent>(device, event_data);
}
}
// Apply op
// Here std::move is REQUIRED for removing duplicated references.
auto tensor_outputs = OpDef::apply_on_physical_tensor(
*cmd.op, std::move(tensor_inputs));
// After execute
m_worker_state.profiler->record_host<HostOpFinishEvent>(event_data);
for (auto&& device: devices) {
m_worker_state.profiler->record_device<DeviceOpFinishEvent>(device, event_data);
if (m_worker_state.profiler->is_profiling()) {
m_worker_state.profiler->record_host<HostOpFinishEvent>(event_data);
for (auto&& device: devices) {
m_worker_state.profiler->record_device<DeviceOpFinishEvent>(device, event_data);
}
}
// End profiling operator
mgb_assert(tensor_outputs.size() == cmd.outputs.size());
......
......@@ -140,27 +140,32 @@ public:
public:
template <typename TEvent, typename... TArgs>
void record_host(TArgs&&... args) {
auto instant = HostInstant{std::this_thread::get_id(), m_host_timer.get_msecs()};
MGB_LOCK_GUARD(m_lock);
if (!m_event_mask.test(index_of<TEvent>())) {
return;
}
mgb_assert(m_status != Stopped, "record after stop");
auto instant = HostInstant{std::this_thread::get_id(), m_host_timer.get_msecs()};
m_record_list.emplace_back(EventRecord<TEvent>{std::move(instant), {std::forward<TArgs>(args)...}});
}
template <typename TEvent, typename... TArgs>
void record_device(Device device, TArgs&&... args) {
auto before = m_host_timer.get_msecs();
auto event = m_device_timer.get_device_time(device);
auto after = m_host_timer.get_msecs();
auto instant = DeviceInstant{before, event, after};
MGB_LOCK_GUARD(m_lock);
if (!m_event_mask.test(index_of<TEvent>())) {
return;
}
mgb_assert(m_status != Stopped, "record after stop");
auto before = m_host_timer.get_msecs();
auto event = m_device_timer.get_device_time(device);
auto after = m_host_timer.get_msecs();
auto instant = DeviceInstant{before, event, after};
m_record_list.emplace_back(EventRecord<TEvent>{std::move(instant), {std::forward<TArgs>(args)...}});
}
// unsafe
bool is_profiling() {
MGB_LOCK_GUARD(m_lock);
return m_status == Profiling;
}
void start(Mask mask) {
MGB_LOCK_GUARD(m_lock);
mgb_assert(m_status == NotStarted, "profiler already started");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册