提交 65f2212f 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!2215 Support hccl profiling

Merge pull request !2215 from caifubi/support-hccl-profiling
......@@ -125,6 +125,12 @@ bool AscendKernelRuntime::Init() {
}
#endif
// Start up profiling before rtSetDevice
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) {
MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed.";
}
ret = InitDevice();
if (!ret) {
return ret;
......@@ -133,11 +139,6 @@ bool AscendKernelRuntime::Init() {
MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->MallocDeviceMemory();
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) {
MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed.";
}
initialized_ = true;
return ret;
}
......
......@@ -28,6 +28,7 @@
#include "utils/context/ms_context.h"
#include "common/utils.h"
#include "utils/convert_utils.h"
#include "runtime/base.h"
using std::vector;
using Json = nlohmann::json;
......@@ -159,6 +160,12 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
MS_LOG(INFO) << "profiling config " << cfg;
auto ret = rtProfilerStart();
if (ret != RT_ERROR_NONE) {
MS_LOG(INFO) << "Call rtProfilerStart failed, ret:" << ret;
return false;
}
// call profiling startup API
ProfMgrCfg prof_cfg = {cfg};
prof_handle_ = ProfMgrStartUp(&prof_cfg);
......@@ -180,6 +187,12 @@ bool ProfilingManager::StopProfiling() {
MS_LOG(INFO) << "report data end, ret = " << reporter->Flush();
}
auto rt_ret = rtProfilerStop();
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtProfilerStop failed";
return false;
}
if (prof_handle_ != nullptr) {
int result = ProfMgrStop(prof_handle_);
if (result != 0) {
......
......@@ -133,3 +133,7 @@ rtError_t rtGetStreamId(rtStream_t stream, int32_t *streamId) { return RT_ERROR_
rtError_t rtGetFunctionByName(const char *stubName, void **stubFunc) { return RT_ERROR_NONE; }
rtError_t rtSetTaskGenCallback(rtTaskGenCallback callback) { return RT_ERROR_NONE; }
rtError_t rtProfilerStart(void) { return RT_ERROR_NONE; }
rtError_t rtProfilerStop(void) { return RT_ERROR_NONE; }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册