提交 000a6c73 编写于 作者: Z zhoufeng

op debug

Signed-off-by: Nzhoufeng <zhoufeng54@huawei.com>
上级 875bdc2e
{
"DumpSettings": {
"net_name": "ResNet50",
"mode": 1,
"dump_mode": 1,
"op_debug_mode": 3,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},
"DumpSettingsSpec": {
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"dump_mode": "0: dump all kernels, 1: dump kernels in kernels list",
"op_debug_mode": "0: close debug, 1: debug ai-core overflow, 2: debug atomic overflow, 3: debug all overflow",
"iteration": "specified iteration ",
"kernels": "op's full scope name which need to be dump"
}
......
graphengine @ 6d124110
Subproject commit 103f2d1019dc50d781d7a964551d9f1f50b3b009
Subproject commit 6d12411003164d88eaed62e1ead33761cbfa15ef
......@@ -20,9 +20,15 @@
#include "utils/context/ms_context.h"
#include "debug/common.h"
constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
static constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
static constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
static constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
static constexpr auto kConfigDumpMode = "dump_mode";
static constexpr auto kConfigOpDebugMode = "op_debug_mode";
static constexpr auto kConfigNetName = "net_name";
static constexpr auto kConfigIteration = "iteration";
static constexpr auto kConfigKernels = "kernels";
namespace mindspore {
void DataDumpParser::ResetParam() {
enable_ = false;
......@@ -132,8 +138,11 @@ bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
}
bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() ||
dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) {
if (dump_settings.find(kConfigDumpMode) == dump_settings.end() ||
dump_settings.find(kConfigNetName) == dump_settings.end() ||
dump_settings.find(kConfigOpDebugMode) == dump_settings.end() ||
dump_settings.find(kConfigIteration) == dump_settings.end() ||
dump_settings.find(kConfigKernels) == dump_settings.end()) {
MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist.";
return false;
}
......@@ -141,10 +150,11 @@ bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
}
bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
auto mode = dump_settings.at("mode");
auto net_name = dump_settings.at("net_name");
auto iteration = dump_settings.at("iteration");
auto kernels = dump_settings.at("kernels");
auto mode = dump_settings.at(kConfigDumpMode);
auto op_debug_mode = dump_settings.at(kConfigOpDebugMode);
auto net_name = dump_settings.at(kConfigNetName);
auto iteration = dump_settings.at(kConfigIteration);
auto kernels = dump_settings.at(kConfigKernels);
if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) {
MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
enable_ = false;
......@@ -155,6 +165,7 @@ bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
dump_mode_ = mode;
op_debug_mode_ = op_debug_mode;
net_name_ = net_name;
dump_step_ = iteration;
for (const auto &kernel : kernels) {
......
......@@ -38,6 +38,7 @@ class DataDumpParser {
bool enable() const { return enable_; }
const std::string &net_name() const { return net_name_; }
uint32_t dump_mode() const { return dump_mode_; }
uint32_t op_debug_mode() const { return op_debug_mode_; }
uint32_t dump_step() const { return dump_step_; }
void MatchKernel(const std::string &kernel_name);
void PrintUnusedKernel();
......@@ -54,6 +55,7 @@ class DataDumpParser {
std::mutex lock_;
bool enable_{false};
std::string net_name_;
uint32_t op_debug_mode_{0};
uint32_t dump_mode_{0};
uint32_t dump_step_{0};
std::map<std::string, uint32_t> kernel_map_;
......
......@@ -97,7 +97,10 @@ AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
void AscendKernelRuntime::ClearGraphModelMap() {
for (auto &iter : graph_data_dumper_) {
MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
iter.second->UnloadDumpInfo();
auto &data_dumper = iter.second;
MS_EXCEPTION_IF_NULL(data_dumper);
data_dumper->UnloadDumpInfo();
data_dumper->OpDebugUnregister();
}
graph_data_dumper_.clear();
// tell users which dump kernel name not used
......@@ -113,18 +116,29 @@ void AscendKernelRuntime::ClearGraphModelMap() {
}
void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
MS_LOG(DEBUG) << "Clear graph:" << graph_id << " runtime resource";
auto iter = graph_model_map_.find(graph_id);
if (iter == graph_model_map_.end()) {
MS_LOG(DEBUG) << "Clear graph:" << graph_id << " data dumper";
if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) {
MS_LOG(DEBUG) << "Unload dump info " << graph_id;
auto &data_dumper = dumper_iter->second;
MS_EXCEPTION_IF_NULL(data_dumper);
data_dumper->UnloadDumpInfo();
data_dumper->OpDebugUnregister();
graph_data_dumper_.erase(dumper_iter);
} else {
MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found";
return;
}
MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first;
auto ret = ModelRunner::Instance().UnloadModel(iter->first);
if (!ret) {
MS_LOG(ERROR) << "UnloadModel failed";
MS_LOG(DEBUG) << "Clear graph:" << graph_id << " runtime resource";
if (auto model_iter = graph_model_map_.find(graph_id); model_iter != graph_model_map_.end()) {
MS_LOG(DEBUG) << "Ge UnloadModel " << graph_id;
auto ret = ModelRunner::Instance().UnloadModel(graph_id);
if (!ret) {
MS_LOG(ERROR) << "UnloadModel failed";
}
graph_model_map_.erase(model_iter);
} else {
MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found";
}
graph_model_map_.erase(iter);
}
bool AscendKernelRuntime::NeedDestroyHccl() {
......@@ -505,15 +519,25 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
bool status =
ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener);
if (!status) {
MS_LOG(EXCEPTION) << "Load Task Failed";
MS_LOG(EXCEPTION) << "Load Model Failed";
}
std::function<void *()> model_handle =
std::bind(&ModelRunner::GetModelHandle, &ModelRunner::Instance(), model_iter->first);
DistributeDebugTask(NOT_NULL(graph), NOT_NULL(model_handle));
status = ModelRunner::Instance().DistributeTask(model_iter->first);
if (!status) {
MS_LOG(EXCEPTION) << "Distribute Task Failed";
}
if (ProfilingManager::GetInstance().IsProfiling()) {
auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph));
}
LaunchDataDump(NOT_NULL(graph));
LaunchDataDump(graph->graph_id());
if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) {
MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed";
......@@ -522,20 +546,35 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
return true;
}
void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) {
void AscendKernelRuntime::DistributeDebugTask(NotNull<const session::KernelGraph *> graph,
NotNull<std::function<void *()>> model_handle) {
if (!DataDumpParser::GetInstance().DumpEnabled()) {
return;
}
auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id());
auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map);
auto data_dumper = std::make_shared<DataDumper>(graph.get(), model_handle);
MS_EXCEPTION_IF_NULL(data_dumper);
data_dumper->LoadDumpInfo();
auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
data_dumper->OpDebugRegister();
if (!ret.second) {
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
}
}
void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) {
if (!DataDumpParser::GetInstance().DumpEnabled()) {
return;
}
auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id);
if (auto dumper_iter = graph_data_dumper_.find(graph_id); dumper_iter != graph_data_dumper_.end()) {
auto &data_dumper = dumper_iter->second;
MS_EXCEPTION_IF_NULL(data_dumper);
data_dumper->set_runtime_info(runtime_info_map);
data_dumper->LoadDumpInfo();
} else {
MS_LOG(EXCEPTION) << "GraphId:" << graph_id << " not found";
}
}
void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph_id);
for (auto iter : runtime_info_map) {
......
......@@ -63,12 +63,13 @@ class AscendKernelRuntime : public KernelRuntime {
bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const;
bool CheckGraphIdValid(GraphId graph_id) const;
static void DebugTaskIdName(GraphId graph_id);
void DistributeDebugTask(NotNull<const session::KernelGraph *> graph, NotNull<std::function<void *()>> model_handle);
void LaunchDataDump(GraphId graph_id);
rtContext_t rt_context_{nullptr};
bool initialized_{false};
unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
void LaunchDataDump(NotNull<const session::KernelGraph *> graph);
unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
};
......
......@@ -22,36 +22,53 @@
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/mem.h"
#include "runtime/kernel.h"
#include "runtime/rt_model.h"
#include "runtime/device/ascend/dump/ge_dump.h"
#include "proto/op_mapping_info.pb.h"
#include "utils/context/ms_context.h"
#include "debug/data_dump_parser.h"
constexpr uint32_t kAicpuLoadFlag = 1;
constexpr uint32_t kAicpuUnloadFlag = 0;
constexpr uint32_t kTupleTaskId = 0;
constexpr uint32_t kTupleStreamId = 1;
constexpr uint32_t kTupleArgs = 2;
constexpr uint32_t kCurrentStepTensorIndex = 0;
constexpr uint32_t kCurrentEpochTensorIndex = 1;
constexpr uint32_t kStepsPerEpochTensorIndex = 2;
static constexpr uint32_t kAicpuLoadFlag = 1;
static constexpr uint32_t kAicpuUnloadFlag = 0;
static constexpr uint32_t kTupleTaskId = 0;
static constexpr uint32_t kTupleStreamId = 1;
static constexpr uint32_t kTupleArgs = 2;
static constexpr uint32_t kCurrentStepTensorIndex = 0;
static constexpr uint32_t kCurrentEpochTensorIndex = 1;
static constexpr uint32_t kStepsPerEpochTensorIndex = 2;
static constexpr uint64_t kOpDebugShape = 2048;
static constexpr uint64_t kOpDebugHostMemSize = 2048;
static constexpr uint64_t kOpDebugDevMemSize = sizeof(void *);
static constexpr uint8_t kNoOverflow = 0;
static constexpr uint8_t kAiCoreOverflow = (0x1 << 0);
static constexpr uint8_t kAtomicOverflow = (0x1 << 1);
static constexpr uint8_t kAllOverflow = (kAiCoreOverflow | kAtomicOverflow);
static const std::map<uint32_t, std::string> kOverflowModeStr = {{kNoOverflow, "NoOverflow"},
{kAiCoreOverflow, "AiCoreOverflow"},
{kAtomicOverflow, "AtomicOverflow"},
{kAllOverflow, "AllOverflow"}};
constexpr const char *kNodeNameOpDebug = "Node_OpDebug";
constexpr const char *kOpTypeOpDebug = "Opdebug";
namespace mindspore {
namespace device {
namespace ascend {
void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);
static void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
static void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
static void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);
DataDumper::~DataDumper() {
ReleaseDevMem(&dev_load_mem_);
ReleaseDevMem(&dev_unload_mem_);
ReleaseDevMem(&op_debug_buffer_addr_);
ReleaseDevMem(&op_debug_dump_args_);
}
void DataDumper::LoadDumpInfo() {
MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
MS_EXCEPTION_IF_NULL(kernel_graph_);
aicpu::dump::OpMappingInfo dump_info;
SetOpDebugMappingInfo(NOT_NULL(&dump_info));
SetOpMappingInfo(NOT_NULL(&dump_info));
auto kernels = kernel_graph_->execution_order();
......@@ -134,7 +151,7 @@ bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
void DataDumper::UnloadDumpInfo() {
if (!load_flag_) {
MS_LOG(WARNING) << "Load not success, no need to unload";
MS_LOG(WARNING) << "[DataDump] Load not success, no need to unload";
return;
}
MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << graph_id_;
......@@ -194,6 +211,84 @@ void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aic
DumpKernelInput(kernel, args, dump_task);
}
void DataDumper::SetOpDebugMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
MS_LOG(INFO) << "[DataDump] Add op debug info to OpMappingInfo, task id = " << debug_task_id_
<< ", stream id = " << debug_stream_id_;
aicpu::dump::Task task;
task.set_end_graph(false);
task.set_task_id(debug_task_id_);
task.set_stream_id(debug_stream_id_);
task.mutable_op()->set_op_name(kNodeNameOpDebug);
task.mutable_op()->set_op_type(kOpTypeOpDebug);
aicpu::dump::Output output;
output.set_data_type(ge::proto::DataType::DT_UINT8);
output.set_format(GeFormat::kFormat_ND);
output.mutable_shape()->add_dim(kOpDebugShape);
output.set_original_name(kNodeNameOpDebug);
output.set_original_output_index(0);
output.set_original_output_format(GeFormat::kFormat_ND);
output.set_original_output_data_type(ge::proto::DataType::DT_UINT8);
// due to lhisi virtual addr bug, cannot use args now
output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_dump_args_)));
output.set_size(kOpDebugHostMemSize);
task.mutable_output()->Add(std::move(output));
dump_info->mutable_task()->Add(std::move(task));
}
void DataDumper::OpDebugRegister() {
uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode();
auto iter = kOverflowModeStr.find(op_debug_mode);
if (iter == kOverflowModeStr.end()) {
MS_LOG(EXCEPTION) << "Invalid op debug mode " << op_debug_mode;
}
MS_LOG(INFO) << "[DataDump] Op debug mode is " << iter->second;
if (op_debug_mode == kNoOverflow) {
return;
}
rtError_t rt_ret = rtMalloc(&op_debug_buffer_addr_, kOpDebugHostMemSize, RT_MEMORY_DDR);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret;
}
rt_ret = rtMalloc(&op_debug_dump_args_, kOpDebugDevMemSize, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret;
}
rt_ret =
rtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed, ret = " << rt_ret;
}
rt_ret = rtDebugRegister(model_handle_(), op_debug_mode, op_debug_buffer_addr_, &debug_stream_id_, &debug_task_id_);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugRegister failed, ret = " << rt_ret;
}
MS_LOG(INFO) << "[DataDump] Distribute op debug task, task id = " << debug_task_id_
<< ", stream id = " << debug_stream_id_;
}
void DataDumper::OpDebugUnregister() {
uint32_t op_debug_mode = DataDumpParser::GetInstance().op_debug_mode();
if (op_debug_mode == kNoOverflow) {
MS_LOG(INFO) << "[DataDump] Op debug mode is no overflow, no need to unregister.";
return;
}
MS_LOG(INFO) << "[DataDump] Start.";
rtError_t rt_ret = rtDebugUnRegister(model_handle_());
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugUnRegister failed, ret = " << rt_ret;
}
}
void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
std::string proto_str;
size_t proto_size = dump_info.ByteSizeLong();
......
......@@ -21,6 +21,7 @@
#include <memory>
#include <string>
#include <vector>
#include <functional>
#include "backend/session/kernel_graph.h"
namespace aicpu {
......@@ -36,25 +37,38 @@ namespace ascend {
using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
class DataDumper {
public:
DataDumper(const session::KernelGraph *kernel_graph,
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map)
: load_flag_(false),
DataDumper(const session::KernelGraph *kernel_graph, NotNull<std::function<void *()>> model_handle)
: model_handle_(model_handle),
debug_task_id_(-1),
debug_stream_id_(-1),
op_debug_buffer_addr_(nullptr),
op_debug_dump_args_(nullptr),
load_flag_(false),
dev_load_mem_(nullptr),
dev_unload_mem_(nullptr),
graph_id_(UINT32_MAX),
kernel_graph_(kernel_graph),
runtime_info_map_(runtime_info_map) {}
kernel_graph_(kernel_graph) {}
~DataDumper();
void set_runtime_info(const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info) {
runtime_info_map_ = runtime_info;
}
void LoadDumpInfo();
void UnloadDumpInfo();
void OpDebugRegister();
void OpDebugUnregister();
private:
void ReleaseDevMem(void **ptr) const;
bool KernelNeedDump(const CNodePtr &kernel) const;
void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
void SetOpDebugMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;
std::function<void *()> model_handle_;
uint32_t debug_task_id_;
uint32_t debug_stream_id_;
void *op_debug_buffer_addr_;
void *op_debug_dump_args_;
bool load_flag_;
void *dev_load_mem_;
void *dev_unload_mem_;
......
......@@ -38,6 +38,10 @@ bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, g
return true;
}
void *ModelRunner::GetModelHandle(uint32_t model_id) const { return nullptr; }
bool ModelRunner::DistributeTask(uint32_t model_id) { return true; }
const std::vector<uint32_t> &ModelRunner::GetTaskIdList(uint32_t model_id) const {
static std::vector<uint32_t> task_id_list;
return task_id_list;
......
......@@ -28,6 +28,8 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
} // namespace tasksink
void DataDumper::LoadDumpInfo() {}
void DataDumper::UnloadDumpInfo() {}
void DataDumper::OpDebugRegister() {}
void DataDumper::OpDebugUnregister() {}
DataDumper::~DataDumper() {}
} // namespace ascend
} // namespace device
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册