提交 c577952c 编写于 作者: C caifubi

Async Data Dump

上级 da9452ee
......@@ -24,7 +24,7 @@ usage()
{
echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
echo ""
echo "Options:"
......@@ -48,6 +48,7 @@ usage()
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
echo " -Q Enable dump memory, default off"
echo " -D Enable dumping of function graph ir, default on"
echo " -S Enable async data dump, default off"
echo " -z Compile dataset & mindrecord, default on"
echo " -M Enable MPI and NCCL for GPU training, gpu default on"
echo " -V Specify the minimum required cuda version, default CUDA 10.1"
......@@ -88,6 +89,7 @@ checkopts()
ENABLE_TIMELINE="off"
ENABLE_DUMP2PROTO="on"
ENABLE_DUMPE2E="off"
ENABLE_DATA_DUMP="off"
ENABLE_DUMP_IR="on"
COMPILE_MINDDATA="on"
ENABLE_MPI="off"
......@@ -102,7 +104,7 @@ checkopts()
ENABLE_PYTHON="on"
# Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt
do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in
......@@ -218,6 +220,11 @@ checkopts()
ENABLE_DUMPE2E="$OPTARG"
echo "enable dump end to end"
;;
S)
check_on_off $OPTARG S
ENABLE_DATA_DUMP="$OPTARG"
echo "enable data dump"
;;
D)
check_on_off $OPTARG D
ENABLE_DUMP_IR="$OPTARG"
......@@ -321,6 +328,9 @@ build_mindspore()
if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
fi
if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
fi
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
if [[ "X$ENABLE_MPI" = "Xon" ]]; then
......
......@@ -116,6 +116,10 @@ if(ENABLE_DUMP_E2E)
add_compile_definitions(ENABLE_DUMP_E2E)
endif()
if(ENABLE_DATA_DUMP)
add_compile_definitions(ENABLE_DATA_DUMP)
endif()
if(ENABLE_DEBUGGER)
add_compile_definitions(ENABLE_DEBUGGER)
endif()
{
"DumpSettings": {
"net_name": "ResNet50",
"mode": 1,
"iteration": 0,
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
},
"DumpSettingsSpec": {
"net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "specified iteration ",
"kernels": "op's full scope name which need to be dump"
}
}
\ No newline at end of file
graphengine @ 18cf6901
Subproject commit 1c2672868fda8b1d012c99e5aca73725ac869ba9
Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7
......@@ -109,8 +109,12 @@ if (ENABLE_D)
file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/dump/proto/*.proto")
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
add_compile_definitions(ENABLE_D)
endif ()
......
......@@ -19,6 +19,15 @@ if (ENABLE_DEBUGGER)
)
endif (ENABLE_DEBUGGER)
if (ENABLE_D)
list(APPEND _DEBUG_SRC_LIST
"${CMAKE_CURRENT_SOURCE_DIR}/common.cc"
)
if (ENABLE_DATA_DUMP)
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/data_dump_parser.cc")
endif(ENABLE_DATA_DUMP)
endif()
if (ENABLE_DUMP_E2E)
list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
endif (ENABLE_DUMP_E2E)
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/common.h"
#include <memory>
#include <optional>
#include "utils/system/env.h"
#include "utils/system/file_system.h"
#include "utils/log_adapter.h"
#include "utils/context/ms_context.h"
namespace mindspore {
std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
std::string out_path;
auto path_split_pos = input_path.find_last_of('/');
if (path_split_pos == std::string::npos) {
path_split_pos = input_path.find_last_of('\\');
}
// get real path
char real_path[PATH_MAX] = {0};
if (path_split_pos != std::string::npos) {
std::string prefix_path = input_path.substr(0, path_split_pos);
if (prefix_path.length() >= PATH_MAX) {
MS_LOG(ERROR) << "Prefix path is too longer!";
return std::nullopt;
}
std::string last_path = input_path.substr(path_split_pos, input_path.length() - path_split_pos);
auto ret = CreateNotExistDirs(prefix_path);
if (!ret) {
MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
return std::nullopt;
}
if (nullptr == realpath(prefix_path.c_str(), real_path)) {
MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
return std::nullopt;
}
out_path = std::string(real_path) + last_path;
}
if (path_split_pos == std::string::npos) {
if (input_path.length() >= PATH_MAX) {
MS_LOG(ERROR) << "Prefix path is too longer!";
return std::nullopt;
}
if (nullptr == realpath(input_path.c_str(), real_path)) {
MS_LOG(ERROR) << "File " << input_path << " does not exit, it will be created.";
}
out_path = std::string(real_path);
}
return out_path;
}
bool Common::CreateNotExistDirs(const std::string &path) {
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
MS_EXCEPTION_IF_NULL(fs);
char temp_path[PATH_MAX] = {0};
if (path.length() > PATH_MAX) {
MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
return false;
}
for (uint32_t i = 0; i < path.length(); i++) {
temp_path[i] = path[i];
if (temp_path[i] == '\\' || temp_path[i] == '/') {
if (i != 0) {
char tmp_char = temp_path[i];
temp_path[i] = '\0';
std::string path_handle(temp_path);
if (!fs->FileExist(temp_path)) {
MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
if (!fs->CreateDir(temp_path)) {
MS_LOG(ERROR) << "Create " << path_handle << " dir error";
return false;
}
}
temp_path[i] = tmp_char;
}
}
}
if (!fs->FileExist(path)) {
MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
if (!fs->CreateDir(path)) {
MS_LOG(ERROR) << "Create " << path << " dir error";
return false;
}
}
return true;
}
std::optional<std::string> Common::GetConfigFile(const std::string &env) {
if (env.empty()) {
MS_LOG(EXCEPTION) << "Invalid env";
}
auto config_path_str = std::getenv(env.c_str());
if (config_path_str == nullptr) {
MS_LOG(ERROR) << "Please export env:" << env;
return {};
}
MS_LOG(INFO) << "Async Dump Getenv env:" << env << "=" << config_path_str;
std::string dump_config_file(config_path_str);
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
MS_EXCEPTION_IF_NULL(fs);
if (!fs->FileExist(dump_config_file)) {
MS_LOG(ERROR) << dump_config_file << " not exist.";
return {};
}
return dump_config_file;
}
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
#include <string>
#include <optional>
#include "utils/contract.h"
namespace mindspore {
class Common {
public:
Common() = default;
~Common() = default;
static std::optional<std::string> GetRealPath(const std::string &input_path);
static std::optional<std::string> GetConfigFile(const std::string &env);
private:
static bool CreateNotExistDirs(const std::string &path);
};
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_COMMON_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "debug/data_dump_parser.h"
#include <fstream>
#include "utils/context/ms_context.h"
#include "debug/common.h"
constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
namespace mindspore {
void DataDumpParser::ResetParam() {
enable_ = false;
net_name_.clear();
dump_mode_ = 0;
dump_step_ = 0;
kernel_set_.clear();
}
bool DataDumpParser::DumpEnabled() const {
auto enable_dump = std::getenv(kEnableDataDump);
if (!enable_dump) {
MS_LOG(WARNING) << "[DataDump] enable dump is null. Please export ENABLE_DATA_DUMP";
return false;
}
auto enabled = std::atoi(enable_dump);
if (enabled != 1) {
MS_LOG(WARNING) << "[DataDump] Please export ENABLE_DATA_DUMP=1";
return false;
}
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
if (context->execution_mode() == kPynativeMode) {
MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
}
return true;
}
std::optional<std::string> DataDumpParser::GetDumpPath() const {
auto dump_path = std::getenv(kDataDumpPath);
if (!dump_path) {
MS_LOG(ERROR) << "[DataDump] dump path is null. Please export DATA_DUMP_PATH";
return {};
}
std::string dump_path_str(dump_path);
return dump_path_str;
}
void DataDumpParser::ParseDumpConfig() {
std::lock_guard<std::mutex> guard(lock_);
MS_LOG(INFO) << "[DataDump] parse start";
if (!DumpEnabled()) {
MS_LOG(INFO) << "[DataDump] dump not enable";
return;
}
ResetParam();
auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
if (!dump_config_file.has_value()) {
MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
}
std::ifstream json_file(dump_config_file.value());
if (!json_file.is_open()) {
MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
}
nlohmann::json j;
json_file >> j;
if (j.find("DumpSettings") == j.end()) {
MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
}
nlohmann::json dump_settings = j.at("DumpSettings");
// convert json to string
std::stringstream ss;
ss << dump_settings;
std::string cfg = ss.str();
MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
if (!IsConfigExist(dump_settings)) {
MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
}
if (!ParseDumpSetting(dump_settings)) {
MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
}
}
bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
if (!DumpEnabled()) {
return false;
}
if (dump_mode_ == 0) {
return true;
}
auto iter = kernel_set_.find(op_full_name);
return iter != kernel_set_.end();
}
bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
if (dump_settings.find("mode") == dump_settings.end() || dump_settings.find("net_name") == dump_settings.end() ||
dump_settings.find("iteration") == dump_settings.end() || dump_settings.find("kernels") == dump_settings.end()) {
MS_LOG(ERROR) << "[DataDump] DumpSettings keys are not exist.";
return false;
}
return true;
}
bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
auto mode = dump_settings.at("mode");
auto net_name = dump_settings.at("net_name");
auto iteration = dump_settings.at("iteration");
auto kernels = dump_settings.at("kernels");
if (!(mode.is_number() && net_name.is_string() && iteration.is_number() && kernels.is_array())) {
MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
enable_ = false;
return false;
}
enable_ = true;
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
dump_mode_ = mode;
net_name_ = net_name;
dump_step_ = iteration;
for (const auto &kernel : kernels) {
auto kernel_str = kernel.dump();
kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
kernel_set_.insert(kernel_str);
}
return true;
}
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
#include <string>
#include <set>
#include <mutex>
#include <optional>
#include "nlohmann/json.hpp"
#include "common/utils.h"
namespace mindspore {
class DataDumpParser {
public:
static DataDumpParser &GetInstance() {
static DataDumpParser instance;
return instance;
}
void ParseDumpConfig();
bool NeedDump(const std::string &op_full_name) const;
bool DumpEnabled() const;
std::optional<std::string> GetDumpPath() const;
bool enable() const { return enable_; }
const std::string &net_name() const { return net_name_; }
uint32_t dump_mode() const { return dump_mode_; }
uint32_t dump_step() const { return dump_step_; }
const std::set<std::string> &kernel_set() const { return kernel_set_; }
private:
DataDumpParser() = default;
virtual ~DataDumpParser() = default;
DISABLE_COPY_AND_ASSIGN(DataDumpParser);
void ResetParam();
bool IsConfigExist(const nlohmann::json &dump_settings) const;
bool ParseDumpSetting(const nlohmann::json &dump_settings);
std::mutex lock_;
bool enable_{false};
std::string net_name_;
uint32_t dump_mode_{0};
uint32_t dump_step_{0};
std::set<std::string> kernel_set_;
};
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
......@@ -17,12 +17,14 @@
#include <limits.h>
#include <fstream>
#include <string>
#include <optional>
#include <nlohmann/json.hpp>
#include "utils/log_adapter.h"
#include "utils/system/file_system.h"
#include "utils/system/env.h"
#include "utils/convert_utils.h"
#include "utils/context/ms_context.h"
#include "debug/common.h"
using json = nlohmann::json;
......@@ -158,100 +160,19 @@ bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len)
return false;
}
std::string realpath;
bool ret = GetRealPath(filename, &realpath);
if (!ret) {
auto realpath = Common::GetRealPath(filename);
if (!realpath.has_value()) {
MS_LOG(ERROR) << "Get real path failed.";
return false;
}
std::ofstream fd;
fd.open(realpath, std::ios::binary | std::ios::out);
fd.open(realpath.value(), std::ios::binary | std::ios::out);
if (!fd.is_open()) {
MS_LOG(ERROR) << "Open file " << realpath << " fail.";
MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
return false;
}
(void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
fd.close();
return true;
}
bool Dump::GetRealPath(const std::string &inpath, std::string *outpath) {
MS_EXCEPTION_IF_NULL(outpath);
auto path_split_pos = inpath.find_last_of('/');
if (path_split_pos == std::string::npos) {
path_split_pos = inpath.find_last_of('\\');
}
// get real path
char real_path[PATH_MAX] = {0};
if (path_split_pos != std::string::npos) {
std::string prefix_path = inpath.substr(0, path_split_pos);
if (prefix_path.length() >= PATH_MAX) {
MS_LOG(ERROR) << "Prefix path is too longer!";
return false;
}
std::string last_path = inpath.substr(path_split_pos, inpath.length() - path_split_pos);
auto ret = CreateNotExistDirs(prefix_path);
if (ret == false) {
MS_LOG(ERROR) << "CreateNotExistDirs Failed!";
return false;
}
if (nullptr == realpath(prefix_path.c_str(), real_path)) {
MS_LOG(ERROR) << "dir " << prefix_path << " does not exit.";
return false;
}
*outpath = std::string(real_path) + last_path;
}
if (path_split_pos == std::string::npos) {
if (inpath.length() >= PATH_MAX) {
MS_LOG(ERROR) << "Prefix path is too longer!";
return false;
}
if (nullptr == realpath(inpath.c_str(), real_path)) {
MS_LOG(ERROR) << "File " << inpath << " does not exit, it will be created.";
}
*outpath = std::string(real_path);
}
return true;
}
bool Dump::CreateNotExistDirs(const std::string &path) {
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
MS_EXCEPTION_IF_NULL(fs);
char temp_path[PATH_MAX] = {0};
if (path.length() > PATH_MAX) {
MS_LOG(ERROR) << "Path lens is max than " << PATH_MAX;
return false;
}
for (uint32_t i = 0; i < path.length(); i++) {
temp_path[i] = path[i];
if (temp_path[i] == '\\' || temp_path[i] == '/') {
if (i != 0) {
char tmp_char = temp_path[i];
temp_path[i] = '\0';
std::string path_handle(temp_path);
if (!fs->FileExist(temp_path)) {
MS_LOG(INFO) << "Dir " << path_handle << " does not exit, creating...";
if (!fs->CreateDir(temp_path)) {
MS_LOG(ERROR) << "Create " << path_handle << " dir error";
return false;
}
}
temp_path[i] = tmp_char;
}
}
}
if (!fs->FileExist(path)) {
MS_LOG(INFO) << "Dir " << path << " does not exit, creating...";
if (!fs->CreateDir(path)) {
MS_LOG(ERROR) << "Create " << path << " dir error";
return false;
}
}
return true;
}
} // namespace mindspore
......@@ -59,10 +59,6 @@ class Dump {
uint32_t cur_iter_;
std::vector<std::string> dump_kernels_;
static bool GetRealPath(const std::string &inpath, std::string *outpath);
static bool CreateNotExistDirs(const std::string &path);
private:
bool ParseDumpConfig(const std::string &dump_config_file);
bool IsConfigExist(const nlohmann::json &dumpSettings);
......
......@@ -42,6 +42,7 @@
#include "device/ascend/ascend_memory_manager.h"
#include "debug/tensor_load.h"
using ge::model_runner::ModelRunner;
using mindspore::device::ascend::ProfilingManager;
using mindspore::device::ascend::ProfilingUtils;
using mindspore::device::ascend::tasksink::TaskGenerator;
......@@ -90,9 +91,16 @@ std::string GetRankId() {
AscendKernelRuntime::~AscendKernelRuntime() { graph_model_map_.clear(); }
void AscendKernelRuntime::ClearGraphModelMap() {
#ifdef ENABLE_DATA_DUMP
for (auto &iter : graph_data_dumper_) {
MS_LOG(INFO) << "[DataDump] Unload data dumper:" << iter.first;
iter.second->UnloadDumpInfo();
}
graph_data_dumper_.clear();
#endif
for (auto &iter : graph_model_map_) {
MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter.first);
auto ret = ModelRunner::Instance().UnloadModel(iter.first);
if (!ret) {
MS_LOG(ERROR) << "UnloadModel failed";
}
......@@ -107,7 +115,7 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id) {
return;
}
MS_LOG(DEBUG) << "Ge UnloadModel " << iter->first;
auto ret = ge::model_runner::ModelRunner::Instance().UnloadModel(iter->first);
auto ret = ModelRunner::Instance().UnloadModel(iter->first);
if (!ret) {
MS_LOG(ERROR) << "UnloadModel failed";
}
......@@ -159,6 +167,10 @@ bool AscendKernelRuntime::Init() {
}
#endif
#ifdef ENABLE_DATA_DUMP
DataDumpParser::GetInstance().ParseDumpConfig();
#endif
// Start up profiling before rtSetDevice
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) {
......@@ -440,7 +452,7 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
<< ", wait_active_stream_list size:" << wait_active_stream_list.size()
<< ", force_copy_stream_list size:" << force_copy_stream_list.size();
std::vector<std::shared_ptr<ge::model_runner::OpInfo>> empty_list;
std::shared_ptr<ge::model_runner::DavinciModel> model = std::make_shared<ge::model_runner::DavinciModel>(
auto model = std::make_shared<ge::model_runner::DavinciModel>(
task_info_list, empty_list, empty_list, empty_list, empty_list, wait_active_stream_list, force_copy_stream_list, 0,
0, 0, 0, 0, 0, resource_manager.get_cur_stream_num(), label_assign_instance.GetLabelNum(NOT_NULL(graph)),
resource_manager.get_cur_event_num(), 0);
......@@ -477,21 +489,45 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
std::shared_ptr<ge::ModelListener> listener;
MS_LOG(INFO) << "LoadDavinciModel mode_id:" << model_iter->first;
bool status = ge::model_runner::ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first,
model_iter->second, listener);
bool status =
ModelRunner::Instance().LoadDavinciModel(device_id_, 0, model_iter->first, model_iter->second, listener);
if (!status) {
MS_LOG(EXCEPTION) << "Load Task Failed";
}
if (ProfilingManager::GetInstance().IsProfiling()) {
auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first);
auto stream_ids = ge::model_runner::ModelRunner::Instance().GetStreamIdList(model_iter->first);
auto task_ids = ModelRunner::Instance().GetTaskIdList(model_iter->first);
auto stream_ids = ModelRunner::Instance().GetStreamIdList(model_iter->first);
ProfilingUtils::ReportProfilingData(task_ids, stream_ids, NOT_NULL(graph));
}
#ifdef ENABLE_DATA_DUMP
LaunchDataDump(NOT_NULL(graph));
#endif
if (!ModelRunner::Instance().LoadModelComplete(model_iter->first)) {
MS_LOG(ERROR) << "Call ge runtime LoadModelComplete failed";
return false;
}
return true;
}
#ifdef ENABLE_DATA_DUMP
void AscendKernelRuntime::LaunchDataDump(NotNull<const session::KernelGraph *> graph) {
if (!DataDumpParser::GetInstance().DumpEnabled()) {
return;
}
auto runtime_info_map = ModelRunner::Instance().GetRuntimeInfoMap(graph->graph_id());
auto data_dumper = std::make_shared<DataDumper>(graph.get(), runtime_info_map);
MS_EXCEPTION_IF_NULL(data_dumper);
data_dumper->LoadDumpInfo();
auto ret = graph_data_dumper_.try_emplace(graph->graph_id(), data_dumper);
if (!ret.second) {
MS_LOG(WARNING) << "[DataDump] Insert graphId:" << graph->graph_id() << " data dumper failed";
}
}
#endif
void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id);
auto task_ids = ModelRunner::Instance().GetTaskIdList(graph_id);
auto graph_task_names = ProfilingUtils::graph_kernel_name();
auto iter = graph_task_names.find(graph_id);
if (iter != graph_task_names.end()) {
......@@ -524,7 +560,7 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
return false;
}
bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
bool status = ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
if (!status) {
MS_LOG(ERROR) << "Run task failed";
DebugTaskIdName(graph->graph_id());
......
......@@ -24,6 +24,10 @@
#include "framework/ge_runtime/davinci_model.h"
#include "device/kernel_runtime_manager.h"
#include "session/session_basic.h"
#ifdef ENABLE_DATA_DUMP
#include "debug/data_dump_parser.h"
#include "device/ascend/dump/data_dumper.h"
#endif
using ge::model_runner::TaskInfo;
using std::unordered_map;
......@@ -66,6 +70,10 @@ class AscendKernelRuntime : public KernelRuntime {
bool initialized_{false};
unordered_map<GraphId, vector<std::shared_ptr<TaskInfo>>> task_map_;
unordered_map<GraphId, std::shared_ptr<ge::model_runner::DavinciModel>> graph_model_map_;
#ifdef ENABLE_DATA_DUMP
void LaunchDataDump(NotNull<const session::KernelGraph *> graph);
unordered_map<GraphId, std::shared_ptr<DataDumper>> graph_data_dumper_;
#endif
};
MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime);
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef ENABLE_DATA_DUMP
#include "device/ascend/dump/data_dumper.h"
#include <map>
#include <memory>
#include <string>
#include "utility"
#include "session/anf_runtime_algorithm.h"
#include "runtime/mem.h"
#include "runtime/kernel.h"
#include "device/ascend/dump/ge_dump.h"
#include "proto/op_mapping_info.pb.h"
#include "utils/context/ms_context.h"
#include "debug/data_dump_parser.h"
constexpr uint32_t kAicpuLoadFlag = 1;
constexpr uint32_t kAicpuUnloadFlag = 0;
constexpr uint32_t kTupleTaskId = 0;
constexpr uint32_t kTupleStreamId = 1;
constexpr uint32_t kTupleArgs = 2;
constexpr uint32_t kCurrentStepTensorIndex = 0;
constexpr uint32_t kCurrentEpochTensorIndex = 1;
constexpr uint32_t kStepsPerEpochTensorIndex = 2;
namespace mindspore {
namespace device {
namespace ascend {
void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task);
void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr);
DataDumper::~DataDumper() {
ReleaseDevMem(&dev_load_mem_);
ReleaseDevMem(&dev_unload_mem_);
}
void DataDumper::LoadDumpInfo() {
MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
MS_EXCEPTION_IF_NULL(kernel_graph_);
aicpu::dump::OpMappingInfo dump_info;
SetOpMappingInfo(NOT_NULL(&dump_info));
auto kernels = kernel_graph_->execution_order();
for (const auto &kernel : kernels) {
MS_EXCEPTION_IF_NULL(kernel);
if (!KernelNeedDump(kernel)) {
continue;
}
MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->fullname_with_scope();
dump_kernel_names_.emplace_back(kernel->fullname_with_scope());
aicpu::dump::Task task;
ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
dump_info.mutable_task()->Add(std::move(task));
}
RtLoadDumpData(dump_info, &dev_load_mem_);
load_flag_ = true;
MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
}
void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
MS_EXCEPTION_IF_NULL(kernel_graph_);
auto dump_path = DataDumpParser::GetInstance().GetDumpPath();
if (!dump_path.has_value()) {
MS_LOG(EXCEPTION) << "Dump path invalid";
}
auto device_id = context_ptr->device_id();
dump_info->set_dump_path(dump_path.value() + "_" + std::to_string(device_id) + "/");
MS_LOG(INFO) << "[DataDump] dump_path:" << dump_path.value();
dump_info->set_model_name(DataDumpParser::GetInstance().net_name() + "_" + std::to_string(kernel_graph_->graph_id()));
dump_info->set_dump_step(std::to_string(DataDumpParser::GetInstance().dump_step()));
dump_info->set_model_id(kernel_graph_->graph_id());
dump_info->set_flag(kAicpuLoadFlag);
const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
if (input_ctrl_tensors == nullptr || input_ctrl_tensors->size() < 3) {
MS_LOG(INFO) << "[DataDump] Not data sink mode, input_ctrl_tensor";
return;
}
const auto &current_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);
MS_EXCEPTION_IF_NULL(current_step_tensor);
MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());
void *current_step = current_step_tensor->device_address()->ptr_;
void *current_epoch = currnet_epoch_tensor->device_address()->ptr_;
void *steps_per_epoch = steps_per_epoch_tensor->device_address()->ptr_;
if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
} else {
MS_LOG(INFO) << "Invalid ctrl tensor device address";
}
}
bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
return false;
}
MS_EXCEPTION_IF_NULL(kernel);
const auto &kernel_set = DataDumpParser::GetInstance().kernel_set();
return kernel_set.find(kernel->fullname_with_scope()) != kernel_set.end();
}
void DataDumper::UnloadDumpInfo() {
if (!load_flag_) {
MS_LOG(WARNING) << "Load not success, no need to unload";
return;
}
MS_EXCEPTION_IF_NULL(kernel_graph_);
MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << kernel_graph_->graph_id();
aicpu::dump::OpMappingInfo op_mapping_info;
op_mapping_info.set_model_id(kernel_graph_->graph_id());
op_mapping_info.set_flag(kAicpuUnloadFlag);
for (const auto &kernel_name : dump_kernel_names_) {
aicpu::dump::Task task;
auto iter = runtime_info_map_.find(kernel_name);
if (iter == runtime_info_map_.end()) {
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
}
MS_EXCEPTION_IF_NULL(iter->second);
auto task_id = std::get<kTupleTaskId>(*iter->second);
task.set_task_id(task_id);
MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
op_mapping_info.mutable_task()->Add(std::move(task));
}
RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
}
void DataDumper::ReleaseDevMem(void **ptr) const {
if (ptr == nullptr) {
return;
}
if (*ptr != nullptr) {
rtError_t rt_error = rtFree(*ptr);
if (rt_error != RT_ERROR_NONE) {
MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
}
*ptr = nullptr;
}
}
void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
dump_task->set_end_graph(false);
auto iter = runtime_info_map_.find(kernel->fullname_with_scope());
if (iter == runtime_info_map_.end()) {
MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
}
MS_EXCEPTION_IF_NULL(iter->second);
auto task_id = std::get<kTupleTaskId>(*iter->second);
auto stream_id = std::get<kTupleStreamId>(*iter->second);
auto args = std::get<kTupleArgs>(*iter->second);
MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;
dump_task->set_task_id(task_id);
dump_task->set_stream_id(stream_id);
MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));
DumpKernelOutput(kernel, args, dump_task);
DumpKernelInput(kernel, args, dump_task);
}
void RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
std::string proto_str;
size_t proto_size = dump_info.ByteSizeLong();
bool ret = dump_info.SerializeToString(&proto_str);
if (!ret || proto_size == 0) {
MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
}
rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
}
if (ptr == nullptr) {
MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
return;
}
rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
}
MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
rt_ret = rtDatadumpInfoLoad(*ptr, proto_size);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
}
}
void DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
uint64_t offset = sizeof(void *) * input_size;
for (size_t i = 0; i < output_size; ++i) {
auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
aicpu::dump::Output output;
output.set_data_type(GetGeDataType(data_type));
output.set_format(GetGeFormat(output_format, output_shape.size()));
MS_EXCEPTION_IF_NULL(output.mutable_shape());
for (auto dim : output_shape) {
output.mutable_shape()->add_dim(dim);
}
output.set_original_output_format(GetGeFormat(output_format, output_shape.size()));
output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
MS_EXCEPTION_IF_NULL(task->mutable_output());
task->mutable_output()->Add(std::move(output));
offset += sizeof(void *);
}
}
void DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
auto input_size = AnfAlgo::GetInputTensorNum(kernel);
uint64_t offset = 0;
for (size_t i = 0; i < input_size; ++i) {
aicpu::dump::Input input;
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
auto input_node = input_node_with_index.first;
auto input_index = input_node_with_index.second;
std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
if (output_type == kTypeUnknown) {
MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
}
auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
input.set_data_type(GetGeDataType(output_type));
input.set_format(GetGeFormat(output_format, output_shape.size()));
MS_EXCEPTION_IF_NULL(input.mutable_shape());
for (auto dim : output_shape) {
input.mutable_shape()->add_dim(dim);
}
input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
MS_EXCEPTION_IF_NULL(task->mutable_input());
task->mutable_input()->Add(std::move(input));
offset += sizeof(void *);
}
}
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
#ifdef ENABLE_DATA_DUMP
#include <tuple>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "session/kernel_graph.h"
namespace aicpu {
namespace dump {
class OpMappingInfo;
class Task;
} // namespace dump
} // namespace aicpu
namespace mindspore {
namespace device {
namespace ascend {
// tuple(op_name, task_id, stream_id, args)
using RuntimeInfo = std::tuple<uint32_t, uint32_t, void *>;
class DataDumper {
public:
DataDumper(const session::KernelGraph *kernel_graph,
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &runtime_info_map)
: load_flag_(false),
dev_load_mem_(nullptr),
dev_unload_mem_(nullptr),
kernel_graph_(kernel_graph),
runtime_info_map_(runtime_info_map) {}
~DataDumper();
void LoadDumpInfo();
void UnloadDumpInfo();
private:
void ReleaseDevMem(void **ptr) const;
bool KernelNeedDump(const CNodePtr &kernel) const;
void SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const;
void ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const;
bool load_flag_;
void *dev_load_mem_;
void *dev_unload_mem_;
std::vector<std::string> dump_kernel_names_;
const session::KernelGraph *kernel_graph_;
std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map_;
};
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_DATADUMP_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
#include <map>
#include <string>
#include "proto/ge_dtype.pb.h"
#include "ir/dtype/type_id.h"
#include "utils/utils.h"
namespace mindspore {
namespace device {
namespace ascend {
static ge::proto::DataType GetGeDataType(TypeId type_id) {
static const std::map<TypeId, ge::proto::DataType> data_type_map = {
{TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED}, {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
{TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
{TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8}, {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
{TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16}, {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
{TypeId::kNumberTypeInt64, ge::proto::DT_INT64}, {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
{TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64}, {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
{TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
};
MS_LOG(INFO) << "Vm origin type_id:" << type_id;
auto iter = data_type_map.find(type_id);
if (iter == data_type_map.end()) {
MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
}
return iter->second;
}
enum GeFormat {
kFormat_NCHW = 0, // NCHW
kFormat_NHWC, // NHWC
kFormat_ND, // Nd Tensor
kFormat_NC1HWC0, // NC1HWC0
kFormat_FRACTAL_Z, // FRACTAL_Z
kFormat_NC1C0HWPAD,
kFormat_NHWC1C0,
kFormat_FSR_NCHW,
kFormat_FRACTAL_DECONV,
kFormat_C1HWNC0,
kFormat_FRACTAL_DECONV_TRANSPOSE,
kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS,
kFormat_NC1HWC0_C04, // NC1HWC0, C0 =4
kFormat_FRACTAL_Z_C04, // FRACZ, C0 =4
kFormat_CHWN,
kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS,
kFormat_HWCN,
kFormat_NC1KHKWHWC0, // KH,KW kernel h& kernel w maxpooling max output format
kFormat_BN_WEIGHT,
kFormat_FILTER_HWCK, // filter input tensor format
kFormat_HASHTABLE_LOOKUP_LOOKUPS = 20,
kFormat_HASHTABLE_LOOKUP_KEYS,
kFormat_HASHTABLE_LOOKUP_VALUE,
kFormat_HASHTABLE_LOOKUP_OUTPUT,
kFormat_HASHTABLE_LOOKUP_HITS = 24,
kFormat_C1HWNCoC0,
kFormat_MD,
kFormat_NDHWC,
kFormat_FRACTAL_ZZ,
kFormat_FRACTAL_NZ,
kFormat_NCDHW,
kFormat_DHWCN, // 3D filter input tensor format
kFormat_NDC1HWC0,
kFormat_FRACTAL_Z_3D,
kFormat_CN,
kFormat_NC,
kFormat_DHWNC,
kFormat_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format
kFormat_RESERVED,
kFormat_ALL
};
static GeFormat GetGeFormat(const std::string &format, size_t shape_size) {
static const std::map<std::string, GeFormat> format_map = {
// default format: nchw, fractal_nz?
{kOpFormat_DEFAULT, kFormat_NCHW},
{kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
{kOpFormat_ND, kFormat_ND},
{kOpFormat_NCHW, kFormat_NCHW},
{kOpFormat_NHWC, kFormat_NHWC},
{kOpFormat_HWCN, kFormat_HWCN},
{kOpFormat_NC1HWC0, kFormat_NC1HWC0},
{kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
{kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
{kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
{kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
{kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
{kOpFormat_NDHWC, kFormat_NDHWC},
};
MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
if (format == kOpFormat_DEFAULT) {
return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
}
auto iter = format_map.find(format);
if (iter == format_map.end()) {
MS_LOG(EXCEPTION) << "Invalid format:" << format;
}
return iter->second;
}
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_DUMP_GE_DUMP_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package ge.proto;
enum DataType
{
DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set.
DT_FLOAT = 1; // float type
DT_FLOAT16 = 2; // fp16 type
DT_INT8 = 3; // int8 type
DT_UINT8 = 4; // uint8 type
DT_INT16 = 5; // int16 type
DT_UINT16 = 6; // uint16 type
DT_INT32 = 7; //
DT_INT64 = 8; // int64 type
DT_UINT32 = 9; // unsigned int32
DT_UINT64 = 10; // unsigned int64
DT_BOOL = 11; // bool type
DT_DOUBLE = 12; // double type
DT_STRING = 13; // string type
DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */
DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */
DT_COMPLEX64 = 16; // complex64 type
DT_COMPLEX128 = 17; // complex128 type
DT_QINT8 = 18; // qint8 type
DT_QINT16 = 19; // qint16 type
DT_QINT32 = 20; // qint32 type
DT_QUINT8 = 21; // quint8 type
DT_QUINT16 = 22; // quint16 type
DT_RESOURCE = 23; // resource type
DT_STRING_REF = 24; // string_ref type
DT_DUAL = 25; /**< dual output type */
}
\ No newline at end of file
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package aicpu.dump;
message Shape {
repeated uint64 dim = 1;
}
message Output {
int32 data_type = 1;
int32 format = 2;
Shape shape = 3;
uint64 address = 4;
string original_name = 5;
int32 original_output_index = 6;
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
};
message Input {
int32 data_type = 1;
int32 format = 2;
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
}
message Op {
string op_name = 1;
string op_type = 2;
};
message Task {
uint32 task_id = 1;
uint32 stream_id = 2;
Op op = 3;
repeated Output output = 4;
bool end_graph = 5;
repeated Input input = 6;
};
message OpMappingInfo {
string dump_path = 1;
oneof model_name_param {
string model_name = 2;
}
oneof model_id_param {
uint32 model_id = 3;
}
oneof step_id {
uint64 step_id_addr = 4;
}
oneof iterations_per_loop {
uint64 iterations_per_loop_addr = 5;
}
oneof loop_cond {
uint64 loop_cond_addr = 6;
}
uint32 flag = 7; // 0x01 load, 0x00 unload
repeated Task task = 8;
string dump_step = 9;
};
......@@ -127,6 +127,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
AddressPtrList kernel_outputs;
auto kernel_mod = AnfAlgo::GetKernelMod(anf_node_ptr);
MS_EXCEPTION_IF_NULL(kernel_mod);
kernel_mod->set_kernel_name(anf_node_ptr->fullname_with_scope());
if (AnfAlgo::GetCNodeName(anf_node_ptr) != kAtomicAddrCleanOpName) {
for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(anf_node_ptr); ++i) {
auto real_input_index = AnfAlgo::GetRealInputIndex(anf_node_ptr, i);
......
......@@ -34,6 +34,7 @@ class CPUKernelRuntime;
namespace ascend {
class AscendKernelRuntime;
class AscendMemoryManager;
class DataDumper;
namespace tasksink {
class TaskGenerator;
} // namespace tasksink
......@@ -90,6 +91,7 @@ class DeviceAddress {
friend class mindspore::device::gpu::GPUMemoryManager;
friend class mindspore::device::ascend::AscendKernelRuntime;
friend class mindspore::device::ascend::AscendMemoryManager;
friend class mindspore::device::ascend::DataDumper;
};
using DeviceAddressPtr = std::shared_ptr<DeviceAddress>;
......
......@@ -34,6 +34,7 @@
#include "device/ascend/kernel_select_ascend.h"
#include "runtime/base.h"
#include "device/ascend/ascend_stream_assign.h"
namespace mindspore {
namespace device {
using device::ascend::ProfilingUtils;
......@@ -117,6 +118,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
std::vector<AnfNodePtr> *mute_inputs = kernel_graph_ptr->MutableInputs();
MS_EXCEPTION_IF_NULL(mute_inputs);
mute_inputs->push_back(switch_loop_input[kLoopCountParamName]);
mute_inputs->push_back(switch_loop_input[kEpochParamName]);
mute_inputs->push_back(switch_loop_input[kIterLoopParamName]);
mute_inputs->push_back(switch_loop_input[kZeroParamName]);
mute_inputs->push_back(switch_loop_input[kOneParamName]);
......@@ -316,6 +318,13 @@ void KernelAdjust::CreateSwitchOpParameters(const std::shared_ptr<session::Kerne
one->set_abstract(paremeter_abstract_ptr);
ParameterPtr one_new = kernel_graph_ptr->NewParameter(one);
(*switch_loop_input)[kOneParamName] = one_new;
ParameterPtr epoch = std::make_shared<Parameter>(kernel_graph_ptr);
MS_EXCEPTION_IF_NULL(epoch);
epoch->set_name(kEpochParamName);
epoch->set_abstract(paremeter_abstract_ptr);
ParameterPtr epoch_new = kernel_graph_ptr->NewParameter(epoch);
(*switch_loop_input)[kEpochParamName] = epoch_new;
}
kernel::KernelBuildInfo::KernelBuildInfoBuilder KernelAdjust::CreateMngKernelBuilder(
......@@ -510,6 +519,14 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
*val = 0;
inputs->push_back(loop_count_tensor);
// Epoch in device
tensor::TensorPtr epoch_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
MS_EXCEPTION_IF_NULL(epoch_tensor);
val = static_cast<int32_t *>(epoch_tensor->data_c());
MS_EXCEPTION_IF_NULL(val);
*val = 0;
inputs->push_back(epoch_tensor);
tensor::TensorPtr iter_loop_tensor = std::make_shared<tensor::Tensor>(kInt32->type_id(), shp);
MS_EXCEPTION_IF_NULL(iter_loop_tensor);
val = static_cast<int32_t *>(iter_loop_tensor->data_c());
......@@ -531,6 +548,7 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
MS_EXCEPTION_IF_NULL(val);
*val = 1;
inputs->push_back(one_tensor);
MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
}
......
......@@ -37,6 +37,7 @@ constexpr auto kLoopCountParamName = "loop_count";
constexpr auto kIterLoopParamName = "iter_loop";
constexpr auto kZeroParamName = "zero";
constexpr auto kOneParamName = "one";
constexpr auto kEpochParamName = "loop_epoch";
constexpr auto kStreamNeedActivedFirst = "stream_need_active_first";
constexpr uint32_t kSecondStreamSwitchLabel = 2;
......
......@@ -26,6 +26,7 @@
#include "kernel/aicpu/aicpu_kernel_build.h"
#include "utils/convert_utils.h"
#include "kernel/aicpu/aicpu_util.h"
#include "utils/context/ms_context.h"
using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
......@@ -144,8 +145,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
if (node_name_ == kTopK) {
node_name_ = kTopKV2;
}
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs);
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};
......
......@@ -26,6 +26,7 @@
#include "runtime/rt.h"
#include "utils/log_adapter.h"
#include "utils/convert_utils.h"
#include "utils/context/ms_context.h"
namespace mindspore {
namespace kernel {
......@@ -123,8 +124,8 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
MS_LOG(DEBUG) << "The block_dim is:" << block_dim;
TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data, input_data_addrs,
output_data_addrs, workspace_addrs);
kernel_name_, stream_id, stub_func, block_dim, args, args_size, sm_desc, binary, binary_size, meta_data,
input_data_addrs, output_data_addrs, workspace_addrs, NeedDump());
return {task_info_ptr};
}
} // namespace kernel
......
......@@ -21,6 +21,9 @@
#include <memory>
#include "framework/ge_runtime/task_info.h"
#include "kernel/kernel.h"
#ifdef ENABLE_DATA_DUMP
#include "debug/data_dump_parser.h"
#endif
using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
namespace mindspore {
......@@ -31,6 +34,13 @@ class AscendKernelMod : public KernelMod {
const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; }
virtual bool NeedDump() {
#ifdef ENABLE_DATA_DUMP
return DataDumpParser::GetInstance().NeedDump(kernel_name_);
#else
return false;
#endif
}
protected:
uint32_t block_dim_{1};
......
......@@ -18,6 +18,7 @@
#include "device/ascend/tasksink/runtime_utils.h"
#include "session/anf_runtime_algorithm.h"
#include "utils/utils.h"
#include "utils/context/ms_context.h"
using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
using ge::model_runner::HcclTaskInfo;
......@@ -146,10 +147,12 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
<< ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_)
<< ", data_type=" << static_cast<int>(data_type);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
HcclTaskInfoPtr task_info_ptr = std::make_shared<HcclTaskInfo>(
stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0, private_def, nullptr,
hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel, RuntimeUtils::HcomUnbindModel,
RuntimeUtils::HcomDistribute);
kernel_name_, stream_id, hccl_type, input_data_addr, output_data_addr, workspace_address, workspace_num, 0,
private_def, nullptr, hccl_count_, root_id_, op_type_, data_type, group_, RuntimeUtils::HcomBindModel,
RuntimeUtils::HcomUnbindModel, RuntimeUtils::HcomDistribute, NeedDump());
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}
......
......@@ -129,6 +129,10 @@ class KernelMod {
virtual std::vector<size_t> GenParameters() { return {}; }
virtual ~KernelMod() = default;
void set_kernel_name(const std::string &kernel_name) { kernel_name_ = kernel_name; }
protected:
std::string kernel_name_;
};
using KernelModPtr = std::shared_ptr<KernelMod>;
} // namespace kernel
......
......@@ -58,8 +58,9 @@ std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &in
}
stream_id_ = stream_id;
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr,
inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE, false);
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}
......
......@@ -55,7 +55,8 @@ std::vector<TaskInfoPtr> LabelGotoKernel::GenTask(const std::vector<AddressPtr>
const std::vector<AddressPtr> &, uint32_t stream_id) {
MS_LOG(INFO) << "LabelGotoKernel GenTask label:" << label_ << ", stream id:" << stream_id;
std::vector<TaskInfoPtr> task_info_list;
std::shared_ptr<LabelGotoTaskInfo> task_info_ptr = std::make_shared<LabelGotoTaskInfo>(stream_id, label_);
std::shared_ptr<LabelGotoTaskInfo> task_info_ptr =
std::make_shared<LabelGotoTaskInfo>(kernel_name_, stream_id, label_);
MS_EXCEPTION_IF_NULL(task_info_ptr);
task_info_list.emplace_back(task_info_ptr);
return task_info_list;
......
......@@ -55,7 +55,7 @@ std::vector<TaskInfoPtr> LabelSetKernel::GenTask(const std::vector<AddressPtr> &
const std::vector<AddressPtr> &, uint32_t stream_id) {
MS_LOG(INFO) << "LabelSetKernel GenTask label:" << label_ << ", stream id:" << stream_id;
std::vector<TaskInfoPtr> task_info_list;
std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(stream_id, label_);
std::shared_ptr<LabelSetTaskInfo> task_info_ptr = std::make_shared<LabelSetTaskInfo>(kernel_name_, stream_id, label_);
MS_EXCEPTION_IF_NULL(task_info_ptr);
task_info_list.emplace_back(task_info_ptr);
return task_info_list;
......
......@@ -67,7 +67,7 @@ std::vector<TaskInfoPtr> LabelSwitchKernel::GenTask(const std::vector<AddressPtr
MS_LOG(INFO) << "LabelSwitchKernel GenTask label size:" << label_size_ << ", stream id:" << stream_id;
std::vector<TaskInfoPtr> task_info_list;
cond_ = inputs[0]->addr;
auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(stream_id, label_size_, label_list_, cond_);
auto task_info_ptr = std::make_shared<LabelSwitchTaskInfo>(kernel_name_, stream_id, label_size_, label_list_, cond_);
MS_EXCEPTION_IF_NULL(task_info_ptr);
task_info_list.emplace_back(task_info_ptr);
return task_info_list;
......
......@@ -23,6 +23,7 @@
#include "common/utils.h"
#include "session/anf_runtime_algorithm.h"
#include "common/trans.h"
#include "utils/context/ms_context.h"
using ge::model_runner::MemcpyAsyncTaskInfo;
using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
......@@ -118,8 +119,9 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
}
stream_id_ = stream_id;
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr =
std::make_shared<MemcpyAsyncTaskInfo>(kernel_name_, stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr,
inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE, NeedDump());
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}
......
......@@ -63,7 +63,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
<< ", outputs size:" << outputs.size();
stream_id_ = stream_id;
std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr =
std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_);
std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_);
return {task_info_ptr};
}
} // namespace kernel
......
......@@ -60,7 +60,7 @@ std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, co
const std::vector<AddressPtr> &, uint32_t stream_id) {
MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id;
stream_id_ = stream_id;
EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_);
EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(kernel_name_, stream_id, event_id_);
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}
......
......@@ -57,7 +57,7 @@ std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, co
const std::vector<AddressPtr> &, uint32_t stream_id) {
MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id;
stream_id_ = stream_id;
EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_);
EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(kernel_name_, stream_id, event_id_);
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}
......
......@@ -72,7 +72,8 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt
stream_id_ = stream_id;
std::vector<TaskInfoPtr> task_info_list;
for (auto &index : active_streams_index_) {
std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index);
std::shared_ptr<StreamActiveTaskInfo> task_info_ptr =
std::make_shared<StreamActiveTaskInfo>(kernel_name_, stream_id, index);
MS_EXCEPTION_IF_NULL(task_info_ptr);
task_info_list.emplace_back(task_info_ptr);
MS_LOG(INFO) << "StreamActiveKernel GenTask: streamId:" << stream_id << ", Active streamId:" << index;
......
......@@ -91,8 +91,8 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt
auto ites_per_loop = inputs[1]->addr;
MS_LOG(INFO) << "cond_:" << static_cast<int>(cond_) << ", true_stream_index_:" << true_stream_index_
<< ", stream_id:" << stream_id;
std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr =
std::make_shared<StreamSwitchTaskInfo>(stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
std::shared_ptr<StreamSwitchTaskInfo> task_info_ptr = std::make_shared<StreamSwitchTaskInfo>(
kernel_name_, stream_id, true_stream_index_, loop_cnt, ites_per_loop, cond_, data_type_);
MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr};
}
......
......@@ -17,7 +17,7 @@
#include "kernel/tbe/tbe_kernel_mod.h"
#include <algorithm>
#include "runtime/rt.h"
#include "nlohmann/json.hpp"
#include "utils/context/ms_context.h"
#include "graphengine/inc/framework/ge_runtime/task_info.h"
namespace mindspore {
......@@ -99,9 +99,9 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in
MS_LOG(INFO) << "block_dim is:" << block_dim_;
TbeTaskInfoPtr task_info_ptr =
make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0,
meta_data, input_data_addrs, output_data_addrs, workspace_addrs);
TbeTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::TbeTaskInfo>(
kernel_name_, stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0, meta_data, input_data_addrs,
output_data_addrs, workspace_addrs, NeedDump());
return {task_info_ptr};
}
......
......@@ -36,7 +36,7 @@ namespace session {
using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
class KernelGraph : public FuncGraph {
public:
KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false) {
KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) {
inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
execution_order_ = {};
executable_ = true;
......@@ -154,6 +154,8 @@ class KernelGraph : public FuncGraph {
AnfNodePtr GetFrontNodeByInternalOutput(const AnfNodePtr &node) const;
void AddFinalOutputKernel(const AnfNodePtr &node);
bool IsFinalOutputKernel(const AnfNodePtr &node) const;
uint32_t current_epoch() const { return current_epoch_; }
void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }
private:
// remove value node form graph
......@@ -216,6 +218,7 @@ class KernelGraph : public FuncGraph {
std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
std::unordered_map<AnfNodePtr, AnfNodePtr> internal_outputs_to_front_map_;
std::set<AnfNodePtr> final_output_kernels_;
uint32_t current_epoch_;
};
} // namespace session
using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
......
......@@ -187,6 +187,18 @@ size_t LoadCtrlInputTensor(const std::shared_ptr<KernelGraph> &graph, std::vecto
// set loop_count to zero
MS_EXCEPTION_IF_NULL(inputs);
inputs->push_back(tensor);
auto epoch_tensor = (*inputs_params)[1];
MS_EXCEPTION_IF_NULL(epoch_tensor);
auto *epoch_val = static_cast<int32_t *>(epoch_tensor->data_c());
MS_EXCEPTION_IF_NULL(epoch_val);
*epoch_val = graph->current_epoch();
epoch_tensor->set_dirty(true);
inputs->push_back(epoch_tensor);
MS_LOG(INFO) << "Load epoch_val:" << *epoch_val;
graph->set_current_epoch(graph->current_epoch() + 1);
return inputs_params->size();
}
......@@ -814,13 +826,13 @@ void SessionBasic::AddParameterToGraphInputs(const std::vector<AnfNodePtr> &para
void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const {
std::vector<tensor::TensorPtr> inputs(inputs_const);
size_t input_ctrl_size = 1;
size_t input_ctrl_size = 2;
MS_EXCEPTION_IF_NULL(kernel_graph);
if (kernel_graph->input_ctrl_tensors()) {
input_ctrl_size = LoadCtrlInputTensor(kernel_graph, &inputs);
}
auto input_nodes = kernel_graph->inputs();
if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) {
if ((inputs.size() + input_ctrl_size) - 2 != input_nodes.size()) {
MS_LOG(EXCEPTION) << "Tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
<< ", input_ctrl_size:" << input_ctrl_size;
}
......
......@@ -32,6 +32,8 @@ bool ModelRunner::LoadDavinciModel(uint32_t device_id, uint64_t session_id, uint
bool ModelRunner::UnloadModel(uint32_t model_id) { return true; }
bool ModelRunner::LoadModelComplete(uint32_t model_id) { return true; }
bool ModelRunner::RunModel(uint32_t model_id, const ge::InputData &input_data, ge::OutputData *output_data) {
return true;
}
......@@ -45,6 +47,11 @@ const std::vector<uint32_t> &ModelRunner::GetStreamIdList(uint32_t model_id) con
static std::vector<uint32_t> stream_id_list;
return stream_id_list;
}
const std::map<std::string, std::shared_ptr<RuntimeInfo>> &ModelRunner::GetRuntimeInfoMap(uint32_t model_id) const {
static std::map<std::string, std::shared_ptr<RuntimeInfo>> runtime_info_map;
return runtime_info_map;
}
} // namespace model_runner
} // namespace ge
......
......@@ -15,7 +15,6 @@
*/
#include "device/ascend/ascend_stream_assign.h"
#include "device/ascend/ascend_label_assign.h"
#include "device/ascend/tasksink/task_generator.h"
#include "device/kernel_adjust.h"
namespace mindspore {
......@@ -31,13 +30,6 @@ void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr)
void AscendStreamAssign::GetWaitStreams(vector<uint32_t> *wait_active_stream_list) { return; }
void AscendStreamAssign::GetHcomStreams(std::vector<uint32_t> *streams) { return; }
namespace tasksink {
bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
uint32_t graph_id) {
return true;
}
} // namespace tasksink
} // namespace ascend
void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return true; }
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/ascend/tasksink/task_generator.h"
namespace mindspore {
namespace device {
namespace ascend {
namespace tasksink {
bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *const task_info_list,
uint32_t graph_id) {
return true;
}
} // namespace tasksink
} // namespace ascend
} // namespace device
} // namespace mindspore
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册