提交 4fbde056 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!772 save task and graph description to framework file

Merge pull request !772 from caifubi/feature-profiling-report-data
...@@ -337,8 +337,8 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) { ...@@ -337,8 +337,8 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
return false; return false;
} }
if (ProfilingManager::GetInstance().IsProfiling()) { if (ProfilingManager::GetInstance().IsProfiling()) {
std::vector<uint32_t> task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first); auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(model_iter->first);
ProfilingUtils::ReportProfilingData(graph->graph_id(), task_ids); ProfilingUtils::ReportProfilingData(task_ids, NOT_NULL(graph));
} }
return true; return true;
} }
......
...@@ -15,12 +15,15 @@ ...@@ -15,12 +15,15 @@
*/ */
#include <map> #include <map>
#include "device/ascend/profiling/reporter/graph_desc_reporter.h"
#include "device/ascend/profiling/profiling_utils.h" #include "device/ascend/profiling/profiling_utils.h"
#include "kernel/kernel.h" #include "kernel/kernel.h"
#include "device/ascend/profiling/profiling_manager.h" #include "device/ascend/profiling/profiling_manager.h"
#include "session/anf_runtime_algorithm.h" #include "session/anf_runtime_algorithm.h"
#include "common/utils.h" #include "common/utils.h"
#include "utils/utils.h" #include "utils/utils.h"
#include "device/ascend/profiling/reporter/task_desc_reporter.h"
#include "utils/context/ms_context.h"
namespace mindspore { namespace mindspore {
namespace device { namespace device {
...@@ -30,6 +33,7 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_"; ...@@ -30,6 +33,7 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
constexpr char kFpStartNode[] = "PROFILING_FP_START"; constexpr char kFpStartNode[] = "PROFILING_FP_START";
constexpr char kBpEndNode[] = "PROFILING_BP_END"; constexpr char kBpEndNode[] = "PROFILING_BP_END";
constexpr char kIterEndNode[] = "PROFILING_ITER_END"; constexpr char kIterEndNode[] = "PROFILING_ITER_END";
std::unordered_map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_; std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
uint32_t ProfilingUtils::custom_node_index_ = 1; uint32_t ProfilingUtils::custom_node_index_ = 1;
...@@ -77,7 +81,7 @@ std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exe ...@@ -77,7 +81,7 @@ std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exe
return std::string(trace_begin); return std::string(trace_begin);
} }
std::string fp_start_str = ""; std::string fp_start_str;
std::set<std::string> getnext_outputs; std::set<std::string> getnext_outputs;
GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs)); GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs));
if (getnext_outputs.empty()) { if (getnext_outputs.empty()) {
...@@ -97,8 +101,8 @@ std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exe ...@@ -97,8 +101,8 @@ std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exe
void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order, void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
NotNull<std::set<std::string> *> getnext_outputs) { NotNull<std::set<std::string> *> getnext_outputs) {
for (auto cnode : cnode_exec_order) { for (const auto &cnode : cnode_exec_order) {
for (auto input : cnode->inputs()) { for (const auto &input : cnode->inputs()) {
auto prev_cnode = AnfAlgo::VisitKernel(input, 0); auto prev_cnode = AnfAlgo::VisitKernel(input, 0);
if (!prev_cnode.first->isa<CNode>()) { if (!prev_cnode.first->isa<CNode>()) {
continue; continue;
...@@ -120,7 +124,7 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe ...@@ -120,7 +124,7 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe
if (trace_bp_end != nullptr) { if (trace_bp_end != nullptr) {
return std::string(trace_bp_end); return std::string(trace_bp_end);
} }
std::string bp_end_str = ""; std::string bp_end_str;
// Contain hccl kernel // Contain hccl kernel
auto iter = cnode_exec_order.rbegin(); auto iter = cnode_exec_order.rbegin();
while (iter != cnode_exec_order.rend()) { while (iter != cnode_exec_order.rend()) {
...@@ -154,7 +158,7 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe ...@@ -154,7 +158,7 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe
} }
std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) { std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) {
std::string last_tbe_kernel_name = ""; std::string last_tbe_kernel_name;
// find last tbe_kernel // find last tbe_kernel
for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) { for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) {
if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) { if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) {
...@@ -276,40 +280,51 @@ void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const Profili ...@@ -276,40 +280,51 @@ void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const Profili
} }
void ProfilingUtils::SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names) { void ProfilingUtils::SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names) {
auto iter = graph_kernel_name_.find(graph_id); auto ret = graph_kernel_name_.try_emplace(graph_id, kernel_names);
if (iter == graph_kernel_name_.end()) { if (!ret.second) {
graph_kernel_name_[graph_id] = kernel_names; MS_LOG(ERROR) << "[profiling]graph " << graph_id << " kernel names already exist";
} else {
MS_LOG(ERROR) << "[profiling]graph kernel names already exist";
} }
} }
void ProfilingUtils::ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids) { void ProfilingUtils::SetGraphProfilingCNode(uint32_t graph_id, const std::vector<CNodePtr> &profiling_cnode_list) {
auto iter = graph_kernel_name_.find(graph_id); auto ret = graph_profiling_cnode_.try_emplace(graph_id, profiling_cnode_list);
if (iter == graph_kernel_name_.end()) { if (!ret.second) {
MS_LOG(ERROR) << "[profiling]graph id " << graph_id << " not in graph_kernel_name_"; MS_LOG(ERROR) << "[profiling]graph " << graph_id << " profiling cnode list already exist";
return;
} }
auto &kernel_names = iter->second; }
MS_LOG(INFO) << "kernel_names size:" << kernel_names.size() << ", task_ids size:" << task_ids.size(); bool ProfilingUtils::ValidComputeGraph(NotNull<const session::KernelGraph *> graph_ptr) {
if (kernel_names.size() != task_ids.size()) { for (const auto &node : graph_ptr->execution_order()) {
MS_LOG(ERROR) << "[profiling]kernel name and task id not match"; if (AnfAlgo::GetKernelType(node) == TBE_KERNEL) {
return; return true;
}
std::map<uint32_t, std::string> op_task_id_map;
size_t size = kernel_names.size();
for (size_t i = 0; i < size; ++i) {
auto it = op_task_id_map.find(task_ids[i]);
if (it != op_task_id_map.end()) {
MS_LOG(WARNING) << "task_id " << task_ids[i] << " exist, " << kernel_names[i];
continue;
} }
op_task_id_map[task_ids[i]] = kernel_names[i];
} }
if (!ProfilingManager::GetInstance().ReportProfilingData(op_task_id_map)) { return false;
MS_LOG(ERROR) << "ReportProfilingData failed"; }
void ProfilingUtils::ReportProfilingData(const std::vector<uint32_t> &task_ids,
NotNull<const session::KernelGraph *> graph) {
if (!ValidComputeGraph(graph)) {
MS_LOG(WARNING) << "Not a valid compute graph:" << graph->graph_id();
return;
}
auto ret = graph_profiling_cnode_.find(graph->graph_id());
if (ret == graph_profiling_cnode_.end()) {
MS_LOG(ERROR) << "Graph id not found";
return;
} }
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
TaskDescReporter task_reporter(context->device_id(), "vm.task_desc_info_" + std::to_string(graph->graph_id()),
ret->second);
task_reporter.set_task_ids(task_ids);
task_reporter.ReportData();
GraphDescReporter graph_reporter(context->device_id(), "vm.graph_desc_info_" + std::to_string(graph->graph_id()),
ret->second);
graph_reporter.ReportData();
} }
} // namespace ascend } // namespace ascend
} // namespace device } // namespace device
......
...@@ -80,12 +80,14 @@ class ProfilingUtils { ...@@ -80,12 +80,14 @@ class ProfilingUtils {
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list); NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
// Mapping graph id and the kernels' name in the graph // Mapping graph id and the kernels' name in the graph
static void SetGraphProfilingCNode(uint32_t graph_id, const std::vector<CNodePtr> &profiling_cnode_list);
static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names); static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names);
// Mapping task_id and kernel name for device to generate the time cost of specific kernel. // Mapping task_id and kernel name for device to generate the time cost of specific kernel.
// Device calculate the time cost of the task which is marked by task id. // Device calculate the time cost of the task which is marked by task id.
// But we need data of (kernel name , time cost) // But we need data of (kernel name , time cost)
static void ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids); static void ReportProfilingData(const std::vector<uint32_t> &task_ids, NotNull<const session::KernelGraph *> graph);
// Get profiling trace point from envs. // Get profiling trace point from envs.
// export PROFILING_FP_START='full name of the first cnode to execute' // export PROFILING_FP_START='full name of the first cnode to execute'
...@@ -122,7 +124,10 @@ class ProfilingUtils { ...@@ -122,7 +124,10 @@ class ProfilingUtils {
static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order, static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
NotNull<std::set<std::string> *> getnext_outputs); NotNull<std::set<std::string> *> getnext_outputs);
static bool ValidComputeGraph(NotNull<const session::KernelGraph *> graph_ptr);
// graph id --> (kernel name list) // graph id --> (kernel name list)
static std::unordered_map<uint32_t, std::vector<CNodePtr>> graph_profiling_cnode_;
static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_; static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
static uint32_t custom_node_index_; static uint32_t custom_node_index_;
}; };
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include "device/ascend/profiling/reporter/desc_reporter.h"
#include "device/ascend/profiling/plugin_impl.h"
#include "utils/log_adapter.h"
constexpr size_t kReportMaxLen = 2048;
namespace mindspore {
namespace device {
namespace ascend {
DescReporter::~DescReporter() = default;
void DescReporter::ReportByLine(const std::string &data, const std::string &file_name) const {
auto reporter = PluginImpl::GetPluginReporter();
MS_EXCEPTION_IF_NULL(reporter);
auto tot_size = data.size();
size_t cur_size = 0;
while (cur_size < tot_size) {
size_t remain_size = tot_size - cur_size;
size_t report_size = std::min(remain_size, kReportMaxLen);
Msprof::Engine::ReporterData report_data{};
report_data.deviceId = device_id_;
report_data.dataLen = report_size;
report_data.data = (unsigned char *)data.c_str() + cur_size;
auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, file_name.c_str(), file_name.length());
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s report data tag failed";
}
auto report_ret = reporter->Report(&report_data);
if (report_ret != 0) {
MS_LOG(EXCEPTION) << "report data failed";
}
cur_size += report_size;
}
}
void DescReporter::ReportData() {
for (const auto &desc : prof_desc_) {
auto data = desc->ToString();
ReportByLine(data, file_name_);
}
}
} // namespace ascend
} // namespace device
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_DESC_REPORTER_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_DESC_REPORTER_H_
#include <string>
#include <utility>
#include <vector>
#include <memory>
#include "toolchain/prof_reporter.h"
#include "device/ascend/profiling/reporter/profiling_desc.h"
#include "utils/contract.h"
#include "session/kernel_graph.h"
namespace mindspore {
namespace device {
namespace ascend {
class DescReporter {
public:
virtual ~DescReporter() = 0;
DescReporter(int device_id, std::string file_name, std::vector<CNodePtr> cnode_list)
: device_id_(device_id), file_name_(std::move(file_name)), cnode_list_(std::move(cnode_list)) {}
virtual void ReportData();
protected:
void ReportByLine(const std::string &data, const std::string &file_name) const;
int device_id_;
std::string file_name_;
std::vector<CNodePtr> cnode_list_;
std::vector<std::shared_ptr<ProfDesc>> prof_desc_;
};
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_DESC_REPORTER_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include <memory>
#include "device/ascend/profiling/reporter/graph_desc_reporter.h"
#include "session/anf_runtime_algorithm.h"
namespace mindspore {
namespace device {
namespace ascend {
void GraphDescReporter::ReportData() {
for (const auto &node : cnode_list_) {
if (AnfAlgo::GetKernelType(node) != TBE_KERNEL) {
MS_LOG(WARNING) << "Skip non tbe kernel";
continue;
}
std::vector<DataElement> input_data_list;
std::vector<DataElement> output_data_list;
auto op_name = node->fullname_with_scope();
auto op_type = AnfAlgo::GetCNodeName(node);
auto input_size = AnfAlgo::GetInputTensorNum(node);
for (size_t i = 0; i < input_size; ++i) {
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
auto input_node = input_node_with_index.first;
auto input_index = input_node_with_index.second;
DataElement element{};
element.index_ = i;
element.data_type_ = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
element.data_format_ = AnfAlgo::GetOutputFormat(input_node, input_index);
element.data_shape_ = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
input_data_list.emplace_back(element);
}
auto output_size = AnfAlgo::GetOutputTensorNum(node);
for (size_t i = 0; i < output_size; ++i) {
DataElement element{};
element.index_ = i;
element.data_type_ = AnfAlgo::GetOutputDeviceDataType(node, i);
element.data_format_ = AnfAlgo::GetOutputFormat(node, i);
element.data_shape_ = AnfAlgo::GetOutputDeviceShape(node, i);
output_data_list.emplace_back(element);
}
auto graph_desc = std::make_shared<GraphDesc>(op_name, op_type, input_data_list, output_data_list);
prof_desc_.emplace_back(graph_desc);
}
DescReporter::ReportData();
}
} // namespace ascend
} // namespace device
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_GRAPH_DESC_REPORTER_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_GRAPH_DESC_REPORTER_H_
#include <utility>
#include <string>
#include <vector>
#include "device/ascend/profiling/reporter/desc_reporter.h"
namespace mindspore {
namespace device {
namespace ascend {
class GraphDescReporter : public DescReporter {
public:
GraphDescReporter(uint32_t device_id, const std::string &file_name, std::vector<CNodePtr> cnode_list)
: DescReporter(device_id, file_name, std::move(cnode_list)) {}
void ReportData() override;
};
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_GRAPH_DESC_REPORTER_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <iterator>
#include <sstream>
#include <algorithm>
#include "device/ascend/profiling/reporter/profiling_desc.h"
namespace mindspore {
namespace device {
namespace ascend {
std::string TaskDesc::ToString() {
std::string out = op_name_;
out.append(" ")
.append(std::to_string(block_dim_))
.append(" ")
.append(std::to_string(task_id_))
.append(" ")
.append(std::to_string(stream_id_))
.append("\n");
return out;
}
std::string GraphDesc::ToString() {
std::string desc;
desc.append("op_name:").append(op_name_).append(" op_type:").append(op_type_);
int input_id = 0;
for (const auto &element : input_data_list_) {
desc.append(" input_id:")
.append(std::to_string(input_id++))
.append(" input_format:")
.append(element.data_format_)
.append(" input_data_type:")
.append(std::to_string(element.data_type_))
.append(" input_shape:")
.append(DataShapeToString(element.data_shape_));
}
input_id = 0;
for (const auto &element : output_data_list_) {
desc.append(" output_id:")
.append(std::to_string(input_id++))
.append(" output_format:")
.append(element.data_format_)
.append(" output_data_type:")
.append(std::to_string(element.data_type_))
.append(" output_shape:")
.append((DataShapeToString(element.data_shape_)));
}
desc.append("\n");
return desc;
}
std::string GraphDesc::DataShapeToString(const std::vector<size_t> &shape) {
std::ostringstream oss;
oss << "\"";
if (!shape.empty()) {
std::copy(shape.begin(), shape.end() - 1, std::ostream_iterator<size_t>(oss, ","));
oss << shape.back();
}
oss << "\"";
return oss.str();
}
} // namespace ascend
} // namespace device
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_PROFILING_DESC_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_PROFILING_DESC_H_
#include <string>
#include <utility>
#include <vector>
namespace mindspore {
namespace device {
namespace ascend {
class ProfDesc {
public:
explicit ProfDesc(std::string op_name) : op_name_(std::move(op_name)) {}
virtual std::string ToString() = 0;
protected:
std::string op_name_;
};
class TaskDesc : public ProfDesc {
public:
TaskDesc(std::string op_name, uint32_t task_id, uint32_t block_dim, uint32_t stream_id)
: ProfDesc(std::move(op_name)), task_id_(task_id), block_dim_(block_dim), stream_id_(stream_id) {}
std::string ToString() override;
private:
uint32_t task_id_;
uint32_t block_dim_;
uint32_t stream_id_;
};
struct DataElement {
size_t index_;
std::string data_format_;
int data_type_;
std::vector<size_t> data_shape_;
};
class GraphDesc : public ProfDesc {
public:
GraphDesc(std::string op_name, std::string op_type, std::vector<DataElement> input_data_list,
std::vector<DataElement> output_data_list)
: ProfDesc(std::move(op_name)),
op_type_(std::move(op_type)),
input_data_list_(std::move(input_data_list)),
output_data_list_(std::move(output_data_list)) {}
std::string ToString() override;
private:
std::string op_type_;
std::vector<DataElement> input_data_list_;
std::vector<DataElement> output_data_list_;
[[nodiscard]] static std::string DataShapeToString(const std::vector<size_t> &shape);
};
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_PROFILING_DESC_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include "device/ascend/profiling/reporter/task_desc_reporter.h"
#include "session/anf_runtime_algorithm.h"
#include "kernel/ascend_kernel_mod.h"
namespace mindspore {
namespace device {
namespace ascend {
void TaskDescReporter::ReportData() {
MS_LOG(INFO) << "cnode_list.size()=" << cnode_list_.size() << " task_ids_.size()=" << task_ids_.size();
if (cnode_list_.size() != task_ids_.size()) {
MS_LOG(ERROR) << "cnode list size not equal task ids size";
return;
}
size_t task_index = 0;
for (const auto &node : cnode_list_) {
if (AnfAlgo::GetKernelType(node) != TBE_KERNEL) {
MS_LOG(WARNING) << "Skip non tbe kernel";
++task_index;
continue;
}
auto kernel_mod = AnfAlgo::GetKernelMod(node);
auto ascend_kernel_mod = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod);
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(ascend_kernel_mod);
auto desc_ptr = std::make_shared<TaskDesc>(node->fullname_with_scope(), task_ids_[task_index++],
ascend_kernel_mod->block_dim(), ascend_kernel_mod->stream_id());
prof_desc_.emplace_back(desc_ptr);
}
DescReporter::ReportData();
}
} // namespace ascend
} // namespace device
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_TASK_DESC_REPORTER_H_
#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_TASK_DESC_REPORTER_H_
#include <utility>
#include <string>
#include <vector>
#include "device/ascend/profiling/reporter/desc_reporter.h"
namespace mindspore {
namespace device {
namespace ascend {
class TaskDescReporter : public DescReporter {
public:
TaskDescReporter(int device_id, const std::string &file_name, std::vector<CNodePtr> cnode_list)
: DescReporter(device_id, file_name, std::move(cnode_list)) {}
void ReportData() override;
void set_task_ids(const std::vector<uint32_t> &task_ids) { task_ids_ = task_ids; }
private:
std::vector<uint32_t> task_ids_;
};
} // namespace ascend
} // namespace device
} // namespace mindspore
#endif // MINDSPORE_MINDSPORE_CCSRC_DEVICE_ASCEND_PROFILING_REPORTER_TASK_DESC_REPORTER_H_
...@@ -83,7 +83,6 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP ...@@ -83,7 +83,6 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP
bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id,
std::vector<TaskInfoPtr> *task_info_list) { std::vector<TaskInfoPtr> *task_info_list) {
MS_LOG(INFO) << "LaunchKernel start...";
MS_EXCEPTION_IF_NULL(task_info_list); MS_EXCEPTION_IF_NULL(task_info_list);
MS_EXCEPTION_IF_NULL(anf_node_ptr); MS_EXCEPTION_IF_NULL(anf_node_ptr);
AddressPtrList kernel_inputs; AddressPtrList kernel_inputs;
...@@ -132,6 +131,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i ...@@ -132,6 +131,7 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list,
std::vector<TaskInfoPtr> *task_info_list, uint32_t graph_id) { std::vector<TaskInfoPtr> *task_info_list, uint32_t graph_id) {
uint32_t current_op_index = 0; uint32_t current_op_index = 0;
std::vector<CNodePtr> profiling_cnode_list;
std::vector<std::string> kernel_name_list; std::vector<std::string> kernel_name_list;
for (const auto &anf_node_ptr : anf_node_list) { for (const auto &anf_node_ptr : anf_node_list) {
size_t old_size = task_info_list->size(); size_t old_size = task_info_list->size();
...@@ -143,11 +143,16 @@ bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, ...@@ -143,11 +143,16 @@ bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list,
return false; return false;
} }
for (size_t i = old_size; i < task_info_list->size(); ++i) { for (size_t i = old_size; i < task_info_list->size(); ++i) {
profiling_cnode_list.emplace_back(anf_node_ptr);
kernel_name_list.emplace_back(anf_node_ptr->fullname_with_scope()); kernel_name_list.emplace_back(anf_node_ptr->fullname_with_scope());
} }
current_op_index++; current_op_index++;
} }
ProfilingUtils::SetGraphKernelName(graph_id, kernel_name_list); ProfilingUtils::SetGraphKernelName(graph_id, kernel_name_list);
if (ProfilingManager::GetInstance().IsProfiling()) {
ProfilingUtils::SetGraphProfilingCNode(graph_id, profiling_cnode_list);
}
return true; return true;
} }
} // namespace tasksink } // namespace tasksink
......
...@@ -127,10 +127,12 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std:: ...@@ -127,10 +127,12 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::
return true; return true;
} }
vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) { const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
MS_LOG(INFO) << "AicpuOpKernelMod GenTask start"; MS_LOG(INFO) << "AicpuOpKernelMod GenTask start";
stream_id_ = stream_id;
node_so_ = AICPU_OPS_SO_NAME; node_so_ = AICPU_OPS_SO_NAME;
std::vector<void *> input_data_addrs; std::vector<void *> input_data_addrs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs), (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(input_data_addrs),
......
...@@ -29,8 +29,8 @@ class AicpuOpKernelMod : public AscendKernelMod { ...@@ -29,8 +29,8 @@ class AicpuOpKernelMod : public AscendKernelMod {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override; const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
void SetInputList(const std::vector<int64_t> &inputList); void SetInputList(const std::vector<int64_t> &inputList);
void SetOutputList(const std::vector<int64_t> &outputList); void SetOutputList(const std::vector<int64_t> &outputList);
......
...@@ -29,6 +29,12 @@ class AscendKernelMod : public KernelMod { ...@@ -29,6 +29,12 @@ class AscendKernelMod : public KernelMod {
public: public:
virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &, virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t) = 0; const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; }
protected:
uint32_t block_dim_{1};
uint32_t stream_id_{0};
}; };
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore
......
...@@ -124,11 +124,13 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const { ...@@ -124,11 +124,13 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
const std::vector<size_t> &HcclKernel::GetWorkspaceSizeList() const { return workspace_size_list_; } const std::vector<size_t> &HcclKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) { const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
if (inputs.empty() || outputs.empty()) { if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "inputs or outputs is empty"; MS_LOG(EXCEPTION) << "inputs or outputs is empty";
} }
stream_id_ = stream_id;
std::string hccl_type = AnfAlgo::GetCNodeName(anf_node_); std::string hccl_type = AnfAlgo::GetCNodeName(anf_node_);
MS_EXCEPTION_IF_NULL(inputs.at(0)); MS_EXCEPTION_IF_NULL(inputs.at(0));
auto input_data_addr = inputs.at(0)->addr; auto input_data_addr = inputs.at(0)->addr;
......
...@@ -38,8 +38,8 @@ class HcclKernel : public AscendKernelMod { ...@@ -38,8 +38,8 @@ class HcclKernel : public AscendKernelMod {
const std::vector<size_t> &GetInputSizeList() const override; const std::vector<size_t> &GetInputSizeList() const override;
const std::vector<size_t> &GetOutputSizeList() const override; const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override; const std::vector<size_t> &GetWorkspaceSizeList() const override;
vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
protected: protected:
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_; std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_;
......
...@@ -52,13 +52,13 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect ...@@ -52,13 +52,13 @@ bool AssignKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vect
return true; return true;
} }
std::vector<TaskInfoPtr> AssignKernel::GenTask(const vector<mindspore::kernel::AddressPtr> &inputs, std::vector<TaskInfoPtr> AssignKernel::GenTask(const std::vector<AddressPtr> &inputs,
const vector<mindspore::kernel::AddressPtr> &workspace, const std::vector<AddressPtr> &workspace,
const vector<mindspore::kernel::AddressPtr> &outputs, const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
uint32_t stream_id) {
if (inputs.size() != 2) { if (inputs.size() != 2) {
MS_LOG(EXCEPTION) << "inputs size is not two"; MS_LOG(EXCEPTION) << "inputs size is not two";
} }
stream_id_ = stream_id;
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>( std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE); stream_id, inputs[0]->addr, inputs[0]->size, inputs[1]->addr, inputs[1]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
......
...@@ -93,10 +93,9 @@ void MemCpyAsyncKernel::GetInputOutputTotalCount(const AnfNodePtr &anf_node) { ...@@ -93,10 +93,9 @@ void MemCpyAsyncKernel::GetInputOutputTotalCount(const AnfNodePtr &anf_node) {
output_size_list_.emplace_back(total_size); output_size_list_.emplace_back(total_size);
} }
std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const vector<mindspore::kernel::AddressPtr> &inputs, std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr> &inputs,
const vector<mindspore::kernel::AddressPtr> & /*workspace*/, const std::vector<AddressPtr> &,
const vector<mindspore::kernel::AddressPtr> &outputs, const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
uint32_t stream_id) {
if (inputs.size() != 1) { if (inputs.size() != 1) {
MS_LOG(EXCEPTION) << "MemCpyAsync op inputs is not one"; MS_LOG(EXCEPTION) << "MemCpyAsync op inputs is not one";
} }
...@@ -105,6 +104,7 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const vector<mindspore::kern ...@@ -105,6 +104,7 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const vector<mindspore::kern
MS_LOG(EXCEPTION) << "MemCpyAsync op output is not one"; MS_LOG(EXCEPTION) << "MemCpyAsync op output is not one";
} }
stream_id_ = stream_id;
std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>( std::shared_ptr<MemcpyAsyncTaskInfo> task_info_ptr = std::make_shared<MemcpyAsyncTaskInfo>(
stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE); stream_id, outputs[0]->addr, outputs[0]->size, inputs[0]->addr, inputs[0]->size, RT_MEMCPY_DEVICE_TO_DEVICE);
MS_EXCEPTION_IF_NULL(task_info_ptr); MS_EXCEPTION_IF_NULL(task_info_ptr);
......
文件模式从 100755 更改为 100644
...@@ -62,6 +62,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt ...@@ -62,6 +62,7 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
const std::vector<AddressPtr> &outputs, uint32_t stream_id) { const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
MS_LOG(INFO) << "gen task inputs size:" << inputs.size() << ", workspace size:" << workspace.size() MS_LOG(INFO) << "gen task inputs size:" << inputs.size() << ", workspace size:" << workspace.size()
<< ", outputs size:" << outputs.size(); << ", outputs size:" << outputs.size();
stream_id_ = stream_id;
std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr = std::shared_ptr<ProfilerTraceTaskInfo> task_info_ptr =
std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_); std::make_shared<ProfilerTraceTaskInfo>(stream_id, log_id_, notify_, flags_);
return {task_info_ptr}; return {task_info_ptr};
......
...@@ -57,6 +57,7 @@ bool RecvKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector ...@@ -57,6 +57,7 @@ bool RecvKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector
std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &, std::vector<TaskInfoPtr> RecvKernel::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t stream_id) { const std::vector<AddressPtr> &, uint32_t stream_id) {
MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id; MS_LOG(INFO) << "RecvKernel GenTask event_id_:" << event_id_ << ", stream_id_:" << stream_id;
stream_id_ = stream_id;
EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_); EventWaitTaskInfoPtr task_info_ptr = std::make_shared<EventWaitTaskInfo>(stream_id, event_id_);
MS_EXCEPTION_IF_NULL(task_info_ptr); MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr}; return {task_info_ptr};
......
...@@ -54,6 +54,7 @@ bool SendKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector ...@@ -54,6 +54,7 @@ bool SendKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector
std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &, std::vector<TaskInfoPtr> SendKernel::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t stream_id) { const std::vector<AddressPtr> &, uint32_t stream_id) {
MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id; MS_LOG(INFO) << "SendKernel GenTask event id:" << event_id_ << ", stream id:" << stream_id;
stream_id_ = stream_id;
EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_); EventRecordTaskInfoPtr task_info_ptr = std::make_shared<EventRecordTaskInfo>(stream_id, event_id_);
MS_EXCEPTION_IF_NULL(task_info_ptr); MS_EXCEPTION_IF_NULL(task_info_ptr);
return {task_info_ptr}; return {task_info_ptr};
......
...@@ -30,8 +30,8 @@ class SendKernel : public RtKernel { ...@@ -30,8 +30,8 @@ class SendKernel : public RtKernel {
bool Init(const AnfNodePtr &anf_node) override; bool Init(const AnfNodePtr &anf_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override; const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
private: private:
uint32_t event_id_; uint32_t event_id_;
......
...@@ -67,6 +67,7 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt ...@@ -67,6 +67,7 @@ std::vector<TaskInfoPtr> StreamActiveKernel::GenTask(const std::vector<AddressPt
const std::vector<AddressPtr> &, uint32_t stream_id) { const std::vector<AddressPtr> &, uint32_t stream_id) {
MS_LOG(INFO) << "StreamActiveKernel GenTask active stream size:" << active_streams_index_.size() MS_LOG(INFO) << "StreamActiveKernel GenTask active stream size:" << active_streams_index_.size()
<< ", stream id:" << stream_id; << ", stream id:" << stream_id;
stream_id_ = stream_id;
std::vector<TaskInfoPtr> task_info_list; std::vector<TaskInfoPtr> task_info_list;
for (auto &index : active_streams_index_) { for (auto &index : active_streams_index_) {
std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index); std::shared_ptr<StreamActiveTaskInfo> task_info_ptr = std::make_shared<StreamActiveTaskInfo>(stream_id, index);
......
...@@ -32,8 +32,8 @@ class StreamActiveKernel : public RtKernel { ...@@ -32,8 +32,8 @@ class StreamActiveKernel : public RtKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override; const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
private: private:
std::vector<uint32_t> active_streams_index_; std::vector<uint32_t> active_streams_index_;
......
...@@ -76,6 +76,7 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt ...@@ -76,6 +76,7 @@ std::vector<TaskInfoPtr> StreamSwitchKernel::GenTask(const std::vector<AddressPt
if (inputs.size() != 2) { if (inputs.size() != 2) {
MS_LOG(ERROR) << "stream switch inputs size is " << inputs.size() << ", is not two"; MS_LOG(ERROR) << "stream switch inputs size is " << inputs.size() << ", is not two";
} }
stream_id_ = stream_id;
MS_EXCEPTION_IF_NULL(inputs[0]); MS_EXCEPTION_IF_NULL(inputs[0]);
MS_EXCEPTION_IF_NULL(inputs[1]); MS_EXCEPTION_IF_NULL(inputs[1]);
auto loop_cnt = inputs[0]->addr; auto loop_cnt = inputs[0]->addr;
......
...@@ -66,9 +66,9 @@ bool TbeKernelMod::Launch(const std::vector<mindspore::kernel::AddressPtr> &inpu ...@@ -66,9 +66,9 @@ bool TbeKernelMod::Launch(const std::vector<mindspore::kernel::AddressPtr> &inpu
return true; return true;
} }
vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &inputs, std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspaces, const std::vector<AddressPtr> &workspaces,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) { const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
if (kernel_pack_ == nullptr) { if (kernel_pack_ == nullptr) {
MS_EXCEPTION(ArgumentError) << "kernel pack should not be nullptr."; MS_EXCEPTION(ArgumentError) << "kernel pack should not be nullptr.";
} }
...@@ -90,19 +90,19 @@ vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &inputs, ...@@ -90,19 +90,19 @@ vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
[](const AddressPtr &workspace) -> void * { return workspace->addr; }); [](const AddressPtr &workspace) -> void * { return workspace->addr; });
} }
uint32_t block_dim = 1; // default blockdim equal to 1. stream_id_ = stream_id;
auto funcstub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); auto funcstub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim_);
if (funcstub == 0) { if (funcstub == 0) {
MS_EXCEPTION(ArgumentError) << "GenFuncStub failed."; MS_EXCEPTION(ArgumentError) << "GenFuncStub failed.";
} }
std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_); std::string stub_func = KernelManager::GetStubFuncName(kernel_pack_);
MS_LOG(INFO) << "block_dim is:" << block_dim; MS_LOG(INFO) << "block_dim is:" << block_dim_;
TbeTaskInfoPtr task_info_ptr = TbeTaskInfoPtr task_info_ptr =
make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim, args, 0, sm_desc, nullptr, 0, meta_data, make_shared<ge::model_runner::TbeTaskInfo>(stream_id, stub_func, block_dim_, args, 0, sm_desc, nullptr, 0,
input_data_addrs, output_data_addrs, workspace_addrs); meta_data, input_data_addrs, output_data_addrs, workspace_addrs);
return {task_info_ptr}; return {task_info_ptr};
} }
......
...@@ -40,8 +40,8 @@ class TbeKernelMod : public AscendKernelMod { ...@@ -40,8 +40,8 @@ class TbeKernelMod : public AscendKernelMod {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override; const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override;
vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces, std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override; const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
std::vector<size_t> GenParameters() override; std::vector<size_t> GenParameters() override;
private: private:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册