提交 4149274b 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4218 add data saver module for gpu profiler

Merge pull request !4218 from yelihua/temp-dev
......@@ -126,7 +126,7 @@ CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *strea
}
CUptiResult CuptiGetDeviceId(CUcontext context, uint32_t *deviceId) {
static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiSubscribe"));
static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiGetDeviceId"));
return func_ptr(context, deviceId);
}
} // namespace gpu
......
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "profiler/device/gpu/data_saver.h"
#include <fstream>
#include <numeric>
#include "utils/log_adapter.h"
namespace mindspore {
namespace profiler {
namespace gpu {
OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
: op_info_(op_info), proportion_(proportion) {
// op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
op_full_name_ = op_info->op_name;
auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
auto op_type_end_iter = op_full_name_.rfind('-');
op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
op_name_ = op_full_name_.substr(op_type_begin_iter);
op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
}
ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
grid_dim_ = basic_info_->activity_type == ActivityType::kKernel
? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' +
std::to_string(basic_info_->kernel_info.grid_y) + ',' +
std::to_string(basic_info_->kernel_info.grid_z) + "\""
: "";
block_dim_ = basic_info_->activity_type == ActivityType::kKernel
? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' +
std::to_string(basic_info_->kernel_info.block_y) + ',' +
std::to_string(basic_info_->kernel_info.block_z) + "\""
: "";
count_ = 1;
total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit;
avg_duration_ = total_duration_;
max_duration_ = total_duration_;
min_duration_ = total_duration_;
}
ActivityData &ActivityData::operator+=(const ActivityData &other) {
this->count_ += other.count_;
this->total_duration_ += other.total_duration_;
// update max or min duration
if (other.total_duration_ > this->max_duration_) {
this->max_duration_ = other.total_duration_;
} else if (other.max_duration_ < this->min_duration_) {
this->min_duration_ = other.total_duration_;
}
return *this;
}
void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
op_detail_infos_.reserve(op_info_maps.size());
float total_time_sum = GetTotalOpTime(op_info_maps);
for (auto item : op_info_maps) {
float proportion = item.second.op_host_cost_time / total_time_sum;
auto op_info = std::make_shared<OpInfo>(item.second);
OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
op_detail_infos_.emplace_back(op_detail_info);
AddOpDetailInfoForType(op_detail_info);
}
// update average time of op type
for (auto &op_type : op_type_infos_) {
// device_infos: <type_name, op_type_info>
op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
}
MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
}
void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
// Construct OpType object according to op detail info
OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count,
op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_};
// Set the OpType into op_type_infos_ map
std::string type_name = op_detail_info.op_type_;
auto iter = op_type_infos_.find(type_name);
if (iter == op_type_infos_.end()) {
op_type_infos_.emplace(type_name, op_type);
} else {
iter->second += op_type;
}
}
float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
float sum = 0;
sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
[](float i, auto iter) { return i + iter.second.op_host_cost_time; });
MS_LOG(DEBUG) << "The total op time is " << sum;
return sum;
}
void DataSaver::ParseEvent(const std::vector<Event> &events) {
// Put Kernel activity events into activity_infos_
for (const auto &event : events) {
if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity ||
event.activity_type != ActivityType::kKernel) {
continue;
}
AddKernelEvent(event);
}
// update average time of kernel op cost
for (auto &device_infos : activity_infos_) {
// device_infos: <device_id, DeviceActivityInfos>
for (auto &activity_info : device_infos.second) {
// activity_info: <kernel_name, Activity>
activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
}
MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first;
}
}
void DataSaver::AddKernelEvent(const Event &event) {
// Put kernel event to activity_infos according to device id
uint32_t device_id = event.device_id;
auto iter = activity_infos_.find(device_id);
if (iter == activity_infos_.end()) {
auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos());
AddKernelEventToDevice(event, &res_flag.first->second);
} else {
AddKernelEventToDevice(event, &iter->second);
}
}
void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
// Combine kernel activity with same kernel name
auto event_ptr = std::make_shared<Event>(event);
ActivityData activity_data = ActivityData(event_ptr);
std::string kernel_name = event.kernel_name;
auto iter = device_activity_infos->find(kernel_name);
if (iter == device_activity_infos->end()) {
device_activity_infos->emplace(kernel_name, activity_data);
} else {
iter->second += activity_data;
}
}
void DataSaver::WriteFile(std::string out_path_dir) {
if (out_path_dir.empty()) {
MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
return;
}
if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) {
MS_LOG(WARNING) << "No operation detail infos to write.";
return;
}
// not support multi-device for operator info per process yet
device_id_ = std::to_string(activity_infos_.begin()->first);
WriteOpDetail(out_path_dir);
WriteOpType(out_path_dir);
WriteActivity(out_path_dir);
}
void DataSaver::WriteOpType(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
// check if the file is writable
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write op type info into file
ofs << OpType().GetHeader() << std::endl;
for (auto op_type_info : op_type_infos_) {
ofs << op_type_info.second << std::endl;
}
ofs.close();
MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
}
void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write op detail info into file
ofs << OpDetailInfo().GetHeader() << std::endl;
for (auto op_detail : op_detail_infos_) {
ofs << op_detail << std::endl;
}
ofs.close();
MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
}
void DataSaver::WriteActivity(const std::string &saver_base_dir) {
std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
for (auto device_info : activity_infos_) {
std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
std::ofstream ofs(file_path);
if (!ofs.is_open()) {
MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
return;
}
// write activity data into file
ofs << ActivityData().GetHeader() << std::endl;
for (auto activity_data : device_info.second) {
ofs << activity_data.second << std::endl;
}
ofs.close();
MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
}
}
} // namespace gpu
} // namespace profiler
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_DATA_SAVER_H
#define MINDSPORE_DATA_SAVER_H
#include <iostream>
#include <unordered_map>
#include <vector>
#include <string>
#include <memory>
#include "profiler/device/gpu/gpu_profiling.h"
namespace mindspore {
namespace profiler {
namespace gpu {
struct OpDetailInfo {
std::string op_type_;
std::string op_name_;
std::string op_full_name_;
std::shared_ptr<OpInfo> op_info_{nullptr};
float op_avg_time_{0};
float proportion_{0};
OpDetailInfo() = default;
OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
std::string GetHeader() const {
return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion,"
"cuda_activity_cost_time(us),cuda_activity_call_count";
}
friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) {
os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ','
<< event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ','
<< event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count;
return os;
}
};
struct OpType {
std::string op_type_;
int count_{0};
float total_time_{0};
float avg_time_{0};
float proportion_{0};
std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
friend std::ostream &operator<<(std::ostream &os, const OpType &event) {
os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ','
<< event.avg_time_;
return os;
}
OpType &operator+=(const OpType &other) {
this->count_ += other.count_;
this->total_time_ += other.total_time_;
this->proportion_ += other.proportion_;
return *this;
}
};
struct ActivityData {
std::shared_ptr<Event> basic_info_{nullptr};
std::string block_dim_;
std::string grid_dim_;
int count_{0};
float total_duration_{0};
float avg_duration_{0};
float max_duration_{0};
float min_duration_{0};
ActivityData() = default;
explicit ActivityData(std::shared_ptr<Event> data);
std::string GetHeader() const {
return "name,type,op_full_name,stream_id,block_dim,grid_dim,occurrences,"
"total_duration(us),avg_duration(us),max_duration(us),min_duration(us)";
}
friend std::ostream &operator<<(std::ostream &os, const ActivityData &event) {
os << "\"" << event.basic_info_->kernel_name << "\"," << event.basic_info_->kernel_type << ','
<< event.basic_info_->op_name << ',' << event.basic_info_->stream_id << ',' << event.block_dim_ << ','
<< event.grid_dim_ << ',' << event.count_ << ',' << event.total_duration_ << ',' << event.avg_duration_ << ','
<< event.max_duration_ << ',' << event.min_duration_;
return os;
}
ActivityData &operator+=(const ActivityData &other);
};
using OpInfoMap = std::unordered_map<std::string, OpInfo>;
using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>; // <device_id, ActivityData>
using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>; // <device_id, ActivityData>
using OpTypeInfos = std::unordered_map<std::string, OpType>; // <op_full_name, Optype>
using OpDetailInfos = std::vector<OpDetailInfo>;
class DataSaver {
public:
DataSaver() = default;
~DataSaver() = default;
DataSaver(const DataSaver &) = delete;
DataSaver &operator=(const DataSaver &) = delete;
void ParseOpInfo(const OpInfoMap &op_info_maps);
void ParseEvent(const std::vector<Event> &events);
void WriteFile(std::string out_path);
private:
void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
float GetTotalOpTime(const OpInfoMap &op_info_maps);
void AddKernelEvent(const Event &event);
void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos);
void WriteOpType(const std::string &saver_base_dir);
void WriteOpDetail(const std::string &saver_base_dir);
void WriteActivity(const std::string &saver_base_dir);
std::string device_id_;
AllActivityInfos activity_infos_;
OpTypeInfos op_type_infos_;
OpDetailInfos op_detail_infos_;
};
} // namespace gpu
} // namespace profiler
} // namespace mindspore
#endif // MINDSPORE_DATA_SAVER_H
......@@ -19,6 +19,7 @@
#include <chrono>
#include "profiler/device/gpu/gpu_profiling.h"
#include "profiler/device/gpu/cupti_interface.h"
#include "profiler/device/gpu/data_saver.h"
#include "utils/log_adapter.h"
#include "pybind_api/api_register.h"
......@@ -478,7 +479,11 @@ void GPUProfiler::Stop() {
void GPUProfiler::SaveProfileData() {
if (profile_data_path_.empty()) {
MS_LOG(WARNING) << "profile_data_path is empty, skip save profile data.";
return;
} else {
DataSaver dataSaver;
dataSaver.ParseOpInfo(op_info_map_);
dataSaver.ParseEvent(events_);
dataSaver.WriteFile(profile_data_path_);
}
op_info_map_.clear();
op_name_map_.clear();
......
......@@ -43,17 +43,21 @@ class MinddataParser:
node_name, node_start, node_end, queue_size = "", 0, 0, 0
if node_info:
node_name = node_info[0].replace("Node:", "")
if len(node_info) > 2:
if len(node_info) > 3 and "queue" in node_info[1]:
queue_size = node_info[1].replace("queue size:", "")
queue_size = int(queue_size) if queue_size.isdigit() else queue_size
node_start = node_info[2].replace("Run start:", "")
node_start = int(node_start) if node_start.isdigit() else node_start
node_end = node_info[3].replace("Run end:", "")
node_end = int(node_end) if node_end.isdigit() else node_end
elif len(node_info) > 3 and "Run" in node_info[1]:
queue_size = node_info[3].replace("queue size:", "")
queue_size = int(queue_size) if queue_size.isdigit() else queue_size
node_start = node_info[1].replace("Run start:", "")
if node_start.isdigit():
node_start = int(node_start)
node_start = int(node_start) if node_start.isdigit() else node_start
node_end = node_info[2].replace("Run end:", "")
if node_end.isdigit():
node_end = int(node_end)
if len(node_info) > 3:
queue_size = node_info[3].replace("queue size:", "")
if queue_size.isdigit():
queue_size = int(queue_size)
node_end = int(node_end) if node_end.isdigit() else node_end
one_step_list = [node_name, node_start, node_end, queue_size]
result.append(one_step_list)
......
......@@ -79,35 +79,42 @@ class Profiler:
optypes_to_deal='', optypes_not_deal='Variable', job_id=""):
# get device_id and device_target
self._get_devid_and_devtarget()
self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
data_path = os.path.join(self._container_path, "data")
if not os.path.exists(data_path):
os.makedirs(data_path, exist_ok=True)
self._output_path = validate_and_normalize_path(output_path)
self._output_path = os.path.join(self._output_path, "profiler")
if not os.path.exists(self._output_path):
os.makedirs(self._output_path, exist_ok=True)
os.environ['PROFILING_MODE'] = 'true'
os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace'
os.environ['MINDDATA_PROFILING_DIR'] = self._output_path
os.environ['DEVICE_ID'] = self._dev_id
os.environ['AICPU_PROFILING_MODE'] = 'true'
os.environ['PROFILING_DIR'] = str(self._container_path)
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
self._subgraph = check_subgraph(subgraph)
self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else []
self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
self._detail = check_bool(is_detail, 'is_detail')
self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path')
self._profiling_job_id = job_id
# add job id env through user input later
self._job_id_env = 0
self._start_time = int(time.time() * 10000000)
logger.info("Profiling: profiling start time: %d", self._start_time)
if self._device_target and self._device_target == "GPU":
from mindspore._c_expression import GPUProfiler
self._gpu_profiler = GPUProfiler.get_instance()
self._gpu_profiler.init(self._output_path)
self._gpu_profiler.step_profiling_enable(True)
elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"):
self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
data_path = os.path.join(self._container_path, "data")
if not os.path.exists(data_path):
os.makedirs(data_path, exist_ok=True)
os.environ['PROFILING_MODE'] = 'true'
os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace'
os.environ['MINDDATA_PROFILING_DIR'] = self._output_path
os.environ['DEVICE_ID'] = self._dev_id
os.environ['AICPU_PROFILING_MODE'] = 'true'
os.environ['PROFILING_DIR'] = str(self._container_path)
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
self._subgraph = check_subgraph(subgraph)
self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else []
self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
self._detail = check_bool(is_detail, 'is_detail')
self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path')
self._profiling_job_id = job_id
# add job id env through user input later
self._job_id_env = 0
self._start_time = int(time.time() * 10000000)
logger.info("Profiling: profiling start time: %d", self._start_time)
def analyse(self):
"""
......@@ -123,71 +130,74 @@ class Profiler:
>>> model.train()
>>> profiler.analyse()
"""
release()
job_id = self._get_profiling_job_id()
logger.info("Profiling: job id is %s ", job_id)
source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
# parse hwts.log.data.45.dev file, and get task profiling data
hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
result = hwtslog_parser.execute()
if not result:
logger.error("Profiling: fail to parse hwts log file.")
return
# parse Framework file, and get the relation of op and tasks
framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
framework_parser.parse()
op_task_dict = framework_parser.to_task_id_full_op_name_dict()
if not op_task_dict:
logger.error("Profiling: fail to parse framework files.")
return
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
optime_parser = OPComputeTimeParser(
hwts_output_filename, opcompute_output_filename,
op_task_dict, self._output_path, self._dev_id
)
optime_parser.execute()
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
aicpu_data_parser.execute()
# Parsing minddata AICPU profiling
MinddataParser.execute(source_path, self._output_path, self._dev_id)
# parse minddata pipeline operator and queue
try:
pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path)
pipeline_parser.parse()
except ProfilerException as err:
logger.warning(err.message)
# analyse op compute time info
try:
self._analyser_op_info()
except ProfilerException as err:
logger.warning(err.message)
# analyse step trace info
try:
self._analyse_step_trace(source_path, framework_parser)
except ProfilerException as err:
logger.warning(err.message)
# analyse timeline info
try:
self._analyse_timeline(aicpu_data_parser, optime_parser)
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
logger.warning('Fail to write timeline data: %s', err)
if self._device_target and self._device_target == "GPU":
self._gpu_profiler.stop()
elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"):
release()
job_id = self._get_profiling_job_id()
logger.info("Profiling: job id is %s ", job_id)
source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
# parse hwts.log.data.45.dev file, and get task profiling data
hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
result = hwtslog_parser.execute()
if not result:
logger.error("Profiling: fail to parse hwts log file.")
return
# parse Framework file, and get the relation of op and tasks
framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
framework_parser.parse()
op_task_dict = framework_parser.to_task_id_full_op_name_dict()
if not op_task_dict:
logger.error("Profiling: fail to parse framework files.")
return
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
optime_parser = OPComputeTimeParser(
hwts_output_filename, opcompute_output_filename,
op_task_dict, self._output_path, self._dev_id
)
optime_parser.execute()
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
aicpu_data_parser.execute()
# Parsing minddata AICPU profiling
MinddataParser.execute(source_path, self._output_path, self._dev_id)
# parse minddata pipeline operator and queue
try:
pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path)
pipeline_parser.parse()
except ProfilerException as err:
logger.warning(err.message)
# analyse op compute time info
try:
self._analyser_op_info()
except ProfilerException as err:
logger.warning(err.message)
# analyse step trace info
try:
self._analyse_step_trace(source_path, framework_parser)
except ProfilerException as err:
logger.warning(err.message)
# analyse timeline info
try:
self._analyse_timeline(aicpu_data_parser, optime_parser)
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
logger.warning('Fail to write timeline data: %s', err)
def _analyse_step_trace(self, source_path, framework_parser):
"""
......@@ -416,12 +426,12 @@ class Profiler:
dev_id = "0"
logger.error("Fail to get DEVICE_ID, use 0 instead.")
if device_target and device_target != "Davinci" \
and device_target != "Ascend":
if device_target and device_target not in ["Davinci", "Ascend", "GPU"]:
msg = "Profiling: unsupport backend: %s" % device_target
raise RuntimeError(msg)
self._dev_id = dev_id
self._device_target = device_target
@staticmethod
def trainable_parameters(network):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册