!4218 add data saver module for gpu profiler

Merge pull request !4218 from yelihua/temp-dev

!4218 add data saver module for gpu profiler
Merge pull request !4218 from yelihua/temp-dev
4149274b · mindspore-ci-bot · Gitee · d1ad3367 · 31e61f71 · 4149274b
6 changed file
--- a/mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/cupti_interface.cc
@@ -126,7 +126,7 @@ CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *strea
 }

 CUptiResult CuptiGetDeviceId(CUcontext context, uint32_t *deviceId) {
-  static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiSubscribe"));
+  static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiGetDeviceId"));
  return func_ptr(context, deviceId);
 }
 }  // namespace gpu

--- a/mindspore/ccsrc/profiler/device/gpu/data_saver.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/data_saver.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profiler/device/gpu/data_saver.h"
+#include <fstream>
+#include <numeric>
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+namespace profiler {
+namespace gpu {
+
+OpDetailInfo::OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion)
+    : op_info_(op_info), proportion_(proportion) {
+  // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}'
+  op_full_name_ = op_info->op_name;
+  auto op_type_begin_iter = op_full_name_.rfind('/') + 1;
+  auto op_type_end_iter = op_full_name_.rfind('-');
+  op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
+  op_name_ = op_full_name_.substr(op_type_begin_iter);
+  op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
+}
+
+ActivityData::ActivityData(std::shared_ptr<Event> data) : basic_info_(data) {
+  grid_dim_ = basic_info_->activity_type == ActivityType::kKernel
+                ? "\"" + std::to_string(basic_info_->kernel_info.grid_x) + ',' +
+                    std::to_string(basic_info_->kernel_info.grid_y) + ',' +
+                    std::to_string(basic_info_->kernel_info.grid_z) + "\""
+                : "";
+  block_dim_ = basic_info_->activity_type == ActivityType::kKernel
+                 ? "\"" + std::to_string(basic_info_->kernel_info.block_x) + ',' +
+                     std::to_string(basic_info_->kernel_info.block_y) + ',' +
+                     std::to_string(basic_info_->kernel_info.block_z) + "\""
+                 : "";
+  count_ = 1;
+  total_duration_ = (basic_info_->end_time_stamp - basic_info_->start_time_stamp) / kTimeUnit;
+  avg_duration_ = total_duration_;
+  max_duration_ = total_duration_;
+  min_duration_ = total_duration_;
+}
+
+ActivityData &ActivityData::operator+=(const ActivityData &other) {
+  this->count_ += other.count_;
+  this->total_duration_ += other.total_duration_;
+  // update max or min duration
+  if (other.total_duration_ > this->max_duration_) {
+    this->max_duration_ = other.total_duration_;
+  } else if (other.max_duration_ < this->min_duration_) {
+    this->min_duration_ = other.total_duration_;
+  }
+  return *this;
+}
+
+void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
+  op_detail_infos_.reserve(op_info_maps.size());
+  float total_time_sum = GetTotalOpTime(op_info_maps);
+  for (auto item : op_info_maps) {
+    float proportion = item.second.op_host_cost_time / total_time_sum;
+    auto op_info = std::make_shared<OpInfo>(item.second);
+    OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion);
+    op_detail_infos_.emplace_back(op_detail_info);
+    AddOpDetailInfoForType(op_detail_info);
+  }
+  // update average time of op type
+  for (auto &op_type : op_type_infos_) {
+    // device_infos: <type_name, op_type_info>
+    op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
+  }
+  MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
+  MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items.";
+}
+
+void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) {
+  // Construct OpType object according to op detail info
+  OpType op_type = OpType{op_detail_info.op_type_, op_detail_info.op_info_->op_count,
+                          op_detail_info.op_info_->op_host_cost_time, 0, op_detail_info.proportion_};
+  // Set the OpType into op_type_infos_ map
+  std::string type_name = op_detail_info.op_type_;
+  auto iter = op_type_infos_.find(type_name);
+  if (iter == op_type_infos_.end()) {
+    op_type_infos_.emplace(type_name, op_type);
+  } else {
+    iter->second += op_type;
+  }
+}
+
+float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) {
+  float sum = 0;
+  sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum,
+                        [](float i, auto iter) { return i + iter.second.op_host_cost_time; });
+  MS_LOG(DEBUG) << "The total op time is " << sum;
+  return sum;
+}
+
+void DataSaver::ParseEvent(const std::vector<Event> &events) {
+  // Put Kernel activity events into activity_infos_
+  for (const auto &event : events) {
+    if (event.op_name.empty() || event.api_type != CUPTIApiType::kActivity ||
+        event.activity_type != ActivityType::kKernel) {
+      continue;
+    }
+    AddKernelEvent(event);
+  }
+  // update average time of kernel op cost
+  for (auto &device_infos : activity_infos_) {
+    // device_infos: <device_id, DeviceActivityInfos>
+    for (auto &activity_info : device_infos.second) {
+      // activity_info: <kernel_name, Activity>
+      activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
+    }
+    MS_LOG(DEBUG) << "Get " << device_infos.second.size() << " activity items for device:" << device_infos.first;
+  }
+}
+
+void DataSaver::AddKernelEvent(const Event &event) {
+  // Put kernel event to activity_infos according to device id
+  uint32_t device_id = event.device_id;
+  auto iter = activity_infos_.find(device_id);
+  if (iter == activity_infos_.end()) {
+    auto res_flag = activity_infos_.emplace(device_id, DeviceActivityInfos());
+    AddKernelEventToDevice(event, &res_flag.first->second);
+  } else {
+    AddKernelEventToDevice(event, &iter->second);
+  }
+}
+
+void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos) {
+  // Combine kernel activity with same kernel name
+  auto event_ptr = std::make_shared<Event>(event);
+  ActivityData activity_data = ActivityData(event_ptr);
+  std::string kernel_name = event.kernel_name;
+  auto iter = device_activity_infos->find(kernel_name);
+  if (iter == device_activity_infos->end()) {
+    device_activity_infos->emplace(kernel_name, activity_data);
+  } else {
+    iter->second += activity_data;
+  }
+}
+
+void DataSaver::WriteFile(std::string out_path_dir) {
+  if (out_path_dir.empty()) {
+    MS_LOG(WARNING) << "Output directory. Ignore the writing data.";
+    return;
+  }
+  if (op_detail_infos_.empty() || op_type_infos_.empty() || activity_infos_.empty()) {
+    MS_LOG(WARNING) << "No operation detail infos to write.";
+    return;
+  }
+  // not support multi-device for operator info per process yet
+  device_id_ = std::to_string(activity_infos_.begin()->first);
+  WriteOpDetail(out_path_dir);
+  WriteOpType(out_path_dir);
+  WriteActivity(out_path_dir);
+}
+
+void DataSaver::WriteOpType(const std::string &saver_base_dir) {
+  std::string file_path = saver_base_dir + "/gpu_op_type_info_" + device_id_ + ".csv";
+  std::ofstream ofs(file_path);
+  // check if the file is writable
+  if (!ofs.is_open()) {
+    MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
+    return;
+  }
+  // write op type info into file
+  ofs << OpType().GetHeader() << std::endl;
+  for (auto op_type_info : op_type_infos_) {
+    ofs << op_type_info.second << std::endl;
+  }
+  ofs.close();
+  MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path;
+}
+
+void DataSaver::WriteOpDetail(const std::string &saver_base_dir) {
+  std::string file_path = saver_base_dir + "/gpu_op_detail_info_" + device_id_ + ".csv";
+  std::ofstream ofs(file_path);
+  if (!ofs.is_open()) {
+    MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
+    return;
+  }
+  // write op detail info into file
+  ofs << OpDetailInfo().GetHeader() << std::endl;
+  for (auto op_detail : op_detail_infos_) {
+    ofs << op_detail << std::endl;
+  }
+  ofs.close();
+  MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path;
+}
+
+void DataSaver::WriteActivity(const std::string &saver_base_dir) {
+  std::string file_path_base = saver_base_dir + "/gpu_activity_data_";
+  for (auto device_info : activity_infos_) {
+    std::string file_path = file_path_base + std::to_string(device_info.first) + ".csv";
+    std::ofstream ofs(file_path);
+    if (!ofs.is_open()) {
+      MS_LOG(WARNING) << "Open file '" << file_path << "' failed!";
+      return;
+    }
+    // write activity data into file
+    ofs << ActivityData().GetHeader() << std::endl;
+    for (auto activity_data : device_info.second) {
+      ofs << activity_data.second << std::endl;
+    }
+    ofs.close();
+    MS_LOG(INFO) << "Write " << device_info.second.size() << " activity infos into file: " << file_path;
+  }
+}
+
+}  // namespace gpu
+}  // namespace profiler
+}  // namespace mindspore
--- a/mindspore/ccsrc/profiler/device/gpu/data_saver.h
+++ b/mindspore/ccsrc/profiler/device/gpu/data_saver.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_DATA_SAVER_H
+#define MINDSPORE_DATA_SAVER_H
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <string>
+#include <memory>
+#include "profiler/device/gpu/gpu_profiling.h"
+namespace mindspore {
+namespace profiler {
+namespace gpu {
+
+struct OpDetailInfo {
+  std::string op_type_;
+  std::string op_name_;
+  std::string op_full_name_;
+  std::shared_ptr<OpInfo> op_info_{nullptr};
+  float op_avg_time_{0};
+  float proportion_{0};
+
+  OpDetailInfo() = default;
+
+  OpDetailInfo(std::shared_ptr<OpInfo> op_info, float proportion);
+
+  std::string GetHeader() const {
+    return "op_side,op_type,op_name,op_full_name,op_occurrences,op_total_time(us),op_avg_time(us),total_proportion,"
+           "cuda_activity_cost_time(us),cuda_activity_call_count";
+  }
+
+  friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) {
+    os << "Device," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ','
+       << event.op_info_->op_count << ',' << event.op_info_->op_host_cost_time << ',' << event.op_avg_time_ << ','
+       << event.proportion_ << ',' << event.op_info_->cupti_activity_time << ',' << event.op_info_->op_kernel_count;
+    return os;
+  }
+};
+
+struct OpType {
+  std::string op_type_;
+  int count_{0};
+  float total_time_{0};
+  float avg_time_{0};
+  float proportion_{0};
+
+  std::string GetHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
+
+  friend std::ostream &operator<<(std::ostream &os, const OpType &event) {
+    os << event.op_type_ << ',' << event.count_ << ',' << event.total_time_ << ',' << event.proportion_ << ','
+       << event.avg_time_;
+    return os;
+  }
+
+  OpType &operator+=(const OpType &other) {
+    this->count_ += other.count_;
+    this->total_time_ += other.total_time_;
+    this->proportion_ += other.proportion_;
+    return *this;
+  }
+};
+
+struct ActivityData {
+  std::shared_ptr<Event> basic_info_{nullptr};
+  std::string block_dim_;
+  std::string grid_dim_;
+  int count_{0};
+  float total_duration_{0};
+  float avg_duration_{0};
+  float max_duration_{0};
+  float min_duration_{0};
+
+  ActivityData() = default;
+
+  explicit ActivityData(std::shared_ptr<Event> data);
+
+  std::string GetHeader() const {
+    return "name,type,op_full_name,stream_id,block_dim,grid_dim,occurrences,"
+           "total_duration(us),avg_duration(us),max_duration(us),min_duration(us)";
+  }
+
+  friend std::ostream &operator<<(std::ostream &os, const ActivityData &event) {
+    os << "\"" << event.basic_info_->kernel_name << "\"," << event.basic_info_->kernel_type << ','
+       << event.basic_info_->op_name << ',' << event.basic_info_->stream_id << ',' << event.block_dim_ << ','
+       << event.grid_dim_ << ',' << event.count_ << ',' << event.total_duration_ << ',' << event.avg_duration_ << ','
+       << event.max_duration_ << ',' << event.min_duration_;
+    return os;
+  }
+
+  ActivityData &operator+=(const ActivityData &other);
+};
+
+using OpInfoMap = std::unordered_map<std::string, OpInfo>;
+using DeviceActivityInfos = std::unordered_map<std::string, ActivityData>;   // <device_id, ActivityData>
+using AllActivityInfos = std::unordered_map<uint32_t, DeviceActivityInfos>;  // <device_id, ActivityData>
+using OpTypeInfos = std::unordered_map<std::string, OpType>;                 // <op_full_name, Optype>
+using OpDetailInfos = std::vector<OpDetailInfo>;
+
+class DataSaver {
+ public:
+  DataSaver() = default;
+
+  ~DataSaver() = default;
+
+  DataSaver(const DataSaver &) = delete;
+
+  DataSaver &operator=(const DataSaver &) = delete;
+
+  void ParseOpInfo(const OpInfoMap &op_info_maps);
+
+  void ParseEvent(const std::vector<Event> &events);
+
+  void WriteFile(std::string out_path);
+
+ private:
+  void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info);
+
+  float GetTotalOpTime(const OpInfoMap &op_info_maps);
+
+  void AddKernelEvent(const Event &event);
+
+  void AddKernelEventToDevice(const Event &event, DeviceActivityInfos *device_activity_infos);
+
+  void WriteOpType(const std::string &saver_base_dir);
+
+  void WriteOpDetail(const std::string &saver_base_dir);
+
+  void WriteActivity(const std::string &saver_base_dir);
+
+  std::string device_id_;
+  AllActivityInfos activity_infos_;
+  OpTypeInfos op_type_infos_;
+  OpDetailInfos op_detail_infos_;
+};
+}  // namespace gpu
+}  // namespace profiler
+}  // namespace mindspore
+
+#endif  // MINDSPORE_DATA_SAVER_H
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
@@ -19,6 +19,7 @@
 #include <chrono>
 #include "profiler/device/gpu/gpu_profiling.h"
 #include "profiler/device/gpu/cupti_interface.h"
+#include "profiler/device/gpu/data_saver.h"
 #include "utils/log_adapter.h"
 #include "pybind_api/api_register.h"

@@ -478,7 +479,11 @@ void GPUProfiler::Stop() {
 void GPUProfiler::SaveProfileData() {
  if (profile_data_path_.empty()) {
    MS_LOG(WARNING) << "profile_data_path is empty, skip save profile data.";
-    return;
+  } else {
+    DataSaver dataSaver;
+    dataSaver.ParseOpInfo(op_info_map_);
+    dataSaver.ParseEvent(events_);
+    dataSaver.WriteFile(profile_data_path_);
  }
  op_info_map_.clear();
  op_name_map_.clear();

--- a/mindspore/profiler/parser/minddata_parser.py
+++ b/mindspore/profiler/parser/minddata_parser.py
@@ -43,17 +43,21 @@ class MinddataParser:
                        node_name, node_start, node_end, queue_size = "", 0, 0, 0
                        if node_info:
                            node_name = node_info[0].replace("Node:", "")
-                        if len(node_info) > 2:
+
+                        if len(node_info) > 3 and "queue" in node_info[1]:
+                            queue_size = node_info[1].replace("queue size:", "")
+                            queue_size = int(queue_size) if queue_size.isdigit() else queue_size
+                            node_start = node_info[2].replace("Run start:", "")
+                            node_start = int(node_start) if node_start.isdigit() else node_start
+                            node_end = node_info[3].replace("Run end:", "")
+                            node_end = int(node_end) if node_end.isdigit() else node_end
+                        elif len(node_info) > 3 and "Run" in node_info[1]:
+                            queue_size = node_info[3].replace("queue size:", "")
+                            queue_size = int(queue_size) if queue_size.isdigit() else queue_size
                            node_start = node_info[1].replace("Run start:", "")
-                            if node_start.isdigit():
-                                node_start = int(node_start)
+                            node_start = int(node_start) if node_start.isdigit() else node_start
                            node_end = node_info[2].replace("Run end:", "")
-                            if node_end.isdigit():
-                                node_end = int(node_end)
-                        if len(node_info) > 3:
-                            queue_size = node_info[3].replace("queue size:", "")
-                            if queue_size.isdigit():
-                                queue_size = int(queue_size)
+                            node_end = int(node_end) if node_end.isdigit() else node_end

                        one_step_list = [node_name, node_start, node_end, queue_size]
                        result.append(one_step_list)

--- a/mindspore/profiler/profiling.py
+++ b/mindspore/profiler/profiling.py
@@ -79,35 +79,42 @@ class Profiler:
                 optypes_to_deal='', optypes_not_deal='Variable', job_id=""):
        # get device_id and device_target
        self._get_devid_and_devtarget()
-        self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
-        data_path = os.path.join(self._container_path, "data")
-        if not os.path.exists(data_path):
-            os.makedirs(data_path, exist_ok=True)
        self._output_path = validate_and_normalize_path(output_path)
        self._output_path = os.path.join(self._output_path, "profiler")
        if not os.path.exists(self._output_path):
            os.makedirs(self._output_path, exist_ok=True)

-        os.environ['PROFILING_MODE'] = 'true'
-        os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace'
-        os.environ['MINDDATA_PROFILING_DIR'] = self._output_path
-        os.environ['DEVICE_ID'] = self._dev_id
-        os.environ['AICPU_PROFILING_MODE'] = 'true'
-        os.environ['PROFILING_DIR'] = str(self._container_path)
-
-        # use context interface to open profiling, for the new mindspore version(after 2020.5.21)
-        context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
-
-        self._subgraph = check_subgraph(subgraph)
-        self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else []
-        self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
-        self._detail = check_bool(is_detail, 'is_detail')
-        self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path')
-        self._profiling_job_id = job_id
-        # add job id env through user input later
-        self._job_id_env = 0
-        self._start_time = int(time.time() * 10000000)
-        logger.info("Profiling: profiling start time: %d", self._start_time)
+        if self._device_target and self._device_target == "GPU":
+            from mindspore._c_expression import GPUProfiler
+            self._gpu_profiler = GPUProfiler.get_instance()
+            self._gpu_profiler.init(self._output_path)
+            self._gpu_profiler.step_profiling_enable(True)
+        elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"):
+            self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
+            data_path = os.path.join(self._container_path, "data")
+            if not os.path.exists(data_path):
+                os.makedirs(data_path, exist_ok=True)
+
+            os.environ['PROFILING_MODE'] = 'true'
+            os.environ['PROFILING_OPTIONS'] = 'training_trace:task_trace'
+            os.environ['MINDDATA_PROFILING_DIR'] = self._output_path
+            os.environ['DEVICE_ID'] = self._dev_id
+            os.environ['AICPU_PROFILING_MODE'] = 'true'
+            os.environ['PROFILING_DIR'] = str(self._container_path)
+
+            # use context interface to open profiling, for the new mindspore version(after 2020.5.21)
+            context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
+
+            self._subgraph = check_subgraph(subgraph)
+            self._valid_optype_name = optypes_to_deal.split(",") if optypes_to_deal else []
+            self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
+            self._detail = check_bool(is_detail, 'is_detail')
+            self._withfullpath = check_bool(is_show_op_path, 'is_show_op_path')
+            self._profiling_job_id = job_id
+            # add job id env through user input later
+            self._job_id_env = 0
+            self._start_time = int(time.time() * 10000000)
+            logger.info("Profiling: profiling start time: %d", self._start_time)

    def analyse(self):
        """
@@ -123,71 +130,74 @@ class Profiler:
            >>> model.train()
            >>> profiler.analyse()
        """
-        release()
-
-        job_id = self._get_profiling_job_id()
-        logger.info("Profiling: job id is %s ", job_id)
-
-        source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
-        # parse hwts.log.data.45.dev file, and get task profiling data
-        hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
-        hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
-        hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
-        result = hwtslog_parser.execute()
-        if not result:
-            logger.error("Profiling: fail to parse hwts log file.")
-            return
-
-        # parse Framework file, and get the relation of op and tasks
-        framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
-        framework_parser.parse()
-        op_task_dict = framework_parser.to_task_id_full_op_name_dict()
-        if not op_task_dict:
-            logger.error("Profiling: fail to parse framework files.")
-            return
-
-        # get op compute time from hwts data and framework data, write output_op_compute_time.txt
-        opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
-        opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
-        optime_parser = OPComputeTimeParser(
-            hwts_output_filename, opcompute_output_filename,
-            op_task_dict, self._output_path, self._dev_id
-        )
-        optime_parser.execute()
-
-        # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
-        output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
-        output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
-        aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
-        aicpu_data_parser.execute()
-
-        # Parsing minddata AICPU profiling
-        MinddataParser.execute(source_path, self._output_path, self._dev_id)
-
-        # parse minddata pipeline operator and queue
-        try:
-            pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path)
-            pipeline_parser.parse()
-        except ProfilerException as err:
-            logger.warning(err.message)
-
-        # analyse op compute time info
-        try:
-            self._analyser_op_info()
-        except ProfilerException as err:
-            logger.warning(err.message)
-
-        # analyse step trace info
-        try:
-            self._analyse_step_trace(source_path, framework_parser)
-        except ProfilerException as err:
-            logger.warning(err.message)
-
-        # analyse timeline info
-        try:
-            self._analyse_timeline(aicpu_data_parser, optime_parser)
-        except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
-            logger.warning('Fail to write timeline data: %s', err)
+        if self._device_target and self._device_target == "GPU":
+            self._gpu_profiler.stop()
+        elif self._device_target and (self._device_target == "Ascend" or self._device_target != "Davinci"):
+            release()
+
+            job_id = self._get_profiling_job_id()
+            logger.info("Profiling: job id is %s ", job_id)
+
+            source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
+            # parse hwts.log.data.45.dev file, and get task profiling data
+            hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
+            hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
+            hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
+            result = hwtslog_parser.execute()
+            if not result:
+                logger.error("Profiling: fail to parse hwts log file.")
+                return
+
+            # parse Framework file, and get the relation of op and tasks
+            framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
+            framework_parser.parse()
+            op_task_dict = framework_parser.to_task_id_full_op_name_dict()
+            if not op_task_dict:
+                logger.error("Profiling: fail to parse framework files.")
+                return
+
+            # get op compute time from hwts data and framework data, write output_op_compute_time.txt
+            opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
+            opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
+            optime_parser = OPComputeTimeParser(
+                hwts_output_filename, opcompute_output_filename,
+                op_task_dict, self._output_path, self._dev_id
+            )
+            optime_parser.execute()
+
+            # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
+            output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
+            output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
+            aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
+            aicpu_data_parser.execute()
+
+            # Parsing minddata AICPU profiling
+            MinddataParser.execute(source_path, self._output_path, self._dev_id)
+
+            # parse minddata pipeline operator and queue
+            try:
+                pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path)
+                pipeline_parser.parse()
+            except ProfilerException as err:
+                logger.warning(err.message)
+
+            # analyse op compute time info
+            try:
+                self._analyser_op_info()
+            except ProfilerException as err:
+                logger.warning(err.message)
+
+            # analyse step trace info
+            try:
+                self._analyse_step_trace(source_path, framework_parser)
+            except ProfilerException as err:
+                logger.warning(err.message)
+
+            # analyse timeline info
+            try:
+                self._analyse_timeline(aicpu_data_parser, optime_parser)
+            except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
+                logger.warning('Fail to write timeline data: %s', err)

    def _analyse_step_trace(self, source_path, framework_parser):
        """
@@ -416,12 +426,12 @@ class Profiler:
            dev_id = "0"
            logger.error("Fail to get DEVICE_ID, use 0 instead.")

-        if device_target and device_target != "Davinci" \
-            and device_target != "Ascend":
+        if device_target and device_target not in ["Davinci", "Ascend", "GPU"]:
            msg = "Profiling: unsupport backend: %s" % device_target
            raise RuntimeError(msg)

        self._dev_id = dev_id
+        self._device_target = device_target

    @staticmethod
    def trainable_parameters(network):