!4070 Add time profile to calculate the running time of the operator.

Merge pull request !4070 from yeyunpeng2020/master_profile

!4070 Add time profile to calculate the running time of the operator.
Merge pull request !4070 from yeyunpeng2020/master_profile
70749776 · mindspore-ci-bot · Gitee · a2b32356 · 32d85c0f · 70749776
9 changed file
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@@ -153,4 +153,5 @@ if (BUILD_DEVICE)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profile)
 endif()
--- a/mindspore/lite/include/lite_session.h
+++ b/mindspore/lite/include/lite_session.h
@@ -27,7 +27,8 @@
 namespace mindspore {
 namespace session {
 struct CallBackParam {
-  std::string name_callback_aram;
+  std::string name_callback_param;
+  std::string type_callback_param;
 };

 using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs,

--- a/mindspore/lite/src/executor.cc
+++ b/mindspore/lite/src/executor.cc
@@ -43,7 +43,8 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten
      output->MallocData();
    }
    session::CallBackParam callbackParam;
-    callbackParam.name_callback_aram = kernel->Name();
+    callbackParam.name_callback_param = kernel->Name();
+    callbackParam.type_callback_param = kernel->type_str();

    if (before != nullptr) {
      if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) {

--- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_executor.cc
@@ -48,7 +48,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
      output->MallocData();
    }
    session::CallBackParam callbackParam;
-    callbackParam.name_callback_aram = kernel->Name();
+    callbackParam.name_callback_param = kernel->Name();

    if (before != nullptr) {
      if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) {

--- a/mindspore/lite/tools/converter/quantizer/post_training.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training.cc
@@ -798,14 +798,14 @@ STATUS PostTrainingQuantizer::DoInference() {
      [&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs,
          const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs,
          const mindspore::session::CallBackParam &callParam) -> bool {
-      if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) {
+      if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) {
        return false;
      }
      auto tensor = beforeInputs[0];
      const float *tData = static_cast<const float *>(tensor->MutableData());
      size_t shapeSize = tensor->ElementsNum();
      vector<float> data(tData, tData + shapeSize);
-      this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetInputDivergInfo());
+      this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetInputDivergInfo());
      return true;
    };
    // func
@@ -813,14 +813,14 @@ STATUS PostTrainingQuantizer::DoInference() {
                                                         const std::vector<mindspore::tensor::MSTensor *> &afterInputs,
                                                         const std::vector<mindspore::tensor::MSTensor *> &afterOutputs,
                                                         const mindspore::session::CallBackParam &callParam) -> bool {
-      if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, afterOutputs) != RET_OK) {
+      if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, afterOutputs) != RET_OK) {
        return false;
      }
      auto tensor = afterOutputs[0];
      const float *tensor_data = static_cast<const float *>(tensor->MutableData());
      size_t shape_size = tensor->ElementsNum();
      vector<float> data(tensor_data, tensor_data + shape_size);
-      this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetOutputDivergInfo());
+      this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetOutputDivergInfo());
      return true;
    };
    status = session_->RunGraph(beforeCallBack, afterCallBack);
@@ -851,14 +851,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() {
      [&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs,
          const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs,
          const mindspore::session::CallBackParam &callParam) {
-        if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) {
+        if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) {
          return false;
        }
        auto tensor = beforeInputs[0];
        const float *tensor_data = static_cast<const float *>(tensor->MutableData());
        size_t shape_size = tensor->ElementsNum();
        vector<float> data(tensor_data, tensor_data + shape_size);
-        this->calibrator_->UpdateDataFrequency(callParam.name_callback_aram, data, tensor->shape(),
+        this->calibrator_->UpdateDataFrequency(callParam.name_callback_param, data, tensor->shape(),
                                               this->calibrator_->GetInputDivergInfo());
        return true;
      };
@@ -867,14 +867,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() {
      [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
          const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
          const mindspore::session::CallBackParam &call_param) {
-        if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_aram, after_outputs) != RET_OK) {
+        if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_param, after_outputs) != RET_OK) {
          return false;
        }
        auto tensor = after_outputs[0];
        const float *tenosr_data = static_cast<const float *>(tensor->MutableData());
        size_t shape_size = tensor->ElementsNum();
        vector<float> data(tenosr_data, tenosr_data + shape_size);
-        this->calibrator_->UpdateDataFrequency(call_param.name_callback_aram, data, tensor->shape(),
+        this->calibrator_->UpdateDataFrequency(call_param.name_callback_param, data, tensor->shape(),
                                               this->calibrator_->GetOutputDivergInfo());
        return true;
      };

--- a/mindspore/lite/tools/time_profile/CMakeLists.txt
+++ b/mindspore/lite/tools/time_profile/CMakeLists.txt
+# add shared link library
+
+set(COMMON_SRC
+        ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc
+        )
+
+add_executable(timeprofile
+        ${CMAKE_CURRENT_SOURCE_DIR}/main.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/time_profile.cc
+        ${COMMON_SRC})
+
+if (PLATFORM_ARM32 OR PLATFORM_ARM64)
+    target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY})
+else()
+    target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY} pthread)
+endif()
--- a/mindspore/lite/tools/time_profile/main.cc
+++ b/mindspore/lite/tools/time_profile/main.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/time_profile/time_profile.h"
+
+int main(int argc, const char **argv) { return mindspore::lite::RunTimeProfile(argc, argv); }
--- a/mindspore/lite/tools/time_profile/time_profile.cc
+++ b/mindspore/lite/tools/time_profile/time_profile.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/time_profile/time_profile.h"
+#define __STDC_FORMAT_MACROS
+#include <cinttypes>
+#undef __STDC_FORMAT_MACROS
+#include <cmath>
+#include <algorithm>
+#include <utility>
+#include "include/ms_tensor.h"
+#include "utils/log_adapter.h"
+#include "include/context.h"
+
+namespace mindspore {
+namespace lite {
+int TimeProfile::GenerateRandomData(size_t size, void *data) {
+  MS_ASSERT(data != nullptr);
+  char *castedData = static_cast<char *>(data);
+  for (size_t i = 0; i < size; i++) {
+    castedData[i] = static_cast<char>(i);
+  }
+  return RET_OK;
+}
+
+int TimeProfile::GenerateInputData() {
+  for (auto tensor : ms_inputs_) {
+    MS_ASSERT(tensor != nullptr);
+    auto input_data = tensor->MutableData();
+    if (input_data == nullptr) {
+      MS_LOG(ERROR) << "MallocData for inTensor failed";
+    }
+    MS_ASSERT(tensor->GetData() != nullptr);
+    auto tensor_byte_size = tensor->Size();
+    auto status = GenerateRandomData(tensor_byte_size, input_data);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Generate RandomData for inTensor failed %d" << status;
+    }
+  }
+  return RET_OK;
+}
+
+int TimeProfile::ReadInputFile() {
+  if (ms_inputs_.empty()) {
+    return RET_OK;
+  }
+
+  auto inTensor = ms_inputs_.at(0);
+  MS_ASSERT(inTensor != nullptr);
+
+  size_t size;
+  char *bin_buf = ReadFile(_flags->in_data_path_.c_str(), &size);
+
+  auto tensor_data_size = inTensor->Size();
+  if (size != tensor_data_size) {
+    MS_LOG(ERROR) << "Input binary file size error, required: %zu, in fact: %zu" << tensor_data_size << size;
+  }
+  auto input_data = inTensor->MutableData();
+  memcpy(input_data, bin_buf, tensor_data_size);
+  return RET_OK;
+}
+
+int TimeProfile::LoadInput() {
+  ms_inputs_ = session_->GetInputs();
+  if (_flags->in_data_path_.empty()) {
+    auto status = GenerateInputData();
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Generate input data error " << status;
+    }
+  } else {
+    auto status = ReadInputFile();
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "ReadInputFile error, " << status;
+    }
+  }
+  return RET_OK;
+}
+
+int TimeProfile::InitSession() {
+  size_t size = 0;
+  char *graph_buf = ReadFile(_flags->model_path_.c_str(), &size);
+  if (graph_buf == nullptr) {
+    MS_LOG(ERROR) << "Load graph failed, path %s" << _flags->model_path_;
+  }
+
+  auto ctx = new lite::Context;
+  ctx->cpu_bind_mode_ = static_cast<CpuBindMode>(_flags->cpu_bind_mode_);
+  ctx->device_ctx_.type = lite::DT_CPU;
+  ctx->thread_num_ = _flags->num_threads_;
+
+  session_ = session::LiteSession::CreateSession(ctx);
+  if (session_ == nullptr) {
+    MS_LOG(ERROR) << "New session failed while running.";
+  }
+
+  return RET_OK;
+}
+
+int TimeProfile::InitCallbackParameter() {
+  // before callback
+  before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
+                        const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
+                        const session::CallBackParam &callParam) {
+    if (before_inputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeInputs is empty";
+    }
+    if (before_outputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeOutputs is empty";
+    }
+    if (op_times_by_type_.find(callParam.type_callback_param) == op_times_by_type_.end()) {
+      op_times_by_type_.insert(std::make_pair(callParam.type_callback_param, std::make_pair(0, 0.0f)));
+    }
+    if (op_times_by_name_.find(callParam.name_callback_param) == op_times_by_name_.end()) {
+      op_times_by_name_.insert(std::make_pair(callParam.name_callback_param, std::make_pair(0, 0.0f)));
+    }
+
+    op_call_times_total_++;
+    op_begin_ = GetTimeUs();
+    return true;
+  };
+
+  // after callback
+  after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
+                       const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
+                       const session::CallBackParam &call_param) {
+    uint64_t opEnd = GetTimeUs();
+
+    if (after_inputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeInputs is empty";
+    }
+    if (after_outputs.empty()) {
+      MS_LOG(INFO) << "The num of beforeOutputs is empty";
+    }
+
+    float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f;
+    op_cost_total_ += cost;
+    op_times_by_type_[call_param.type_callback_param].first++;
+    op_times_by_type_[call_param.type_callback_param].second += cost;
+    op_times_by_name_[call_param.name_callback_param].first++;
+    op_times_by_name_[call_param.name_callback_param].second += cost;
+    return true;
+  };
+
+  return RET_OK;
+}
+
+int TimeProfile::Init() {
+  if (this->_flags == nullptr) {
+    return 1;
+  }
+  MS_LOG(INFO) << "ModelPath = " << _flags->model_path_;
+  MS_LOG(INFO) << "InDataPath = " << _flags->in_data_path_;
+  MS_LOG(INFO) << "LoopCount = " << _flags->loop_count_;
+  MS_LOG(INFO) << "NumThreads = " << _flags->num_threads_;
+  if (_flags->cpu_bind_mode_ == -1) {
+    MS_LOG(INFO) << "cpuBindMode = MID_CPU";
+  } else if (_flags->cpu_bind_mode_ == 1) {
+    MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU";
+  } else {
+    MS_LOG(INFO) << "cpuBindMode = NO_BIND";
+  }
+
+  if (_flags->model_path_.empty()) {
+    MS_LOG(ERROR) << "modelPath is required";
+    return 1;
+  }
+
+  auto status = InitSession();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Init session failed.";
+    return RET_ERROR;
+  }
+
+  status = this->LoadInput();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Load input failed.";
+    return RET_ERROR;
+  }
+
+  status = InitCallbackParameter();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Init callback Parameter failed.";
+    return RET_ERROR;
+  }
+
+  return RET_OK;
+}
+
+int TimeProfile::PrintResult(const std::vector<std::string> &title,
+                             const std::map<std::string, std::pair<int, float>> &result) {
+  std::vector<size_t> columnLenMax(5);
+  std::vector<std::vector<std::string>> rows;
+
+  for (auto &iter : result) {
+    char stringBuf[5][100] = {};
+    std::vector<std::string> columns;
+    int len;
+
+    len = iter.first.size();
+    if (len > columnLenMax.at(0)) {
+      columnLenMax.at(0) = len + 4;
+    }
+    columns.push_back(iter.first);
+
+    len = sprintf_s(stringBuf[1], 100, "%f", iter.second.second / _flags->loop_count_);
+    if (len > columnLenMax.at(1)) {
+      columnLenMax.at(1) = len + 4;
+    }
+    columns.emplace_back(stringBuf[1]);
+
+    len = sprintf_s(stringBuf[2], 100, "%f", iter.second.second / op_cost_total_);
+    if (len > columnLenMax.at(2)) {
+      columnLenMax.at(2) = len + 4;
+    }
+    columns.emplace_back(stringBuf[2]);
+
+    len = sprintf_s(stringBuf[3], 100, "%d", iter.second.first);
+    if (len > columnLenMax.at(3)) {
+      columnLenMax.at(3) = len + 4;
+    }
+    columns.emplace_back(stringBuf[3]);
+
+    len = sprintf_s(stringBuf[4], 100, "%f", iter.second.second);
+    if (len > columnLenMax.at(4)) {
+      columnLenMax.at(4) = len + 4;
+    }
+    columns.emplace_back(stringBuf[4]);
+
+    rows.push_back(columns);
+  }
+
+  printf("-------------------------------------------------------------------------\n");
+  for (int i = 0; i < 5; i++) {
+    auto printBuf = title[i];
+    if (printBuf.size() > columnLenMax.at(i)) {
+      columnLenMax.at(i) = printBuf.size();
+    }
+    printBuf.resize(columnLenMax.at(i), ' ');
+    printf("%s", printBuf.c_str());
+  }
+  printf("\n");
+  for (int i = 0; i < rows.size(); i++) {
+    for (int j = 0; j < 5; j++) {
+      auto printBuf = rows[i][j];
+      printBuf.resize(columnLenMax.at(j), ' ');
+      printf("%s\t", printBuf.c_str());
+    }
+    printf("\n");
+  }
+  return RET_OK;
+}
+
+int TimeProfile::RunTimeProfile() {
+  uint64_t time_avg = 0;
+
+  // Load graph
+  std::string modelName = _flags->model_path_.substr(_flags->model_path_.find_last_of("/") + 1);
+
+  MS_LOG(INFO) << "start reading model file";
+  size_t size = 0;
+  char *graphBuf = ReadFile(_flags->model_path_.c_str(), &size);
+  if (graphBuf == nullptr) {
+    MS_LOG(ERROR) << "Load graph failed while running %s", modelName.c_str();
+    return 1;
+  }
+  auto model = lite::Model::Import(graphBuf, size);
+
+  auto ret = session_->CompileGraph(model.get());
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Compile graph failed.";
+    return RET_ERROR;
+  }
+
+  // load input
+  MS_LOG(INFO) << "start generate input data";
+  auto status = LoadInput();
+  if (status != 0) {
+    MS_LOG(ERROR) << "Generate input data error";
+    return status;
+  }
+
+  // run graph and test
+  for (int i = 0; i < _flags->loop_count_; i++) {
+    session_->BindThread(true);
+    uint64_t run_begin = GetTimeUs();
+
+    ret = session_->RunGraph(before_call_back_, after_call_back_);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Run graph failed.";
+    }
+    auto outputs = session_->GetOutputs();
+
+    uint64_t run_end = GetTimeUs();
+    uint64_t time = run_end - run_begin;
+    time_avg += time;
+    session_->BindThread(false);
+    /*
+    for(auto &output : outputs) {
+      for (auto &outputTensor : output.second) {
+        delete outputTensor;
+      }
+    }*/
+    outputs.clear();
+  }
+
+  time_avg /= _flags->loop_count_;
+  float runCost = static_cast<float>(time_avg) / 1000.0f;
+
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Run session failed.";
+  }
+
+  const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
+  const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
+  PrintResult(per_op_name, op_times_by_name_);
+  PrintResult(per_op_type, op_times_by_type_);
+
+  printf("\n total time:     %5.5f ms,   kernel cost:   %5.5f ms \n\n", runCost, op_cost_total_ / _flags->loop_count_);
+  printf("-------------------------------------------------------------------------\n");
+
+  for (auto &msInput : ms_inputs_) {
+    delete msInput;
+  }
+  ms_inputs_.clear();
+  delete graphBuf;
+  return ret;
+}
+
+int RunTimeProfile(int argc, const char **argv) {
+  TimeProfileFlags flags;
+  Option<std::string> err = flags.ParseFlags(argc, argv);
+
+  if (err.IsSome()) {
+    std::cerr << err.Get() << std::endl;
+    std::cerr << flags.Usage() << std::endl;
+    return -1;
+  }
+
+  if (flags.help) {
+    std::cerr << flags.Usage() << std::endl;
+    return 0;
+  }
+
+  TimeProfile time_profile(&flags);
+  auto ret = time_profile.Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init TimeProfile failed.";
+  }
+
+  ret = time_profile.RunTimeProfile();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Run TimeProfile failed.";
+  }
+
+  return RET_OK;
+}
+
+}  // namespace lite
+}  // namespace mindspore
--- a/mindspore/lite/tools/time_profile/time_profile.h
+++ b/mindspore/lite/tools/time_profile/time_profile.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINNIE_TIMEPROFILE_TIMEPROFILE_H_
+#define MINNIE_TIMEPROFILE_TIMEPROFILE_H_
+
+#include <getopt.h>
+#include <signal.h>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "tools/common/flag_parser.h"
+#include "src/common/file_utils.h"
+#include "src/common/utils.h"
+#include "schema/model_generated.h"
+#include "include/model.h"
+#include "include/lite_session.h"
+
+
+namespace mindspore {
+namespace lite {
+
+class MS_API TimeProfileFlags : public virtual FlagParser {
+ public:
+  TimeProfileFlags() {
+    AddFlag(&TimeProfileFlags::model_path_, "modelPath", "Input model path", "");
+    AddFlag(&TimeProfileFlags::in_data_path_, "inDataPath", "Input data path, if not set, use random input", "");
+    AddFlag(&TimeProfileFlags::cpu_bind_mode_, "cpuBindMode",
+            "Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1);
+    AddFlag(&TimeProfileFlags::loop_count_, "loopCount", "Run loop count", 10);
+    AddFlag(&TimeProfileFlags::num_threads_, "numThreads", "Run threads number", 2);
+  }
+
+  ~TimeProfileFlags() override = default;
+
+ public:
+  std::string model_path_;
+  std::string in_data_path_;
+  int cpu_bind_mode_ = 1;
+  int loop_count_;
+  int num_threads_;
+};
+
+class MS_API TimeProfile {
+ public:
+  explicit TimeProfile(TimeProfileFlags *flags) : _flags(flags) {}
+  ~TimeProfile() = default;
+
+  int Init();
+  int RunTimeProfile();
+
+ private:
+  int GenerateRandomData(size_t size, void *data);
+  int GenerateInputData();
+  int LoadInput();
+  int ReadInputFile();
+  int InitCallbackParameter();
+  int InitSession();
+  int PrintResult(const std::vector<std::string>& title, const std::map<std::string, std::pair<int, float>>& result);
+
+ private:
+  TimeProfileFlags *_flags;
+  std::vector<mindspore::tensor::MSTensor *> ms_inputs_;
+  session::LiteSession *session_;
+
+  // callback parameters
+  uint64_t op_begin_ = 0;
+  int op_call_times_total_ = 0;
+  float op_cost_total_ = 0.0f;
+  std::map<std::string, std::pair<int, float>> op_times_by_type_;
+  std::map<std::string, std::pair<int, float>> op_times_by_name_;
+
+  session::KernelCallBack before_call_back_;
+  session::KernelCallBack after_call_back_;
+};
+
+int MS_API RunTimeProfile(int argc, const char **argv);
+}  // namespace lite
+}  // namespace mindspore
+#endif  // MINNIE_TIMEPROFILE_TIMEPROFILE_H_