提交 70749776 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4070 Add time profile to calculate the running time of the operator.

Merge pull request !4070 from yeyunpeng2020/master_profile
......@@ -153,4 +153,5 @@ if (BUILD_DEVICE)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profile)
endif()
......@@ -27,7 +27,8 @@
namespace mindspore {
namespace session {
struct CallBackParam {
std::string name_callback_aram;
std::string name_callback_param;
std::string type_callback_param;
};
using KernelCallBack = std::function<bool(std::vector<tensor::MSTensor *> inputs,
......
......@@ -43,7 +43,8 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten
output->MallocData();
}
session::CallBackParam callbackParam;
callbackParam.name_callback_aram = kernel->Name();
callbackParam.name_callback_param = kernel->Name();
callbackParam.type_callback_param = kernel->type_str();
if (before != nullptr) {
if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) {
......
......@@ -48,7 +48,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
output->MallocData();
}
session::CallBackParam callbackParam;
callbackParam.name_callback_aram = kernel->Name();
callbackParam.name_callback_param = kernel->Name();
if (before != nullptr) {
if (!before(PackToMSTensors(kernel->GetInputs()), PackToMSTensors(kernel->GetOutputs()), callbackParam)) {
......
......@@ -798,14 +798,14 @@ STATUS PostTrainingQuantizer::DoInference() {
[&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs,
const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs,
const mindspore::session::CallBackParam &callParam) -> bool {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) {
return false;
}
auto tensor = beforeInputs[0];
const float *tData = static_cast<const float *>(tensor->MutableData());
size_t shapeSize = tensor->ElementsNum();
vector<float> data(tData, tData + shapeSize);
this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetInputDivergInfo());
this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetInputDivergInfo());
return true;
};
// func
......@@ -813,14 +813,14 @@ STATUS PostTrainingQuantizer::DoInference() {
const std::vector<mindspore::tensor::MSTensor *> &afterInputs,
const std::vector<mindspore::tensor::MSTensor *> &afterOutputs,
const mindspore::session::CallBackParam &callParam) -> bool {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, afterOutputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, afterOutputs) != RET_OK) {
return false;
}
auto tensor = afterOutputs[0];
const float *tensor_data = static_cast<const float *>(tensor->MutableData());
size_t shape_size = tensor->ElementsNum();
vector<float> data(tensor_data, tensor_data + shape_size);
this->calibrator_->RecordMaxValue(callParam.name_callback_aram, data, this->calibrator_->GetOutputDivergInfo());
this->calibrator_->RecordMaxValue(callParam.name_callback_param, data, this->calibrator_->GetOutputDivergInfo());
return true;
};
status = session_->RunGraph(beforeCallBack, afterCallBack);
......@@ -851,14 +851,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() {
[&](const std::vector<mindspore::tensor::MSTensor *> &beforeInputs,
const std::vector<mindspore::tensor::MSTensor *> &beforeOutputs,
const mindspore::session::CallBackParam &callParam) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_aram, beforeInputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(callParam.name_callback_param, beforeInputs) != RET_OK) {
return false;
}
auto tensor = beforeInputs[0];
const float *tensor_data = static_cast<const float *>(tensor->MutableData());
size_t shape_size = tensor->ElementsNum();
vector<float> data(tensor_data, tensor_data + shape_size);
this->calibrator_->UpdateDataFrequency(callParam.name_callback_aram, data, tensor->shape(),
this->calibrator_->UpdateDataFrequency(callParam.name_callback_param, data, tensor->shape(),
this->calibrator_->GetInputDivergInfo());
return true;
};
......@@ -867,14 +867,14 @@ STATUS PostTrainingQuantizer::CollectDataFrequency() {
[&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
const mindspore::session::CallBackParam &call_param) {
if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_aram, after_outputs) != RET_OK) {
if (PostTrainingQuantizer::CheckTensorVec(call_param.name_callback_param, after_outputs) != RET_OK) {
return false;
}
auto tensor = after_outputs[0];
const float *tenosr_data = static_cast<const float *>(tensor->MutableData());
size_t shape_size = tensor->ElementsNum();
vector<float> data(tenosr_data, tenosr_data + shape_size);
this->calibrator_->UpdateDataFrequency(call_param.name_callback_aram, data, tensor->shape(),
this->calibrator_->UpdateDataFrequency(call_param.name_callback_param, data, tensor->shape(),
this->calibrator_->GetOutputDivergInfo());
return true;
};
......
# add shared link library
set(COMMON_SRC
${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc
)
add_executable(timeprofile
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
${CMAKE_CURRENT_SOURCE_DIR}/time_profile.cc
${COMMON_SRC})
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY})
else()
target_link_libraries(timeprofile mindspore-lite ${SECUREC_LIBRARY} pthread)
endif()
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "tools/time_profile/time_profile.h"
int main(int argc, const char **argv) { return mindspore::lite::RunTimeProfile(argc, argv); }
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "tools/time_profile/time_profile.h"
#define __STDC_FORMAT_MACROS
#include <cinttypes>
#undef __STDC_FORMAT_MACROS
#include <cmath>
#include <algorithm>
#include <utility>
#include "include/ms_tensor.h"
#include "utils/log_adapter.h"
#include "include/context.h"
namespace mindspore {
namespace lite {
int TimeProfile::GenerateRandomData(size_t size, void *data) {
MS_ASSERT(data != nullptr);
char *castedData = static_cast<char *>(data);
for (size_t i = 0; i < size; i++) {
castedData[i] = static_cast<char>(i);
}
return RET_OK;
}
int TimeProfile::GenerateInputData() {
for (auto tensor : ms_inputs_) {
MS_ASSERT(tensor != nullptr);
auto input_data = tensor->MutableData();
if (input_data == nullptr) {
MS_LOG(ERROR) << "MallocData for inTensor failed";
}
MS_ASSERT(tensor->GetData() != nullptr);
auto tensor_byte_size = tensor->Size();
auto status = GenerateRandomData(tensor_byte_size, input_data);
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate RandomData for inTensor failed %d" << status;
}
}
return RET_OK;
}
int TimeProfile::ReadInputFile() {
if (ms_inputs_.empty()) {
return RET_OK;
}
auto inTensor = ms_inputs_.at(0);
MS_ASSERT(inTensor != nullptr);
size_t size;
char *bin_buf = ReadFile(_flags->in_data_path_.c_str(), &size);
auto tensor_data_size = inTensor->Size();
if (size != tensor_data_size) {
MS_LOG(ERROR) << "Input binary file size error, required: %zu, in fact: %zu" << tensor_data_size << size;
}
auto input_data = inTensor->MutableData();
memcpy(input_data, bin_buf, tensor_data_size);
return RET_OK;
}
int TimeProfile::LoadInput() {
ms_inputs_ = session_->GetInputs();
if (_flags->in_data_path_.empty()) {
auto status = GenerateInputData();
if (status != RET_OK) {
MS_LOG(ERROR) << "Generate input data error " << status;
}
} else {
auto status = ReadInputFile();
if (status != RET_OK) {
MS_LOG(ERROR) << "ReadInputFile error, " << status;
}
}
return RET_OK;
}
int TimeProfile::InitSession() {
size_t size = 0;
char *graph_buf = ReadFile(_flags->model_path_.c_str(), &size);
if (graph_buf == nullptr) {
MS_LOG(ERROR) << "Load graph failed, path %s" << _flags->model_path_;
}
auto ctx = new lite::Context;
ctx->cpu_bind_mode_ = static_cast<CpuBindMode>(_flags->cpu_bind_mode_);
ctx->device_ctx_.type = lite::DT_CPU;
ctx->thread_num_ = _flags->num_threads_;
session_ = session::LiteSession::CreateSession(ctx);
if (session_ == nullptr) {
MS_LOG(ERROR) << "New session failed while running.";
}
return RET_OK;
}
int TimeProfile::InitCallbackParameter() {
// before callback
before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
const session::CallBackParam &callParam) {
if (before_inputs.empty()) {
MS_LOG(INFO) << "The num of beforeInputs is empty";
}
if (before_outputs.empty()) {
MS_LOG(INFO) << "The num of beforeOutputs is empty";
}
if (op_times_by_type_.find(callParam.type_callback_param) == op_times_by_type_.end()) {
op_times_by_type_.insert(std::make_pair(callParam.type_callback_param, std::make_pair(0, 0.0f)));
}
if (op_times_by_name_.find(callParam.name_callback_param) == op_times_by_name_.end()) {
op_times_by_name_.insert(std::make_pair(callParam.name_callback_param, std::make_pair(0, 0.0f)));
}
op_call_times_total_++;
op_begin_ = GetTimeUs();
return true;
};
// after callback
after_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
const session::CallBackParam &call_param) {
uint64_t opEnd = GetTimeUs();
if (after_inputs.empty()) {
MS_LOG(INFO) << "The num of beforeInputs is empty";
}
if (after_outputs.empty()) {
MS_LOG(INFO) << "The num of beforeOutputs is empty";
}
float cost = static_cast<float>(opEnd - op_begin_) / 1000.0f;
op_cost_total_ += cost;
op_times_by_type_[call_param.type_callback_param].first++;
op_times_by_type_[call_param.type_callback_param].second += cost;
op_times_by_name_[call_param.name_callback_param].first++;
op_times_by_name_[call_param.name_callback_param].second += cost;
return true;
};
return RET_OK;
}
int TimeProfile::Init() {
if (this->_flags == nullptr) {
return 1;
}
MS_LOG(INFO) << "ModelPath = " << _flags->model_path_;
MS_LOG(INFO) << "InDataPath = " << _flags->in_data_path_;
MS_LOG(INFO) << "LoopCount = " << _flags->loop_count_;
MS_LOG(INFO) << "NumThreads = " << _flags->num_threads_;
if (_flags->cpu_bind_mode_ == -1) {
MS_LOG(INFO) << "cpuBindMode = MID_CPU";
} else if (_flags->cpu_bind_mode_ == 1) {
MS_LOG(INFO) << "cpuBindMode = HIGHER_CPU";
} else {
MS_LOG(INFO) << "cpuBindMode = NO_BIND";
}
if (_flags->model_path_.empty()) {
MS_LOG(ERROR) << "modelPath is required";
return 1;
}
auto status = InitSession();
if (status != RET_OK) {
MS_LOG(ERROR) << "Init session failed.";
return RET_ERROR;
}
status = this->LoadInput();
if (status != RET_OK) {
MS_LOG(ERROR) << "Load input failed.";
return RET_ERROR;
}
status = InitCallbackParameter();
if (status != RET_OK) {
MS_LOG(ERROR) << "Init callback Parameter failed.";
return RET_ERROR;
}
return RET_OK;
}
int TimeProfile::PrintResult(const std::vector<std::string> &title,
const std::map<std::string, std::pair<int, float>> &result) {
std::vector<size_t> columnLenMax(5);
std::vector<std::vector<std::string>> rows;
for (auto &iter : result) {
char stringBuf[5][100] = {};
std::vector<std::string> columns;
int len;
len = iter.first.size();
if (len > columnLenMax.at(0)) {
columnLenMax.at(0) = len + 4;
}
columns.push_back(iter.first);
len = sprintf_s(stringBuf[1], 100, "%f", iter.second.second / _flags->loop_count_);
if (len > columnLenMax.at(1)) {
columnLenMax.at(1) = len + 4;
}
columns.emplace_back(stringBuf[1]);
len = sprintf_s(stringBuf[2], 100, "%f", iter.second.second / op_cost_total_);
if (len > columnLenMax.at(2)) {
columnLenMax.at(2) = len + 4;
}
columns.emplace_back(stringBuf[2]);
len = sprintf_s(stringBuf[3], 100, "%d", iter.second.first);
if (len > columnLenMax.at(3)) {
columnLenMax.at(3) = len + 4;
}
columns.emplace_back(stringBuf[3]);
len = sprintf_s(stringBuf[4], 100, "%f", iter.second.second);
if (len > columnLenMax.at(4)) {
columnLenMax.at(4) = len + 4;
}
columns.emplace_back(stringBuf[4]);
rows.push_back(columns);
}
printf("-------------------------------------------------------------------------\n");
for (int i = 0; i < 5; i++) {
auto printBuf = title[i];
if (printBuf.size() > columnLenMax.at(i)) {
columnLenMax.at(i) = printBuf.size();
}
printBuf.resize(columnLenMax.at(i), ' ');
printf("%s", printBuf.c_str());
}
printf("\n");
for (int i = 0; i < rows.size(); i++) {
for (int j = 0; j < 5; j++) {
auto printBuf = rows[i][j];
printBuf.resize(columnLenMax.at(j), ' ');
printf("%s\t", printBuf.c_str());
}
printf("\n");
}
return RET_OK;
}
int TimeProfile::RunTimeProfile() {
uint64_t time_avg = 0;
// Load graph
std::string modelName = _flags->model_path_.substr(_flags->model_path_.find_last_of("/") + 1);
MS_LOG(INFO) << "start reading model file";
size_t size = 0;
char *graphBuf = ReadFile(_flags->model_path_.c_str(), &size);
if (graphBuf == nullptr) {
MS_LOG(ERROR) << "Load graph failed while running %s", modelName.c_str();
return 1;
}
auto model = lite::Model::Import(graphBuf, size);
auto ret = session_->CompileGraph(model.get());
if (ret != RET_OK) {
MS_LOG(ERROR) << "Compile graph failed.";
return RET_ERROR;
}
// load input
MS_LOG(INFO) << "start generate input data";
auto status = LoadInput();
if (status != 0) {
MS_LOG(ERROR) << "Generate input data error";
return status;
}
// run graph and test
for (int i = 0; i < _flags->loop_count_; i++) {
session_->BindThread(true);
uint64_t run_begin = GetTimeUs();
ret = session_->RunGraph(before_call_back_, after_call_back_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run graph failed.";
}
auto outputs = session_->GetOutputs();
uint64_t run_end = GetTimeUs();
uint64_t time = run_end - run_begin;
time_avg += time;
session_->BindThread(false);
/*
for(auto &output : outputs) {
for (auto &outputTensor : output.second) {
delete outputTensor;
}
}*/
outputs.clear();
}
time_avg /= _flags->loop_count_;
float runCost = static_cast<float>(time_avg) / 1000.0f;
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run session failed.";
}
const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
PrintResult(per_op_name, op_times_by_name_);
PrintResult(per_op_type, op_times_by_type_);
printf("\n total time: %5.5f ms, kernel cost: %5.5f ms \n\n", runCost, op_cost_total_ / _flags->loop_count_);
printf("-------------------------------------------------------------------------\n");
for (auto &msInput : ms_inputs_) {
delete msInput;
}
ms_inputs_.clear();
delete graphBuf;
return ret;
}
int RunTimeProfile(int argc, const char **argv) {
TimeProfileFlags flags;
Option<std::string> err = flags.ParseFlags(argc, argv);
if (err.IsSome()) {
std::cerr << err.Get() << std::endl;
std::cerr << flags.Usage() << std::endl;
return -1;
}
if (flags.help) {
std::cerr << flags.Usage() << std::endl;
return 0;
}
TimeProfile time_profile(&flags);
auto ret = time_profile.Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init TimeProfile failed.";
}
ret = time_profile.RunTimeProfile();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Run TimeProfile failed.";
}
return RET_OK;
}
} // namespace lite
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINNIE_TIMEPROFILE_TIMEPROFILE_H_
#define MINNIE_TIMEPROFILE_TIMEPROFILE_H_
#include <getopt.h>
#include <signal.h>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <utility>
#include "tools/common/flag_parser.h"
#include "src/common/file_utils.h"
#include "src/common/utils.h"
#include "schema/model_generated.h"
#include "include/model.h"
#include "include/lite_session.h"
namespace mindspore {
namespace lite {
class MS_API TimeProfileFlags : public virtual FlagParser {
public:
TimeProfileFlags() {
AddFlag(&TimeProfileFlags::model_path_, "modelPath", "Input model path", "");
AddFlag(&TimeProfileFlags::in_data_path_, "inDataPath", "Input data path, if not set, use random input", "");
AddFlag(&TimeProfileFlags::cpu_bind_mode_, "cpuBindMode",
"Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1);
AddFlag(&TimeProfileFlags::loop_count_, "loopCount", "Run loop count", 10);
AddFlag(&TimeProfileFlags::num_threads_, "numThreads", "Run threads number", 2);
}
~TimeProfileFlags() override = default;
public:
std::string model_path_;
std::string in_data_path_;
int cpu_bind_mode_ = 1;
int loop_count_;
int num_threads_;
};
class MS_API TimeProfile {
public:
explicit TimeProfile(TimeProfileFlags *flags) : _flags(flags) {}
~TimeProfile() = default;
int Init();
int RunTimeProfile();
private:
int GenerateRandomData(size_t size, void *data);
int GenerateInputData();
int LoadInput();
int ReadInputFile();
int InitCallbackParameter();
int InitSession();
int PrintResult(const std::vector<std::string>& title, const std::map<std::string, std::pair<int, float>>& result);
private:
TimeProfileFlags *_flags;
std::vector<mindspore::tensor::MSTensor *> ms_inputs_;
session::LiteSession *session_;
// callback parameters
uint64_t op_begin_ = 0;
int op_call_times_total_ = 0;
float op_cost_total_ = 0.0f;
std::map<std::string, std::pair<int, float>> op_times_by_type_;
std::map<std::string, std::pair<int, float>> op_times_by_name_;
session::KernelCallBack before_call_back_;
session::KernelCallBack after_call_back_;
};
int MS_API RunTimeProfile(int argc, const char **argv);
} // namespace lite
} // namespace mindspore
#endif // MINNIE_TIMEPROFILE_TIMEPROFILE_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册