提交 0b753607 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] cache om-models by inputs shape (#3242)

上级 5b2a618f
......@@ -19,8 +19,8 @@ namespace paddle {
namespace lite {
namespace npu {
std::unique_ptr<hiai::AiModelMngerClient> Device::Build(
std::string& model_name, // NOLINT
std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
const std::string model_name, // NOLINT
std::vector<ge::Operator>& input_nodes, // NOLINT
std::vector<ge::Operator>& output_nodes // NOLINT
) {
......@@ -41,15 +41,15 @@ std::unique_ptr<hiai::AiModelMngerClient> Device::Build(
ir_build.ReleaseModelBuff(om_model_buf);
return nullptr;
}
// Create a HiAI model manager client to load the HiAI om model
std::unique_ptr<hiai::AiModelMngerClient> model_client(
std::shared_ptr<hiai::AiModelMngerClient> model_client(
new hiai::AiModelMngerClient());
if (model_client->Init(nullptr) != hiai::AI_SUCCESS) {
LOG(WARNING) << "[NPU] AiModelMngerClient init failed)!";
ir_build.ReleaseModelBuff(om_model_buf);
return nullptr;
}
model_name = "model_" + std::to_string(model_count_++) + ".om";
auto model_desc = std::make_shared<hiai::AiModelDescription>(
model_name, freq_level(), framework_type(), model_type(), device_type());
model_desc->SetModelBuffer(om_model_buf.data, om_model_buf.length);
......
......@@ -40,8 +40,8 @@ class Device {
// Build the HiAI IR graph to om model, return HiAI model manager client to
// load om model and run inference.
std::unique_ptr<hiai::AiModelMngerClient> Build(
std::string& model_name, // NOLINT
std::shared_ptr<hiai::AiModelMngerClient> Build(
const std::string model_name, // NOLINT
std::vector<ge::Operator>& input_nodes, // NOLINT
std::vector<ge::Operator>& output_nodes // NOLINT
); // NOLINT
......@@ -51,7 +51,6 @@ class Device {
int framework_type_{0};
int model_type_{0};
int device_type_{0};
int model_count_{0};
};
} // namespace npu
......
......@@ -407,7 +407,7 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
cpp::OpDesc subgraph_op_desc;
subgraph_op_desc.SetType("subgraph");
// Create a new sub block desc for storing all of Ops an Vars of the target
// Create a new sub block desc for storing all of Ops and Vars of the target
// subgraph and sub_block_idx is set as a attribute of subgraph op,
// sub_block_idx < 0 means it's a new subgraph op
int sub_block_idx = -(subgraph_idx + 1);
......
......@@ -17,6 +17,7 @@
#include "lite/api/paddle_api.h"
#include "lite/api/test_helper.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/string.h"
DEFINE_string(model_file, "", "model file path of combined protobuf model");
DEFINE_string(params_file, "", "params file path of combined protobuf model");
......@@ -31,43 +32,17 @@ namespace lite {
// The helper functions for loading and running model from command line and
// verifying output data
std::vector<std::string> TypeParsing(std::string text) {
std::vector<std::string> types;
while (!text.empty()) {
size_t index = text.find_first_of(":");
std::string type = text.substr(0, index);
VLOG(3) << type;
types.push_back(type);
if (index == std::string::npos) {
break;
} else {
text = text.substr(index + 1);
}
}
return types;
return Split(text, ":");
}
std::vector<std::vector<int64_t>> ShapeParsing(std::string text) {
std::vector<std::vector<int64_t>> shapes;
while (!text.empty()) {
size_t index = text.find_first_of(":");
std::string slice = text.substr(0, index);
std::vector<int64_t> shape;
while (!slice.empty()) {
size_t index = slice.find_first_of(",");
int d = atoi(slice.substr(0, index).c_str());
VLOG(3) << d;
shape.push_back(d);
if (index == std::string::npos) {
break;
} else {
slice = slice.substr(index + 1);
}
}
shapes.push_back(shape);
if (index == std::string::npos) {
break;
} else {
text = text.substr(index + 1);
std::vector<std::string> shape_strings = Split(text, ":");
shapes.resize(shape_strings.size());
for (int i = 0; i < shape_strings.size(); i++) {
std::vector<std::string> shape_nums = Split(shape_strings[i], ",");
for (auto shape_num : shape_nums) {
shapes[i].push_back(atoi(shape_num.c_str()));
}
}
return shapes;
......
......@@ -85,22 +85,31 @@ int SubgraphEngine::BuildDeviceProgram() {
<< "[NPU] No input nodes found for building NPU model";
CHECK(!device_onames_.empty())
<< "[NPU] No output nodes found for building NPU model";
// Build the HiAI IR graph to HiAI om model as the device program
device_program_ = lite::npu::Device::Global().Build(
if (device_program_map_.count(inputs_shape_) > 0) {
return status;
}
auto device_client = lite::npu::Device::Global().Build(
model_name_, device_inodes, device_onodes);
if (device_program_ == nullptr) {
if (device_client == nullptr) {
LOG(WARNING) << "[NPU] Build model failed!";
return subgraph::FAILED;
}
auto device_program = std::make_shared<device_program_t>(device_client);
device_program_map_[inputs_shape_] = device_program;
// Query and check the dimensions of valid input and output tensors
std::vector<hiai::TensorDimension> device_idims, device_odims;
if (device_program_->GetModelIOTensorDim(
if (device_program->client->GetModelIOTensorDim(
model_name_, device_idims, device_odims) != hiai::AI_SUCCESS) {
LOG(WARNING)
<< "[NPU] Get the dimensions of input and output tensors failed!";
return subgraph::FAILED;
}
device_program->device_idims = device_idims;
device_program->device_odims = device_odims;
CHECK_EQ(device_idims.size(), device_inames_.size());
CHECK_EQ(device_odims.size(), device_onames_.size());
origin_idims_.resize(device_inames_.size());
......@@ -109,6 +118,7 @@ int SubgraphEngine::BuildDeviceProgram() {
origin_odims_.resize(device_onames_.size());
origin_otensors_.resize(device_onames_.size());
device_otensors_.resize(device_onames_.size());
for (int i = 0; i < device_inames_.size(); i++) {
auto node = graph.Get(device_inames_[i]);
auto precision = node->precision();
......@@ -130,6 +140,8 @@ int SubgraphEngine::BuildDeviceProgram() {
device_itensors_[i].reset(new hiai::AiTensor);
device_itensors_[i]->Init(&(device_idims[i]));
}
device_program->origin_idims = origin_idims_;
for (int i = 0; i < device_onames_.size(); i++) {
auto node = graph.Get(device_onames_[i]);
auto precision = node->precision();
......@@ -170,6 +182,8 @@ int SubgraphEngine::BuildDeviceProgram() {
<< PrecisionToStr(precision);
break;
}
device_program->origin_odims = origin_odims_;
CHECK_EQ(origin_odims_[i].production(),
device_odims[i].GetNumber() * device_odims[i].GetChannel() *
device_odims[i].GetHeight() * device_odims[i].GetWidth());
......@@ -181,14 +195,25 @@ int SubgraphEngine::BuildDeviceProgram() {
int SubgraphEngine::LaunchDeviceProgram() {
// Copy the data of origin input tensors to the buffer of input HiAI tensors
// init device_itensors_, device_otensors_, origin_otensors_
auto device_program = device_program_map_[inputs_shape_];
for (size_t i = 0; i < device_itensors_.size(); i++) {
device_itensors_[i]->Init(&(device_program->device_idims[i]));
std::memcpy(device_itensors_[i]->GetBuffer(),
origin_itensors_[i]->raw_data(),
origin_itensors_[i]->memory_size());
}
for (size_t i = 0; i < device_otensors_.size(); i++) {
device_otensors_[i]->Init(&(device_program->device_odims[i]));
}
for (size_t i = 0; i < origin_otensors_.size(); i++) {
origin_otensors_[i]->Resize(device_program->origin_odims[i]);
}
// Run the HiAI model by name
std::string key = "model_name"; // Note: key seems must be model_name
model_context_.AddPara(key, model_name_);
hiai::AiContext model_context;
model_context.AddPara(key, model_name_);
auto GetCurrentUS = []() -> double {
struct timeval time;
gettimeofday(&time, NULL);
......@@ -196,11 +221,11 @@ int SubgraphEngine::LaunchDeviceProgram() {
};
int istamp;
auto start_time = GetCurrentUS();
CHECK_EQ(
device_program_->Process(
model_context_, device_itensors_, device_otensors_, 1000, istamp),
CHECK_EQ(device_program->client->Process(
model_context, device_itensors_, device_otensors_, 1000, istamp),
hiai::AI_SUCCESS);
VLOG(3) << "[NPU] Process cost " << GetCurrentUS() - start_time << " us";
// Copy the data of output HiAI tensor to the buffer of origin output tensors
for (size_t i = 0; i < device_otensors_.size(); i++) {
std::memcpy(const_cast<void*>(origin_otensors_[i]->raw_data()),
......@@ -210,6 +235,18 @@ int SubgraphEngine::LaunchDeviceProgram() {
return 0;
}
bool SubgraphEngine::InputShapeChanged() {
std::vector<std::vector<int64_t>> new_shape;
for (auto origin_itensor : origin_itensors_) {
new_shape.push_back(origin_itensor->dims().Vectorize());
}
inputs_shape_ = new_shape;
if (device_program_map_.count(inputs_shape_) > 0) {
return false;
}
return true;
}
void SubgraphCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
engine_.reset(new SubgraphEngine(ctx_.get(),
......
......@@ -14,6 +14,7 @@
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
......@@ -38,17 +39,29 @@ class SubgraphEngine : public subgraph::Engine {
: subgraph::Engine(
ctx, block_idx, block_desc, input_names, output_names, scope) {}
struct device_program_t {
explicit device_program_t(std::shared_ptr<hiai::AiModelMngerClient> _client)
: client(_client) {}
std::shared_ptr<hiai::AiModelMngerClient> client{nullptr};
std::vector<DDim> origin_idims{};
std::vector<DDim> origin_odims{};
std::vector<hiai::TensorDimension> device_idims{};
std::vector<hiai::TensorDimension> device_odims{};
};
protected:
int BuildDeviceProgram() override;
int LaunchDeviceProgram() override;
bool InputShapeChanged() override;
std::string model_name_;
hiai::AiContext model_context_;
std::vector<std::string> device_inames_;
std::vector<std::string> device_onames_;
std::vector<std::shared_ptr<hiai::AiTensor>> device_itensors_;
std::vector<std::shared_ptr<hiai::AiTensor>> device_otensors_;
std::unique_ptr<hiai::AiModelMngerClient> device_program_{nullptr};
std::string model_name_{"model.om"};
std::vector<std::vector<int64_t>> inputs_shape_{};
std::map<std::vector<std::vector<int64_t>>, std::shared_ptr<device_program_t>>
device_program_map_{};
std::vector<std::string> device_inames_{};
std::vector<std::string> device_onames_{};
std::vector<std::shared_ptr<hiai::AiTensor>> device_itensors_{};
std::vector<std::shared_ptr<hiai::AiTensor>> device_otensors_{};
};
class SubgraphCompute : public KernelLite<TARGET(kNPU), PRECISION(kAny)> {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册