提交 aa05c93e 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] save subgraph model cache (#3589)

上级 950b7382
......@@ -36,6 +36,11 @@ void LightPredictorImpl::Init(const lite_api::MobileConfig& config) {
}
mode_ = config.power_mode();
threads_ = config.threads();
#ifdef LITE_WITH_NPU
Context<TargetType::kNPU>::SetSubgraphModelCacheDir(
config.subgraph_model_cache_dir());
#endif
}
std::unique_ptr<lite_api::Tensor> LightPredictorImpl::GetInput(int i) {
......
......@@ -118,18 +118,27 @@ class LITE_API ConfigBase {
std::string model_dir_;
int threads_{1};
PowerMode mode_{LITE_POWER_NO_BIND};
// to save subgraph model for npu/xpu/...
std::string subgraph_model_cache_dir_{""};
public:
explicit ConfigBase(PowerMode mode = LITE_POWER_NO_BIND, int threads = 1);
// set Model_dir
void set_model_dir(const std::string& x) { model_dir_ = x; }
const std::string& model_dir() const { return model_dir_; }
// set Power_mode
void set_power_mode(PowerMode mode);
PowerMode power_mode() const { return mode_; }
// set Thread
void set_threads(int threads);
int threads() const { return threads_; }
// set Power_mode
void set_power_mode(PowerMode mode);
PowerMode power_mode() const { return mode_; }
// set subgraph_model_dir
void set_subgraph_model_cache_dir(std::string subgraph_model_cache_dir) {
subgraph_model_cache_dir_ = subgraph_model_cache_dir;
}
const std::string& subgraph_model_cache_dir() const {
return subgraph_model_cache_dir_;
}
};
/// CxxConfig is the config for the Full feature predictor.
......
......@@ -14,15 +14,50 @@
#include "lite/backends/npu/device.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/io.h"
namespace paddle {
namespace lite {
namespace npu {
bool WriteToOMFile(const domi::ModelBufferData& om_model_buff,
std::string om_file_path) {
FILE* fp;
fp = fopen(om_file_path.c_str(), "wb");
CHECK(fp != nullptr) << om_file_path << " open failed!";
uint32_t write_size =
(uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
CHECK_EQ(write_size, om_model_buff.length) << "write om file failed !";
fclose(fp);
return true;
}
bool ReadFromOMFile(domi::ModelBufferData* om_model_buff,
std::string om_file_path) {
FILE* fp;
fp = fopen(om_file_path.c_str(), "rb");
CHECK(fp != nullptr) << om_file_path << " open failed!";
fseek(fp, 0, SEEK_END);
uint32_t model_length = (uint32_t)ftell(fp);
fseek(fp, 0, SEEK_SET);
om_model_buff->data = malloc(model_length);
om_model_buff->length = model_length;
uint32_t read_size =
(uint32_t)fread(om_model_buff->data, 1, model_length, fp);
CHECK_EQ(read_size, model_length) << "read om file failed !";
fclose(fp);
return true;
}
std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
const std::string model_name, // NOLINT
std::vector<ge::Operator>& input_nodes, // NOLINT
std::vector<ge::Operator>& output_nodes // NOLINT
std::vector<ge::Operator>& output_nodes, // NOLINT
const std::string model_cache_full_dir = "" // NOLINT
) {
VLOG(3) << "[NPU] Build model";
// Build the HiAI IR graph to the HiAI om model
......@@ -32,6 +67,11 @@ std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
om_model.SetGraph(ir_graph);
domi::HiaiIrBuild ir_build;
domi::ModelBufferData om_model_buf;
if (!model_cache_full_dir.empty() && IsFileExists(model_cache_full_dir)) {
VLOG(3) << "Will read om model from " << model_cache_full_dir;
ReadFromOMFile(&om_model_buf, model_cache_full_dir);
} else {
if (!ir_build.CreateModelBuff(om_model, om_model_buf)) {
LOG(WARNING) << "[NPU] CreateModelBuff failed!";
return nullptr;
......@@ -41,6 +81,11 @@ std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
ir_build.ReleaseModelBuff(om_model_buf);
return nullptr;
}
if (!model_cache_full_dir.empty()) {
VLOG(3) << "Will write om model to " << model_cache_full_dir;
WriteToOMFile(om_model_buf, model_cache_full_dir);
}
}
// Create a HiAI model manager client to load the HiAI om model
std::shared_ptr<hiai::AiModelMngerClient> model_client(
......
......@@ -43,7 +43,8 @@ class Device {
std::shared_ptr<hiai::AiModelMngerClient> Build(
const std::string model_name, // NOLINT
std::vector<ge::Operator>& input_nodes, // NOLINT
std::vector<ge::Operator>& output_nodes // NOLINT
std::vector<ge::Operator>& output_nodes, // NOLINT
const std::string model_cache_name // NOLINT
); // NOLINT
private:
......
......@@ -17,6 +17,10 @@
namespace paddle {
namespace lite {
#ifdef LITE_WITH_NPU
std::string Context<TargetType::kNPU>::subgraph_model_cache_dir_{""}; // NOLINT
#endif
#ifdef LITE_WITH_XPU
std::string Context<TargetType::kXPU>::_multi_encoder_precision; // NOLINT
thread_local xdnn::Context* Context<TargetType::kXPU>::_tls_raw_ctx{nullptr};
......
......@@ -85,6 +85,16 @@ class Context<TargetType::kNPU> {
NPUContext& operator=(const NPUContext& ctx) {}
std::string name() const { return "NPUContext"; }
static void SetSubgraphModelCacheDir(std::string subgraph_model_cache_dir) {
subgraph_model_cache_dir_ = subgraph_model_cache_dir;
}
static std::string SubgraphModelCacheDir() {
return subgraph_model_cache_dir_;
}
private:
static std::string subgraph_model_cache_dir_;
};
#endif
......
......@@ -132,6 +132,7 @@ std::shared_ptr<lite_api::PaddlePredictor> TestModel(
mobile_config.set_model_from_file(optimized_model_dir + ".nb");
mobile_config.set_power_mode(lite_api::PowerMode::LITE_POWER_HIGH);
mobile_config.set_threads(1);
// mobile_config.set_subgraph_model_cache_dir("/data/local/tmp");
predictor = lite_api::CreatePaddlePredictor(mobile_config);
FillInputTensors(predictor, input_tensor_shape, input_tensor_type, 1);
// Run optimized model
......
......@@ -33,13 +33,15 @@ class Engine {
cpp::BlockDesc *block_desc,
const std::vector<std::string> &input_names,
const std::vector<std::string> &output_names,
lite::Scope *scope)
lite::Scope *scope,
std::string model_cache_dir = "")
: ctx_(ctx),
block_idx_(block_idx),
block_desc_(block_desc),
input_names_(input_names),
output_names_(output_names),
scope_(scope) {}
scope_(scope),
model_cache_dir_(model_cache_dir) {}
virtual ~Engine() = default;
virtual int Build();
......@@ -73,6 +75,7 @@ class Engine {
std::vector<Tensor *> origin_itensors_;
std::vector<Tensor *> origin_otensors_;
std::vector<Instruction> origin_program_;
std::string model_cache_dir_{""};
};
} // namespace subgraph
......
......@@ -15,6 +15,7 @@
#include "lite/kernels/npu/subgraph_compute.h"
#include <sys/time.h>
#include <time.h>
#include <algorithm>
#include <utility>
#include "hiai_ir_build.h" // NOLINT
#include "lite/backends/npu/device.h"
......@@ -22,12 +23,41 @@
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/paddle_use_bridges.h"
#include "lite/kernels/npu/bridges/utility.h"
#include "lite/utils/io.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace npu {
std::string SubgraphEngine::GenerateModelCacheName() const {
auto inames = device_inames_;
auto onames = device_onames_;
std::sort(inames.begin(), inames.end());
std::sort(onames.begin(), onames.end());
std::string model_cache_name = "";
for (auto iname : inames) {
auto itensor = scope_->FindTensor(iname);
std::replace(iname.begin(), iname.end(), '/', '_');
model_cache_name += "_" + iname;
for (auto i : itensor->dims().Vectorize()) {
model_cache_name += "_" + std::to_string(i);
}
}
for (auto oname : onames) {
auto otensor = scope_->FindTensor(oname);
std::replace(oname.begin(), oname.end(), '/', '_');
model_cache_name += "_" + oname;
for (auto i : otensor->dims().Vectorize()) {
model_cache_name += "_" + std::to_string(i);
}
}
model_cache_name += "_.om";
return model_cache_name;
}
int SubgraphEngine::BuildDeviceProgram() {
int status = 0;
// Convert all of ops and their input vars and weights and added into the NPU
......@@ -88,8 +118,11 @@ int SubgraphEngine::BuildDeviceProgram() {
if (device_program_map_.count(inputs_shape_) > 0) {
return status;
}
std::string model_cache_full_dir =
model_cache_dir_.empty() ? "" : model_cache_dir_ + "/" +
GenerateModelCacheName();
auto device_client = lite::npu::Device::Global().Build(
model_name_, device_inodes, device_onodes);
model_name_, device_inodes, device_onodes, model_cache_full_dir);
if (device_client == nullptr) {
LOG(WARNING) << "[NPU] Build model failed!";
return subgraph::FAILED;
......@@ -280,7 +313,8 @@ void SubgraphCompute::PrepareForRun() {
param.sub_block_desc,
param.input_data_names,
param.output_data_names,
param.scope));
param.scope,
NPUContext::SubgraphModelCacheDir()));
CHECK(engine_);
engine_->Build();
}
......
......@@ -35,9 +35,15 @@ class SubgraphEngine : public subgraph::Engine {
cpp::BlockDesc *block_desc,
const std::vector<std::string> &input_names,
const std::vector<std::string> &output_names,
Scope *scope)
: subgraph::Engine(
ctx, block_idx, block_desc, input_names, output_names, scope) {}
Scope *scope,
std::string model_cache_dir = "")
: subgraph::Engine(ctx,
block_idx,
block_desc,
input_names,
output_names,
scope,
model_cache_dir) {}
struct device_program_t {
explicit device_program_t(std::shared_ptr<hiai::AiModelMngerClient> _client)
......@@ -58,6 +64,8 @@ class SubgraphEngine : public subgraph::Engine {
void InitDeviceTensor() override;
bool InputShapeChanged() override;
std::string GenerateModelCacheName() const;
std::string model_name_{"model.om"};
std::vector<std::vector<int64_t>> inputs_shape_{};
std::map<std::vector<std::vector<int64_t>>, std::shared_ptr<device_program_t>>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册