From aa05c93e82ee5b4e1127c21aac60cc71f49e1825 Mon Sep 17 00:00:00 2001 From: zhupengyang <1165938320@qq.com> Date: Wed, 13 May 2020 16:36:23 +0800 Subject: [PATCH] [NPU] save subgraph model cache (#3589) --- lite/api/light_api_impl.cc | 5 ++ lite/api/paddle_api.h | 15 ++++- lite/backends/npu/device.cc | 67 ++++++++++++++++---- lite/backends/npu/device.h | 9 +-- lite/core/context.cc | 4 ++ lite/core/context.h | 10 +++ lite/core/mir/subgraph/subgraph_pass_test.cc | 1 + lite/kernels/npu/bridges/engine.h | 7 +- lite/kernels/npu/subgraph_compute.cc | 38 ++++++++++- lite/kernels/npu/subgraph_compute.h | 14 +++- 10 files changed, 145 insertions(+), 25 deletions(-) diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc index cdf5b7fb06..e76e89af43 100644 --- a/lite/api/light_api_impl.cc +++ b/lite/api/light_api_impl.cc @@ -36,6 +36,11 @@ void LightPredictorImpl::Init(const lite_api::MobileConfig& config) { } mode_ = config.power_mode(); threads_ = config.threads(); + +#ifdef LITE_WITH_NPU + Context::SetSubgraphModelCacheDir( + config.subgraph_model_cache_dir()); +#endif } std::unique_ptr LightPredictorImpl::GetInput(int i) { diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index f4c7bae753..593a2eb702 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -118,18 +118,27 @@ class LITE_API ConfigBase { std::string model_dir_; int threads_{1}; PowerMode mode_{LITE_POWER_NO_BIND}; + // to save subgraph model for npu/xpu/... + std::string subgraph_model_cache_dir_{""}; public: explicit ConfigBase(PowerMode mode = LITE_POWER_NO_BIND, int threads = 1); // set Model_dir void set_model_dir(const std::string& x) { model_dir_ = x; } const std::string& model_dir() const { return model_dir_; } - // set Power_mode - void set_power_mode(PowerMode mode); - PowerMode power_mode() const { return mode_; } // set Thread void set_threads(int threads); int threads() const { return threads_; } + // set Power_mode + void set_power_mode(PowerMode mode); + PowerMode power_mode() const { return mode_; } + // set subgraph_model_dir + void set_subgraph_model_cache_dir(std::string subgraph_model_cache_dir) { + subgraph_model_cache_dir_ = subgraph_model_cache_dir; + } + const std::string& subgraph_model_cache_dir() const { + return subgraph_model_cache_dir_; + } }; /// CxxConfig is the config for the Full feature predictor. diff --git a/lite/backends/npu/device.cc b/lite/backends/npu/device.cc index 345b239c32..f9803aa881 100644 --- a/lite/backends/npu/device.cc +++ b/lite/backends/npu/device.cc @@ -14,15 +14,50 @@ #include "lite/backends/npu/device.h" #include "lite/utils/cp_logging.h" +#include "lite/utils/io.h" namespace paddle { namespace lite { namespace npu { +bool WriteToOMFile(const domi::ModelBufferData& om_model_buff, + std::string om_file_path) { + FILE* fp; + fp = fopen(om_file_path.c_str(), "wb"); + CHECK(fp != nullptr) << om_file_path << " open failed!"; + + uint32_t write_size = + (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp); + CHECK_EQ(write_size, om_model_buff.length) << "write om file failed !"; + + fclose(fp); + return true; +} + +bool ReadFromOMFile(domi::ModelBufferData* om_model_buff, + std::string om_file_path) { + FILE* fp; + fp = fopen(om_file_path.c_str(), "rb"); + CHECK(fp != nullptr) << om_file_path << " open failed!"; + + fseek(fp, 0, SEEK_END); + uint32_t model_length = (uint32_t)ftell(fp); + fseek(fp, 0, SEEK_SET); + om_model_buff->data = malloc(model_length); + om_model_buff->length = model_length; + uint32_t read_size = + (uint32_t)fread(om_model_buff->data, 1, model_length, fp); + CHECK_EQ(read_size, model_length) << "read om file failed !"; + + fclose(fp); + return true; +} + std::shared_ptr Device::Build( - const std::string model_name, // NOLINT - std::vector& input_nodes, // NOLINT - std::vector& output_nodes // NOLINT + const std::string model_name, // NOLINT + std::vector& input_nodes, // NOLINT + std::vector& output_nodes, // NOLINT + const std::string model_cache_full_dir = "" // NOLINT ) { VLOG(3) << "[NPU] Build model"; // Build the HiAI IR graph to the HiAI om model @@ -32,14 +67,24 @@ std::shared_ptr Device::Build( om_model.SetGraph(ir_graph); domi::HiaiIrBuild ir_build; domi::ModelBufferData om_model_buf; - if (!ir_build.CreateModelBuff(om_model, om_model_buf)) { - LOG(WARNING) << "[NPU] CreateModelBuff failed!"; - return nullptr; - } - if (!ir_build.BuildIRModel(om_model, om_model_buf)) { - LOG(WARNING) << "[NPU] BuildIRModel failed!"; - ir_build.ReleaseModelBuff(om_model_buf); - return nullptr; + + if (!model_cache_full_dir.empty() && IsFileExists(model_cache_full_dir)) { + VLOG(3) << "Will read om model from " << model_cache_full_dir; + ReadFromOMFile(&om_model_buf, model_cache_full_dir); + } else { + if (!ir_build.CreateModelBuff(om_model, om_model_buf)) { + LOG(WARNING) << "[NPU] CreateModelBuff failed!"; + return nullptr; + } + if (!ir_build.BuildIRModel(om_model, om_model_buf)) { + LOG(WARNING) << "[NPU] BuildIRModel failed!"; + ir_build.ReleaseModelBuff(om_model_buf); + return nullptr; + } + if (!model_cache_full_dir.empty()) { + VLOG(3) << "Will write om model to " << model_cache_full_dir; + WriteToOMFile(om_model_buf, model_cache_full_dir); + } } // Create a HiAI model manager client to load the HiAI om model diff --git a/lite/backends/npu/device.h b/lite/backends/npu/device.h index 6733a7f6df..fa8469bf2e 100644 --- a/lite/backends/npu/device.h +++ b/lite/backends/npu/device.h @@ -41,10 +41,11 @@ class Device { // Build the HiAI IR graph to om model, return HiAI model manager client to // load om model and run inference. std::shared_ptr Build( - const std::string model_name, // NOLINT - std::vector& input_nodes, // NOLINT - std::vector& output_nodes // NOLINT - ); // NOLINT + const std::string model_name, // NOLINT + std::vector& input_nodes, // NOLINT + std::vector& output_nodes, // NOLINT + const std::string model_cache_name // NOLINT + ); // NOLINT private: int freq_level_{3}; diff --git a/lite/core/context.cc b/lite/core/context.cc index 66d0c39463..eb8f90d7fa 100644 --- a/lite/core/context.cc +++ b/lite/core/context.cc @@ -17,6 +17,10 @@ namespace paddle { namespace lite { +#ifdef LITE_WITH_NPU +std::string Context::subgraph_model_cache_dir_{""}; // NOLINT +#endif + #ifdef LITE_WITH_XPU std::string Context::_multi_encoder_precision; // NOLINT thread_local xdnn::Context* Context::_tls_raw_ctx{nullptr}; diff --git a/lite/core/context.h b/lite/core/context.h index 324b5552ac..f8013ac500 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -85,6 +85,16 @@ class Context { NPUContext& operator=(const NPUContext& ctx) {} std::string name() const { return "NPUContext"; } + + static void SetSubgraphModelCacheDir(std::string subgraph_model_cache_dir) { + subgraph_model_cache_dir_ = subgraph_model_cache_dir; + } + static std::string SubgraphModelCacheDir() { + return subgraph_model_cache_dir_; + } + + private: + static std::string subgraph_model_cache_dir_; }; #endif diff --git a/lite/core/mir/subgraph/subgraph_pass_test.cc b/lite/core/mir/subgraph/subgraph_pass_test.cc index ee2d67e918..0fa69df8b5 100644 --- a/lite/core/mir/subgraph/subgraph_pass_test.cc +++ b/lite/core/mir/subgraph/subgraph_pass_test.cc @@ -132,6 +132,7 @@ std::shared_ptr TestModel( mobile_config.set_model_from_file(optimized_model_dir + ".nb"); mobile_config.set_power_mode(lite_api::PowerMode::LITE_POWER_HIGH); mobile_config.set_threads(1); + // mobile_config.set_subgraph_model_cache_dir("/data/local/tmp"); predictor = lite_api::CreatePaddlePredictor(mobile_config); FillInputTensors(predictor, input_tensor_shape, input_tensor_type, 1); // Run optimized model diff --git a/lite/kernels/npu/bridges/engine.h b/lite/kernels/npu/bridges/engine.h index 34ec923889..9f90277be8 100644 --- a/lite/kernels/npu/bridges/engine.h +++ b/lite/kernels/npu/bridges/engine.h @@ -33,13 +33,15 @@ class Engine { cpp::BlockDesc *block_desc, const std::vector &input_names, const std::vector &output_names, - lite::Scope *scope) + lite::Scope *scope, + std::string model_cache_dir = "") : ctx_(ctx), block_idx_(block_idx), block_desc_(block_desc), input_names_(input_names), output_names_(output_names), - scope_(scope) {} + scope_(scope), + model_cache_dir_(model_cache_dir) {} virtual ~Engine() = default; virtual int Build(); @@ -73,6 +75,7 @@ class Engine { std::vector origin_itensors_; std::vector origin_otensors_; std::vector origin_program_; + std::string model_cache_dir_{""}; }; } // namespace subgraph diff --git a/lite/kernels/npu/subgraph_compute.cc b/lite/kernels/npu/subgraph_compute.cc index da2fd3ead2..d2609ff61e 100644 --- a/lite/kernels/npu/subgraph_compute.cc +++ b/lite/kernels/npu/subgraph_compute.cc @@ -15,6 +15,7 @@ #include "lite/kernels/npu/subgraph_compute.h" #include #include +#include #include #include "hiai_ir_build.h" // NOLINT #include "lite/backends/npu/device.h" @@ -22,12 +23,41 @@ #include "lite/kernels/npu/bridges/graph.h" #include "lite/kernels/npu/bridges/paddle_use_bridges.h" #include "lite/kernels/npu/bridges/utility.h" +#include "lite/utils/io.h" namespace paddle { namespace lite { namespace kernels { namespace npu { +std::string SubgraphEngine::GenerateModelCacheName() const { + auto inames = device_inames_; + auto onames = device_onames_; + std::sort(inames.begin(), inames.end()); + std::sort(onames.begin(), onames.end()); + + std::string model_cache_name = ""; + for (auto iname : inames) { + auto itensor = scope_->FindTensor(iname); + std::replace(iname.begin(), iname.end(), '/', '_'); + model_cache_name += "_" + iname; + for (auto i : itensor->dims().Vectorize()) { + model_cache_name += "_" + std::to_string(i); + } + } + for (auto oname : onames) { + auto otensor = scope_->FindTensor(oname); + std::replace(oname.begin(), oname.end(), '/', '_'); + model_cache_name += "_" + oname; + for (auto i : otensor->dims().Vectorize()) { + model_cache_name += "_" + std::to_string(i); + } + } + model_cache_name += "_.om"; + + return model_cache_name; +} + int SubgraphEngine::BuildDeviceProgram() { int status = 0; // Convert all of ops and their input vars and weights and added into the NPU @@ -88,8 +118,11 @@ int SubgraphEngine::BuildDeviceProgram() { if (device_program_map_.count(inputs_shape_) > 0) { return status; } + std::string model_cache_full_dir = + model_cache_dir_.empty() ? "" : model_cache_dir_ + "/" + + GenerateModelCacheName(); auto device_client = lite::npu::Device::Global().Build( - model_name_, device_inodes, device_onodes); + model_name_, device_inodes, device_onodes, model_cache_full_dir); if (device_client == nullptr) { LOG(WARNING) << "[NPU] Build model failed!"; return subgraph::FAILED; @@ -280,7 +313,8 @@ void SubgraphCompute::PrepareForRun() { param.sub_block_desc, param.input_data_names, param.output_data_names, - param.scope)); + param.scope, + NPUContext::SubgraphModelCacheDir())); CHECK(engine_); engine_->Build(); } diff --git a/lite/kernels/npu/subgraph_compute.h b/lite/kernels/npu/subgraph_compute.h index db84fc1883..9f0b5a9441 100644 --- a/lite/kernels/npu/subgraph_compute.h +++ b/lite/kernels/npu/subgraph_compute.h @@ -35,9 +35,15 @@ class SubgraphEngine : public subgraph::Engine { cpp::BlockDesc *block_desc, const std::vector &input_names, const std::vector &output_names, - Scope *scope) - : subgraph::Engine( - ctx, block_idx, block_desc, input_names, output_names, scope) {} + Scope *scope, + std::string model_cache_dir = "") + : subgraph::Engine(ctx, + block_idx, + block_desc, + input_names, + output_names, + scope, + model_cache_dir) {} struct device_program_t { explicit device_program_t(std::shared_ptr _client) @@ -58,6 +64,8 @@ class SubgraphEngine : public subgraph::Engine { void InitDeviceTensor() override; bool InputShapeChanged() override; + std::string GenerateModelCacheName() const; + std::string model_name_{"model.om"}; std::vector> inputs_shape_{}; std::map>, std::shared_ptr> -- GitLab