From aa05c93e82ee5b4e1127c21aac60cc71f49e1825 Mon Sep 17 00:00:00 2001
From: zhupengyang <1165938320@qq.com>
Date: Wed, 13 May 2020 16:36:23 +0800
Subject: [PATCH] [NPU] save subgraph model cache (#3589)

---
 lite/api/light_api_impl.cc                   |  5 ++
 lite/api/paddle_api.h                        | 15 ++++-
 lite/backends/npu/device.cc                  | 67 ++++++++++++++++----
 lite/backends/npu/device.h                   |  9 +--
 lite/core/context.cc                         |  4 ++
 lite/core/context.h                          | 10 +++
 lite/core/mir/subgraph/subgraph_pass_test.cc |  1 +
 lite/kernels/npu/bridges/engine.h            |  7 +-
 lite/kernels/npu/subgraph_compute.cc         | 38 ++++++++++-
 lite/kernels/npu/subgraph_compute.h          | 14 +++-
 10 files changed, 145 insertions(+), 25 deletions(-)

diff --git a/lite/api/light_api_impl.cc b/lite/api/light_api_impl.cc
index cdf5b7fb06..e76e89af43 100644
--- a/lite/api/light_api_impl.cc
+++ b/lite/api/light_api_impl.cc
@@ -36,6 +36,11 @@ void LightPredictorImpl::Init(const lite_api::MobileConfig& config) {
   }
   mode_ = config.power_mode();
   threads_ = config.threads();
+
+#ifdef LITE_WITH_NPU
+  Context<TargetType::kNPU>::SetSubgraphModelCacheDir(
+      config.subgraph_model_cache_dir());
+#endif
 }
 
 std::unique_ptr<lite_api::Tensor> LightPredictorImpl::GetInput(int i) {
diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h
index f4c7bae753..593a2eb702 100644
--- a/lite/api/paddle_api.h
+++ b/lite/api/paddle_api.h
@@ -118,18 +118,27 @@ class LITE_API ConfigBase {
   std::string model_dir_;
   int threads_{1};
   PowerMode mode_{LITE_POWER_NO_BIND};
+  // to save subgraph model for npu/xpu/...
+  std::string subgraph_model_cache_dir_{""};
 
  public:
   explicit ConfigBase(PowerMode mode = LITE_POWER_NO_BIND, int threads = 1);
   // set Model_dir
   void set_model_dir(const std::string& x) { model_dir_ = x; }
   const std::string& model_dir() const { return model_dir_; }
-  // set Power_mode
-  void set_power_mode(PowerMode mode);
-  PowerMode power_mode() const { return mode_; }
   // set Thread
   void set_threads(int threads);
   int threads() const { return threads_; }
+  // set Power_mode
+  void set_power_mode(PowerMode mode);
+  PowerMode power_mode() const { return mode_; }
+  // set subgraph_model_dir
+  void set_subgraph_model_cache_dir(std::string subgraph_model_cache_dir) {
+    subgraph_model_cache_dir_ = subgraph_model_cache_dir;
+  }
+  const std::string& subgraph_model_cache_dir() const {
+    return subgraph_model_cache_dir_;
+  }
 };
 
 /// CxxConfig is the config for the Full feature predictor.
diff --git a/lite/backends/npu/device.cc b/lite/backends/npu/device.cc
index 345b239c32..f9803aa881 100644
--- a/lite/backends/npu/device.cc
+++ b/lite/backends/npu/device.cc
@@ -14,15 +14,50 @@
 
 #include "lite/backends/npu/device.h"
 #include "lite/utils/cp_logging.h"
+#include "lite/utils/io.h"
 
 namespace paddle {
 namespace lite {
 namespace npu {
 
+bool WriteToOMFile(const domi::ModelBufferData& om_model_buff,
+                   std::string om_file_path) {
+  FILE* fp;
+  fp = fopen(om_file_path.c_str(), "wb");
+  CHECK(fp != nullptr) << om_file_path << " open failed!";
+
+  uint32_t write_size =
+      (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
+  CHECK_EQ(write_size, om_model_buff.length) << "write om file failed !";
+
+  fclose(fp);
+  return true;
+}
+
+bool ReadFromOMFile(domi::ModelBufferData* om_model_buff,
+                    std::string om_file_path) {
+  FILE* fp;
+  fp = fopen(om_file_path.c_str(), "rb");
+  CHECK(fp != nullptr) << om_file_path << " open failed!";
+
+  fseek(fp, 0, SEEK_END);
+  uint32_t model_length = (uint32_t)ftell(fp);
+  fseek(fp, 0, SEEK_SET);
+  om_model_buff->data = malloc(model_length);
+  om_model_buff->length = model_length;
+  uint32_t read_size =
+      (uint32_t)fread(om_model_buff->data, 1, model_length, fp);
+  CHECK_EQ(read_size, model_length) << "read om file failed !";
+
+  fclose(fp);
+  return true;
+}
+
 std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
-    const std::string model_name,            // NOLINT
-    std::vector<ge::Operator>& input_nodes,  // NOLINT
-    std::vector<ge::Operator>& output_nodes  // NOLINT
+    const std::string model_name,                // NOLINT
+    std::vector<ge::Operator>& input_nodes,      // NOLINT
+    std::vector<ge::Operator>& output_nodes,     // NOLINT
+    const std::string model_cache_full_dir = ""  // NOLINT
     ) {
   VLOG(3) << "[NPU] Build model";
   // Build the HiAI IR graph to the HiAI om model
@@ -32,14 +67,24 @@ std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
   om_model.SetGraph(ir_graph);
   domi::HiaiIrBuild ir_build;
   domi::ModelBufferData om_model_buf;
-  if (!ir_build.CreateModelBuff(om_model, om_model_buf)) {
-    LOG(WARNING) << "[NPU] CreateModelBuff failed!";
-    return nullptr;
-  }
-  if (!ir_build.BuildIRModel(om_model, om_model_buf)) {
-    LOG(WARNING) << "[NPU] BuildIRModel failed!";
-    ir_build.ReleaseModelBuff(om_model_buf);
-    return nullptr;
+
+  if (!model_cache_full_dir.empty() && IsFileExists(model_cache_full_dir)) {
+    VLOG(3) << "Will read om model from " << model_cache_full_dir;
+    ReadFromOMFile(&om_model_buf, model_cache_full_dir);
+  } else {
+    if (!ir_build.CreateModelBuff(om_model, om_model_buf)) {
+      LOG(WARNING) << "[NPU] CreateModelBuff failed!";
+      return nullptr;
+    }
+    if (!ir_build.BuildIRModel(om_model, om_model_buf)) {
+      LOG(WARNING) << "[NPU] BuildIRModel failed!";
+      ir_build.ReleaseModelBuff(om_model_buf);
+      return nullptr;
+    }
+    if (!model_cache_full_dir.empty()) {
+      VLOG(3) << "Will write om model to " << model_cache_full_dir;
+      WriteToOMFile(om_model_buf, model_cache_full_dir);
+    }
   }
 
   // Create a HiAI model manager client to load the HiAI om model
diff --git a/lite/backends/npu/device.h b/lite/backends/npu/device.h
index 6733a7f6df..fa8469bf2e 100644
--- a/lite/backends/npu/device.h
+++ b/lite/backends/npu/device.h
@@ -41,10 +41,11 @@ class Device {
   // Build the HiAI IR graph to om model, return HiAI model manager client to
   // load om model and run inference.
   std::shared_ptr<hiai::AiModelMngerClient> Build(
-      const std::string model_name,            // NOLINT
-      std::vector<ge::Operator>& input_nodes,  // NOLINT
-      std::vector<ge::Operator>& output_nodes  // NOLINT
-      );                                       // NOLINT
+      const std::string model_name,             // NOLINT
+      std::vector<ge::Operator>& input_nodes,   // NOLINT
+      std::vector<ge::Operator>& output_nodes,  // NOLINT
+      const std::string model_cache_name        // NOLINT
+      );                                        // NOLINT
 
  private:
   int freq_level_{3};
diff --git a/lite/core/context.cc b/lite/core/context.cc
index 66d0c39463..eb8f90d7fa 100644
--- a/lite/core/context.cc
+++ b/lite/core/context.cc
@@ -17,6 +17,10 @@
 namespace paddle {
 namespace lite {
 
+#ifdef LITE_WITH_NPU
+std::string Context<TargetType::kNPU>::subgraph_model_cache_dir_{""};  // NOLINT
+#endif
+
 #ifdef LITE_WITH_XPU
 std::string Context<TargetType::kXPU>::_multi_encoder_precision;  // NOLINT
 thread_local xdnn::Context* Context<TargetType::kXPU>::_tls_raw_ctx{nullptr};
diff --git a/lite/core/context.h b/lite/core/context.h
index 324b5552ac..f8013ac500 100644
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -85,6 +85,16 @@ class Context<TargetType::kNPU> {
 
   NPUContext& operator=(const NPUContext& ctx) {}
   std::string name() const { return "NPUContext"; }
+
+  static void SetSubgraphModelCacheDir(std::string subgraph_model_cache_dir) {
+    subgraph_model_cache_dir_ = subgraph_model_cache_dir;
+  }
+  static std::string SubgraphModelCacheDir() {
+    return subgraph_model_cache_dir_;
+  }
+
+ private:
+  static std::string subgraph_model_cache_dir_;
 };
 #endif
 
diff --git a/lite/core/mir/subgraph/subgraph_pass_test.cc b/lite/core/mir/subgraph/subgraph_pass_test.cc
index ee2d67e918..0fa69df8b5 100644
--- a/lite/core/mir/subgraph/subgraph_pass_test.cc
+++ b/lite/core/mir/subgraph/subgraph_pass_test.cc
@@ -132,6 +132,7 @@ std::shared_ptr<lite_api::PaddlePredictor> TestModel(
   mobile_config.set_model_from_file(optimized_model_dir + ".nb");
   mobile_config.set_power_mode(lite_api::PowerMode::LITE_POWER_HIGH);
   mobile_config.set_threads(1);
+  // mobile_config.set_subgraph_model_cache_dir("/data/local/tmp");
   predictor = lite_api::CreatePaddlePredictor(mobile_config);
   FillInputTensors(predictor, input_tensor_shape, input_tensor_type, 1);
   // Run optimized model
diff --git a/lite/kernels/npu/bridges/engine.h b/lite/kernels/npu/bridges/engine.h
index 34ec923889..9f90277be8 100644
--- a/lite/kernels/npu/bridges/engine.h
+++ b/lite/kernels/npu/bridges/engine.h
@@ -33,13 +33,15 @@ class Engine {
          cpp::BlockDesc *block_desc,
          const std::vector<std::string> &input_names,
          const std::vector<std::string> &output_names,
-         lite::Scope *scope)
+         lite::Scope *scope,
+         std::string model_cache_dir = "")
       : ctx_(ctx),
         block_idx_(block_idx),
         block_desc_(block_desc),
         input_names_(input_names),
         output_names_(output_names),
-        scope_(scope) {}
+        scope_(scope),
+        model_cache_dir_(model_cache_dir) {}
   virtual ~Engine() = default;
 
   virtual int Build();
@@ -73,6 +75,7 @@ class Engine {
   std::vector<Tensor *> origin_itensors_;
   std::vector<Tensor *> origin_otensors_;
   std::vector<Instruction> origin_program_;
+  std::string model_cache_dir_{""};
 };
 
 }  // namespace subgraph
diff --git a/lite/kernels/npu/subgraph_compute.cc b/lite/kernels/npu/subgraph_compute.cc
index da2fd3ead2..d2609ff61e 100644
--- a/lite/kernels/npu/subgraph_compute.cc
+++ b/lite/kernels/npu/subgraph_compute.cc
@@ -15,6 +15,7 @@
 #include "lite/kernels/npu/subgraph_compute.h"
 #include <sys/time.h>
 #include <time.h>
+#include <algorithm>
 #include <utility>
 #include "hiai_ir_build.h"  // NOLINT
 #include "lite/backends/npu/device.h"
@@ -22,12 +23,41 @@
 #include "lite/kernels/npu/bridges/graph.h"
 #include "lite/kernels/npu/bridges/paddle_use_bridges.h"
 #include "lite/kernels/npu/bridges/utility.h"
+#include "lite/utils/io.h"
 
 namespace paddle {
 namespace lite {
 namespace kernels {
 namespace npu {
 
+std::string SubgraphEngine::GenerateModelCacheName() const {
+  auto inames = device_inames_;
+  auto onames = device_onames_;
+  std::sort(inames.begin(), inames.end());
+  std::sort(onames.begin(), onames.end());
+
+  std::string model_cache_name = "";
+  for (auto iname : inames) {
+    auto itensor = scope_->FindTensor(iname);
+    std::replace(iname.begin(), iname.end(), '/', '_');
+    model_cache_name += "_" + iname;
+    for (auto i : itensor->dims().Vectorize()) {
+      model_cache_name += "_" + std::to_string(i);
+    }
+  }
+  for (auto oname : onames) {
+    auto otensor = scope_->FindTensor(oname);
+    std::replace(oname.begin(), oname.end(), '/', '_');
+    model_cache_name += "_" + oname;
+    for (auto i : otensor->dims().Vectorize()) {
+      model_cache_name += "_" + std::to_string(i);
+    }
+  }
+  model_cache_name += "_.om";
+
+  return model_cache_name;
+}
+
 int SubgraphEngine::BuildDeviceProgram() {
   int status = 0;
   // Convert all of ops and their input vars and weights and added into the NPU
@@ -88,8 +118,11 @@ int SubgraphEngine::BuildDeviceProgram() {
   if (device_program_map_.count(inputs_shape_) > 0) {
     return status;
   }
+  std::string model_cache_full_dir =
+      model_cache_dir_.empty() ? "" : model_cache_dir_ + "/" +
+                                          GenerateModelCacheName();
   auto device_client = lite::npu::Device::Global().Build(
-      model_name_, device_inodes, device_onodes);
+      model_name_, device_inodes, device_onodes, model_cache_full_dir);
   if (device_client == nullptr) {
     LOG(WARNING) << "[NPU] Build model failed!";
     return subgraph::FAILED;
@@ -280,7 +313,8 @@ void SubgraphCompute::PrepareForRun() {
                                    param.sub_block_desc,
                                    param.input_data_names,
                                    param.output_data_names,
-                                   param.scope));
+                                   param.scope,
+                                   NPUContext::SubgraphModelCacheDir()));
   CHECK(engine_);
   engine_->Build();
 }
diff --git a/lite/kernels/npu/subgraph_compute.h b/lite/kernels/npu/subgraph_compute.h
index db84fc1883..9f0b5a9441 100644
--- a/lite/kernels/npu/subgraph_compute.h
+++ b/lite/kernels/npu/subgraph_compute.h
@@ -35,9 +35,15 @@ class SubgraphEngine : public subgraph::Engine {
                  cpp::BlockDesc *block_desc,
                  const std::vector<std::string> &input_names,
                  const std::vector<std::string> &output_names,
-                 Scope *scope)
-      : subgraph::Engine(
-            ctx, block_idx, block_desc, input_names, output_names, scope) {}
+                 Scope *scope,
+                 std::string model_cache_dir = "")
+      : subgraph::Engine(ctx,
+                         block_idx,
+                         block_desc,
+                         input_names,
+                         output_names,
+                         scope,
+                         model_cache_dir) {}
 
   struct device_program_t {
     explicit device_program_t(std::shared_ptr<hiai::AiModelMngerClient> _client)
@@ -58,6 +64,8 @@ class SubgraphEngine : public subgraph::Engine {
   void InitDeviceTensor() override;
   bool InputShapeChanged() override;
 
+  std::string GenerateModelCacheName() const;
+
   std::string model_name_{"model.om"};
   std::vector<std::vector<int64_t>> inputs_shape_{};
   std::map<std::vector<std::vector<int64_t>>, std::shared_ptr<device_program_t>>
-- 
GitLab