From 5c39519e779b6a061250aff2979df9548ddf893d Mon Sep 17 00:00:00 2001
From: huzhiqiang <912790387@qq.com>
Date: Mon, 21 Sep 2020 21:58:15 +0800
Subject: [PATCH] [Framework] Add method for specifying initial size of
 `workspace_` (#4378)

---
 lite/api/paddle_api.cc      |  8 ++++++++
 lite/api/paddle_api.h       | 13 ++++++++++++
 lite/api/paddle_api_test.cc |  5 ++++-
 lite/core/device_info.h     | 40 +++++++++++++++++++++++++++++++++++--
 4 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc
index a3d29dff93..d37657206d 100644
--- a/lite/api/paddle_api.cc
+++ b/lite/api/paddle_api.cc
@@ -356,5 +356,13 @@ void MobileConfig::set_model_buffer(const char *model_buffer,
   model_from_memory_ = true;
 }
 
+// This is the method for allocating workspace_size according to L3Cache size
+void MobileConfig::SetArmL3CacheSize(L3CacheSetMethod method,
+                                     int absolute_val) {
+#ifdef LITE_WITH_ARM
+  lite::DeviceInfo::Global().SetArmL3CacheSize(method, absolute_val);
+#endif
+}
+
 }  // namespace lite_api
 }  // namespace paddle
diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h
index 42a4b2228b..7df7f7889a 100644
--- a/lite/api/paddle_api.h
+++ b/lite/api/paddle_api.h
@@ -32,6 +32,14 @@ using shape_t = std::vector<int64_t>;
 using lod_t = std::vector<std::vector<uint64_t>>;
 
 enum class LiteModelType { kProtobuf = 0, kNaiveBuffer, UNK };
+// Methods for allocating L3Cache on Arm platform
+enum class L3CacheSetMethod {
+  kDeviceL3Cache = 0,  // Use the system L3 Cache size, best performance.
+  kDeviceL2Cache = 1,  // Use the system L2 Cache size, trade off performance
+                       // with less memory consumption.
+  kAbsolute = 2,       // Use the external setting.
+  // kAutoGrow = 3,   // Not supported yet, least memory consumption.
+};
 
 // return true if current device supports OpenCL model
 LITE_API bool IsOpenCLBackendValid();
@@ -294,6 +302,11 @@ class LITE_API MobileConfig : public ConfigBase {
 
   // NOTE: This is a deprecated API and will be removed in latter release.
   const std::string& param_buffer() const { return param_buffer_; }
+
+  // This is the method for allocating workspace_size according to L3Cache size
+  void SetArmL3CacheSize(
+      L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache,
+      int absolute_val = -1);
 };
 
 template <typename ConfigT>
diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc
index 9176ce0eb1..41799bdc2c 100644
--- a/lite/api/paddle_api_test.cc
+++ b/lite/api/paddle_api_test.cc
@@ -107,7 +107,8 @@ TEST(CxxApi, share_external_data) {
 TEST(LightApi, run) {
   lite_api::MobileConfig config;
   config.set_model_from_file(FLAGS_model_dir + ".opt2.naive.nb");
-
+  // disable L3 cache on workspace_ allocating
+  config.SetArmL3CacheSize(L3CacheSetMethod::kDeviceL2Cache);
   auto predictor = lite_api::CreatePaddlePredictor(config);
 
   auto inputs = predictor->GetInputNames();
@@ -148,6 +149,8 @@ TEST(MobileConfig, LoadfromMemory) {
   // set model buffer and run model
   lite_api::MobileConfig config;
   config.set_model_from_buffer(model_buffer);
+  // allocate 1M initial space for workspace_
+  config.SetArmL3CacheSize(L3CacheSetMethod::kAbsolute, 1024 * 1024);
 
   auto predictor = lite_api::CreatePaddlePredictor(config);
   auto input_tensor = predictor->GetInput(0);
diff --git a/lite/core/device_info.h b/lite/core/device_info.h
index c95f285e14..53d22ef90e 100644
--- a/lite/core/device_info.h
+++ b/lite/core/device_info.h
@@ -17,6 +17,7 @@
 #include <cstdarg>
 #include <string>
 #include <vector>
+#include "lite/api/paddle_api.h"
 #include "lite/core/tensor.h"
 #include "lite/utils/cp_logging.h"
 #ifdef LITE_WITH_MLU
@@ -27,6 +28,7 @@
 namespace paddle {
 namespace lite {
 
+using L3CacheSetMethod = lite_api::L3CacheSetMethod;
 #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU))
 
 typedef enum {
@@ -65,11 +67,41 @@ class DeviceInfo {
   int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
   int l2_cache_size() const { return L2_cache_[active_ids_[0]]; }
   int l3_cache_size() const { return L3_cache_[active_ids_[0]]; }
+  // Methods for allocating L3Cache on Arm platform
+  // Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h`
+  void SetArmL3CacheSize(
+      L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache,
+      int absolute_val = -1) {
+    l3_cache_method_ = method;
+    absolute_l3cache_size_ = absolute_val;
+    // Realloc memory for sgemm in this context.
+    workspace_.clear();
+    workspace_.Resize({llc_size()});
+    workspace_.mutable_data<int8_t>();
+  }
+
   int llc_size() const {
-    auto size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]]
-                                              : L2_cache_[active_ids_[0]];
+    auto size = absolute_l3cache_size_;
+    switch (l3_cache_method_) {
+      // kDeviceL3Cache = 0, use the system L3 Cache size, best performance.
+      case L3CacheSetMethod::kDeviceL3Cache:
+        size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]]
+                                             : L2_cache_[active_ids_[0]];
+        break;
+      // kDeviceL2Cache = 1, use the system L2 Cache size, trade off performance
+      // with less memory consumption.
+      case L3CacheSetMethod::kDeviceL2Cache:
+        size = L2_cache_[active_ids_[0]];
+        break;
+      // kAbsolute = 2, use the external setting.
+      case L3CacheSetMethod::kAbsolute:
+        break;
+      default:
+        LOG(FATAL) << "Error: unknown l3_cache_method_ !";
+    }
     return size > 0 ? size : 512 * 1024;
   }
+
   bool has_dot() const { return dot_[active_ids_[0]]; }
   bool has_fp16() const { return fp16_[active_ids_[0]]; }
 
@@ -121,6 +153,10 @@ class DeviceInfo {
   void RequestPowerRandHighMode(int shift_num, int thread_num);
   void RequestPowerRandLowMode(int shift_num, int thread_num);
 
+  // Methods for allocating L3Cache on Arm platform
+  // Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h`
+  L3CacheSetMethod l3_cache_method_{L3CacheSetMethod::kDeviceL3Cache};
+  int absolute_l3cache_size_{-1};
   DeviceInfo() = default;
 };
 #endif  // LITE_WITH_ARM
-- 
GitLab