From 5c39519e779b6a061250aff2979df9548ddf893d Mon Sep 17 00:00:00 2001 From: huzhiqiang <912790387@qq.com> Date: Mon, 21 Sep 2020 21:58:15 +0800 Subject: [PATCH] [Framework] Add method for specifying initial size of `workspace_` (#4378) --- lite/api/paddle_api.cc | 8 ++++++++ lite/api/paddle_api.h | 13 ++++++++++++ lite/api/paddle_api_test.cc | 5 ++++- lite/core/device_info.h | 40 +++++++++++++++++++++++++++++++++++-- 4 files changed, 63 insertions(+), 3 deletions(-) diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc index a3d29dff93..d37657206d 100644 --- a/lite/api/paddle_api.cc +++ b/lite/api/paddle_api.cc @@ -356,5 +356,13 @@ void MobileConfig::set_model_buffer(const char *model_buffer, model_from_memory_ = true; } +// This is the method for allocating workspace_size according to L3Cache size +void MobileConfig::SetArmL3CacheSize(L3CacheSetMethod method, + int absolute_val) { +#ifdef LITE_WITH_ARM + lite::DeviceInfo::Global().SetArmL3CacheSize(method, absolute_val); +#endif +} + } // namespace lite_api } // namespace paddle diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index 42a4b2228b..7df7f7889a 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -32,6 +32,14 @@ using shape_t = std::vector; using lod_t = std::vector>; enum class LiteModelType { kProtobuf = 0, kNaiveBuffer, UNK }; +// Methods for allocating L3Cache on Arm platform +enum class L3CacheSetMethod { + kDeviceL3Cache = 0, // Use the system L3 Cache size, best performance. + kDeviceL2Cache = 1, // Use the system L2 Cache size, trade off performance + // with less memory consumption. + kAbsolute = 2, // Use the external setting. + // kAutoGrow = 3, // Not supported yet, least memory consumption. +}; // return true if current device supports OpenCL model LITE_API bool IsOpenCLBackendValid(); @@ -294,6 +302,11 @@ class LITE_API MobileConfig : public ConfigBase { // NOTE: This is a deprecated API and will be removed in latter release. const std::string& param_buffer() const { return param_buffer_; } + + // This is the method for allocating workspace_size according to L3Cache size + void SetArmL3CacheSize( + L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache, + int absolute_val = -1); }; template diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc index 9176ce0eb1..41799bdc2c 100644 --- a/lite/api/paddle_api_test.cc +++ b/lite/api/paddle_api_test.cc @@ -107,7 +107,8 @@ TEST(CxxApi, share_external_data) { TEST(LightApi, run) { lite_api::MobileConfig config; config.set_model_from_file(FLAGS_model_dir + ".opt2.naive.nb"); - + // disable L3 cache on workspace_ allocating + config.SetArmL3CacheSize(L3CacheSetMethod::kDeviceL2Cache); auto predictor = lite_api::CreatePaddlePredictor(config); auto inputs = predictor->GetInputNames(); @@ -148,6 +149,8 @@ TEST(MobileConfig, LoadfromMemory) { // set model buffer and run model lite_api::MobileConfig config; config.set_model_from_buffer(model_buffer); + // allocate 1M initial space for workspace_ + config.SetArmL3CacheSize(L3CacheSetMethod::kAbsolute, 1024 * 1024); auto predictor = lite_api::CreatePaddlePredictor(config); auto input_tensor = predictor->GetInput(0); diff --git a/lite/core/device_info.h b/lite/core/device_info.h index c95f285e14..53d22ef90e 100644 --- a/lite/core/device_info.h +++ b/lite/core/device_info.h @@ -17,6 +17,7 @@ #include #include #include +#include "lite/api/paddle_api.h" #include "lite/core/tensor.h" #include "lite/utils/cp_logging.h" #ifdef LITE_WITH_MLU @@ -27,6 +28,7 @@ namespace paddle { namespace lite { +using L3CacheSetMethod = lite_api::L3CacheSetMethod; #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU)) typedef enum { @@ -65,11 +67,41 @@ class DeviceInfo { int l1_cache_size() const { return L1_cache_[active_ids_[0]]; } int l2_cache_size() const { return L2_cache_[active_ids_[0]]; } int l3_cache_size() const { return L3_cache_[active_ids_[0]]; } + // Methods for allocating L3Cache on Arm platform + // Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h` + void SetArmL3CacheSize( + L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache, + int absolute_val = -1) { + l3_cache_method_ = method; + absolute_l3cache_size_ = absolute_val; + // Realloc memory for sgemm in this context. + workspace_.clear(); + workspace_.Resize({llc_size()}); + workspace_.mutable_data(); + } + int llc_size() const { - auto size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]] - : L2_cache_[active_ids_[0]]; + auto size = absolute_l3cache_size_; + switch (l3_cache_method_) { + // kDeviceL3Cache = 0, use the system L3 Cache size, best performance. + case L3CacheSetMethod::kDeviceL3Cache: + size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]] + : L2_cache_[active_ids_[0]]; + break; + // kDeviceL2Cache = 1, use the system L2 Cache size, trade off performance + // with less memory consumption. + case L3CacheSetMethod::kDeviceL2Cache: + size = L2_cache_[active_ids_[0]]; + break; + // kAbsolute = 2, use the external setting. + case L3CacheSetMethod::kAbsolute: + break; + default: + LOG(FATAL) << "Error: unknown l3_cache_method_ !"; + } return size > 0 ? size : 512 * 1024; } + bool has_dot() const { return dot_[active_ids_[0]]; } bool has_fp16() const { return fp16_[active_ids_[0]]; } @@ -121,6 +153,10 @@ class DeviceInfo { void RequestPowerRandHighMode(int shift_num, int thread_num); void RequestPowerRandLowMode(int shift_num, int thread_num); + // Methods for allocating L3Cache on Arm platform + // Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h` + L3CacheSetMethod l3_cache_method_{L3CacheSetMethod::kDeviceL3Cache}; + int absolute_l3cache_size_{-1}; DeviceInfo() = default; }; #endif // LITE_WITH_ARM -- GitLab