diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc index 5fd4183061ad92b0066d10eef3d819e3fc1b3ac3..a93990a612ec1172739dce70e29f9cab9493b38a 100644 --- a/lite/api/paddle_api.cc +++ b/lite/api/paddle_api.cc @@ -333,5 +333,13 @@ void MobileConfig::set_model_buffer(const char *model_buffer, model_from_memory_ = true; } +// This is the method for allocating workspace_size according to L3Cache size +void MobileConfig::SetArmL3CacheSize(L3CacheSetMethod method, + int absolute_val) { +#ifdef LITE_WITH_ARM + lite::DeviceInfo::Global().SetArmL3CacheSize(method, absolute_val); +#endif +} + } // namespace lite_api } // namespace paddle diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index 3dc07a5ded96fa782513ca5e6a242c906fee65de..e593719fc6a78c6421d2263a7e55144df43e6140 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -31,6 +31,14 @@ using shape_t = std::vector; using lod_t = std::vector>; enum class LiteModelType { kProtobuf = 0, kNaiveBuffer, UNK }; +// Methods for allocating L3Cache on Arm platform +enum class L3CacheSetMethod { + kDeviceL3Cache = 0, // Use the system L3 Cache size, best performance. + kDeviceL2Cache = 1, // Use the system L2 Cache size, trade off performance + // with less memory consumption. + kAbsolute = 2, // Use the external setting. + // kAutoGrow = 3, // Not supported yet, least memory consumption. +}; // return true if current device supports OpenCL model LITE_API bool IsOpenCLBackendValid(); @@ -283,6 +291,11 @@ class LITE_API MobileConfig : public ConfigBase { // NOTE: This is a deprecated API and will be removed in latter release. const std::string& param_buffer() const { return param_buffer_; } + + // This is the method for allocating workspace_size according to L3Cache size + void SetArmL3CacheSize( + L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache, + int absolute_val = -1); }; template diff --git a/lite/api/paddle_api_test.cc b/lite/api/paddle_api_test.cc index 832867df079efa1baebf08da4c0d8e37958460f1..1b90b97c126e2e11c6ff689deff5cc25e40276f9 100644 --- a/lite/api/paddle_api_test.cc +++ b/lite/api/paddle_api_test.cc @@ -70,7 +70,8 @@ TEST(CxxApi, run) { TEST(LightApi, run) { lite_api::MobileConfig config; config.set_model_from_file(FLAGS_model_dir + ".opt2.naive.nb"); - + // disable L3 cache on workspace_ allocating + config.SetArmL3CacheSize(L3CacheSetMethod::kDeviceL2Cache); auto predictor = lite_api::CreatePaddlePredictor(config); auto inputs = predictor->GetInputNames(); @@ -111,6 +112,8 @@ TEST(MobileConfig, LoadfromMemory) { // set model buffer and run model lite_api::MobileConfig config; config.set_model_from_buffer(model_buffer); + // allocate 1M initial space for workspace_ + config.SetArmL3CacheSize(L3CacheSetMethod::kAbsolute, 1024 * 1024); auto predictor = lite_api::CreatePaddlePredictor(config); auto input_tensor = predictor->GetInput(0); diff --git a/lite/core/device_info.h b/lite/core/device_info.h index d35efebf7c9a0703b6d8acc0ad3960890fcfe607..288a034f2f621745b5f977f63c623348bae0d12a 100644 --- a/lite/core/device_info.h +++ b/lite/core/device_info.h @@ -17,6 +17,7 @@ #include #include #include +#include "lite/api/paddle_api.h" #include "lite/core/tensor.h" #include "lite/utils/cp_logging.h" #ifdef LITE_WITH_MLU @@ -27,6 +28,7 @@ namespace paddle { namespace lite { +using L3CacheSetMethod = lite_api::L3CacheSetMethod; #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU)) typedef enum { @@ -79,11 +81,41 @@ class DeviceInfo { int l1_cache_size() const { return L1_cache_[active_ids_[0]]; } int l2_cache_size() const { return L2_cache_[active_ids_[0]]; } int l3_cache_size() const { return L3_cache_[active_ids_[0]]; } + // Methods for allocating L3Cache on Arm platform + // Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h` + void SetArmL3CacheSize( + L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache, + int absolute_val = -1) { + l3_cache_method_ = method; + absolute_l3cache_size_ = absolute_val; + // Realloc memory for sgemm in this context. + workspace_.clear(); + workspace_.Resize({llc_size()}); + workspace_.mutable_data(); + } + int llc_size() const { - auto size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]] - : L2_cache_[active_ids_[0]]; + auto size = absolute_l3cache_size_; + switch (l3_cache_method_) { + // kDeviceL3Cache = 0, use the system L3 Cache size, best performance. + case L3CacheSetMethod::kDeviceL3Cache: + size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]] + : L2_cache_[active_ids_[0]]; + break; + // kDeviceL2Cache = 1, use the system L2 Cache size, trade off performance + // with less memory consumption. + case L3CacheSetMethod::kDeviceL2Cache: + size = L2_cache_[active_ids_[0]]; + break; + // kAbsolute = 2, use the external setting. + case L3CacheSetMethod::kAbsolute: + break; + default: + LOG(FATAL) << "Error: unknown l3_cache_method_ !"; + } return size > 0 ? size : 512 * 1024; } + bool has_dot() const { return dot_[active_ids_[0]]; } bool has_fp16() const { return fp16_[active_ids_[0]]; } @@ -144,6 +176,10 @@ class DeviceInfo { void RequestPowerRandHighMode(int shift_num, int thread_num); void RequestPowerRandLowMode(int shift_num, int thread_num); + // Methods for allocating L3Cache on Arm platform + // Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h` + L3CacheSetMethod l3_cache_method_{L3CacheSetMethod::kDeviceL3Cache}; + int absolute_l3cache_size_{-1}; DeviceInfo() = default; }; #endif // LITE_WITH_ARM