未验证 提交 5c39519e 编写于 作者: H huzhiqiang 提交者: GitHub

[Framework] Add method for specifying initial size of `workspace_` (#4378)

上级 cda2e2d9
...@@ -356,5 +356,13 @@ void MobileConfig::set_model_buffer(const char *model_buffer, ...@@ -356,5 +356,13 @@ void MobileConfig::set_model_buffer(const char *model_buffer,
model_from_memory_ = true; model_from_memory_ = true;
} }
// This is the method for allocating workspace_size according to L3Cache size
void MobileConfig::SetArmL3CacheSize(L3CacheSetMethod method,
int absolute_val) {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Global().SetArmL3CacheSize(method, absolute_val);
#endif
}
} // namespace lite_api } // namespace lite_api
} // namespace paddle } // namespace paddle
...@@ -32,6 +32,14 @@ using shape_t = std::vector<int64_t>; ...@@ -32,6 +32,14 @@ using shape_t = std::vector<int64_t>;
using lod_t = std::vector<std::vector<uint64_t>>; using lod_t = std::vector<std::vector<uint64_t>>;
enum class LiteModelType { kProtobuf = 0, kNaiveBuffer, UNK }; enum class LiteModelType { kProtobuf = 0, kNaiveBuffer, UNK };
// Methods for allocating L3Cache on Arm platform
enum class L3CacheSetMethod {
kDeviceL3Cache = 0, // Use the system L3 Cache size, best performance.
kDeviceL2Cache = 1, // Use the system L2 Cache size, trade off performance
// with less memory consumption.
kAbsolute = 2, // Use the external setting.
// kAutoGrow = 3, // Not supported yet, least memory consumption.
};
// return true if current device supports OpenCL model // return true if current device supports OpenCL model
LITE_API bool IsOpenCLBackendValid(); LITE_API bool IsOpenCLBackendValid();
...@@ -294,6 +302,11 @@ class LITE_API MobileConfig : public ConfigBase { ...@@ -294,6 +302,11 @@ class LITE_API MobileConfig : public ConfigBase {
// NOTE: This is a deprecated API and will be removed in latter release. // NOTE: This is a deprecated API and will be removed in latter release.
const std::string& param_buffer() const { return param_buffer_; } const std::string& param_buffer() const { return param_buffer_; }
// This is the method for allocating workspace_size according to L3Cache size
void SetArmL3CacheSize(
L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache,
int absolute_val = -1);
}; };
template <typename ConfigT> template <typename ConfigT>
......
...@@ -107,7 +107,8 @@ TEST(CxxApi, share_external_data) { ...@@ -107,7 +107,8 @@ TEST(CxxApi, share_external_data) {
TEST(LightApi, run) { TEST(LightApi, run) {
lite_api::MobileConfig config; lite_api::MobileConfig config;
config.set_model_from_file(FLAGS_model_dir + ".opt2.naive.nb"); config.set_model_from_file(FLAGS_model_dir + ".opt2.naive.nb");
// disable L3 cache on workspace_ allocating
config.SetArmL3CacheSize(L3CacheSetMethod::kDeviceL2Cache);
auto predictor = lite_api::CreatePaddlePredictor(config); auto predictor = lite_api::CreatePaddlePredictor(config);
auto inputs = predictor->GetInputNames(); auto inputs = predictor->GetInputNames();
...@@ -148,6 +149,8 @@ TEST(MobileConfig, LoadfromMemory) { ...@@ -148,6 +149,8 @@ TEST(MobileConfig, LoadfromMemory) {
// set model buffer and run model // set model buffer and run model
lite_api::MobileConfig config; lite_api::MobileConfig config;
config.set_model_from_buffer(model_buffer); config.set_model_from_buffer(model_buffer);
// allocate 1M initial space for workspace_
config.SetArmL3CacheSize(L3CacheSetMethod::kAbsolute, 1024 * 1024);
auto predictor = lite_api::CreatePaddlePredictor(config); auto predictor = lite_api::CreatePaddlePredictor(config);
auto input_tensor = predictor->GetInput(0); auto input_tensor = predictor->GetInput(0);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <cstdarg> #include <cstdarg>
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/api/paddle_api.h"
#include "lite/core/tensor.h" #include "lite/core/tensor.h"
#include "lite/utils/cp_logging.h" #include "lite/utils/cp_logging.h"
#ifdef LITE_WITH_MLU #ifdef LITE_WITH_MLU
...@@ -27,6 +28,7 @@ ...@@ -27,6 +28,7 @@
namespace paddle { namespace paddle {
namespace lite { namespace lite {
using L3CacheSetMethod = lite_api::L3CacheSetMethod;
#if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU)) #if ((defined LITE_WITH_ARM) || (defined LITE_WITH_MLU))
typedef enum { typedef enum {
...@@ -65,11 +67,41 @@ class DeviceInfo { ...@@ -65,11 +67,41 @@ class DeviceInfo {
int l1_cache_size() const { return L1_cache_[active_ids_[0]]; } int l1_cache_size() const { return L1_cache_[active_ids_[0]]; }
int l2_cache_size() const { return L2_cache_[active_ids_[0]]; } int l2_cache_size() const { return L2_cache_[active_ids_[0]]; }
int l3_cache_size() const { return L3_cache_[active_ids_[0]]; } int l3_cache_size() const { return L3_cache_[active_ids_[0]]; }
// Methods for allocating L3Cache on Arm platform
// Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h`
void SetArmL3CacheSize(
L3CacheSetMethod method = L3CacheSetMethod::kDeviceL3Cache,
int absolute_val = -1) {
l3_cache_method_ = method;
absolute_l3cache_size_ = absolute_val;
// Realloc memory for sgemm in this context.
workspace_.clear();
workspace_.Resize({llc_size()});
workspace_.mutable_data<int8_t>();
}
int llc_size() const { int llc_size() const {
auto size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]] auto size = absolute_l3cache_size_;
: L2_cache_[active_ids_[0]]; switch (l3_cache_method_) {
// kDeviceL3Cache = 0, use the system L3 Cache size, best performance.
case L3CacheSetMethod::kDeviceL3Cache:
size = L3_cache_[active_ids_[0]] > 0 ? L3_cache_[active_ids_[0]]
: L2_cache_[active_ids_[0]];
break;
// kDeviceL2Cache = 1, use the system L2 Cache size, trade off performance
// with less memory consumption.
case L3CacheSetMethod::kDeviceL2Cache:
size = L2_cache_[active_ids_[0]];
break;
// kAbsolute = 2, use the external setting.
case L3CacheSetMethod::kAbsolute:
break;
default:
LOG(FATAL) << "Error: unknown l3_cache_method_ !";
}
return size > 0 ? size : 512 * 1024; return size > 0 ? size : 512 * 1024;
} }
bool has_dot() const { return dot_[active_ids_[0]]; } bool has_dot() const { return dot_[active_ids_[0]]; }
bool has_fp16() const { return fp16_[active_ids_[0]]; } bool has_fp16() const { return fp16_[active_ids_[0]]; }
...@@ -121,6 +153,10 @@ class DeviceInfo { ...@@ -121,6 +153,10 @@ class DeviceInfo {
void RequestPowerRandHighMode(int shift_num, int thread_num); void RequestPowerRandHighMode(int shift_num, int thread_num);
void RequestPowerRandLowMode(int shift_num, int thread_num); void RequestPowerRandLowMode(int shift_num, int thread_num);
// Methods for allocating L3Cache on Arm platform
// Enum class L3CacheSetMethod is declared in `lite/api/paddle_api.h`
L3CacheSetMethod l3_cache_method_{L3CacheSetMethod::kDeviceL3Cache};
int absolute_l3cache_size_{-1};
DeviceInfo() = default; DeviceInfo() = default;
}; };
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册