未验证 提交 1d0f70ae 编写于 作者: Y ysh329 提交者: GitHub

add opencl tune api for user. test=develop (#4016)

上级 9bd739fb
...@@ -244,6 +244,18 @@ ConfigBase::ConfigBase(PowerMode mode, int threads) { ...@@ -244,6 +244,18 @@ ConfigBase::ConfigBase(PowerMode mode, int threads) {
#endif #endif
} }
void ConfigBase::set_opencl_tune(bool enable_tune) {
#ifdef LITE_WITH_OPENCL
if (paddle::lite_api::IsOpenCLBackendValid()) {
enable_opencl_tune_ = enable_tune;
paddle::lite::CLRuntime::Global()->set_auto_tune(enable_opencl_tune_);
#ifdef LITE_WITH_OPENCL
LOG(INFO) << "auto_tune:" << paddle::lite::CLRuntime::Global()->auto_tune();
#endif
}
#endif
}
void ConfigBase::set_power_mode(paddle::lite_api::PowerMode mode) { void ConfigBase::set_power_mode(paddle::lite_api::PowerMode mode) {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
lite::DeviceInfo::Global().SetRunMode(mode, threads_); lite::DeviceInfo::Global().SetRunMode(mode, threads_);
......
...@@ -124,6 +124,8 @@ class LITE_API ConfigBase { ...@@ -124,6 +124,8 @@ class LITE_API ConfigBase {
std::string model_dir_; std::string model_dir_;
int threads_{1}; int threads_{1};
PowerMode mode_{LITE_POWER_NO_BIND}; PowerMode mode_{LITE_POWER_NO_BIND};
// gpu
bool enable_opencl_tune_{false};
// to save subgraph model for npu/xpu/... // to save subgraph model for npu/xpu/...
std::string subgraph_model_cache_dir_{""}; std::string subgraph_model_cache_dir_{""};
int device_id_{0}; int device_id_{0};
...@@ -139,6 +141,9 @@ class LITE_API ConfigBase { ...@@ -139,6 +141,9 @@ class LITE_API ConfigBase {
// set Power_mode // set Power_mode
void set_power_mode(PowerMode mode); void set_power_mode(PowerMode mode);
PowerMode power_mode() const { return mode_; } PowerMode power_mode() const { return mode_; }
// set GPU opencl tune
void set_opencl_tune(bool enable_tune);
bool opencl_tune() const { return enable_opencl_tune_; }
// set subgraph_model_dir // set subgraph_model_dir
void set_subgraph_model_cache_dir(std::string subgraph_model_cache_dir) { void set_subgraph_model_cache_dir(std::string subgraph_model_cache_dir) {
subgraph_model_cache_dir_ = subgraph_model_cache_dir; subgraph_model_cache_dir_ = subgraph_model_cache_dir;
......
...@@ -65,9 +65,11 @@ class CLContext { ...@@ -65,9 +65,11 @@ class CLContext {
cl::NDRange LocalWorkSizeTune(cl::NDRange global_work_size, cl::NDRange LocalWorkSizeTune(cl::NDRange global_work_size,
size_t max_work_size, size_t max_work_size,
int divitor = 2); int divitor = 2);
cl::NDRange LocalWorkSizeTuneReverse(cl::NDRange global_work_size, cl::NDRange LocalWorkSizeTuneReverse(cl::NDRange global_work_size,
size_t max_work_size, size_t max_work_size,
int divitor = 2); int divitor = 2);
bool IsArmMali(); bool IsArmMali();
// cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size, // cl::NDRange LocalWorkSizeConv1x1(cl::NDRange global_work_size,
// size_t max_work_size); // size_t max_work_size);
......
...@@ -91,6 +91,10 @@ class CLRuntime { ...@@ -91,6 +91,10 @@ class CLRuntime {
return is_device_avaliable_for_opencl_; return is_device_avaliable_for_opencl_;
} }
void set_auto_tune(bool enable_tune) { auto_tune_ = enable_tune; }
bool auto_tune() { return auto_tune_; }
bool Init(); bool Init();
cl::Platform& platform(); cl::Platform& platform();
...@@ -195,6 +199,8 @@ class CLRuntime { ...@@ -195,6 +199,8 @@ class CLRuntime {
bool is_cl_runtime_initialized_{false}; bool is_cl_runtime_initialized_{false};
bool is_platform_device_init_success_{false}; bool is_platform_device_init_success_{false};
bool auto_tune_{false};
}; };
} // namespace lite } // namespace lite
......
...@@ -92,6 +92,7 @@ void RunModel(std::string model_dir, ...@@ -92,6 +92,7 @@ void RunModel(std::string model_dir,
if (is_opencl_backend_valid) { if (is_opencl_backend_valid) {
// give opencl nb model dir // give opencl nb model dir
config.set_model_from_file(model_dir); config.set_model_from_file(model_dir);
config.set_opencl_tune(false); // default is false
} else { } else {
std::cout << "Unsupport opencl nb model." << std::endl; std::cout << "Unsupport opencl nb model." << std::endl;
exit(1); exit(1);
......
...@@ -32,16 +32,24 @@ namespace opencl { ...@@ -32,16 +32,24 @@ namespace opencl {
void ConvImageCompute::PrepareForRun() { void ConvImageCompute::PrepareForRun() {
ReInitWhenNeeded(); ReInitWhenNeeded();
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
const bool is_mali = context.cl_context()->IsArmMali();
use_tune_ = CLRuntime::Global()->auto_tune();
if (!is_mali) {
use_tune_ = false;
}
#ifdef LITE_WITH_LOG
LOG(INFO) << "use_tune_" << use_tune_;
#endif
auto filter_dims = conv_param_->filter->dims(); auto filter_dims = conv_param_->filter->dims();
filter_tensor_n_ = filter_dims[0]; filter_tensor_n_ = filter_dims[0];
filter_tensor_c_ = filter_dims[1]; filter_tensor_c_ = filter_dims[1];
filter_tensor_h_ = filter_dims[2]; filter_tensor_h_ = filter_dims[2];
filter_tensor_w_ = filter_dims[3]; filter_tensor_w_ = filter_dims[3];
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
const bool is_mali = context.cl_context()->IsArmMali();
auto paddings = *conv_param_->paddings; auto paddings = *conv_param_->paddings;
pad_up_ = paddings[0]; pad_up_ = paddings[0];
pad_down_ = paddings[1]; pad_down_ = paddings[1];
...@@ -65,6 +73,7 @@ void ConvImageCompute::PrepareForRun() { ...@@ -65,6 +73,7 @@ void ConvImageCompute::PrepareForRun() {
bool stride_equal = stride_h_ == stride_w_; bool stride_equal = stride_h_ == stride_w_;
bool dilation_equal = dilation_h_ == dilation_w_; bool dilation_equal = dilation_h_ == dilation_w_;
#ifdef LITE_WITH_LOG
VLOG(3) << "Is arm mali / " << (is_mali ? "Yes" : "No"); VLOG(3) << "Is arm mali / " << (is_mali ? "Yes" : "No");
VLOG(3) << "Is relu fused? / " << (relu_fused_ ? "Yes" : "No"); VLOG(3) << "Is relu fused? / " << (relu_fused_ ? "Yes" : "No");
VLOG(3) << "groups:" << groups_ << " stride_h_:" << stride_h_ VLOG(3) << "groups:" << groups_ << " stride_h_:" << stride_h_
...@@ -83,6 +92,8 @@ void ConvImageCompute::PrepareForRun() { ...@@ -83,6 +92,8 @@ void ConvImageCompute::PrepareForRun() {
VLOG(3) << "dilation_equal:" << dilation_equal; VLOG(3) << "dilation_equal:" << dilation_equal;
VLOG(3) << "padding :" << pad_up_ << " " << pad_down_ << " " << pad_left_ VLOG(3) << "padding :" << pad_up_ << " " << pad_down_ << " " << pad_left_
<< " " << pad_right_; << " " << pad_right_;
#endif
CHECK(pad_equal && stride_equal && dilation_equal); CHECK(pad_equal && stride_equal && dilation_equal);
CHECK_GE(conv_param_->dilations->size(), 2); CHECK_GE(conv_param_->dilations->size(), 2);
CHECK(dilation_h_ == dilation_w_); CHECK(dilation_h_ == dilation_w_);
...@@ -91,10 +102,6 @@ void ConvImageCompute::PrepareForRun() { ...@@ -91,10 +102,6 @@ void ConvImageCompute::PrepareForRun() {
CHECK_GE(conv_param_->strides.size(), 2); CHECK_GE(conv_param_->strides.size(), 2);
CHECK(stride_h_ == stride_w_); CHECK(stride_h_ == stride_w_);
if (!is_mali) {
use_tune_ = false;
}
/********************************************* /*********************************************
* Upload filter, bias to opencl device * Upload filter, bias to opencl device
*********************************************/ *********************************************/
......
...@@ -152,7 +152,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL), ...@@ -152,7 +152,7 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
cl::NDRange local_work_size_ = cl::NDRange{ cl::NDRange local_work_size_ = cl::NDRange{
static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)}; static_cast<size_t>(1), static_cast<size_t>(1), static_cast<size_t>(1)};
bool use_lws_{true}; bool use_lws_{true};
bool use_tune_{true}; bool use_tune_{false};
}; };
} // namespace opencl } // namespace opencl
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册