From dd3150a4c89e8b65f4a7ff71b3c4cf038f91c7e7 Mon Sep 17 00:00:00 2001 From: ysh329 Date: Thu, 30 Jul 2020 01:45:56 -0500 Subject: [PATCH] fix conflict and cherry pick 1d0f70a: add opencl tune api. test=develop (#4020) --- lite/api/paddle_api.cc | 12 ++++++++++ lite/api/paddle_api.h | 5 ++++ lite/backends/opencl/cl_context.h | 1 + lite/backends/opencl/cl_runtime.h | 6 +++++ .../cxx/mobile_light/mobilenetv1_light_api.cc | 1 + lite/kernels/opencl/conv_image_compute.cc | 23 ++++++++++++------- 6 files changed, 40 insertions(+), 8 deletions(-) diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc index 45f2b3aff6..faed7cb943 100644 --- a/lite/api/paddle_api.cc +++ b/lite/api/paddle_api.cc @@ -215,6 +215,18 @@ ConfigBase::ConfigBase(PowerMode mode, int threads) { #endif } +void ConfigBase::set_opencl_tune(bool enable_tune) { +#ifdef LITE_WITH_OPENCL + if (paddle::lite_api::IsOpenCLBackendValid()) { + enable_opencl_tune_ = enable_tune; + paddle::lite::CLRuntime::Global()->set_auto_tune(enable_opencl_tune_); +#ifdef LITE_WITH_OPENCL + LOG(INFO) << "auto_tune:" << paddle::lite::CLRuntime::Global()->auto_tune(); +#endif + } +#endif +} + void ConfigBase::set_power_mode(paddle::lite_api::PowerMode mode) { #ifdef LITE_WITH_ARM lite::DeviceInfo::Global().SetRunMode(mode, threads_); diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index c11a60a197..3dc07a5ded 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -121,6 +121,8 @@ class LITE_API ConfigBase { std::string model_dir_; int threads_{1}; PowerMode mode_{LITE_POWER_NO_BIND}; + // gpu + bool enable_opencl_tune_{false}; // to save subgraph model for npu/xpu/... std::string subgraph_model_cache_dir_{""}; @@ -135,6 +137,9 @@ class LITE_API ConfigBase { // set Thread void set_threads(int threads); int threads() const { return threads_; } + // set GPU opencl tune + void set_opencl_tune(bool enable_tune); + bool opencl_tune() const { return enable_opencl_tune_; } // set subgraph_model_dir void set_subgraph_model_cache_dir(std::string subgraph_model_cache_dir) { subgraph_model_cache_dir_ = subgraph_model_cache_dir; diff --git a/lite/backends/opencl/cl_context.h b/lite/backends/opencl/cl_context.h index ce2a600e43..23292eef1e 100644 --- a/lite/backends/opencl/cl_context.h +++ b/lite/backends/opencl/cl_context.h @@ -70,6 +70,7 @@ class CLContext { cl::NDRange LocalWorkSizeTuneReverse(cl::NDRange global_work_size, size_t max_work_size, int divitor = 2); + bool IsArmMali(); private: diff --git a/lite/backends/opencl/cl_runtime.h b/lite/backends/opencl/cl_runtime.h index 7e28130e15..7a48989e37 100644 --- a/lite/backends/opencl/cl_runtime.h +++ b/lite/backends/opencl/cl_runtime.h @@ -91,6 +91,10 @@ class CLRuntime { return is_device_avaliable_for_opencl_; } + void set_auto_tune(bool enable_tune) { auto_tune_ = enable_tune; } + + bool auto_tune() { return auto_tune_; } + bool Init(); cl::Platform& platform(); @@ -195,6 +199,8 @@ class CLRuntime { bool is_cl_runtime_initialized_{false}; bool is_platform_device_init_success_{false}; + + bool auto_tune_{false}; }; } // namespace lite diff --git a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc index 2604f104e7..6427f4c46d 100644 --- a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc +++ b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc @@ -92,6 +92,7 @@ void RunModel(std::string model_dir, if (is_opencl_backend_valid) { // give opencl nb model dir config.set_model_from_file(model_dir); + config.set_opencl_tune(false); // default is false } else { std::cout << "Unsupport opencl nb model." << std::endl; exit(1); diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc index 083f72134e..f53c464e99 100644 --- a/lite/kernels/opencl/conv_image_compute.cc +++ b/lite/kernels/opencl/conv_image_compute.cc @@ -32,16 +32,24 @@ namespace opencl { void ConvImageCompute::PrepareForRun() { ReInitWhenNeeded(); + auto& context = ctx_->As(); + CHECK(context.cl_context() != nullptr); + const bool is_mali = context.cl_context()->IsArmMali(); + + use_tune_ = CLRuntime::Global()->auto_tune(); + if (!is_mali) { + use_tune_ = false; + } +#ifdef LITE_WITH_LOG + LOG(INFO) << "use_tune_" << use_tune_; +#endif + auto filter_dims = conv_param_->filter->dims(); filter_tensor_n_ = filter_dims[0]; filter_tensor_c_ = filter_dims[1]; filter_tensor_h_ = filter_dims[2]; filter_tensor_w_ = filter_dims[3]; - auto& context = ctx_->As(); - CHECK(context.cl_context() != nullptr); - const bool is_mali = context.cl_context()->IsArmMali(); - auto paddings = *conv_param_->paddings; pad_up_ = paddings[0]; pad_down_ = paddings[1]; @@ -65,6 +73,7 @@ void ConvImageCompute::PrepareForRun() { bool stride_equal = stride_h_ == stride_w_; bool dilation_equal = dilation_h_ == dilation_w_; +#ifdef LITE_WITH_LOG VLOG(3) << "Is arm mali / " << (is_mali ? "Yes" : "No"); VLOG(3) << "Is relu fused? / " << (relu_fused_ ? "Yes" : "No"); VLOG(3) << "groups:" << groups_ << " stride_h_:" << stride_h_ @@ -83,6 +92,8 @@ void ConvImageCompute::PrepareForRun() { VLOG(3) << "dilation_equal:" << dilation_equal; VLOG(3) << "padding :" << pad_up_ << " " << pad_down_ << " " << pad_left_ << " " << pad_right_; +#endif + CHECK(pad_equal && stride_equal && dilation_equal); CHECK_GE(conv_param_->dilations->size(), 2); CHECK(dilation_h_ == dilation_w_); @@ -91,10 +102,6 @@ void ConvImageCompute::PrepareForRun() { CHECK_GE(conv_param_->strides.size(), 2); CHECK(stride_h_ == stride_w_); - if (!is_mali) { - use_tune_ = false; - } - /********************************************* * Upload filter, bias to opencl device *********************************************/ -- GitLab