From dd3150a4c89e8b65f4a7ff71b3c4cf038f91c7e7 Mon Sep 17 00:00:00 2001
From: ysh329 <ysh329@users.noreply.github.com>
Date: Thu, 30 Jul 2020 01:45:56 -0500
Subject: [PATCH] fix conflict and cherry pick 1d0f70a: add opencl tune api.
 test=develop (#4020)

---
 lite/api/paddle_api.cc                        | 12 ++++++++++
 lite/api/paddle_api.h                         |  5 ++++
 lite/backends/opencl/cl_context.h             |  1 +
 lite/backends/opencl/cl_runtime.h             |  6 +++++
 .../cxx/mobile_light/mobilenetv1_light_api.cc |  1 +
 lite/kernels/opencl/conv_image_compute.cc     | 23 ++++++++++++-------
 6 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/lite/api/paddle_api.cc b/lite/api/paddle_api.cc
index 45f2b3aff6..faed7cb943 100644
--- a/lite/api/paddle_api.cc
+++ b/lite/api/paddle_api.cc
@@ -215,6 +215,18 @@ ConfigBase::ConfigBase(PowerMode mode, int threads) {
 #endif
 }
 
+void ConfigBase::set_opencl_tune(bool enable_tune) {
+#ifdef LITE_WITH_OPENCL
+  if (paddle::lite_api::IsOpenCLBackendValid()) {
+    enable_opencl_tune_ = enable_tune;
+    paddle::lite::CLRuntime::Global()->set_auto_tune(enable_opencl_tune_);
+#ifdef LITE_WITH_OPENCL
+    LOG(INFO) << "auto_tune:" << paddle::lite::CLRuntime::Global()->auto_tune();
+#endif
+  }
+#endif
+}
+
 void ConfigBase::set_power_mode(paddle::lite_api::PowerMode mode) {
 #ifdef LITE_WITH_ARM
   lite::DeviceInfo::Global().SetRunMode(mode, threads_);
diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h
index c11a60a197..3dc07a5ded 100644
--- a/lite/api/paddle_api.h
+++ b/lite/api/paddle_api.h
@@ -121,6 +121,8 @@ class LITE_API ConfigBase {
   std::string model_dir_;
   int threads_{1};
   PowerMode mode_{LITE_POWER_NO_BIND};
+  // gpu
+  bool enable_opencl_tune_{false};
   // to save subgraph model for npu/xpu/...
   std::string subgraph_model_cache_dir_{""};
 
@@ -135,6 +137,9 @@ class LITE_API ConfigBase {
   // set Thread
   void set_threads(int threads);
   int threads() const { return threads_; }
+  // set GPU opencl tune
+  void set_opencl_tune(bool enable_tune);
+  bool opencl_tune() const { return enable_opencl_tune_; }
   // set subgraph_model_dir
   void set_subgraph_model_cache_dir(std::string subgraph_model_cache_dir) {
     subgraph_model_cache_dir_ = subgraph_model_cache_dir;
diff --git a/lite/backends/opencl/cl_context.h b/lite/backends/opencl/cl_context.h
index ce2a600e43..23292eef1e 100644
--- a/lite/backends/opencl/cl_context.h
+++ b/lite/backends/opencl/cl_context.h
@@ -70,6 +70,7 @@ class CLContext {
   cl::NDRange LocalWorkSizeTuneReverse(cl::NDRange global_work_size,
                                        size_t max_work_size,
                                        int divitor = 2);
+
   bool IsArmMali();
 
  private:
diff --git a/lite/backends/opencl/cl_runtime.h b/lite/backends/opencl/cl_runtime.h
index 7e28130e15..7a48989e37 100644
--- a/lite/backends/opencl/cl_runtime.h
+++ b/lite/backends/opencl/cl_runtime.h
@@ -91,6 +91,10 @@ class CLRuntime {
     return is_device_avaliable_for_opencl_;
   }
 
+  void set_auto_tune(bool enable_tune) { auto_tune_ = enable_tune; }
+
+  bool auto_tune() { return auto_tune_; }
+
   bool Init();
 
   cl::Platform& platform();
@@ -195,6 +199,8 @@ class CLRuntime {
   bool is_cl_runtime_initialized_{false};
 
   bool is_platform_device_init_success_{false};
+
+  bool auto_tune_{false};
 };
 
 }  // namespace lite
diff --git a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
index 2604f104e7..6427f4c46d 100644
--- a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
+++ b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
@@ -92,6 +92,7 @@ void RunModel(std::string model_dir,
   if (is_opencl_backend_valid) {
     // give opencl nb model dir
     config.set_model_from_file(model_dir);
+    config.set_opencl_tune(false); // default is false
   } else {
     std::cout << "Unsupport opencl nb model." << std::endl;
     exit(1);
diff --git a/lite/kernels/opencl/conv_image_compute.cc b/lite/kernels/opencl/conv_image_compute.cc
index 083f72134e..f53c464e99 100644
--- a/lite/kernels/opencl/conv_image_compute.cc
+++ b/lite/kernels/opencl/conv_image_compute.cc
@@ -32,16 +32,24 @@ namespace opencl {
 void ConvImageCompute::PrepareForRun() {
   ReInitWhenNeeded();
 
+  auto& context = ctx_->As<OpenCLContext>();
+  CHECK(context.cl_context() != nullptr);
+  const bool is_mali = context.cl_context()->IsArmMali();
+
+  use_tune_ = CLRuntime::Global()->auto_tune();
+  if (!is_mali) {
+    use_tune_ = false;
+  }
+#ifdef LITE_WITH_LOG
+  LOG(INFO) << "use_tune_" << use_tune_;
+#endif
+
   auto filter_dims = conv_param_->filter->dims();
   filter_tensor_n_ = filter_dims[0];
   filter_tensor_c_ = filter_dims[1];
   filter_tensor_h_ = filter_dims[2];
   filter_tensor_w_ = filter_dims[3];
 
-  auto& context = ctx_->As<OpenCLContext>();
-  CHECK(context.cl_context() != nullptr);
-  const bool is_mali = context.cl_context()->IsArmMali();
-
   auto paddings = *conv_param_->paddings;
   pad_up_ = paddings[0];
   pad_down_ = paddings[1];
@@ -65,6 +73,7 @@ void ConvImageCompute::PrepareForRun() {
   bool stride_equal = stride_h_ == stride_w_;
   bool dilation_equal = dilation_h_ == dilation_w_;
 
+#ifdef LITE_WITH_LOG
   VLOG(3) << "Is arm mali  / " << (is_mali ? "Yes" : "No");
   VLOG(3) << "Is relu fused? / " << (relu_fused_ ? "Yes" : "No");
   VLOG(3) << "groups:" << groups_ << " stride_h_:" << stride_h_
@@ -83,6 +92,8 @@ void ConvImageCompute::PrepareForRun() {
   VLOG(3) << "dilation_equal:" << dilation_equal;
   VLOG(3) << "padding :" << pad_up_ << " " << pad_down_ << " " << pad_left_
           << " " << pad_right_;
+#endif
+
   CHECK(pad_equal && stride_equal && dilation_equal);
   CHECK_GE(conv_param_->dilations->size(), 2);
   CHECK(dilation_h_ == dilation_w_);
@@ -91,10 +102,6 @@ void ConvImageCompute::PrepareForRun() {
   CHECK_GE(conv_param_->strides.size(), 2);
   CHECK(stride_h_ == stride_w_);
 
-  if (!is_mali) {
-    use_tune_ = false;
-  }
-
   /*********************************************
    * Upload filter, bias to opencl device
    *********************************************/
-- 
GitLab