[cherry-pick][OPENCL][API] add opencl valid api for device. test=develop (#3951) (#3960)

* [OPENCL][API] add opencl valid api for device. test=develop (#3951)

[cherry-pick][OPENCL][API] add opencl valid api for device. test=develop (#3951) (#3960)
* [OPENCL][API] add opencl valid api for device. test=develop (#3951)
37a01383 · ysh329 · GitHub · 273b1fa2 · 37a01383 · 37a01383
10 changed file
--- a/docs/demo_guides/opencl.md
+++ b/docs/demo_guides/opencl.md
@@ -37,14 +37,25 @@ rm ./lite/api/paddle_use_kernels.h
 rm ./lite/api/paddle_use_ops.h

 # 设置编译参数并开始编译
+# android-armv7:cpu+gpu+cv+extra
 ./lite/tools/build_android.sh \
  --arch=armv7 \
  --toolchain=clang \
-  --with_cv=OFF \
  --with_log=OFF \
-  --with_extra=OFF \
+  --with_extra=ON \
+  --with_cv=ON \
  --with_opencl=ON

+# android-armv8:cpu+gpu+cv+extra
+./lite/tools/build_android.sh \
+  --arch=armv8 \
+  --toolchain=clang \
+  --with_log=OFF \
+  --with_extra=ON \
+  --with_cv=ON \
+  --with_opencl=ON
+
+
 # 注：编译帮助请执行: ./lite/tools/build_android.sh help
 ```

@@ -206,7 +217,7 @@ adb shell "export GLOG_v=4; \

 ## 3. 如何在Code中使用

-即编译产物`demo/cxx/mobile_light`目录下的代码，在线版参考GitHub仓库[./lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc);
+即编译产物`demo/cxx/mobile_light`目录下的代码，在线版参考GitHub仓库[./lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc)，其中也包括判断当前设备是否支持OpenCL的方法;

 注：这里给出的链接会跳转到线上最新develop分支的代码，很可能与您本地的代码存在差异，建议参考自己本地位于`lite/demo/cxx/`目录的代码，查看如何使用。


--- a/lite/api/paddle_api.cc
+++ b/lite/api/paddle_api.cc
@@ -22,9 +22,22 @@
 #include "lite/backends/cuda/target_wrapper.h"
 #endif

+#ifdef LITE_WITH_OPENCL
+#include "lite/backends/opencl/cl_runtime.h"
+#endif
+
 namespace paddle {
 namespace lite_api {

+bool IsOpenCLBackendValid() {
+  bool opencl_valid = false;
+#ifdef LITE_WITH_OPENCL
+  opencl_valid = paddle::lite::CLRuntime::Global()->OpenCLAvaliableForDevice();
+#endif
+  LOG(INFO) << "opencl_valid:" << opencl_valid;
+  return opencl_valid;
+}
+
 Tensor::Tensor(void *raw) : raw_tensor_(raw) {}

 // TODO(Superjomn) refine this by using another `const void* const_raw`;

--- a/lite/api/paddle_api.h
+++ b/lite/api/paddle_api.h
@@ -32,6 +32,9 @@ using lod_t = std::vector<std::vector<uint64_t>>;

 enum class LiteModelType { kProtobuf = 0, kNaiveBuffer, UNK };

+// return true if current device supports OpenCL model
+LITE_API bool IsOpenCLBackendValid();
+
 struct LITE_API Tensor {
  explicit Tensor(void* raw);
  explicit Tensor(const void* raw);

--- a/lite/backends/opencl/cl_runtime.cc
+++ b/lite/backends/opencl/cl_runtime.cc
@@ -38,17 +38,20 @@ CLRuntime::~CLRuntime() {
 }

 bool CLRuntime::Init() {
-  if (initialized_) {
+  if (is_cl_runtime_initialized_) {
    return true;
  }
  bool is_platform_init = InitializePlatform();
  bool is_device_init = InitializeDevice();
-  is_init_success_ = is_platform_init && is_device_init;
-  initialized_ = true;
-
+  LOG(INFO) << "is_platform_init:" << is_platform_init;
+  LOG(INFO) << "is_device_init:" << is_device_init;
+  if ((is_platform_init == true) && (is_device_init == true)) {
+    is_platform_device_init_success_ = true;
    context_ = CreateContext();
    command_queue_ = CreateCommandQueue(context());
-  return initialized_;
+    is_cl_runtime_initialized_ = true;
+  }
+  return is_cl_runtime_initialized_;
 }

 cl::Platform& CLRuntime::platform() {
@@ -64,7 +67,9 @@ cl::Context& CLRuntime::context() {
 }

 cl::Device& CLRuntime::device() {
-  CHECK(device_ != nullptr) << "device_ is not initialized!";
+  if (device_ == nullptr) {
+    LOG(ERROR) << "device_ is not initialized!";
+  }
  return *device_;
 }

@@ -150,6 +155,14 @@ GpuType CLRuntime::ParseGpuTypeFromDeviceName(std::string device_name) {
 }

 bool CLRuntime::InitializeDevice() {
+  VLOG(3) << "device_info_.size():" << device_info_.size();
+  for (auto i : device_info_) {
+    VLOG(3) << ">>> " << i.first << " " << i.second;
+  }
+  if (device_info_.size() > 0 && device_info_.size() <= 2) {
+    return false;
+  }
+  device_info_["PLACEHOLDER"] = 1;
  // ===================== BASIC =====================
  // CL_DEVICE_TYPE_GPU
  // CL_DEVICE_NAME
@@ -160,7 +173,7 @@ bool CLRuntime::InitializeDevice() {
  status_ = platform_->getDevices(CL_DEVICE_TYPE_GPU, &all_devices);
  CL_CHECK_ERROR(status_);
  if (all_devices.empty()) {
-    LOG(FATAL) << "No OpenCL GPU device found!";
+    LOG(ERROR) << "No available OpenCL GPU device found!";
    return false;
  }
  device_ = std::make_shared<cl::Device>();
@@ -313,9 +326,6 @@ bool CLRuntime::InitializeDevice() {
 }

 std::map<std::string, size_t>& CLRuntime::GetDeviceInfo() {
-  if (0 != device_info_.size()) {
-    return device_info_;
-  }
  InitializeDevice();
  return device_info_;
 }

--- a/lite/backends/opencl/cl_runtime.h
+++ b/lite/backends/opencl/cl_runtime.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <vector>
 #include "lite/backends/opencl/cl_include.h"
 #include "lite/backends/opencl/cl_utility.h"
+#include "lite/backends/opencl/cl_wrapper.h"

 typedef enum {
  UNKNOWN = 0,
@@ -68,6 +69,28 @@ class CLRuntime {
 public:
  static CLRuntime* Global();

+  bool OpenCLAvaliableForDevice() {
+    bool opencl_lib_found = paddle::lite::CLWrapper::Global()->OpenclLibFound();
+    LOG(INFO) << "opencl_lib_found:" << opencl_lib_found;
+    if (opencl_lib_found == false) return false;
+
+    bool dlsym_success = paddle::lite::CLWrapper::Global()->DlsymSuccess();
+    LOG(INFO) << "dlsym_success:" << dlsym_success;
+    if (opencl_lib_found == false) return false;
+
+    InitializeDevice();
+    bool support_fp16 =
+        static_cast<bool>(device_info_["CL_DEVICE_EXTENSIONS_FP16"]);
+    LOG(INFO) << "support_fp16:" << support_fp16;
+    if (support_fp16 == false) return false;
+
+    is_device_avaliable_for_opencl_ =
+        dlsym_success && opencl_lib_found && support_fp16;
+    LOG(INFO) << "is_device_avaliable_for_opencl_:"
+              << is_device_avaliable_for_opencl_;
+    return is_device_avaliable_for_opencl_;
+  }
+
  bool Init();

  cl::Platform& platform();
@@ -85,7 +108,7 @@ class CLRuntime {

  bool BuildProgram(cl::Program* program, const std::string& options = "");

-  bool IsInitSuccess() { return is_init_success_; }
+  bool IsInitSuccess() { return is_platform_device_init_success_; }

  std::string cl_path() { return cl_path_; }

@@ -167,9 +190,11 @@ class CLRuntime {

  cl_int status_{CL_SUCCESS};

-  bool initialized_{false};
+  bool is_device_avaliable_for_opencl_{false};
+
+  bool is_cl_runtime_initialized_{false};

-  bool is_init_success_{false};
+  bool is_platform_device_init_success_{false};
 };

 }  // namespace lite

--- a/lite/backends/opencl/cl_wrapper.cc
+++ b/lite/backends/opencl/cl_wrapper.cc
@@ -19,14 +19,16 @@ limitations under the License. */

 namespace paddle {
 namespace lite {
+
 CLWrapper *CLWrapper::Global() {
  static CLWrapper wrapper;
  return &wrapper;
 }

 CLWrapper::CLWrapper() {
-  CHECK(InitHandle()) << "Fail to initialize the OpenCL library!";
-  InitFunctions();
+  opencl_lib_found_ = InitHandle();
+  CHECK(opencl_lib_found_) << "Fail to initialize the OpenCL library!";
+  dlsym_success_ = InitFunctions();
 }

 bool CLWrapper::InitHandle() {
@@ -68,15 +70,17 @@ bool CLWrapper::InitHandle() {
  }
 }

-void CLWrapper::InitFunctions() {
+bool CLWrapper::InitFunctions() {
  CHECK(handle_ != nullptr) << "The library handle can't be null!";
+  bool dlsym_success = true;

 #define PADDLE_DLSYM(cl_func)                                        \
  do {                                                               \
    cl_func##_ = (cl_func##Type)dlsym(handle_, #cl_func);            \
    if (cl_func##_ == nullptr) {                                     \
-      LOG(FATAL) << "Cannot find the " << #cl_func                   \
+      LOG(ERROR) << "Cannot find the " << #cl_func                   \
                 << " symbol in libOpenCL.so!";                      \
+      dlsym_success = false;                                         \
      break;                                                         \
    }                                                                \
    VLOG(4) << "Loaded the " << #cl_func << " symbol successfully."; \
@@ -137,6 +141,7 @@ void CLWrapper::InitFunctions() {
  PADDLE_DLSYM(clEnqueueCopyImage);

 #undef PADDLE_DLSYM
+  return dlsym_success;
 }

 }  // namespace lite

--- a/lite/backends/opencl/cl_wrapper.h
+++ b/lite/backends/opencl/cl_wrapper.h
@@ -508,13 +508,20 @@ class CLWrapper final {
    return clEnqueueCopyImage_;
  }

+  bool OpenclLibFound() { return opencl_lib_found_; }
+
+  bool DlsymSuccess() { return dlsym_success_; }
+
 private:
  CLWrapper();
  CLWrapper(const CLWrapper &) = delete;
  CLWrapper &operator=(const CLWrapper &) = delete;
  bool InitHandle();
-  void InitFunctions();
+  bool InitFunctions();
+  bool opencl_lib_found_{true};
+  bool dlsym_success_{true};
  void *handle_{nullptr};
+
  clGetPlatformIDsType clGetPlatformIDs_{nullptr};
  clGetPlatformInfoType clGetPlatformInfo_{nullptr};
  clBuildProgramType clBuildProgram_{nullptr};

--- a/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
+++ b/lite/demo/cxx/mobile_light/mobilenetv1_light_api.cc
@@ -78,6 +78,28 @@ void RunModel(std::string model_dir,
  // 1. Set MobileConfig
  MobileConfig config;
  config.set_model_from_file(model_dir);
+
+  // NOTE: Use android gpu with opencl, you should ensure:
+  //  first, [compile **cpu+opencl** paddlelite
+  //    lib](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/demo_guides/opencl.md);
+  //  second, [convert and use opencl nb
+  //    model](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/user_guides/opt/opt_bin.md).
+  //
+  /*  Uncomment code below to enable OpenCL
+  bool is_opencl_backend_valid = ::IsOpenCLBackendValid();
+  std::cout << "is_opencl_backend_valid:" << is_opencl_backend_valid <<
+  std::endl;
+  if (is_opencl_backend_valid) {
+    // give opencl nb model dir
+    config.set_model_from_file(model_dir);
+  } else {
+    std::cout << "Unsupport opencl nb model." << std::endl;
+    exit(1);
+    // you can give backup cpu nb model instead
+    // config.set_model_from_file(cpu_nb_model_dir);
+  }
+  */
+
  // NOTE: To load model transformed by model_optimize_tool before
  // release/v2.3.0, plese use `set_model_dir` API as listed below.
  // config.set_model_dir(model_dir);

--- a/lite/tools/build.sh
+++ b/lite/tools/build.sh
@@ -40,8 +40,8 @@ readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/t
 readonly workspace=$PWD

 # if operating in mac env, we should expand the maximum file num
-os_nmae=`uname -s`
-if [ ${os_nmae} == "Darwin" ]; then
+os_name=`uname -s`
+if [ ${os_name} == "Darwin" ]; then
   ulimit -n 1024
 fi


--- a/lite/tools/ci_build.sh
+++ b/lite/tools/ci_build.sh
@@ -19,8 +19,8 @@ NUM_CORES_FOR_COMPILE=${LITE_BUILD_THREADS:-8}
 USE_ADB_EMULATOR=ON

 # if operating in mac env, we should expand the maximum file num
-os_nmae=`uname -s`
-if [ ${os_nmae} == "Darwin" ]; then
+os_name=`uname -s`
+if [ ${os_name} == "Darwin" ]; then
   ulimit -n 1024
 fi