From 01e326423d38eb89404a80caccd08c5d0642c6f3 Mon Sep 17 00:00:00 2001
From: Yanzhan Yang <yangyanzhan@gmail.com>
Date: Thu, 29 Aug 2019 17:35:09 +0800
Subject: [PATCH] refine toolchain test=develop (#1904)

* refine toolchain test=develop

* fix wrap compilation error

* fix yolov3 armv8 compilation test=develop

* revert to armv7 as default test=develop

* fix fpga compilation test=develop
---
 mobile/CMakeLists.txt                         |  9 ++-
 mobile/src/io/paddle_mobile_wrap.cpp          | 38 +++++++++
 .../src/operators/kernel/cl/relu6_kernel.cpp  | 10 ++-
 mobile/test/CMakeLists.txt                    |  5 ++
 mobile/tools/build.sh                         | 13 +--
 mobile/tools/docker_build_fpga.sh             |  2 +-
 mobile/tools/python/fluidtools/run.py         | 80 +++++++++++++++++--
 mobile/tools/shell/change_mobile_namespace.sh | 10 +--
 8 files changed, 146 insertions(+), 21 deletions(-)

diff --git a/mobile/CMakeLists.txt b/mobile/CMakeLists.txt
index d34e9738a5..00a53035a1 100644
--- a/mobile/CMakeLists.txt
+++ b/mobile/CMakeLists.txt
@@ -14,7 +14,7 @@ option(WITH_TEST        "build with unit tests"          ON)
 
 # select platform: CPU, GPU_CL, FPGA
 option(CPU              "build with arm CPU support"     ON)
-option(GPU_CL           "build with OpenCL support"      OFF)
+option(GPU_CL           "build with OpenCL support"      ON)
 option(FPGA             "build with FPGA support"        OFF)
 if(FPGA)
   option(FPGAV1     "build with fpga v1 support"   ON)
@@ -91,7 +91,12 @@ if (GPU_CL)
     # opencl version
     add_definitions(-DCL_TARGET_OPENCL_VERSION=220)
 
-    link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL.so)
+    if (ANDROID_ABI STREQUAL "arm64-v8a")    
+        link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL-64.so)
+    else ()
+        link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL.so)
+    endif ()
+
     include_directories(third_party/opencl/OpenCL-Headers)
 else()
     file(GLOB_RECURSE _tmp_list src/framework/cl/*.cpp src/operators/kernel/cl/*.cpp)
diff --git a/mobile/src/io/paddle_mobile_wrap.cpp b/mobile/src/io/paddle_mobile_wrap.cpp
index 4cf70ff52c..b8fd3097e2 100644
--- a/mobile/src/io/paddle_mobile_wrap.cpp
+++ b/mobile/src/io/paddle_mobile_wrap.cpp
@@ -72,11 +72,13 @@ void Net::SetThreadNum(int threads) {
 }
 
 void Net::SetCLPath(std::string path) {
+#ifdef PADDLE_MOBILE_CL
   if (this->device_ == kGPU_CL) {
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     engine->SetCLPath(path);
   }
+#endif
 }
 
 bool Net::Load(const std::string &dirname, const bool optimize,
@@ -91,6 +93,7 @@ bool Net::Load(const std::string &dirname, const bool optimize,
       return status == paddle_mobile::PMSuccess;
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
@@ -98,6 +101,9 @@ bool Net::Load(const std::string &dirname, const bool optimize,
           engine->Load(dirname, optimize, quantification, batch_size, lod_mode);
       return status == paddle_mobile::PMSuccess;
     }
+#else
+    return false;
+#endif
   }
   return false;
 }
@@ -115,6 +121,7 @@ bool Net::Load(const std::string &model_path, const std::string &para_path,
       return status == paddle_mobile::PMSuccess;
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
@@ -123,6 +130,9 @@ bool Net::Load(const std::string &model_path, const std::string &para_path,
                        batch_size, lod_mode);
       return status == paddle_mobile::PMSuccess;
     }
+#else
+    return false;
+#endif
   }
   return false;
 }
@@ -142,6 +152,7 @@ bool Net::LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
       return status;
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
@@ -150,6 +161,9 @@ bool Net::LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
           optimize, quantification, batch_size, lod_mode);
       return status;
     }
+#else
+    return false;
+#endif
   }
   return false;
 }
@@ -164,12 +178,16 @@ std::vector<float> Net::Predict(const std::vector<float> &input,
       return result;
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
       auto result = engine->Predict(input, dims);
       return result;
     }
+#else
+    return std::vector<float>();
+#endif
   }
   return std::vector<float>();
 }
@@ -183,12 +201,16 @@ bool Net::Predict() {
       return status == paddle_mobile::PMSuccess;
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
       paddle_mobile::PMStatus status = engine->Predict();
       return status == paddle_mobile::PMSuccess;
     }
+#else
+    return false;
+#endif
   }
   return false;
 }
@@ -208,6 +230,7 @@ bool Net::Predict(const Tensor &input) {
       return status == paddle_mobile::PMSuccess;
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
@@ -220,6 +243,9 @@ bool Net::Predict(const Tensor &input) {
       paddle_mobile::PMStatus status = engine->Predict(input_inner);
       return status == paddle_mobile::PMSuccess;
     }
+#else
+    return false;
+#endif
   }
   return false;
 }
@@ -238,6 +264,7 @@ void Net::Feed(const std::string &var_name, const Tensor &input) {
       engine->Feed(var_name, input_inner);
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
@@ -249,6 +276,9 @@ void Net::Feed(const std::string &var_name, const Tensor &input) {
           paddle_mobile::framework::make_ddim(input_dims_as_vector));
       engine->Feed(var_name, input_inner);
     }
+#else
+    return;
+#endif
   }
 }
 
@@ -269,6 +299,7 @@ std::shared_ptr<Tensor> Net::Fetch(const std::string &var_name) {
       return ptr;
     }
   } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
     auto engine =
         (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
     if (engine != nullptr) {
@@ -283,6 +314,9 @@ std::shared_ptr<Tensor> Net::Fetch(const std::string &var_name) {
       std::shared_ptr<Tensor> ptr(new Tensor(output_data, ddim));
       return ptr;
     }
+#else
+    return nullptr;
+#endif
   }
   return nullptr;
 }
@@ -295,8 +329,10 @@ Net::Net(DeviceTypeEnum device) {
       this->engine_ =
           new paddle_mobile::PaddleMobile<paddle_mobile::CPU>(config);
     } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
       this->engine_ =
           new paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL>(config);
+#endif
     }
   }
 }
@@ -309,10 +345,12 @@ Net::~Net() {
       delete engine;
       this->engine_ = nullptr;
     } else if (this->device_ == kGPU_CL) {
+#ifdef PADDLE_MOBILE_CL
       auto engine =
           (paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
       delete engine;
       this->engine_ = nullptr;
+#endif
     }
   }
 }
diff --git a/mobile/src/operators/kernel/cl/relu6_kernel.cpp b/mobile/src/operators/kernel/cl/relu6_kernel.cpp
index 06167e8075..20a6d9815b 100644
--- a/mobile/src/operators/kernel/cl/relu6_kernel.cpp
+++ b/mobile/src/operators/kernel/cl/relu6_kernel.cpp
@@ -33,9 +33,13 @@ void Relu6Kernel<GPU_CL, float>::Compute(const Relu6Param<GPU_CL>& param) {
   auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
   auto inputImage = input->GetCLImage();
   auto outputImage = output->GetCLImage();
-  clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
-  clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
-  clSetKernelArg(kernel, 2, sizeof(cl_mem), &threshold);
+  cl_int status;
+  status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
+  CL_CHECK_ERRORS(status);
+  status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
+  CL_CHECK_ERRORS(status);
+  status = clSetKernelArg(kernel, 2, sizeof(float), &threshold);
+  CL_CHECK_ERRORS(status);
   const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
 
   clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
diff --git a/mobile/test/CMakeLists.txt b/mobile/test/CMakeLists.txt
index 1b6675f43e..056ede3fb9 100644
--- a/mobile/test/CMakeLists.txt
+++ b/mobile/test/CMakeLists.txt
@@ -3,6 +3,11 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build")
 set(FOUND_MATCH OFF)
 set(ENABLE_ALL_TEST ON)
 
+if (ANDROID_ABI STREQUAL "arm64-v8a")
+    message("using google's linker to link armv8 binary")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
+endif ()
+
 set(CON -1)
 
 message(STATUS "nets :${NET}")
diff --git a/mobile/tools/build.sh b/mobile/tools/build.sh
index 8f3a17ef7b..877791ff7b 100755
--- a/mobile/tools/build.sh
+++ b/mobile/tools/build.sh
@@ -3,10 +3,13 @@ NETS=""
 declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet" "mobilenetssd" "nlp" "mobilenetfssd" "genet" "super" "op")
 
 # merge cl to so
-merge_cl_to_so=0
-rm ../src/operators/kernel/cl/opencl_kernels.cpp
+merge_cl_to_so=1
+opencl_kernels="opencl_kernels.cpp"
 cd ../src/operators/kernel/cl
-python gen_code.py $merge_cl_to_so > opencl_kernels.cpp
+if [[ -f "${opencl_kernels}" ]]; then
+    rm "${opencl_kernels}"
+fi
+python gen_code.py "${merge_cl_to_so}" > "${opencl_kernels}"
 cd -
 
 build_for_mac() {
@@ -40,7 +43,7 @@ build_for_mac() {
 }
 
 build_for_android() {
-    #rm -rf "../build"
+    # rm -rf "../build"
     if [ -z "${NDK_ROOT}" ]; then
         echo "NDK_ROOT not found!"
         exit -1
@@ -48,7 +51,7 @@ build_for_android() {
 
     if [ -z "$PLATFORM" ]; then
         PLATFORM="arm-v7a"  # Users could choose "arm-v8a" platform.
-#        PLATFORM="arm-v8a"
+        # PLATFORM="arm-v8a"
     fi
 
     if [ "${PLATFORM}" = "arm-v7a" ]; then
diff --git a/mobile/tools/docker_build_fpga.sh b/mobile/tools/docker_build_fpga.sh
index e8239318a4..9ca9406f43 100644
--- a/mobile/tools/docker_build_fpga.sh
+++ b/mobile/tools/docker_build_fpga.sh
@@ -4,4 +4,4 @@ apt-get update
 apt-get install -y gcc g++ cmake
 
 cd /workspace && mkdir build
-cd build && cmake .. -DCPU=OFF -DFPGA=ON && make -j4
+cd build && cmake .. -DCPU=OFF -DGPU_CL=OFF -DFPGA=ON && make -j4
diff --git a/mobile/tools/python/fluidtools/run.py b/mobile/tools/python/fluidtools/run.py
index 06ffdd11b9..5da371f8d9 100644
--- a/mobile/tools/python/fluidtools/run.py
+++ b/mobile/tools/python/fluidtools/run.py
@@ -10,7 +10,7 @@ model_path = "model"
 checked_model_path = "checked_model"
 feed_path = "feeds"
 output_path = "outputs"
-diff_threshold = 0.01
+diff_threshold = 0.1
 is_lod = False
 mobile_model_path = ""
 fast_check = False
@@ -22,6 +22,8 @@ checked_encrypt_model_path = "checked_encrypt_model"
 output_var_filter = []
 output_key_filter = {}
 check_shape = False
+architecture = "arm-v7a"
+# architecture = "arm-v8a"
 
 np.set_printoptions(linewidth=150)
 
@@ -437,6 +439,8 @@ def check_mobile_results(args, fuse, mem_opt):
             continue
         if not op_output_var_name in mobile_var_cache:
             continue
+        if op_output_var_name not in fetch_names:
+            continue
         values1 = output_var_cache[op_output_var_name]
         values2 = mobile_var_cache[op_output_var_name]
         shape = get_var_shape(op_output_var_name) if check_shape else []
@@ -473,12 +477,78 @@ def check_mobile_results(args, fuse, mem_opt):
         error_values1 = np.array(error_values1)
         error_values2 = np.array(error_values2)
         # pp_red("mobile op is not correct, error occurs at {}th op, op's type is {}")
-        pp_red("corresponding fluid op is {}th op, op's type is {}, wrong var name is {}".format(
-            error_index,op_cache[error_index][1].type,op_output_var_name), 1)
+        pp_red("outputs are incorrect", 1)
         pp_red("fluid results are : ", 1)
         pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1)
         pp_yellow("paddle mobile results are : ", 1)
         pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1)
+    if not fuse and not mem_opt:
+        error_index = None
+        error_values1 = None
+        error_values2 = None
+        checked_names = []
+        fetch_names = []
+        for fetch in fetches:
+            fetch_names.append(fetch.name)
+        for index in op_cache:
+            op_output_var_name, op = op_cache[index]
+            if mem_opt:
+                found_in_fetch = False
+                for fetch in fetches:
+                    if op_output_var_name == fetch.name:
+                        found_in_fetch = True
+                        break
+                if not found_in_fetch:
+                    continue
+            if not op_output_var_name in output_var_cache:
+                continue
+            if not op_output_var_name in mobile_var_cache:
+                continue
+            if fuse or mem_opt:
+                if op_output_var_name not in fetch_names:
+                    continue
+            values1 = output_var_cache[op_output_var_name]
+            values2 = mobile_var_cache[op_output_var_name]
+            shape = get_var_shape(op_output_var_name) if check_shape else []
+            if len(values1) + len(shape) != len(values2):
+                error_index = index
+            for i in range(len(shape)):
+                v1 = shape[i]
+                v2 = values2[i]
+                if v1 != v2:
+                    error_index = index
+                    break
+            if error_index == None:
+                for i in range(len(values1)):
+                    v1 = values1[i]
+                    v2 = values2[len(shape) + i]
+                    if abs(v1 - v2) > diff_threshold:
+                        error_index = index
+                        break
+            checked_names.append(op_output_var_name)
+            if error_index != None:
+                error_values1 = values1
+                error_values2 = values2
+                break
+        if error_index == None:
+            for name in fetch_names:
+                if name not in checked_names:
+                    error_index = -1
+                    break
+        if error_index == None:
+            pp_green("outputs are all correct", 1)
+        elif error_index == -1:
+            pp_red("outputs are missing")
+        else:
+            error_values1 = np.array(error_values1)
+            error_values2 = np.array(error_values2)
+            # pp_red("mobile op is not correct, error occurs at {}th op, op's type is {}")
+            pp_red("corresponding fluid op is {}th op, op's type is {}, wrong var name is {}".format(
+                error_index,op_cache[error_index][1].type,op_output_var_name), 1)
+            pp_red("fluid results are : ", 1)
+            pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1)
+            pp_yellow("paddle mobile results are : ", 1)
+            pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1)
     # print(output_var_cache)
     # print(mobile_var_cache)
 
@@ -535,8 +605,8 @@ def main():
     pp_yellow(dot + " start inspecting paddle mobile correctness & performance")
     push(checked_model_path)
     push(feed_path + "/" + last_feed_file_name, "input.txt")
-    push(mobile_src_root + "/build/release/arm-v7a/build/libpaddle-mobile.so")
-    push(mobile_src_root + "/build/release/arm-v7a/build/cl_kernel")
+    push(mobile_src_root + "/build/release/{}/build/libpaddle-mobile.so".format(architecture))
+    push(mobile_src_root + "/build/release/{}/build/cl_kernel".format(architecture))
     push(mobile_src_root + "/test/build/test-net")
     last_feed_var_shape = get_feed_var_shape(last_feed_var_name)
     args = str(len(last_feed_var_shape))
diff --git a/mobile/tools/shell/change_mobile_namespace.sh b/mobile/tools/shell/change_mobile_namespace.sh
index 5de8744844..aaad6ac193 100755
--- a/mobile/tools/shell/change_mobile_namespace.sh
+++ b/mobile/tools/shell/change_mobile_namespace.sh
@@ -31,9 +31,9 @@ else
         echo "converting ${file}"
         convert $extension $file
     done
-    # for file in $(find test -name "*\.*")
-    # do
-    #     echo "converting ${file}"
-    #     convert $extension $file
-    # done
+    for file in $(find test -name "*\.*")
+    do
+        echo "converting ${file}"
+        convert $extension $file
+    done
 fi
-- 
GitLab