From 01e326423d38eb89404a80caccd08c5d0642c6f3 Mon Sep 17 00:00:00 2001 From: Yanzhan Yang Date: Thu, 29 Aug 2019 17:35:09 +0800 Subject: [PATCH] refine toolchain test=develop (#1904) * refine toolchain test=develop * fix wrap compilation error * fix yolov3 armv8 compilation test=develop * revert to armv7 as default test=develop * fix fpga compilation test=develop --- mobile/CMakeLists.txt | 9 ++- mobile/src/io/paddle_mobile_wrap.cpp | 38 +++++++++ .../src/operators/kernel/cl/relu6_kernel.cpp | 10 ++- mobile/test/CMakeLists.txt | 5 ++ mobile/tools/build.sh | 13 +-- mobile/tools/docker_build_fpga.sh | 2 +- mobile/tools/python/fluidtools/run.py | 80 +++++++++++++++++-- mobile/tools/shell/change_mobile_namespace.sh | 10 +-- 8 files changed, 146 insertions(+), 21 deletions(-) diff --git a/mobile/CMakeLists.txt b/mobile/CMakeLists.txt index d34e9738a5..00a53035a1 100644 --- a/mobile/CMakeLists.txt +++ b/mobile/CMakeLists.txt @@ -14,7 +14,7 @@ option(WITH_TEST "build with unit tests" ON) # select platform: CPU, GPU_CL, FPGA option(CPU "build with arm CPU support" ON) -option(GPU_CL "build with OpenCL support" OFF) +option(GPU_CL "build with OpenCL support" ON) option(FPGA "build with FPGA support" OFF) if(FPGA) option(FPGAV1 "build with fpga v1 support" ON) @@ -91,7 +91,12 @@ if (GPU_CL) # opencl version add_definitions(-DCL_TARGET_OPENCL_VERSION=220) - link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL.so) + if (ANDROID_ABI STREQUAL "arm64-v8a") + link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL-64.so) + else () + link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL.so) + endif () + include_directories(third_party/opencl/OpenCL-Headers) else() file(GLOB_RECURSE _tmp_list src/framework/cl/*.cpp src/operators/kernel/cl/*.cpp) diff --git a/mobile/src/io/paddle_mobile_wrap.cpp b/mobile/src/io/paddle_mobile_wrap.cpp index 4cf70ff52c..b8fd3097e2 100644 --- a/mobile/src/io/paddle_mobile_wrap.cpp +++ b/mobile/src/io/paddle_mobile_wrap.cpp @@ -72,11 +72,13 @@ void Net::SetThreadNum(int threads) { } void Net::SetCLPath(std::string path) { +#ifdef PADDLE_MOBILE_CL if (this->device_ == kGPU_CL) { auto engine = (paddle_mobile::PaddleMobile *)this->engine_; engine->SetCLPath(path); } +#endif } bool Net::Load(const std::string &dirname, const bool optimize, @@ -91,6 +93,7 @@ bool Net::Load(const std::string &dirname, const bool optimize, return status == paddle_mobile::PMSuccess; } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { @@ -98,6 +101,9 @@ bool Net::Load(const std::string &dirname, const bool optimize, engine->Load(dirname, optimize, quantification, batch_size, lod_mode); return status == paddle_mobile::PMSuccess; } +#else + return false; +#endif } return false; } @@ -115,6 +121,7 @@ bool Net::Load(const std::string &model_path, const std::string ¶_path, return status == paddle_mobile::PMSuccess; } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { @@ -123,6 +130,9 @@ bool Net::Load(const std::string &model_path, const std::string ¶_path, batch_size, lod_mode); return status == paddle_mobile::PMSuccess; } +#else + return false; +#endif } return false; } @@ -142,6 +152,7 @@ bool Net::LoadCombinedMemory(size_t model_len, const uint8_t *model_buf, return status; } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { @@ -150,6 +161,9 @@ bool Net::LoadCombinedMemory(size_t model_len, const uint8_t *model_buf, optimize, quantification, batch_size, lod_mode); return status; } +#else + return false; +#endif } return false; } @@ -164,12 +178,16 @@ std::vector Net::Predict(const std::vector &input, return result; } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { auto result = engine->Predict(input, dims); return result; } +#else + return std::vector(); +#endif } return std::vector(); } @@ -183,12 +201,16 @@ bool Net::Predict() { return status == paddle_mobile::PMSuccess; } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { paddle_mobile::PMStatus status = engine->Predict(); return status == paddle_mobile::PMSuccess; } +#else + return false; +#endif } return false; } @@ -208,6 +230,7 @@ bool Net::Predict(const Tensor &input) { return status == paddle_mobile::PMSuccess; } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { @@ -220,6 +243,9 @@ bool Net::Predict(const Tensor &input) { paddle_mobile::PMStatus status = engine->Predict(input_inner); return status == paddle_mobile::PMSuccess; } +#else + return false; +#endif } return false; } @@ -238,6 +264,7 @@ void Net::Feed(const std::string &var_name, const Tensor &input) { engine->Feed(var_name, input_inner); } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { @@ -249,6 +276,9 @@ void Net::Feed(const std::string &var_name, const Tensor &input) { paddle_mobile::framework::make_ddim(input_dims_as_vector)); engine->Feed(var_name, input_inner); } +#else + return; +#endif } } @@ -269,6 +299,7 @@ std::shared_ptr Net::Fetch(const std::string &var_name) { return ptr; } } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; if (engine != nullptr) { @@ -283,6 +314,9 @@ std::shared_ptr Net::Fetch(const std::string &var_name) { std::shared_ptr ptr(new Tensor(output_data, ddim)); return ptr; } +#else + return nullptr; +#endif } return nullptr; } @@ -295,8 +329,10 @@ Net::Net(DeviceTypeEnum device) { this->engine_ = new paddle_mobile::PaddleMobile(config); } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL this->engine_ = new paddle_mobile::PaddleMobile(config); +#endif } } } @@ -309,10 +345,12 @@ Net::~Net() { delete engine; this->engine_ = nullptr; } else if (this->device_ == kGPU_CL) { +#ifdef PADDLE_MOBILE_CL auto engine = (paddle_mobile::PaddleMobile *)this->engine_; delete engine; this->engine_ = nullptr; +#endif } } } diff --git a/mobile/src/operators/kernel/cl/relu6_kernel.cpp b/mobile/src/operators/kernel/cl/relu6_kernel.cpp index 06167e8075..20a6d9815b 100644 --- a/mobile/src/operators/kernel/cl/relu6_kernel.cpp +++ b/mobile/src/operators/kernel/cl/relu6_kernel.cpp @@ -33,9 +33,13 @@ void Relu6Kernel::Compute(const Relu6Param& param) { auto default_work_size = this->cl_helper_.DefaultWorkSize(*output); auto inputImage = input->GetCLImage(); auto outputImage = output->GetCLImage(); - clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); - clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &threshold); + cl_int status; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); + CL_CHECK_ERRORS(status); + status = clSetKernelArg(kernel, 2, sizeof(float), &threshold); + CL_CHECK_ERRORS(status); const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()}; clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, diff --git a/mobile/test/CMakeLists.txt b/mobile/test/CMakeLists.txt index 1b6675f43e..056ede3fb9 100644 --- a/mobile/test/CMakeLists.txt +++ b/mobile/test/CMakeLists.txt @@ -3,6 +3,11 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build") set(FOUND_MATCH OFF) set(ENABLE_ALL_TEST ON) +if (ANDROID_ABI STREQUAL "arm64-v8a") + message("using google's linker to link armv8 binary") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold") +endif () + set(CON -1) message(STATUS "nets :${NET}") diff --git a/mobile/tools/build.sh b/mobile/tools/build.sh index 8f3a17ef7b..877791ff7b 100755 --- a/mobile/tools/build.sh +++ b/mobile/tools/build.sh @@ -3,10 +3,13 @@ NETS="" declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet" "mobilenetssd" "nlp" "mobilenetfssd" "genet" "super" "op") # merge cl to so -merge_cl_to_so=0 -rm ../src/operators/kernel/cl/opencl_kernels.cpp +merge_cl_to_so=1 +opencl_kernels="opencl_kernels.cpp" cd ../src/operators/kernel/cl -python gen_code.py $merge_cl_to_so > opencl_kernels.cpp +if [[ -f "${opencl_kernels}" ]]; then + rm "${opencl_kernels}" +fi +python gen_code.py "${merge_cl_to_so}" > "${opencl_kernels}" cd - build_for_mac() { @@ -40,7 +43,7 @@ build_for_mac() { } build_for_android() { - #rm -rf "../build" + # rm -rf "../build" if [ -z "${NDK_ROOT}" ]; then echo "NDK_ROOT not found!" exit -1 @@ -48,7 +51,7 @@ build_for_android() { if [ -z "$PLATFORM" ]; then PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform. -# PLATFORM="arm-v8a" + # PLATFORM="arm-v8a" fi if [ "${PLATFORM}" = "arm-v7a" ]; then diff --git a/mobile/tools/docker_build_fpga.sh b/mobile/tools/docker_build_fpga.sh index e8239318a4..9ca9406f43 100644 --- a/mobile/tools/docker_build_fpga.sh +++ b/mobile/tools/docker_build_fpga.sh @@ -4,4 +4,4 @@ apt-get update apt-get install -y gcc g++ cmake cd /workspace && mkdir build -cd build && cmake .. -DCPU=OFF -DFPGA=ON && make -j4 +cd build && cmake .. -DCPU=OFF -DGPU_CL=OFF -DFPGA=ON && make -j4 diff --git a/mobile/tools/python/fluidtools/run.py b/mobile/tools/python/fluidtools/run.py index 06ffdd11b9..5da371f8d9 100644 --- a/mobile/tools/python/fluidtools/run.py +++ b/mobile/tools/python/fluidtools/run.py @@ -10,7 +10,7 @@ model_path = "model" checked_model_path = "checked_model" feed_path = "feeds" output_path = "outputs" -diff_threshold = 0.01 +diff_threshold = 0.1 is_lod = False mobile_model_path = "" fast_check = False @@ -22,6 +22,8 @@ checked_encrypt_model_path = "checked_encrypt_model" output_var_filter = [] output_key_filter = {} check_shape = False +architecture = "arm-v7a" +# architecture = "arm-v8a" np.set_printoptions(linewidth=150) @@ -437,6 +439,8 @@ def check_mobile_results(args, fuse, mem_opt): continue if not op_output_var_name in mobile_var_cache: continue + if op_output_var_name not in fetch_names: + continue values1 = output_var_cache[op_output_var_name] values2 = mobile_var_cache[op_output_var_name] shape = get_var_shape(op_output_var_name) if check_shape else [] @@ -473,12 +477,78 @@ def check_mobile_results(args, fuse, mem_opt): error_values1 = np.array(error_values1) error_values2 = np.array(error_values2) # pp_red("mobile op is not correct, error occurs at {}th op, op's type is {}") - pp_red("corresponding fluid op is {}th op, op's type is {}, wrong var name is {}".format( - error_index,op_cache[error_index][1].type,op_output_var_name), 1) + pp_red("outputs are incorrect", 1) pp_red("fluid results are : ", 1) pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1) pp_yellow("paddle mobile results are : ", 1) pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1) + if not fuse and not mem_opt: + error_index = None + error_values1 = None + error_values2 = None + checked_names = [] + fetch_names = [] + for fetch in fetches: + fetch_names.append(fetch.name) + for index in op_cache: + op_output_var_name, op = op_cache[index] + if mem_opt: + found_in_fetch = False + for fetch in fetches: + if op_output_var_name == fetch.name: + found_in_fetch = True + break + if not found_in_fetch: + continue + if not op_output_var_name in output_var_cache: + continue + if not op_output_var_name in mobile_var_cache: + continue + if fuse or mem_opt: + if op_output_var_name not in fetch_names: + continue + values1 = output_var_cache[op_output_var_name] + values2 = mobile_var_cache[op_output_var_name] + shape = get_var_shape(op_output_var_name) if check_shape else [] + if len(values1) + len(shape) != len(values2): + error_index = index + for i in range(len(shape)): + v1 = shape[i] + v2 = values2[i] + if v1 != v2: + error_index = index + break + if error_index == None: + for i in range(len(values1)): + v1 = values1[i] + v2 = values2[len(shape) + i] + if abs(v1 - v2) > diff_threshold: + error_index = index + break + checked_names.append(op_output_var_name) + if error_index != None: + error_values1 = values1 + error_values2 = values2 + break + if error_index == None: + for name in fetch_names: + if name not in checked_names: + error_index = -1 + break + if error_index == None: + pp_green("outputs are all correct", 1) + elif error_index == -1: + pp_red("outputs are missing") + else: + error_values1 = np.array(error_values1) + error_values2 = np.array(error_values2) + # pp_red("mobile op is not correct, error occurs at {}th op, op's type is {}") + pp_red("corresponding fluid op is {}th op, op's type is {}, wrong var name is {}".format( + error_index,op_cache[error_index][1].type,op_output_var_name), 1) + pp_red("fluid results are : ", 1) + pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1) + pp_yellow("paddle mobile results are : ", 1) + pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1) # print(output_var_cache) # print(mobile_var_cache) @@ -535,8 +605,8 @@ def main(): pp_yellow(dot + " start inspecting paddle mobile correctness & performance") push(checked_model_path) push(feed_path + "/" + last_feed_file_name, "input.txt") - push(mobile_src_root + "/build/release/arm-v7a/build/libpaddle-mobile.so") - push(mobile_src_root + "/build/release/arm-v7a/build/cl_kernel") + push(mobile_src_root + "/build/release/{}/build/libpaddle-mobile.so".format(architecture)) + push(mobile_src_root + "/build/release/{}/build/cl_kernel".format(architecture)) push(mobile_src_root + "/test/build/test-net") last_feed_var_shape = get_feed_var_shape(last_feed_var_name) args = str(len(last_feed_var_shape)) diff --git a/mobile/tools/shell/change_mobile_namespace.sh b/mobile/tools/shell/change_mobile_namespace.sh index 5de8744844..aaad6ac193 100755 --- a/mobile/tools/shell/change_mobile_namespace.sh +++ b/mobile/tools/shell/change_mobile_namespace.sh @@ -31,9 +31,9 @@ else echo "converting ${file}" convert $extension $file done - # for file in $(find test -name "*\.*") - # do - # echo "converting ${file}" - # convert $extension $file - # done + for file in $(find test -name "*\.*") + do + echo "converting ${file}" + convert $extension $file + done fi -- GitLab