From 9ba8fd8e7446982ef617cd526183315f8dd15590 Mon Sep 17 00:00:00 2001 From: hong19860320 <9973393+hong19860320@users.noreply.github.com> Date: Mon, 16 Mar 2020 11:41:09 +0800 Subject: [PATCH] [XPU][NPU] Support XPU on AArch64 for FT+Kylin (#3184) --- CMakeLists.txt | 5 ++- cmake/{cross_compiling => device}/npu.cmake | 27 ++++++------- cmake/{ => device}/xpu.cmake | 43 ++++++++++----------- lite/CMakeLists.txt | 3 ++ lite/core/mir/type_precision_cast_pass.cc | 17 ++++---- lite/kernels/xpu/subgraph_compute.cc | 8 ++-- lite/kernels/xpu/subgraph_compute.h | 2 +- lite/tools/build.sh | 36 ++++++++++++++++- 8 files changed, 91 insertions(+), 50 deletions(-) rename cmake/{cross_compiling => device}/npu.cmake (83%) rename cmake/{ => device}/xpu.cmake (74%) diff --git a/CMakeLists.txt b/CMakeLists.txt index bf1d35bc51..377e58d3ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,7 +132,8 @@ endif() if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) message(STATUS "Building the mobile framework") include(cross_compiling/postproject) - include(cross_compiling/npu) # check and prepare NPU DDK + include(device/npu) # check and prepare NPU DDK + include(device/xpu) # check and prepare XPU SDK # We compile the mobile deployment library when LITE_ON_TINY_PUBLISH=ON # So the following third party dependencies are not needed. @@ -173,7 +174,7 @@ endif() ######################################################################################## if(LITE_WITH_XPU) - include(xpu) + include(device/xpu) endif() include(external/mklml) # download mklml package diff --git a/cmake/cross_compiling/npu.cmake b/cmake/device/npu.cmake similarity index 83% rename from cmake/cross_compiling/npu.cmake rename to cmake/device/npu.cmake index c22bb1db4f..88598f4690 100644 --- a/cmake/cross_compiling/npu.cmake +++ b/cmake/device/npu.cmake @@ -17,15 +17,16 @@ if(NOT LITE_WITH_NPU) endif() if(NOT DEFINED NPU_DDK_ROOT) - set(NPU_DDK_ROOT $ENV{NPU_DDK_ROOT}) - if(NOT NPU_DDK_ROOT) - message(FATAL_ERROR "Must set NPU_DDK_ROOT or env NPU_DDK_ROOT when LITE_WITH_NPU=ON") - endif() + set(NPU_DDK_ROOT $ENV{NPU_DDK_ROOT}) + if(NOT NPU_DDK_ROOT) + message(FATAL_ERROR "Must set NPU_DDK_ROOT or env NPU_DDK_ROOT when LITE_WITH_NPU=ON") + endif() endif() message(STATUS "NPU_DDK_ROOT: ${NPU_DDK_ROOT}") find_path(NPU_DDK_INC NAMES HiAiModelManagerService.h - PATHS ${NPU_DDK_ROOT}/include NO_DEFAULT_PATH) + PATHS ${NPU_DDK_ROOT}/include + NO_DEFAULT_PATH) if(NOT NPU_DDK_INC) message(FATAL_ERROR "Can not find HiAiModelManagerService.h in ${NPU_DDK_ROOT}/include") endif() @@ -34,21 +35,24 @@ include_directories("${NPU_DDK_ROOT}/include") set(NPU_SUB_LIB_PATH "lib64") if(ARM_TARGET_ARCH_ABI STREQUAL "armv8") - set(NPU_SUB_LIB_PATH "lib64") + set(NPU_SUB_LIB_PATH "lib64") endif() if(ARM_TARGET_ARCH_ABI STREQUAL "armv7") - set(NPU_SUB_LIB_PATH "lib") + set(NPU_SUB_LIB_PATH "lib") endif() find_library(NPU_DDK_HIAI_FILE NAMES hiai - PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH}) + PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH} + NO_DEFAULT_PATH) find_library(NPU_DDK_IR_FILE NAMES hiai_ir - PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH}) + PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH} + NO_DEFAULT_PATH) find_library(NPU_DDK_IR_BUILD_FILE NAMES hiai_ir_build - PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH}) + PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH} + NO_DEFAULT_PATH) if(NOT NPU_DDK_HIAI_FILE) message(FATAL_ERROR "Can not find NPU_DDK_HIAI_FILE in ${NPU_DDK_ROOT}") @@ -76,6 +80,3 @@ endif() set(npu_runtime_libs npu_ddk_hiai CACHE INTERNAL "npu ddk runtime libs") set(npu_builder_libs npu_ddk_ir npu_ddk_ir_build CACHE INTERNAL "npu ddk builder libs") - - - diff --git a/cmake/xpu.cmake b/cmake/device/xpu.cmake similarity index 74% rename from cmake/xpu.cmake rename to cmake/device/xpu.cmake index 2112f6b658..099833ee4c 100644 --- a/cmake/xpu.cmake +++ b/cmake/device/xpu.cmake @@ -17,15 +17,16 @@ if(NOT LITE_WITH_XPU) endif() if(NOT DEFINED XPU_SDK_ROOT) - set(XPU_SDK_ROOT $ENV{XPU_SDK_ROOT}) - if(NOT XPU_SDK_ROOT) - message(FATAL_ERROR "Must set XPU_SDK_ROOT or env XPU_SDK_ROOT when LITE_WITH_XPU=ON") - endif() + set(XPU_SDK_ROOT $ENV{XPU_SDK_ROOT}) + if(NOT XPU_SDK_ROOT) + message(FATAL_ERROR "Must set XPU_SDK_ROOT or env XPU_SDK_ROOT when LITE_WITH_XPU=ON") + endif() endif() message(STATUS "XPU_SDK_ROOT: ${XPU_SDK_ROOT}") find_path(XPU_SDK_INC NAMES xtcl.h - PATHS ${XPU_SDK_ROOT}/XTCL/include/xtcl NO_DEFAULT_PATH) + PATHS ${XPU_SDK_ROOT}/XTCL/include/xtcl + NO_DEFAULT_PATH) if(NOT XPU_SDK_INC) message(FATAL_ERROR "Can not find xtcl.h in ${XPU_SDK_ROOT}/include") endif() @@ -34,7 +35,8 @@ include_directories("${XPU_SDK_ROOT}/XTCL/include") include_directories("${XPU_SDK_ROOT}/XTDK/include") find_library(XPU_SDK_XTCL_FILE NAMES xtcl - PATHS ${XPU_SDK_ROOT}/XTCL/so) + PATHS ${XPU_SDK_ROOT}/XTCL/so + NO_DEFAULT_PATH) if(NOT XPU_SDK_XTCL_FILE) message(FATAL_ERROR "Can not find XPU XTCL Library in ${XPU_SDK_ROOT}") @@ -45,7 +47,8 @@ else() endif() find_library(XPU_SDK_TVM_FILE NAMES tvm - PATHS ${XPU_SDK_ROOT}/XTCL/so) + PATHS ${XPU_SDK_ROOT}/XTCL/so + NO_DEFAULT_PATH) if(NOT XPU_SDK_TVM_FILE) message(FATAL_ERROR "Can not find XPU TVM Library in ${XPU_SDK_ROOT}") @@ -56,7 +59,8 @@ else() endif() find_library(XPU_SDK_XPU_API_FILE NAMES xpuapi - PATHS ${XPU_SDK_ROOT}/XTDK/shlib) + PATHS ${XPU_SDK_ROOT}/XTDK/shlib + NO_DEFAULT_PATH) if(NOT XPU_SDK_XPU_API_FILE) message(FATAL_ERROR "Can not find XPU API Library in ${XPU_SDK_ROOT}") @@ -67,7 +71,8 @@ else() endif() find_library(XPU_SDK_XPU_RT_FILE NAMES xpurt - PATHS ${XPU_SDK_ROOT}/XTDK/shlib) + PATHS ${XPU_SDK_ROOT}/XTDK/shlib + NO_DEFAULT_PATH) if(NOT XPU_SDK_XPU_RT_FILE) message(FATAL_ERROR "Can not find XPU RT Library in ${XPU_SDK_ROOT}") @@ -78,18 +83,12 @@ else() endif() find_library(XPU_SDK_XPU_JITC_FILE NAMES xpujitc - PATHS ${XPU_SDK_ROOT}/XTDK/shlib) - -if(NOT XPU_SDK_XPU_JITC_FILE) - message(FATAL_ERROR "Can not find XPU JITC Library in ${XPU_SDK_ROOT}") -else() - message(STATUS "Found XPU JITC Library: ${XPU_SDK_XPU_JITC_FILE}") - add_library(xpu_sdk_xpu_jitc SHARED IMPORTED GLOBAL) - set_property(TARGET xpu_sdk_xpu_jitc PROPERTY IMPORTED_LOCATION ${XPU_SDK_XPU_JITC_FILE}) -endif() + PATHS ${XPU_SDK_ROOT}/XTDK/shlib + NO_DEFAULT_PATH) find_library(XPU_SDK_LLVM_FILE NAMES LLVM-8 - PATHS ${XPU_SDK_ROOT}/XTDK/shlib) + PATHS ${XPU_SDK_ROOT}/XTDK/shlib + NO_DEFAULT_PATH) if(NOT XPU_SDK_LLVM_FILE) message(FATAL_ERROR "Can not find LLVM Library in ${XPU_SDK_ROOT}") @@ -99,7 +98,7 @@ else() set_property(TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_LLVM_FILE}) endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMLC_USE_GLOG=1 -D_GLIBCXX_USE_CXX11_ABI=0") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMLC_USE_GLOG=0") -set(xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu runtime libs") -set(xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu builder libs") +set(xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL "xpu runtime libs") +set(xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL "xpu builder libs") diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt index f5a5f96cf3..b78c90fbc9 100644 --- a/lite/CMakeLists.txt +++ b/lite/CMakeLists.txt @@ -65,6 +65,9 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) if (LITE_WITH_NPU) set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.npu") endif(LITE_WITH_NPU) + if (LITE_WITH_XPU) + set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.xpu") + endif(LITE_WITH_XPU) if (LITE_WITH_FPGA) set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.fpga") endif(LITE_WITH_FPGA) diff --git a/lite/core/mir/type_precision_cast_pass.cc b/lite/core/mir/type_precision_cast_pass.cc index 86eb43be59..25b367e73c 100644 --- a/lite/core/mir/type_precision_cast_pass.cc +++ b/lite/core/mir/type_precision_cast_pass.cc @@ -71,21 +71,22 @@ static bool InferScaleFromSubgraph(std::string var_name, const OpInfo* op_info, float* scale, bool reverse = false) { - bool found = false; - auto input_or_output_names = op_info->GetAttr>( - reverse ? "output_data_names" : "input_data_names"); - auto input_or_output_scales = op_info->GetAttr>( - reverse ? "output_data_scales" : "input_data_scales"); + std::string attr_name = reverse ? "output_data_names" : "input_data_names"; + if (!op_info->HasAttr(attr_name)) return false; + auto input_or_output_names = + op_info->GetAttr>(attr_name); + attr_name = reverse ? "output_data_scales" : "input_data_scales"; + if (!op_info->HasAttr(attr_name)) return false; + auto input_or_output_scales = op_info->GetAttr>(attr_name); auto size = input_or_output_names.size(); CHECK(size == input_or_output_scales.size()); for (int i = 0; i < size; i++) { if (input_or_output_names[i] == var_name) { *scale = input_or_output_scales[i]; - found = true; - break; + return true; } } - return found; + return false; } // Infer the scale value for the new calib op from the input_scale of the diff --git a/lite/kernels/xpu/subgraph_compute.cc b/lite/kernels/xpu/subgraph_compute.cc index c4d170f673..1b6d374f73 100644 --- a/lite/kernels/xpu/subgraph_compute.cc +++ b/lite/kernels/xpu/subgraph_compute.cc @@ -220,10 +220,12 @@ void SubgraphCompute::Run() { REGISTER_LITE_KERNEL(subgraph, kXPU, - kFloat, + kAny, kNCHW, paddle::lite::kernels::xpu::SubgraphCompute, def) - .BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))}) - .BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))}) + .BindInput("Inputs", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))}) + .BindOutput("Outputs", + {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kAny))}) .Finalize(); diff --git a/lite/kernels/xpu/subgraph_compute.h b/lite/kernels/xpu/subgraph_compute.h index c21a1b7b05..1faada3978 100644 --- a/lite/kernels/xpu/subgraph_compute.h +++ b/lite/kernels/xpu/subgraph_compute.h @@ -49,7 +49,7 @@ class SubgraphEngine : public subgraph::Engine { std::unique_ptr device_program_{nullptr}; }; -class SubgraphCompute : public KernelLite { +class SubgraphCompute : public KernelLite { public: using param_t = operators::SubgraphParam; diff --git a/lite/tools/build.sh b/lite/tools/build.sh index 8eeb0dc32a..93bc95fa4a 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -21,6 +21,10 @@ OPTMODEL_DIR="" BUILD_TAILOR=OFF BUILD_CV=OFF SHUTDOWN_LOG=ON +BUILD_NPU=OFF +NPU_DDK_ROOT="$(pwd)/ai_ddk_lib/" # Download HiAI DDK from https://developer.huawei.com/consumer/cn/hiai/ +BUILD_XPU=OFF +XPU_SDK_ROOT="$(pwd)/xpu_sdk_lib/" LITE_WITH_ARM_LANG=OFF readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz @@ -130,6 +134,10 @@ function make_tiny_publish_so { -DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \ -DLITE_BUILD_TAILOR=$BUILD_TAILOR \ -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \ + -DLITE_WITH_NPU=$BUILD_NPU \ + -DNPU_DDK_ROOT=$NPU_DDK_ROOT \ + -DLITE_WITH_XPU=$BUILD_XPU \ + -DXPU_SDK_ROOT=$XPU_SDK_ROOT \ -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} make publish_inference -j$NUM_PROC @@ -214,6 +222,10 @@ function make_full_publish_so { -DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \ -DLITE_BUILD_TAILOR=$BUILD_TAILOR \ -DLITE_OPTMODEL_DIR=$OPTMODEL_DIR \ + -DLITE_WITH_NPU=$BUILD_NPU \ + -DNPU_DDK_ROOT=$NPU_DDK_ROOT \ + -DLITE_WITH_XPU=$BUILD_XPU \ + -DXPU_SDK_ROOT=$XPU_SDK_ROOT \ -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} make publish_inference -j$NUM_PROC @@ -243,6 +255,10 @@ function make_all_tests { -DLITE_BUILD_EXTRA=$BUILD_EXTRA \ -DLITE_WITH_CV=$BUILD_CV \ -DLITE_WITH_ARM_LANG=$LITE_WITH_ARM_LANG \ + -DLITE_WITH_NPU=$BUILD_NPU \ + -DNPU_DDK_ROOT=$NPU_DDK_ROOT \ + -DLITE_WITH_XPU=$BUILD_XPU \ + -DXPU_SDK_ROOT=$XPU_SDK_ROOT \ -DARM_TARGET_OS=${os} -DARM_TARGET_ARCH_ABI=${abi} -DARM_TARGET_LANG=${lang} make lite_compile_deps -j$NUM_PROC @@ -339,7 +355,9 @@ function make_x86 { -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF \ -DLITE_WITH_ARM=OFF \ -DWITH_GPU=OFF \ - -DLITE_BUILD_EXTRA=ON + -DLITE_BUILD_EXTRA=ON \ + -DLITE_WITH_XPU=$BUID_XPU \ + -DXPU_SDK_ROOT=$XPU_SDK_ROOT \ make publish_inference -j$NUM_PROC cd - @@ -441,6 +459,22 @@ function main { SHUTDOWN_LOG="${i#*=}" shift ;; + --build_npu=*) + BUILD_NPU="${i#*=}" + shift + ;; + --npu_ddk_root=*) + NPU_DDK_ROOT="${i#*=}" + shift + ;; + --build_xpu=*) + BUILD_XPU="${i#*=}" + shift + ;; + --xpu_sdk_root=*) + XPU_SDK_ROOT="${i#*=}" + shift + ;; tiny_publish) make_tiny_publish_so $ARM_OS $ARM_ABI $ARM_LANG $ANDROID_STL shift -- GitLab