提交 c4a87224 编写于 作者: myq406450149's avatar myq406450149 提交者: GitHub

Support bitman backend,test=develop (#2761)

* Support bitman backend
上级 13945aed
...@@ -60,6 +60,7 @@ lite_option(LITE_WITH_X86 "Enable X86 in lite mode" ON) ...@@ -60,6 +60,7 @@ lite_option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
lite_option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) lite_option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
lite_option(LITE_WITH_NPU "Enable NPU in lite mode" OFF) lite_option(LITE_WITH_NPU "Enable NPU in lite mode" OFF)
lite_option(LITE_WITH_XPU "Enable XPU in lite mode" OFF) lite_option(LITE_WITH_XPU "Enable XPU in lite mode" OFF)
lite_option(LITE_WITH_BM "Enable BM in lite mode" OFF)
lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON) lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON)
lite_option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF) lite_option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF)
lite_option(LITE_WITH_FPGA "Enable FPGA support in lite" OFF) lite_option(LITE_WITH_FPGA "Enable FPGA support in lite" OFF)
...@@ -192,6 +193,9 @@ if(LITE_WITH_CUDA) ...@@ -192,6 +193,9 @@ if(LITE_WITH_CUDA)
include(cuda) include(cuda)
endif() endif()
if(LITE_WITH_BM)
include(bm)
endif()
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
include(util) # set unittest and link libs include(util) # set unittest and link libs
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT LITE_WITH_BM)
return()
endif()
if(NOT DEFINED BM_SDK_ROOT)
set(BM_SDK_ROOT $ENV{BM_SDK_ROOT})
if(NOT BM_SDK_ROOT)
message(FATAL_ERROR "Must set BM_SDK_ROOT or env BM_SDK_ROOT when LITE_WITH_BM=ON")
endif()
endif()
message(STATUS "BM_SDK_ROOT: ${BM_SDK_ROOT}")
find_path(BM_SDK_INC NAMES bmruntime_interface.h
PATHS ${BM_SDK_ROOT}/include/bmruntime NO_DEFAULT_PATH)
if(NOT BM_SDK_INC)
message(FATAL_ERROR "Can not find bmruntime_interface.h in ${BM_SDK_ROOT}/include")
endif()
include_directories("${BM_SDK_ROOT}/include/bmruntime")
include_directories("${BM_SDK_ROOT}/include/bmlib")
include_directories("${BM_SDK_ROOT}/include/bmcompiler")
include_directories("${BM_SDK_ROOT}/include/bmcpu")
include_directories("${BM_SDK_ROOT}/include/bmlog")
find_library(BM_SDK_RT_LIB NAMES bmrt
PATHS ${BM_SDK_ROOT}/lib/bmnn/pcie)
if(NOT BM_SDK_RT_LIB)
message(FATAL_ERROR "Can not find bmrt Library in ${BM_SDK_ROOT}")
else()
message(STATUS "Found bmrt Library: ${BM_SDK_RT_LIB}")
add_library(bmrt SHARED IMPORTED GLOBAL)
set_property(TARGET bmrt PROPERTY IMPORTED_LOCATION ${BM_SDK_RT_LIB})
endif()
find_library(BM_SDK_BM_LIB NAMES bmlib
PATHS ${BM_SDK_ROOT}/lib/bmnn/pcie)
if(NOT BM_SDK_BM_LIB)
message(FATAL_ERROR "Can not find bmlib Library in ${BM_SDK_ROOT}")
else()
message(STATUS "Found bmlib Library: ${BM_SDK_BM_LIB}")
add_library(bmlib SHARED IMPORTED GLOBAL)
set_property(TARGET bmlib PROPERTY IMPORTED_LOCATION ${BM_SDK_BM_LIB})
endif()
find_library(BM_SDK_COMPILER_LIB NAMES bmcompiler
PATHS ${BM_SDK_ROOT}/lib/bmcompiler)
if(NOT BM_SDK_COMPILER_LIB)
message(FATAL_ERROR "Can not find bmcompiler Library in ${BM_SDK_ROOT}")
else()
message(STATUS "Found bmcompiler Library: ${BM_SDK_COMPILER_LIB}")
add_library(bmcompiler SHARED IMPORTED GLOBAL)
set_property(TARGET bmcompiler PROPERTY IMPORTED_LOCATION ${BM_SDK_COMPILER_LIB})
endif()
find_library(BM_SDK_CPU_LIB NAMES bmcpu
PATHS ${BM_SDK_ROOT}/lib/bmnn/pcie)
if(NOT BM_SDK_CPU_LIB)
message(FATAL_ERROR "Can not find bmcpu Library in ${BM_SDK_ROOT}")
else()
message(STATUS "Found bmcpu Library: ${BM_SDK_CPU_LIB}")
add_library(bmcpu SHARED IMPORTED GLOBAL)
set_property(TARGET bmcpu PROPERTY IMPORTED_LOCATION ${BM_SDK_CPU_LIB})
endif()
set(bm_runtime_libs bmrt bmlib bmcompiler bmcpu CACHE INTERNAL "bm runtime libs")
set(bm_builder_libs bmrt bmlib bmcompiler bmcpu CACHE INTERNAL "bm builder libs")
...@@ -143,6 +143,10 @@ if (LITE_WITH_FPGA) ...@@ -143,6 +143,10 @@ if (LITE_WITH_FPGA)
add_definitions("-DLITE_WITH_FPGA") add_definitions("-DLITE_WITH_FPGA")
endif() endif()
if (LITE_WITH_BM)
add_definitions("-DLITE_WITH_BM")
endif()
if (LITE_WITH_PROFILE) if (LITE_WITH_PROFILE)
add_definitions("-DLITE_WITH_PROFILE") add_definitions("-DLITE_WITH_PROFILE")
if (LITE_WITH_PRECISION_PROFILE) if (LITE_WITH_PRECISION_PROFILE)
......
...@@ -22,7 +22,7 @@ endfunction() ...@@ -22,7 +22,7 @@ endfunction()
function (lite_deps TARGET) function (lite_deps TARGET)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS CV_DEPS ARGS) set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS CV_DEPS ARGS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps ${lite_deps_DEPS}) set(deps ${lite_deps_DEPS})
...@@ -94,6 +94,12 @@ function (lite_deps TARGET) ...@@ -94,6 +94,12 @@ function (lite_deps TARGET)
endforeach(var) endforeach(var)
endif() endif()
if (LITE_WITH_BM)
foreach(var ${lite_deps_BM_DEPS})
set(deps ${deps} ${var})
endforeach(var)
endif()
set(${TARGET} ${deps} PARENT_SCOPE) set(${TARGET} ${deps} PARENT_SCOPE)
endfunction() endfunction()
...@@ -119,7 +125,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean ...@@ -119,7 +125,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean
function(lite_cc_library TARGET) function(lite_cc_library TARGET)
set(options SHARED shared STATIC static MODULE module) set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS CV_DEPS PROFILE_DEPS LIGHT_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS CV_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS) HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -129,6 +135,7 @@ function(lite_cc_library TARGET) ...@@ -129,6 +135,7 @@ function(lite_cc_library TARGET)
X86_DEPS ${args_X86_DEPS} X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS} CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS} CL_DEPS ${args_CL_DEPS}
BM_DEPS ${args_BM_DEPS}
ARM_DEPS ${args_ARM_DEPS} ARM_DEPS ${args_ARM_DEPS}
CV_DEPS ${args_CV_DEPS} CV_DEPS ${args_CV_DEPS}
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
...@@ -163,7 +170,7 @@ function(lite_cc_binary TARGET) ...@@ -163,7 +170,7 @@ function(lite_cc_binary TARGET)
set(options " -g ") set(options " -g ")
endif() endif()
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS ARGS) LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -177,6 +184,7 @@ function(lite_cc_binary TARGET) ...@@ -177,6 +184,7 @@ function(lite_cc_binary TARGET)
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS} XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
...@@ -210,7 +218,7 @@ function(lite_cc_test TARGET) ...@@ -210,7 +218,7 @@ function(lite_cc_test TARGET)
endif() endif()
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS
ARGS ARGS
COMPILE_LEVEL # (basic|extra) COMPILE_LEVEL # (basic|extra)
...@@ -232,6 +240,7 @@ function(lite_cc_test TARGET) ...@@ -232,6 +240,7 @@ function(lite_cc_test TARGET)
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS} XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
...@@ -260,6 +269,7 @@ set(cuda_kernels CACHE INTERNAL "cuda kernels") ...@@ -260,6 +269,7 @@ set(cuda_kernels CACHE INTERNAL "cuda kernels")
set(fpga_kernels CACHE INTERNAL "fpga kernels") set(fpga_kernels CACHE INTERNAL "fpga kernels")
set(npu_kernels CACHE INTERNAL "npu kernels") set(npu_kernels CACHE INTERNAL "npu kernels")
set(xpu_kernels CACHE INTERNAL "xpu kernels") set(xpu_kernels CACHE INTERNAL "xpu kernels")
set(bm_kernels CACHE INTERNAL "bm kernels")
set(opencl_kernels CACHE INTERNAL "opencl kernels") set(opencl_kernels CACHE INTERNAL "opencl kernels")
set(host_kernels CACHE INTERNAL "host kernels") set(host_kernels CACHE INTERNAL "host kernels")
...@@ -270,12 +280,12 @@ if(LITE_BUILD_TAILOR) ...@@ -270,12 +280,12 @@ if(LITE_BUILD_TAILOR)
file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list) file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
endif() endif()
# add a kernel for some specific device # add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA) # device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA, BM)
# level: one of (basic, extra) # level: one of (basic, extra)
function(add_kernel TARGET device level) function(add_kernel TARGET device level)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS) ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -341,6 +351,12 @@ function(add_kernel TARGET device level) ...@@ -341,6 +351,12 @@ function(add_kernel TARGET device level)
endif() endif()
set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "") set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "")
endif() endif()
if ("${device}" STREQUAL "BM")
if (NOT LITE_WITH_BM)
return()
endif()
set(bm_kernels "${bm_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "OPENCL") if ("${device}" STREQUAL "OPENCL")
if (NOT LITE_WITH_OPENCL) if (NOT LITE_WITH_OPENCL)
return() return()
...@@ -374,6 +390,7 @@ function(add_kernel TARGET device level) ...@@ -374,6 +390,7 @@ function(add_kernel TARGET device level)
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS} XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
...@@ -392,7 +409,7 @@ endif() ...@@ -392,7 +409,7 @@ endif()
function(add_operator TARGET level) function(add_operator TARGET level)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS) ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -424,6 +441,7 @@ function(add_operator TARGET level) ...@@ -424,6 +441,7 @@ function(add_operator TARGET level)
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
NPU_DEPS ${args_NPU_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS} XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
......
...@@ -9,6 +9,7 @@ message(STATUS "LITE_WITH_OPENCL:\t${LITE_WITH_OPENCL}") ...@@ -9,6 +9,7 @@ message(STATUS "LITE_WITH_OPENCL:\t${LITE_WITH_OPENCL}")
message(STATUS "LITE_WITH_NPU:\t${LITE_WITH_NPU}") message(STATUS "LITE_WITH_NPU:\t${LITE_WITH_NPU}")
message(STATUS "LITE_WITH_XPU:\t${LITE_WITH_XPU}") message(STATUS "LITE_WITH_XPU:\t${LITE_WITH_XPU}")
message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}") message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}")
message(STATUS "LITE_WITH_BM:\t${LITE_WITH_BM}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}") message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
message(STATUS "LITE_WITH_CV:\t${LITE_WITH_CV}") message(STATUS "LITE_WITH_CV:\t${LITE_WITH_CV}")
...@@ -66,6 +67,9 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -66,6 +67,9 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
if (LITE_WITH_FPGA) if (LITE_WITH_FPGA)
set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.fpga") set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.fpga")
endif(LITE_WITH_FPGA) endif(LITE_WITH_FPGA)
if (LITE_WITH_BM)
set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.bm")
endif(LITE_WITH_BM)
else() else()
set(INFER_LITE_PUBLISH_ROOT "${CMAKE_BINARY_DIR}/inference_lite_lib") set(INFER_LITE_PUBLISH_ROOT "${CMAKE_BINARY_DIR}/inference_lite_lib")
endif() endif()
......
...@@ -61,13 +61,19 @@ if (WITH_TESTING) ...@@ -61,13 +61,19 @@ if (WITH_TESTING)
${ops} ${host_kernels} ${ops} ${host_kernels}
CUDA_DEPS ${cuda_kernels} CUDA_DEPS ${cuda_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
XPU_DEPS ${xpu_kernels}) XPU_DEPS ${xpu_kernels}
BM_DEPS ${bm_kernels})
endif() endif()
if(LITE_WITH_FPGA) if(LITE_WITH_FPGA)
set(light_api_deps ${light_api_deps} ${fpga_deps}) set(light_api_deps ${light_api_deps} ${fpga_deps})
set(cxx_api_deps ${cxx_api_deps} ${fpga_deps}) set(cxx_api_deps ${cxx_api_deps} ${fpga_deps})
endif() endif()
if(LITE_WITH_BM)
set(light_api_deps ${light_api_deps} ${bm_deps})
set(cxx_api_deps ${cxx_api_deps} ${bm_deps})
endif()
message(STATUS "get ops ${ops}") message(STATUS "get ops ${ops}")
message(STATUS "get X86 kernels ${x86_kernels}") message(STATUS "get X86 kernels ${x86_kernels}")
message(STATUS "get CUDA kernels ${cuda_kernels}") message(STATUS "get CUDA kernels ${cuda_kernels}")
...@@ -76,6 +82,7 @@ message(STATUS "get ARM kernels ${arm_kernels}") ...@@ -76,6 +82,7 @@ message(STATUS "get ARM kernels ${arm_kernels}")
message(STATUS "get NPU kernels ${npu_kernels}") message(STATUS "get NPU kernels ${npu_kernels}")
message(STATUS "get XPU kernels ${xpu_kernels}") message(STATUS "get XPU kernels ${xpu_kernels}")
message(STATUS "get FPGA kernels ${fpga_kernels}") message(STATUS "get FPGA kernels ${fpga_kernels}")
message(STATUS "get BM kernels ${bm_kernels}")
# for full api # for full api
if (NOT LITE_ON_TINY_PUBLISH) if (NOT LITE_ON_TINY_PUBLISH)
...@@ -90,6 +97,7 @@ if (NOT LITE_ON_TINY_PUBLISH) ...@@ -90,6 +97,7 @@ if (NOT LITE_ON_TINY_PUBLISH)
CV_DEPS paddle_cv_arm CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
BM_DEPS ${bm_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}) FPGA_DEPS ${fpga_kernels})
endif() endif()
...@@ -111,7 +119,8 @@ lite_cc_library(light_api SRCS light_api.cc ...@@ -111,7 +119,8 @@ lite_cc_library(light_api SRCS light_api.cc
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}) FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels})
include(ExternalProject) include(ExternalProject)
set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING
...@@ -129,6 +138,7 @@ if(WITH_TESTING) ...@@ -129,6 +138,7 @@ if(WITH_TESTING)
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
EXCLUDE_COMPILE_DEPS "ON" EXCLUDE_COMPILE_DEPS "ON"
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
...@@ -164,6 +174,12 @@ if(WITH_TESTING) ...@@ -164,6 +174,12 @@ if(WITH_TESTING)
${ops} ${host_kernels} ${x86_kernels} ${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/step_rnn) ARGS --model_dir=${LITE_MODEL_DIR}/step_rnn)
add_dependencies(test_step_rnn_lite_x86 extern_lite_download_step_rnn_tar_gz) add_dependencies(test_step_rnn_lite_x86 extern_lite_download_step_rnn_tar_gz)
if(LITE_WITH_BM)
lite_cc_test(test_resnet50_lite_bm SRCS test_resnet50_lite_bm.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${bm_kernels} ${bm_bridges}
ARGS --model_dir=${LITE_MODEL_DIR}/resnet50)
endif()
endif() endif()
endif() endif()
...@@ -254,6 +270,7 @@ lite_cc_test(test_light_api SRCS light_api_test.cc ...@@ -254,6 +270,7 @@ lite_cc_test(test_light_api SRCS light_api_test.cc
DEPS light_api program mir_passes paddle_api_light DEPS light_api program mir_passes paddle_api_light
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_test(test_apis SRCS apis_test.cc lite_cc_test(test_apis SRCS apis_test.cc
...@@ -262,6 +279,7 @@ lite_cc_test(test_apis SRCS apis_test.cc ...@@ -262,6 +279,7 @@ lite_cc_test(test_apis SRCS apis_test.cc
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
...@@ -293,6 +311,7 @@ lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle ...@@ -293,6 +311,7 @@ lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL) ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL)
if (WITH_TESTING) if (WITH_TESTING)
add_dependencies(test_paddle_api extern_lite_download_lite_naive_model_tar_gz) add_dependencies(test_paddle_api extern_lite_download_lite_naive_model_tar_gz)
...@@ -307,6 +326,7 @@ if(NOT IOS) ...@@ -307,6 +326,7 @@ if(NOT IOS)
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels}) CUDA_DEPS ${cuda_kernels})
...@@ -328,6 +348,7 @@ if(NOT IOS) ...@@ -328,6 +348,7 @@ if(NOT IOS)
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels}) CUDA_DEPS ${cuda_kernels})
......
...@@ -55,7 +55,8 @@ const std::string& TargetToStr(TargetType target) { ...@@ -55,7 +55,8 @@ const std::string& TargetToStr(TargetType target) {
"any", "any",
"fpga", "fpga",
"npu", "npu",
"xpu"}; "xpu",
"bm"};
auto x = static_cast<int>(target); auto x = static_cast<int>(target);
CHECK_LT(x, static_cast<int>(TARGET(NUM))); CHECK_LT(x, static_cast<int>(TARGET(NUM)));
return target2string[x]; return target2string[x];
...@@ -94,7 +95,8 @@ const std::string& TargetRepr(TargetType target) { ...@@ -94,7 +95,8 @@ const std::string& TargetRepr(TargetType target) {
"kAny", "kAny",
"kFPGA", "kFPGA",
"kNPU", "kNPU",
"kXPU"}; "kXPU",
"kBM"};
auto x = static_cast<int>(target); auto x = static_cast<int>(target);
CHECK_LT(x, static_cast<int>(TARGET(NUM))); CHECK_LT(x, static_cast<int>(TARGET(NUM)));
return target2string[x]; return target2string[x];
...@@ -135,6 +137,7 @@ std::set<TargetType> ExpandValidTargets(TargetType target) { ...@@ -135,6 +137,7 @@ std::set<TargetType> ExpandValidTargets(TargetType target) {
TARGET(kOpenCL), TARGET(kOpenCL),
TARGET(kNPU), TARGET(kNPU),
TARGET(kXPU), TARGET(kXPU),
TARGET(kBM),
TARGET(kFPGA)}); TARGET(kFPGA)});
if (target == TARGET(kAny)) { if (target == TARGET(kAny)) {
return valid_set; return valid_set;
......
...@@ -52,8 +52,9 @@ enum class TargetType : int { ...@@ -52,8 +52,9 @@ enum class TargetType : int {
kFPGA = 7, kFPGA = 7,
kNPU = 8, kNPU = 8,
kXPU = 9, kXPU = 9,
kBM = 10,
kAny = 6, // any target kAny = 6, // any target
NUM = 10, // number of fields. NUM = 11, // number of fields.
}; };
enum class PrecisionType : int { enum class PrecisionType : int {
kUnk = 0, kUnk = 0,
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string(input_img_txt_path,
"",
"if set input_img_txt_path, read the img filename as input.");
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places) {
lite::Predictor predictor;
std::vector<std::string> passes;
passes.push_back("bm_subgraph_pass");
predictor.Build(FLAGS_model_dir, "", "", valid_places, passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
if (FLAGS_input_img_txt_path.empty()) {
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
} else {
std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
if (!fs.is_open()) {
LOG(FATAL) << "open input_img_txt error.";
}
for (int i = 0; i < item_size; i++) {
fs >> data[i];
}
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
auto* out_data = out->data<float>();
FILE* fp = fopen("result.txt", "wb");
for (int i = 0; i < out->numel(); i++) {
fprintf(fp, "%f\n", out_data[i]);
}
fclose(fp);
}
TEST(ResNet50, test_bm) {
std::vector<Place> valid_places({Place{TARGET(kBM), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
TestModel(valid_places);
}
} // namespace lite
} // namespace paddle
...@@ -6,3 +6,4 @@ add_subdirectory(fpga) ...@@ -6,3 +6,4 @@ add_subdirectory(fpga)
add_subdirectory(host) add_subdirectory(host)
add_subdirectory(npu) add_subdirectory(npu)
add_subdirectory(xpu) add_subdirectory(xpu)
add_subdirectory(bm)
if (NOT LITE_WITH_BM)
return()
endif()
lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc DEPS ${bm_runtime_libs})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/bm/target_wrapper.h"
#include <bmcompiler_if.h>
#include <bmlib_runtime.h>
#include <utility>
namespace paddle {
namespace lite {
int TargetWrapperBM::device_id_ = 0;
std::map<int, void*> TargetWrapperBM::bm_hds_;
size_t TargetWrapperBM::num_devices() {
int count = 0;
bm_dev_getcount(&count);
return count;
}
void TargetWrapperBM::SetDevice(int id) {
/*
if (id < 0 || (size_t)id >= num_devices()) {
LOG(FATAL) << "Failed with invalid device id " << id;
}
*/
device_id_ = id;
if (bm_hds_.find(id) == bm_hds_.end()) {
bm_handle_t bm_handle;
bm_status_t ret = bm_dev_request(&bm_handle, id);
CHECK_EQ(ret, BM_SUCCESS) << "Failed with error code: "
<< static_cast<int>(ret);
bm_hds_.insert(std::pair<int, bm_handle_t>(id, bm_handle));
}
return;
}
void* TargetWrapperBM::GetHandle() {
if (bm_hds_.find(device_id_) == bm_hds_.end()) {
LOG(FATAL) << "device not initialized " << device_id_;
}
return bm_hds_.at(device_id_);
}
void* TargetWrapperBM::Malloc(size_t size) {
void* ptr{};
if (bm_hds_.find(device_id_) == bm_hds_.end()) {
SetDevice(device_id_);
}
bm_handle_t bm_handle = static_cast<bm_handle_t>(bm_hds_.at(device_id_));
bm_device_mem_t* p_mem =
reinterpret_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
bm_malloc_device_byte(bm_handle, p_mem, size);
ptr = reinterpret_cast<void*>(p_mem);
return ptr;
}
void TargetWrapperBM::Free(void* ptr) {
if (ptr != NULL) {
bm_handle_t bm_handle = static_cast<bm_handle_t>(bm_hds_.at(device_id_));
bm_device_mem_t* mem = static_cast<bm_device_mem_t*>(ptr);
bm_free_device(bm_handle, *mem);
free(ptr);
}
return;
}
void TargetWrapperBM::MemcpySync(void* dst,
const void* src,
size_t size,
IoDirection dir) {
if (bm_hds_.find(device_id_) == bm_hds_.end()) {
return;
}
bm_handle_t bm_handle = static_cast<bm_handle_t>(bm_hds_.at(device_id_));
bm_device_mem_t* pmem{};
const bm_device_mem_t* pcst_mem{};
switch (dir) {
case IoDirection::HtoD:
pmem = static_cast<bm_device_mem_t*>(dst);
bm_memcpy_s2d_partial_offset(
bm_handle, *pmem, const_cast<void*>(src), size, 0);
break;
case IoDirection::DtoH:
pcst_mem = static_cast<const bm_device_mem_t*>(src);
bm_memcpy_d2s_partial_offset(
bm_handle, reinterpret_cast<void*>(dst), *pcst_mem, size, 0);
break;
default:
LOG(FATAL) << "Unsupported IoDirection " << static_cast<int>(dir);
break;
}
return;
}
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include "lite/core/target_wrapper.h"
namespace paddle {
namespace lite {
using TargetWrapperBM = TargetWrapper<TARGET(kBM)>;
template <>
class TargetWrapper<TARGET(kBM)> {
public:
using stream_t = int;
using event_t = int;
static size_t num_devices();
static size_t maximum_stream() { return 0; }
static void SetDevice(int id);
static void CreateStream(stream_t* stream) {}
static void DestroyStream(const stream_t& stream) {}
static void CreateEvent(event_t* event) {}
static void DestroyEvent(const event_t& event) {}
static void RecordEvent(const event_t& event) {}
static void SyncEvent(const event_t& event) {}
static void StreamSync(const stream_t& stream) {}
static void* Malloc(size_t size);
static void Free(void* ptr);
static void* GetHandle();
static void MemcpySync(void* dst,
const void* src,
size_t size,
IoDirection dir);
static void MemcpyAsync(void* dst,
const void* src,
size_t size,
IoDirection dir,
const stream_t& stream) {}
static void MemsetSync(void* devPtr, int value, size_t count) {}
static void MemsetAsync(void* devPtr,
int value,
size_t count,
const stream_t& stream) {}
private:
static int device_id_;
static std::map<int, void*> bm_hds_;
};
} // namespace lite
} // namespace paddle
...@@ -6,7 +6,8 @@ lite_cc_library(target_wrapper SRCS target_wrapper.cc ...@@ -6,7 +6,8 @@ lite_cc_library(target_wrapper SRCS target_wrapper.cc
X86_DEPS target_wrapper_x86 X86_DEPS target_wrapper_x86
CUDA_DEPS target_wrapper_cuda CUDA_DEPS target_wrapper_cuda
CL_DEPS cl_target_wrapper CL_DEPS cl_target_wrapper
FPGA_DEPS fpga_target_wrapper) FPGA_DEPS fpga_target_wrapper
BM_DEPS target_wrapper_bm)
lite_cc_library(memory SRCS memory.cc DEPS target_wrapper CL_DEPS cl_target_wrapper) lite_cc_library(memory SRCS memory.cc DEPS target_wrapper CL_DEPS cl_target_wrapper)
......
...@@ -6,5 +6,5 @@ endif() ...@@ -6,5 +6,5 @@ endif()
lite_cc_library(arena_framework SRCS framework.cc DEPS program gtest) lite_cc_library(arena_framework SRCS framework.cc DEPS program gtest)
if((NOT LITE_WITH_OPENCL) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) if((NOT LITE_WITH_OPENCL) AND (LITE_WITH_X86 OR LITE_WITH_ARM))
lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${bm_kernels} ${npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif() endif()
...@@ -55,6 +55,7 @@ using NPUContext = Context<TargetType::kNPU>; ...@@ -55,6 +55,7 @@ using NPUContext = Context<TargetType::kNPU>;
using XPUContext = Context<TargetType::kXPU>; using XPUContext = Context<TargetType::kXPU>;
using OpenCLContext = Context<TargetType::kOpenCL>; using OpenCLContext = Context<TargetType::kOpenCL>;
using FPGAContext = Context<TargetType::kFPGA>; using FPGAContext = Context<TargetType::kFPGA>;
using BMContext = Context<TargetType::kBM>;
template <> template <>
class Context<TargetType::kHost> { class Context<TargetType::kHost> {
...@@ -82,6 +83,23 @@ class Context<TargetType::kNPU> { ...@@ -82,6 +83,23 @@ class Context<TargetType::kNPU> {
}; };
#endif #endif
#ifdef LITE_WITH_BM
template <>
class Context<TargetType::kBM> {
public:
Context() {}
explicit Context(const BMContext& ctx);
// NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() { Init(0); }
void Init(int dev_id) { TargetWrapperBM::SetDevice(dev_id); }
void CopySharedTo(BMContext* ctx) {}
void* GetHandle() { return TargetWrapperBM::GetHandle(); }
std::string name() const { return "BMContext"; }
};
#endif
#ifdef LITE_WITH_XPU #ifdef LITE_WITH_XPU
template <> template <>
class Context<TargetType::kXPU> { class Context<TargetType::kXPU> {
...@@ -374,6 +392,12 @@ class ContextScheduler { ...@@ -374,6 +392,12 @@ class ContextScheduler {
kernel_contexts_[TargetType::kFPGA].As<FPGAContext>().CopySharedTo( kernel_contexts_[TargetType::kFPGA].As<FPGAContext>().CopySharedTo(
&ctx->As<FPGAContext>()); &ctx->As<FPGAContext>());
break; break;
#endif
#ifdef LITE_WITH_BM
case TARGET(kBM):
kernel_contexts_[TargetType::kBM].As<BMContext>().CopySharedTo(
&ctx->As<BMContext>());
break;
#endif #endif
default: default:
#ifndef LITE_ON_MODEL_OPTIMIZE_TOOL #ifndef LITE_ON_MODEL_OPTIMIZE_TOOL
...@@ -412,6 +436,9 @@ class ContextScheduler { ...@@ -412,6 +436,9 @@ class ContextScheduler {
#endif #endif
#ifdef LITE_WITH_XPU #ifdef LITE_WITH_XPU
InitContext<TargetType::kXPU, XPUContext>(); InitContext<TargetType::kXPU, XPUContext>();
#endif
#ifdef LITE_WITH_BM
InitContext<TargetType::kBM, BMContext>();
#endif #endif
} }
......
...@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) { ...@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) {
data = TargetWrapper<TARGET(kFPGA)>::Malloc(size); data = TargetWrapper<TARGET(kFPGA)>::Malloc(size);
break; break;
#endif // LITE_WITH_OPENCL #endif // LITE_WITH_OPENCL
#ifdef LITE_WITH_BM
case TargetType::kBM:
data = TargetWrapper<TARGET(kBM)>::Malloc(size);
break;
#endif
default: default:
LOG(FATAL) << "Unknown supported target " << TargetToStr(target); LOG(FATAL) << "Unknown supported target " << TargetToStr(target);
} }
...@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) { ...@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) {
TargetWrapper<TARGET(kFPGA)>::Free(data); TargetWrapper<TARGET(kFPGA)>::Free(data);
break; break;
#endif // LITE_WITH_CUDA #endif // LITE_WITH_CUDA
#ifdef LITE_WITH_BM
case TargetType::kBM:
TargetWrapper<TARGET(kBM)>::Free(data);
break;
#endif
default: default:
LOG(FATAL) << "Unknown type"; LOG(FATAL) << "Unknown type";
} }
...@@ -95,6 +105,11 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) { ...@@ -95,6 +105,11 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
dst, src, size, IoDirection::DtoD); dst, src, size, IoDirection::DtoD);
break; break;
#endif #endif
#ifdef LITE_WITH_BM
case TargetType::kBM:
TargetWrapper<TARGET(kBM)>::MemcpySync(dst, src, size, IoDirection::DtoD);
break;
#endif
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
case TargetType::kOpenCL: case TargetType::kOpenCL:
TargetWrapperCL::MemcpySync(dst, src, size, IoDirection::DtoD); TargetWrapperCL::MemcpySync(dst, src, size, IoDirection::DtoD);
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
#include "lite/backends/cuda/target_wrapper.h" #include "lite/backends/cuda/target_wrapper.h"
#endif // LITE_WITH_CUDA #endif // LITE_WITH_CUDA
#ifdef LITE_WITH_BM
#include "lite/backends/bm/target_wrapper.h"
#endif // LITE_WITH_BM
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) { ...@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) {
case TARGET(kFPGA): case TARGET(kFPGA):
TargetWrapper<TARGET(kFPGA)>::MemcpySync(dst, src, size, dir); TargetWrapper<TARGET(kFPGA)>::MemcpySync(dst, src, size, dir);
break; break;
#endif
#ifdef LITE_WITH_BM
case TARGET(kBM):
TargetWrapper<TARGET(kBM)>::MemcpySync(dst, src, size, dir);
break;
#endif #endif
} }
} }
......
...@@ -46,4 +46,4 @@ void ConvBNFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -46,4 +46,4 @@ void ConvBNFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
REGISTER_MIR_PASS(lite_conv_bn_fuse_pass, paddle::lite::mir::ConvBNFusePass) REGISTER_MIR_PASS(lite_conv_bn_fuse_pass, paddle::lite::mir::ConvBNFusePass)
.BindTargets({TARGET(kAny)}) .BindTargets({TARGET(kAny)})
.ExcludeTargets({TARGET(kX86), TARGET(kXPU)}); .ExcludeTargets({TARGET(kX86), TARGET(kXPU), TARGET(kBM)});
...@@ -47,4 +47,4 @@ void ConvElementwiseFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -47,4 +47,4 @@ void ConvElementwiseFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
REGISTER_MIR_PASS(lite_conv_elementwise_fuse_pass, REGISTER_MIR_PASS(lite_conv_elementwise_fuse_pass,
paddle::lite::mir::ConvElementwiseFusePass) paddle::lite::mir::ConvElementwiseFusePass)
.BindTargets({TARGET(kAny)}) .BindTargets({TARGET(kAny)})
.ExcludeTargets({TARGET(kXPU)}); .ExcludeTargets({TARGET(kXPU), TARGET(kBM)});
...@@ -36,5 +36,6 @@ REGISTER_MIR_PASS(lite_elementwise_add_activation_fuse_pass, ...@@ -36,5 +36,6 @@ REGISTER_MIR_PASS(lite_elementwise_add_activation_fuse_pass,
paddle::lite::mir::ElementwiseAddActivationFusePass) paddle::lite::mir::ElementwiseAddActivationFusePass)
.BindTargets({TARGET(kAny)}) .BindTargets({TARGET(kAny)})
.ExcludeTargets({TARGET(kXPU)}) .ExcludeTargets({TARGET(kXPU)})
.ExcludeTargets({TARGET(kBM)})
.ExcludeTargets({TARGET(kX86)}) .ExcludeTargets({TARGET(kX86)})
.BindKernel("fusion_elementwise_add_activation"); .BindKernel("fusion_elementwise_add_activation");
...@@ -39,5 +39,6 @@ void FcFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -39,5 +39,6 @@ void FcFusePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
REGISTER_MIR_PASS(lite_fc_fuse_pass, paddle::lite::mir::FcFusePass) REGISTER_MIR_PASS(lite_fc_fuse_pass, paddle::lite::mir::FcFusePass)
.BindTargets({TARGET(kAny)}) .BindTargets({TARGET(kAny)})
.ExcludeTargets({TARGET(kXPU)}) .ExcludeTargets({TARGET(kXPU)})
.ExcludeTargets({TARGET(kBM)})
.ExcludeTargets({TARGET(kCUDA)}) .ExcludeTargets({TARGET(kCUDA)})
.BindKernel("fc"); .BindKernel("fc");
...@@ -256,4 +256,4 @@ void MemoryOptimizePass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -256,4 +256,4 @@ void MemoryOptimizePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
REGISTER_MIR_PASS(memory_optimize_pass, paddle::lite::mir::MemoryOptimizePass) REGISTER_MIR_PASS(memory_optimize_pass, paddle::lite::mir::MemoryOptimizePass)
.BindTargets({TARGET(kARM)}) .BindTargets({TARGET(kARM)})
.ExcludeTargets({TARGET(kOpenCL), TARGET(kNPU), TARGET(kXPU)}); .ExcludeTargets({TARGET(kOpenCL), TARGET(kNPU), TARGET(kXPU), TARGET(kBM)});
...@@ -53,6 +53,20 @@ void XPUSubgraphPass::Apply(const std::unique_ptr<SSAGraph>& graph) { ...@@ -53,6 +53,20 @@ void XPUSubgraphPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
fuser(); fuser();
} }
void BMSubgraphPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
std::unordered_set<std::string> supported_lists;
#define USE_SUBGRAPH_BRIDGE(op_type, target) supported_lists.insert(#op_type);
#include "lite/kernels/bm/bridges/paddle_use_bridges.h"
#undef USE_SUBGRAPH_BRIDGE
auto teller = [&](Node* node) {
if (!node->IsStmt()) return false;
auto& stmt = node->AsStmt();
return supported_lists.count(stmt.op_type()) != 0;
};
SubgraphFuser fuser(graph.get(), teller, 1 /* min_subgraph_size */);
fuser();
}
} // namespace mir } // namespace mir
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -61,3 +75,5 @@ REGISTER_MIR_PASS(npu_subgraph_pass, paddle::lite::mir::NPUSubgraphPass) ...@@ -61,3 +75,5 @@ REGISTER_MIR_PASS(npu_subgraph_pass, paddle::lite::mir::NPUSubgraphPass)
.BindTargets({TARGET(kNPU)}); .BindTargets({TARGET(kNPU)});
REGISTER_MIR_PASS(xpu_subgraph_pass, paddle::lite::mir::XPUSubgraphPass) REGISTER_MIR_PASS(xpu_subgraph_pass, paddle::lite::mir::XPUSubgraphPass)
.BindTargets({TARGET(kXPU)}); .BindTargets({TARGET(kXPU)});
REGISTER_MIR_PASS(bm_subgraph_pass, paddle::lite::mir::BMSubgraphPass)
.BindTargets({TARGET(kBM)});
...@@ -32,6 +32,11 @@ class XPUSubgraphPass : public ProgramPass { ...@@ -32,6 +32,11 @@ class XPUSubgraphPass : public ProgramPass {
void Apply(const std::unique_ptr<SSAGraph>& graph) override; void Apply(const std::unique_ptr<SSAGraph>& graph) override;
}; };
class BMSubgraphPass : public ProgramPass {
public:
void Apply(const std::unique_ptr<SSAGraph>& graph) override;
};
} // namespace mir } // namespace mir
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -100,6 +100,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create( ...@@ -100,6 +100,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
case TARGET(kFPGA): { case TARGET(kFPGA): {
CREATE_KERNEL(kFPGA); CREATE_KERNEL(kFPGA);
} break; } break;
case TARGET(kBM): {
CREATE_KERNEL(kBM);
} break;
default: default:
CHECK(false) << "not supported kernel target " << TargetToStr(target); CHECK(false) << "not supported kernel target " << TargetToStr(target);
} }
...@@ -186,6 +189,11 @@ KernelRegistry::KernelRegistry() ...@@ -186,6 +189,11 @@ KernelRegistry::KernelRegistry()
INIT_FOR(kFPGA, kFloat, kNHWC); INIT_FOR(kFPGA, kFloat, kNHWC);
INIT_FOR(kFPGA, kAny, kNHWC); INIT_FOR(kFPGA, kAny, kNHWC);
INIT_FOR(kFPGA, kAny, kAny); INIT_FOR(kFPGA, kAny, kAny);
INIT_FOR(kBM, kFloat, kNCHW);
INIT_FOR(kBM, kInt8, kNCHW);
INIT_FOR(kBM, kAny, kNCHW);
INIT_FOR(kBM, kAny, kAny);
#undef INIT_FOR #undef INIT_FOR
} }
......
...@@ -230,6 +230,16 @@ class KernelRegistry final { ...@@ -230,6 +230,16 @@ class KernelRegistry final {
PRECISION(kInt8), PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, // DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kAny),
DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kBM),
PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kFPGA), KernelRegistryForTarget<TARGET(kFPGA),
PRECISION(kFloat), PRECISION(kFloat),
DATALAYOUT(kNCHW)> *, // DATALAYOUT(kNCHW)> *, //
......
...@@ -10,3 +10,4 @@ add_subdirectory(opencl) ...@@ -10,3 +10,4 @@ add_subdirectory(opencl)
add_subdirectory(fpga) add_subdirectory(fpga)
add_subdirectory(npu) add_subdirectory(npu)
add_subdirectory(xpu) add_subdirectory(xpu)
add_subdirectory(bm)
if(NOT LITE_WITH_BM)
return ()
endif()
add_subdirectory(bridges)
add_kernel(subgraph_compute_bm BM basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} ${bm_subgraph_bridges})
if(NOT LITE_WITH_BM)
return()
endif()
lite_cc_library(subgraph_bridge_utility_bm SRCS utility.cc DEPS)
lite_cc_library(subgraph_bridge_graph_bm SRCS graph.cc DEPS subgraph_bridge_utility_bm)
set(bm_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_engine subgraph_bridge_utility_bm subgraph_bridge_graph_bm)
lite_cc_library(subgraph_bridge_act_op_bm SRCS act_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_conv_op_bm SRCS conv_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_elementwise_ops_bm SRCS elementwise_ops.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_pool_op_bm SRCS pool_op.cc DEPS ${subgraph_bridge_deps_bm})
lite_cc_library(subgraph_bridge_softmax_op_bm SRCS softmax_op.cc DEPS ${subgraph_bridge_deps_bm})
lite_cc_library(subgraph_bridge_mul_op_bm SRCS mul_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_batch_norm_op_bm SRCS batch_norm_op.cc DEPS ${bm_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_scale_op_bm SRCS scale_op.cc DEPS ${bm_subgraph_bridge_deps})
set(bm_subgraph_bridges
subgraph_bridge_registry
subgraph_bridge_engine
subgraph_bridge_graph_bm
subgraph_bridge_act_op_bm
subgraph_bridge_conv_op_bm
subgraph_bridge_elementwise_ops_bm
subgraph_bridge_pool_op_bm
subgraph_bridge_softmax_op_bm
subgraph_bridge_mul_op_bm
subgraph_bridge_batch_norm_op_bm
subgraph_bridge_scale_op_bm
CACHE INTERNAL "bm_subgraph_bridges")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
auto output_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
CHECK_EQ(op_type, "relu");
add_relu_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
0.f,
-1.f);
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(relu, kBM, paddle::lite::subgraph::bm::ActConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto unique_op_name = lite::subgraph::bm::UniqueName(op_type);
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
int channel_size = x_dims[1];
auto scale_var_name = op_info->Input("Scale").front();
auto scale = scope->FindVar(scale_var_name)->GetMutable<lite::Tensor>();
auto bias_var_name = op_info->Input("Bias").front();
auto bias = scope->FindVar(bias_var_name)->GetMutable<lite::Tensor>();
auto mean_var_name = op_info->Input("Mean").front();
auto mean = scope->FindVar(mean_var_name)->GetMutable<lite::Tensor>();
auto variance_var_name = op_info->Input("Variance").front();
auto variance = scope->FindVar(variance_var_name)->GetMutable<lite::Tensor>();
// output
auto output_var_name = op_info->Output("Y").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
auto epsilon = op_info->GetAttr<float>("epsilon");
auto unique_bn_out_name = lite::subgraph::bm::UniqueName("batch_norm_out");
auto* scale_data = scale->mutable_data<float>();
auto* bias_data = bias->mutable_data<float>();
auto* mean_data = mean->mutable_data<float>();
auto* variance_data = variance->mutable_data<float>();
for (int c = 0; c < channel_size; c++) {
float inv_scale = 1.f / (std::sqrt(variance_data[c] + epsilon));
bias_data[c] = bias_data[c] - inv_scale * scale_data[c] * mean_data[c];
scale_data[c] = inv_scale * scale_data[c];
}
const int input_num = 1;
int** shape = new int*[input_num];
int* dim = new int[input_num];
const char** name = new const char*[input_num];
name[0] = static_cast<const char*>(x_var_name.c_str());
dim[0] = x_dims.size();
shape[0] = &i_x_shape_data[0];
add_scale_layer(graph->GetCompilerHandle(),
input_num,
shape,
dim,
name,
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
static_cast<const char*>(unique_op_name.c_str()),
static_cast<const float*>(scale->mutable_data<float>()),
static_cast<const float*>(bias->mutable_data<float>()),
1,
1,
1);
delete[] shape;
delete[] name;
delete[] dim;
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(batch_norm,
kBM,
paddle::lite::subgraph::bm::BatchNormConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/conv_op.h"
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto unique_op_name = lite::subgraph::bm::UniqueName(op_type);
auto input_var_name = op_info->Input("Input").front();
auto input = scope->FindVar(input_var_name)->GetMutable<lite::Tensor>();
auto input_dims = input->dims();
auto output_var_name = op_info->Output("Output").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
auto filter_var_name = op_info->Input("Filter").front();
auto filter = scope->FindVar(filter_var_name)->GetMutable<lite::Tensor>();
auto filter_dims = filter->dims();
CHECK_EQ(input_dims.size(), 4);
CHECK_EQ(output_dims.size(), 4);
CHECK_EQ(filter_dims.size(), 4);
bool has_bias = lite::subgraph::bm::HasInputArg(op_info, scope, "Bias");
float* bias_data = nullptr;
if (has_bias) {
auto bias_var_name = op_info->Input("Bias").front();
auto* bias = scope->FindVar(bias_var_name)->GetMutable<lite::Tensor>();
bias_data = static_cast<float*>(bias->mutable_data<float>());
}
const int64_t* input_shape_data =
const_cast<const int64_t*>(&input_dims.data()[0]);
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
std::vector<int32_t> i_input_shape_data(input_dims.size());
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < input_dims.size(); i++) {
i_input_shape_data[i] = static_cast<int>(input_shape_data[i]);
}
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
const float* filter_data =
const_cast<const float*>(filter->mutable_data<float>());
auto groups = op_info->GetAttr<int>("groups");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
add_conv_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_input_shape_data[0]),
input_dims.size(),
static_cast<const char*>(input_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
static_cast<const char*>(unique_op_name.c_str()),
filter_data,
bias_data,
filter_dims.data()[2],
filter_dims.data()[3],
groups,
paddings[0],
paddings[0],
paddings[1],
paddings[1],
strides[0],
strides[1],
dilations[0],
dilations[1],
static_cast<int>(has_bias));
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(conv2d,
kBM,
paddle::lite::subgraph::bm::ConvConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_defs.h>
#include <bmcompiler_if.h>
#include <bmcompiler_if_lite.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
// input
const int input_num = 2;
int** shape = new int*[input_num];
int* dim = new int[input_num];
const char** name = new const char*[input_num];
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
name[0] = static_cast<const char*>(x_var_name.c_str());
dim[0] = x_dims.size();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
shape[0] = &i_x_shape_data[0];
auto y_var_name = op_info->Input("Y").front();
auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
auto y_dims = y->dims();
name[1] = static_cast<const char*>(y_var_name.c_str());
dim[1] = y_dims.size();
const int64_t* y_shape_data = const_cast<const int64_t*>(&y_dims.data()[0]);
std::vector<int32_t> i_y_shape_data(y_dims.size());
for (size_t i = 0; i < y_dims.size(); i++) {
i_y_shape_data[i] = static_cast<int>(y_shape_data[i]);
}
shape[1] = &i_y_shape_data[0];
bool y_is_const = !graph->HasNode(y_var_name);
// output
auto output_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
if (y_is_const) {
CHECK_EQ(op_type, "elementwise_add");
}
int op_code{-1};
float coeff[2] = {1.f, 1.f};
if (op_type == "elementwise_mul") {
op_code = 0;
} else if (op_type == "elementwise_add") {
op_code = 1;
} else if (op_type == "elementwise_sub") {
op_code = 1;
coeff[1] = -1.f;
} else {
LOG(FATAL) << "UNSUPPORTED ELTWISE OPERATION: " << op_type;
}
if (!y_is_const) {
add_eltwise_layer(graph->GetCompilerHandle(),
input_num,
shape,
dim,
name,
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
op_code,
coeff);
} else {
const float* y_data = const_cast<const float*>(y->mutable_data<float>());
const float* x_data = const_cast<const float*>(x->mutable_data<float>());
bm_add_const_tensor(graph->GetCompilerHandle(),
name[1],
shape[0],
dim[0],
static_cast<bm_data_type_t>(DTYPE_FP32),
static_cast<const void*>(y_data));
add_binary_layer_v2(graph->GetCompilerHandle(),
name[0],
shape[0],
dim[0],
0,
static_cast<const float*>(x_data),
name[1],
shape[0],
dim[0],
0,
static_cast<const float*>(y_data),
static_cast<const char*>(output_var_name.c_str()),
0);
}
delete[] shape;
delete[] name;
delete[] dim;
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(elementwise_add,
kBM,
paddle::lite::subgraph::bm::ElementwiseConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/graph.h"
#include <bmcompiler_if.h>
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
void Graph::AddNode(const std::string& name) {
nodes_.insert(std::make_pair(name, name));
}
void Graph::CreateCompilerHandle() {
compiler_handle_ = create_bmcompiler("BM1684");
CHECK(compiler_handle_ != nullptr);
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
// Graph to collect all of converted BM IR nodes
class Graph {
public:
void AddNode(const std::string& name);
bool HasNode(const std::string& name) {
return nodes_.find(name) != nodes_.end();
}
void CreateCompilerHandle();
void* GetCompilerHandle() { return compiler_handle_; }
private:
std::unordered_map<std::string, std::string> nodes_;
void* compiler_handle_;
};
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto unique_op_name = lite::subgraph::bm::UniqueName(op_type);
// only support y is const
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
// add reshape layer
int i_x_reshape_shape_data[2];
for (size_t i = 0; i < 2; i++) {
i_x_reshape_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
int reshape_param[] = {0, -1};
auto unique_op_reshape_name =
lite::subgraph::bm::UniqueName(op_type + "_reshape");
add_reshape_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_x_reshape_shape_data[0]),
2,
static_cast<const char*>(unique_op_reshape_name.c_str()),
const_cast<const int*>(reshape_param));
auto y_var_name = op_info->Input("Y").front();
auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
auto y_dims = y->dims();
// output
auto output_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
add_fc_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_reshape_shape_data[0]),
2,
static_cast<const char*>(unique_op_reshape_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
static_cast<const char*>(unique_op_name.c_str()),
i_x_reshape_shape_data[1],
i_output_shape_data[1],
static_cast<const float*>(y->mutable_data<float>()),
nullptr,
0,
0);
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(mul, kBM, paddle::lite::subgraph::bm::MulConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
USE_SUBGRAPH_BRIDGE(relu, kBM);
USE_SUBGRAPH_BRIDGE(conv2d, kBM);
USE_SUBGRAPH_BRIDGE(elementwise_add, kBM);
USE_SUBGRAPH_BRIDGE(pool2d, kBM);
USE_SUBGRAPH_BRIDGE(softmax, kBM);
USE_SUBGRAPH_BRIDGE(mul, kBM);
USE_SUBGRAPH_BRIDGE(batch_norm, kBM);
USE_SUBGRAPH_BRIDGE(scale, kBM);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto unique_op_name = lite::subgraph::bm::UniqueName(op_type);
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
// output
int32_t* shape[1];
int32_t dim[1];
const char* name[1];
auto output_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
std::vector<int32_t> i_output_shape_data(output_dims.size());
for (size_t i = 0; i < output_dims.size(); i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
shape[0] = &i_output_shape_data[0];
name[0] = static_cast<const char*>(output_var_name.c_str());
dim[0] = output_dims.size();
auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
CHECK(pooling_type == "max" || pooling_type == "avg");
auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto global_pooling = op_info->GetAttr<bool>("global_pooling");
auto ceil_mode = op_info->GetAttr<bool>("ceil_mode");
bool average_exclusive = false;
if (pooling_type == "avg") {
average_exclusive = op_info->GetAttr<bool>("exclusive");
}
add_pooling_layer(
graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
1,
shape,
dim,
name,
ksize[0],
ksize[1],
paddings[0],
paddings[0],
paddings[1],
paddings[1],
strides[0],
strides[1],
(ksize[0] > 1 && ksize[1] > 1) && pooling_type == "max" ? 0 : 1,
static_cast<int>(average_exclusive),
static_cast<int>(global_pooling),
static_cast<int>(ceil_mode),
static_cast<const char*>(unique_op_name.c_str()),
nullptr);
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(pool2d,
kBM,
paddle::lite::subgraph::bm::PoolConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include <bmcompiler_op_code.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto unique_op_name = lite::subgraph::bm::UniqueName(op_type);
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
std::vector<int32_t> i_x_shape_data(x_dims.size());
for (size_t i = 0; i < x_dims.size(); i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
// output
auto output_var_name = op_info->Output("Out").front();
auto scale = op_info->GetAttr<float>("scale");
auto bias = op_info->GetAttr<float>("bias");
auto bias_after_scale = op_info->GetAttr<bool>("bias_after_scale");
if (!bias_after_scale) {
bias *= scale;
}
auto unique_op_scale_name = lite::subgraph::bm::UniqueName(op_type);
add_const_binary_layer(graph->GetCompilerHandle(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
scale,
static_cast<const char*>(unique_op_scale_name.c_str()),
BINARY_MUL,
0);
add_const_binary_layer(graph->GetCompilerHandle(),
static_cast<const char*>(unique_op_scale_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
bias,
static_cast<const char*>(output_var_name.c_str()),
BINARY_ADD,
0);
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(scale,
kBM,
paddle::lite::subgraph::bm::ScaleConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto scope = op->scope();
auto op_info = op->op_info();
// input
auto x_var_name = op_info->Input("X").front();
auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
auto x_dims = x->dims();
const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
size_t length = x_dims.size();
std::vector<int32_t> i_x_shape_data(length);
for (size_t i = 0; i < length; i++) {
i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
}
// output
auto output_var_name = op_info->Output("Out").front();
auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
auto output_dims = output->dims();
const int64_t* output_shape_data =
const_cast<const int64_t*>(&output_dims.data()[0]);
length = output_dims.size();
std::vector<int32_t> i_output_shape_data(length);
for (size_t i = 0; i < length; i++) {
i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
}
auto axis = op_info->GetAttr<int>("axis");
if (axis < 0) {
axis += x_dims.size();
}
int outer_num = x_dims.Slice(0, axis).production();
int inner_num = x_dims.Slice(axis + 1, x_dims.size()).production();
add_softmax_layer(graph->GetCompilerHandle(),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_output_shape_data[0]),
output_dims.size(),
static_cast<const char*>(output_var_name.c_str()),
inner_num,
outer_num,
x_dims[axis]);
graph->AddNode(output_var_name);
return SUCCESS;
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(softmax,
kBM,
paddle::lite::subgraph::bm::SoftmaxConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/utility.h"
#include <mutex> //NOLINT
#include <unordered_map>
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
std::string UniqueName(const std::string& prefix) {
static std::mutex counter_mtx;
static std::unordered_map<std::string, int> counter_map;
std::unique_lock<std::mutex> counter_lck(counter_mtx);
int counter = 1;
auto it = counter_map.find(prefix);
if (it == counter_map.end()) {
counter_map[prefix] = counter;
} else {
counter = ++(it->second);
}
return prefix + "_" + std::to_string(counter);
}
bool HasInputArg(const OpInfo* op_info,
const Scope* scope,
const std::string& argname) {
auto iarg_names = op_info->input_argnames();
if (std::find(iarg_names.begin(), iarg_names.end(), argname) !=
iarg_names.end()) {
auto inputs = op_info->Input(argname);
if (inputs.empty()) {
return false;
}
auto var_name = inputs.front();
auto var = scope->FindVar(var_name);
return var != nullptr;
} else {
return false;
}
}
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace bm {
std::string UniqueName(const std::string& prefix);
bool HasInputArg(const OpInfo* op_info,
const Scope* scope,
const std::string& argname);
} // namespace bm
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/subgraph_compute.h"
#include <sys/time.h>
#include <time.h>
#include <string>
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/paddle_use_bridges.h"
#include "lite/kernels/bm/bridges/utility.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
int SubgraphEngine::BuildDeviceProgram() {
int status = 0;
subgraph::bm::Graph graph;
const auto& bridges = subgraph::Registry::Instance();
graph.CreateCompilerHandle();
auto& ctx = this->ctx_->template As<BMContext>();
for (auto& inst : origin_program_) {
auto op = inst.op();
CHECK(op);
op->CheckShape();
op->InferShape();
std::string op_type = op->op_info()->Type();
if (!bridges.Exists(op_type, TARGET(kBM))) {
return subgraph::FAILED;
}
auto kernel = inst.kernel();
status |=
bridges.Select(op_type, TARGET(kBM))(reinterpret_cast<void*>(&graph),
const_cast<OpLite*>(op),
const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) {
return subgraph::FAILED;
}
}
std::string net_name = "paddle_bitmain";
__bmcompile_opt(
graph.GetCompilerHandle(), const_cast<char*>(net_name.c_str()), 2);
void* bmodel_data = nullptr;
unsigned int data_size = 0;
bm_hd_ = static_cast<bm_handle_t>(ctx.GetHandle());
finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
bmrt_hd_ = bmrt_create(bm_hd_);
if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
return subgraph::FAILED;
}
bmrt_get_network_names(bmrt_hd_, &net_names_);
net_info_ = bmrt_get_network_info(bmrt_hd_, net_names_[0]);
auto& stage = net_info_->stages[0];
// input
origin_idims_.resize(input_names_.size());
origin_itensors_.resize(input_names_.size());
device_inputs_.resize(input_names_.size());
for (size_t i = 0; i < input_names_.size(); i++) {
origin_itensors_[i] = scope_->FindMutableTensor(input_names_[i]);
CHECK(origin_itensors_[i]);
origin_idims_[i] = origin_itensors_[i]->dims();
bm_device_mem_t* p_mem =
static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
CHECK(p_mem != nullptr);
CHECK_EQ(bm_malloc_device_byte(
bm_hd_, p_mem, origin_itensors_[i]->memory_size()),
BM_SUCCESS);
bmrt_tensor_with_device(&device_inputs_[i],
*p_mem,
net_info_->input_dtypes[i],
stage.input_shapes[i]);
}
// output
origin_odims_.resize(output_names_.size());
origin_otensors_.resize(output_names_.size());
device_outputs_.resize(output_names_.size());
for (size_t i = 0; i < output_names_.size(); i++) {
origin_otensors_[i] = scope_->FindMutableTensor(output_names_[i]);
CHECK(origin_otensors_[i]);
origin_odims_[i] = origin_otensors_[i]->dims();
output_map_.insert(std::pair<std::string, int>(output_names_[i], i));
origin_otensors_[i]->mutable_data<float>();
}
for (size_t i = 0; i < output_names_.size(); i++) {
int mapping_index = output_map_.at(net_info_->output_names[i]);
bm_device_mem_t* p_mem =
static_cast<bm_device_mem_t*>(malloc(sizeof(bm_device_mem_t)));
CHECK(p_mem != nullptr);
CHECK_EQ(bm_malloc_device_byte(
bm_hd_, p_mem, origin_otensors_[mapping_index]->memory_size()),
BM_SUCCESS);
bmrt_tensor_with_device(&device_outputs_[i],
*p_mem,
net_info_->output_dtypes[i],
stage.output_shapes[i]);
}
return status;
}
int SubgraphEngine::LaunchDeviceProgram() {
for (size_t i = 0; i < device_inputs_.size(); i++) {
bm_memcpy_s2d(bm_hd_,
device_inputs_[i].device_mem,
const_cast<void*>(origin_itensors_[i]->raw_data()));
}
bmrt_launch_tensor_ex(bmrt_hd_,
net_names_[0],
static_cast<const bm_tensor_t*>(&device_inputs_[0]),
net_info_->input_num,
static_cast<bm_tensor_t*>(&device_outputs_[0]),
net_info_->output_num,
true,
false);
bm_thread_sync(bm_hd_);
for (size_t i = 0; i < device_outputs_.size(); i++) {
bm_memcpy_d2s(bm_hd_,
const_cast<void*>(origin_otensors_[i]->raw_data()),
device_outputs_[i].device_mem);
}
return 0;
}
void SubgraphCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
engine_.reset(new SubgraphEngine(ctx_.get(),
param.sub_block_idx,
param.sub_block_desc,
param.input_data_names,
param.output_data_names,
param.scope));
CHECK(engine_);
engine_->Build();
}
void SubgraphCompute::Run() {
CHECK(engine_);
engine_->Launch();
}
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(subgraph,
kBM,
kFloat,
kNCHW,
paddle::lite::kernels::bm::SubgraphCompute,
def)
.BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <bmcompiler_if.h>
#include <bmruntime_interface.h>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/program.h"
#include "lite/core/types.h"
#include "lite/kernels/npu/bridges/engine.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace bm {
class SubgraphEngine : public subgraph::Engine {
public:
SubgraphEngine(KernelContext *ctx,
int block_idx,
cpp::BlockDesc *block_desc,
const std::vector<std::string> &input_names,
const std::vector<std::string> &output_names,
Scope *scope)
: subgraph::Engine(
ctx, block_idx, block_desc, input_names, output_names, scope) {}
protected:
int BuildDeviceProgram() override;
int LaunchDeviceProgram() override;
private:
void *bmrt_hd_;
std::vector<bm_tensor_t> device_inputs_;
std::vector<bm_tensor_t> device_outputs_;
std::map<std::string, int> output_map_;
const char **net_names_;
const bm_net_info_t *net_info_;
bm_handle_t bm_hd_;
};
class SubgraphCompute : public KernelLite<TARGET(kBM), PRECISION(kFloat)> {
public:
using param_t = operators::SubgraphParam;
void PrepareForRun() override;
void Run() override;
virtual ~SubgraphCompute() = default;
private:
std::unique_ptr<SubgraphEngine> engine_;
};
} // namespace bm
} // namespace kernels
} // namespace lite
} // namespace paddle
if(NOT LITE_WITH_NPU AND NOT LITE_WITH_XPU) if(NOT LITE_WITH_NPU AND NOT LITE_WITH_XPU AND NOT LITE_WITH_BM)
return() return()
endif() endif()
......
if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH_ARM)) if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_WITH_X86 OR LITE_WITH_ARM))
lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
...@@ -36,36 +36,36 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH ...@@ -36,36 +36,36 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
if(LITE_BUILD_EXTRA) if(LITE_BUILD_EXTRA)
lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS ${bm_kernels} arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif() endif()
lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_negative_compute SRCS negative_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_bilinear_interp_compute SRCS bilinear_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_bilinear_interp_compute SRCS bilinear_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${bm_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_slice_compute SRCS slice_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${bm_kernels} ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif() endif()
#!/bin/bash
set -ex
# global variables with default value
BM_SDK_ROOT="$(pwd)/../BM_SDK" # BM SDK
TARGET_NAME="BM1682" # default target
BUILD_EXTRA=OFF # ON(with sequence ops)/OFF
WITH_TESTING=ON # ON/OFF
function print_usage {
echo -e "\nUSAGE:"
echo
echo "----------------------------------------"
echo -e "--bm_sdk_root=<bm sdk directory>"
echo -e "--target_name=<target name>"
echo "----------------------------------------"
echo
}
# readonly variables with default value
readonly CMAKE_COMMON_OPTIONS="-DWITH_LITE=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF \
-DWITH_PYTHON=OFF \
-DLITE_WITH_ARM=OFF"
readonly NUM_CORES_FOR_COMPILE=${LITE_BUILD_THRLITE_BUILD_THREADSEADS:-1}
readonly THIRDPARTY_TAR=https://paddle-inference-dist.bj.bcebos.com/PaddleLite/third-party-05b862.tar.gz
readonly workspace=$(pwd)
function prepare_thirdparty {
if [ ! -d $workspace/third-party -o -f $workspace/third-party-05b862.tar.gz ]; then
rm -rf $workspace/third-party
if [ ! -f $workspace/third-party-05b862.tar.gz ]; then
wget $THIRDPARTY_TAR
fi
tar xzf third-party-05b862.tar.gz
else
git submodule update --init --recursive
fi
}
# for code gen, a source file is generated after a test, but is dependended by some targets in cmake.
# here we fake an empty file to make cmake works.
function prepare_workspace {
# in build directory
# 1. Prepare gen_code file
GEN_CODE_PATH_PREFIX=lite/gen_code
mkdir -p ./${GEN_CODE_PATH_PREFIX}
touch ./${GEN_CODE_PATH_PREFIX}/__generated_code__.cc
# 2.Prepare debug tool
DEBUG_TOOL_PATH_PREFIX=lite/tools/debug
mkdir -p ./${DEBUG_TOOL_PATH_PREFIX}
cp ../${DEBUG_TOOL_PATH_PREFIX}/analysis_tool.py ./${DEBUG_TOOL_PATH_PREFIX}/
# clone submodule
# git submodule update --init --recursive
prepare_thirdparty
}
function build_bm {
build_dir=${workspace}/build.lite.bm
mkdir -p $build_dir
cd $build_dir
prepare_workspace
cmake .. \
${CMAKE_COMMON_OPTIONS} \
-DWITH_GPU=OFF \
-DWITH_MKLDNN=OFF \
-DLITE_WITH_X86=ON \
-DWITH_MKL=ON \
-DLITE_BUILD_EXTRA=ON \
-DLITE_WITH_XPU=OFF \
-DLITE_WITH_BM=ON \
-DWITH_TESTING=${WITH_TESTING} \
-DBM_SDK_ROOT=${BM_SDK_ROOT}
make -j$NUM_CORES_FOR_COMPILE
cd -
echo "Done"
}
function main {
# Parse command line.
for i in "$@"; do
case $i in
--target_name=*)
TARGET_NAME="${i#*=}"
shift
;;
--bm_sdk_root=*)
BM_SDK_ROOT="${i#*=}"
shift
;;
bm)
build_bm
shift
;;
*)
# unknown option
print_usage
exit 1
;;
esac
done
}
main $@
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册