提交 d77b32fb 编写于 作者: J Jiansong Wang

NNA integration on develop branch

1, Remove a paragraph at the end of lite/CMakeLists.txt, otherwise build
fails with original developbranch on Ubuntu 18.04;
2, Add NNA support, build light library successfully;
3, Full build, library ok, but apps such as benchmark_bin failed
上级 1a6880d6
......@@ -87,6 +87,7 @@ lite_option(LITE_WITH_NPU "Enable NPU in lite mode" OFF)
lite_option(LITE_WITH_RKNPU "Enable RKNPU in lite mode" OFF)
lite_option(LITE_WITH_MLU "Enable MLU in lite mode" OFF)
lite_option(LITE_WITH_HUAWEI_ASCEND_NPU "Enable HUAWEI_ASCEND_NPU in lite mode" OFF)
lite_option(LITE_WITH_IMAGINATION "Enable IMAGINATION_NNA in lite mode" OFF)
lite_option(LITE_WITH_XPU "Enable XPU in lite mode" OFF)
lite_option(LITE_WITH_XTCL "Enable XPU via XTCL" OFF IF LITE_WITH_XPU)
lite_option(LITE_WITH_BM "Enable BM in lite mode" OFF)
......@@ -171,6 +172,10 @@ if(LITE_WITH_RKNPU)
include(device/rknpu)
endif()
if(LITE_WITH_NNA)
include(device/nna)
endif()
include(external/flatbuffers)
# for mobile
......
......@@ -175,6 +175,10 @@ if (LITE_WITH_MLU)
add_definitions("-DLITE_WITH_MLU")
endif()
if (LITE_WITH_NNA)
add_definitions("-DLITE_WITH_NNA")
endif()
if (LITE_WITH_HUAWEI_ASCEND_NPU)
add_definitions("-DLITE_WITH_HUAWEI_ASCEND_NPU")
endif()
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT LITE_WITH_NNA)
return()
endif()
if(NOT DEFINED IMGNNA_DDK_ROOT)
set(IMGNNA_DDK_ROOT $ENV{IMGNNA_DDK_ROOT})
if(NOT IMGNNA_DDK_ROOT)
message(FATAL_ERROR "Must set IMGNNA_DDK_ROOT or env IMGNNA_DDK_ROOT when LITE_WITH_IMGNNA=ON")
endif()
endif()
message(STATUS "IMGNNA_DDK_ROOT: ${IMGNNA_DDK_ROOT}")
find_path(IMGNNA_DDK_INC NAMES imgdnn.h
PATHS ${IMGNNA_DDK_ROOT}/include/imgdnn NO_DEFAULT_PATH)
if(NOT IMGNNA_DDK_INC)
message(FATAL_ERROR "Can not find imgdnn.h in ${IMGNNA_DDK_ROOT}/include")
endif()
#include_directories("${IMGNNA_DDK_ROOT}/include")
include_directories(${IMGNNA_DDK_INC})
#set(IMGNNA_SUB_LIB_PATH "lib64")
#if(ARM_TARGET_ARCH_ABI STREQUAL "armv8")
# set(IMGNNA_SUB_LIB_PATH "lib64")
#endif()
#if(ARM_TARGET_ARCH_ABI STREQUAL "armv7")
# set(IMGNNA_SUB_LIB_PATH "lib")
#endif()
set(IMGNNA_LIB_PATH "lib")
find_library(IMGNNA_DDK_IMGDNN_FILE NAMES imgdnn
PATHS ${IMGNNA_DDK_ROOT}/${IMGNNA_LIB_PATH})
if(NOT IMGNNA_DDK_IMGDNN_FILE)
message(FATAL_ERROR "Can not find IMGNNA_DDK_IMGDNN_FILE in ${IMGNNA_DDK_ROOT}")
else()
message(STATUS "Found IMGNNA_DDK IMGDNN Library: ${IMGNNA_DDK_IMGDNN_FILE}")
add_library(nna_ddk_imgdnn SHARED IMPORTED GLOBAL)
set_property(TARGET nna_ddk_imgdnn PROPERTY IMPORTED_LOCATION ${IMGNNA_DDK_IMGDNN_FILE})
endif()
find_library(IMGNNA_DDK_RUNTIME_FILE NAMES nnasession
PATHS ${IMGNNA_DDK_ROOT}/${IMGNNA_LIB_PATH})
if(NOT IMGNNA_DDK_RUNTIME_FILE)
message(FATAL_ERROR "Can not find IMGNNA_DDK_RUNTIME_FILE in ${IMGNNA_DDK_ROOT}")
else()
message(STATUS "Found IMGNNA_DDK RUNTIME Library: ${IMGNNA_DDK_RUNTIME_FILE}")
add_library(nna_ddk_runtime SHARED IMPORTED GLOBAL)
set_property(TARGET nna_ddk_runtime PROPERTY IMPORTED_LOCATION ${IMGNNA_DDK_RUNTIME_FILE})
endif()
set(nna_runtime_libs nna_ddk_runtime CACHE INTERNAL "imgnna ddk runtime libs")
set(nna_builder_libs nna_ddk_imgdnn CACHE INTERNAL "imgnna ddk builder libs")
......@@ -118,6 +118,12 @@ function (lite_deps TARGET)
endforeach(var)
endif()
if (LITE_WITH_NNA)
foreach(var ${lite_deps_NNA_DEPS})
set(deps ${deps} ${var})
endforeach(var)
endif()
if (LITE_WITH_HUAWEI_ASCEND_NPU)
foreach(var ${lite_deps_HUAWEI_ASCEND_NPU_DEPS})
set(deps ${deps} ${var})
......@@ -149,7 +155,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean
function(lite_cc_library TARGET)
set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS RKNPU_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS CV_DEPS PROFILE_DEPS LIGHT_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NNA_DEPS RKNPU_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS CV_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -160,6 +166,7 @@ function(lite_cc_library TARGET)
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
BM_DEPS ${args_BM_DEPS}
NNA_DEPS ${args_NNA_DEPS}
RKNPU_DEPS ${args_RKNPU_DEPS}
ARM_DEPS ${args_ARM_DEPS}
CV_DEPS ${args_CV_DEPS}
......@@ -200,7 +207,7 @@ function(lite_cc_binary TARGET)
set(options " -g ")
endif()
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS RKNPU NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NNA_DEPS RKNPU NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -217,6 +224,7 @@ function(lite_cc_binary TARGET)
XPU_DEPS ${args_XPU_DEPS}
RKNPU_DEPS ${args_RKNPU_DEPS}
BM_DEPS ${args_BM_DEPS}
NNA_DEPS ${args_NNA_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
......@@ -254,7 +262,7 @@ function(lite_cc_test TARGET)
endif()
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS RKNPU_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NNA_DEPS RKNPU_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS
ARGS
COMPILE_LEVEL # (basic|extra)
......@@ -279,6 +287,7 @@ function(lite_cc_test TARGET)
XPU_DEPS ${args_XPU_DEPS}
RKNPU_DEPS ${args_RKNPU_DEPS}
BM_DEPS ${args_BM_DEPS}
NNA_DEPS ${args_NNA_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
......@@ -315,6 +324,7 @@ set(xpu_kernels CACHE INTERNAL "xpu kernels")
set(mlu_kernels CACHE INTERNAL "mlu kernels")
set(huawei_ascend_npu_kernels CACHE INTERNAL "huawei_ascend_npu kernels")
set(bm_kernels CACHE INTERNAL "bm kernels")
set(nna_kernels CACHE INTERNAL "nna kernels")
set(rknpu_kernels CACHE INTERNAL "rknpu kernels")
set(opencl_kernels CACHE INTERNAL "opencl kernels")
set(host_kernels CACHE INTERNAL "host kernels")
......@@ -331,12 +341,12 @@ if(LITE_BUILD_TAILOR)
file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
endif()
# add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, MLU, HUAWEI_ASCEND_NPU, APU, FPGA, OPENCL, CUDA, BM, RKNPU)
# device: one of (Host, ARM, X86, NPU, MLU, HUAWEI_ASCEND_NPU, APU, FPGA, OPENCL, CUDA, BM, RKNPU NNA)
# level: one of (basic, extra)
function(add_kernel TARGET device level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS RKNPU_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NNA_DEPS RKNPU_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -448,6 +458,16 @@ function(add_kernel TARGET device level)
endif()
set(mlu_kernels "${mlu_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "NNA")
if (NOT LITE_WITH_NNA)
foreach(src ${args_SRCS})
file(APPEND ${fake_kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return()
endif()
set(nna_kernels "${nna_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "HUAWEI_ASCEND_NPU")
if (NOT LITE_WITH_HUAWEI_ASCEND_NPU)
foreach(src ${args_SRCS})
......@@ -500,6 +520,7 @@ function(add_kernel TARGET device level)
RKNPU_DEPS ${args_RKNPU_DEPS}
BM_DEPS ${args_BM_DEPS}
MLU_DEPS ${args_MLU_DEPS}
NNA_DEPS ${args_NNA_DEPS}
HUAWEI_ASCEND_NPU_DEPS ${args_HUAWEI_ASCEND_NPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
......@@ -519,7 +540,7 @@ endif()
function(add_operator TARGET level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NNA_DEPS NPU_DEPS XPU_DEPS MLU_DEPS HUAWEI_ASCEND_NPU_DEPS APU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -557,6 +578,7 @@ function(add_operator TARGET level)
RKNPU_DEPS ${args_RKNPU_DEPS}
BM_DEPS ${args_BM_DEPS}
MLU_DEPS ${args_MLU_DEPS}
NNA_DEPS ${args_NNA_DEPS}
HUAWEI_ASCEND_NPU_DEPS ${args_HUAWEI_ASCEND_NPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
......
......@@ -14,6 +14,7 @@ message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}")
message(STATUS "LITE_WITH_MLU:\t${LITE_WITH_MLU}")
message(STATUS "LITE_WITH_HUAWEI_ASCEND_NPU:\t${LITE_WITH_HUAWEI_ASCEND_NPU}")
message(STATUS "LITE_WITH_BM:\t${LITE_WITH_BM}")
message(STATUS "LITE_WITH_NNA:\t${LITE_WITH_NNA}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
message(STATUS "LITE_WITH_CV:\t${LITE_WITH_CV}")
......@@ -93,6 +94,9 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
if (LITE_WITH_RKNPU)
set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.rknpu")
endif(LITE_WITH_RKNPU)
if (LITE_WITH_NNA)
set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.nna")
endif(LITE_WITH_NNA)
else()
set(INFER_LITE_PUBLISH_ROOT "${CMAKE_BINARY_DIR}/inference_lite_lib")
endif()
......
......@@ -40,6 +40,7 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH
NPU_DEPS ${npu_kernels}
APU_DEPS ${apu_kernels}
RKNPU_DEPS ${rknpu_kernels}
NNA_DEPS ${nna_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels}
)
......@@ -85,7 +86,10 @@ else()
# Need to add RKNPU runtime libs dependency
target_link_libraries(paddle_light_api_shared ${rknpu_builder_libs} ${rknpu_runtime_libs})
endif()
if (LITE_WITH_NNA)
# Need to add IMG NNA runtime libs (libhiai.so) dependency
#target_link_libraries(paddle_light_api_shared ${nna_builder_libs} ${nna_runtime_libs})
endif()
endif()
endif()
......@@ -118,6 +122,11 @@ if(LITE_WITH_RKNPU)
set(cxx_api_deps ${cxx_api_deps} ${rknpu_deps})
endif()
if(LITE_WITH_NNA)
set(light_api_deps ${light_api_deps} ${nna_deps})
set(cxx_api_deps ${cxx_api_deps} ${nna_deps})
endif()
if(LITE_WITH_HUAWEI_ASCEND_NPU)
set(light_api_deps ${light_api_deps} ${huawei_ascend_npu_deps})
set(cxx_api_deps ${cxx_api_deps} ${huawei_ascend_npu_deps})
......@@ -137,6 +146,7 @@ list(LENGTH fpga_kernels num_fpga_kernels)
list(LENGTH bm_kernels num_bm_kernels)
list(LENGTH mlu_kernels num_mlu_kernels)
list(LENGTH huawei_ascend_npu_kernels num_huawei_ascend_npu_kernels)
list(LENGTH imagination_nna_kernels num_imagination_nna_kernels)
message(STATUS "Collected ${num_ops} ops")
message(STATUS "Collected ${num_x86_kernels} X86 kernels")
......@@ -152,6 +162,7 @@ message(STATUS "Collected ${num_fpga_kernels} FPGA kernels")
message(STATUS "Collected ${num_bm_kernels} BM kernels")
message(STATUS "Collected ${num_mlu_kernels} MLU kernels")
message(STATUS "Collected ${num_huawei_ascend_npu_kernels} HUAWEI_ASCEND_NPU kernels")
message(STATUS "Collected ${num_imagination_nna_kernels} IMAGINATION_NNA kernels")
# for full api
if (NOT LITE_ON_TINY_PUBLISH)
......@@ -169,6 +180,7 @@ if (NOT LITE_ON_TINY_PUBLISH)
APU_DEPS ${apu_kernels}
RKNPU_DEPS ${rknpu_kernels}
BM_DEPS ${bm_kernels}
NNA_DEPS ${nna_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels})
......@@ -195,6 +207,7 @@ lite_cc_library(light_api SRCS light_api.cc
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
NNA_DEPS ${nna_kernels}
MLU_DEPS ${mlu_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels})
......@@ -219,6 +232,7 @@ if(WITH_TESTING)
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
MLU_DEPS ${mlu_kernels}
NNA_DEPS ${nna_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels}
EXCLUDE_COMPILE_DEPS "ON"
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
......@@ -351,6 +365,7 @@ if (NOT LITE_ON_TINY_PUBLISH)
APU_DEPS ${apu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
NNA_DEPS ${nna_kernels}
BM_DEPS ${bm_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels})
# The final inference library for just MobileConfig.
......@@ -382,6 +397,7 @@ if(NOT WITH_COVERAGE)
RKNPU_DEPS ${rknpu_kernels}
BM_DEPS ${bm_kernels}
MLU_DEPS ${mlu_kernels}
NNA_DEPS ${nna_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
endif()
......@@ -424,6 +440,7 @@ if(NOT WITH_COVERAGE)
FPGA_DEPS ${fpga_kernels}
BM_DEPS ${bm_kernels}
MLU_DEPS ${mlu_kernels}
NNA_DEPS ${nna_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model SERIAL)
if (WITH_TESTING)
......@@ -444,6 +461,7 @@ if(NOT IOS)
CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
RKNPU_DEPS ${rknpu_kernels}
NNA_DEPS ${nna_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels}
......@@ -460,6 +478,7 @@ if(NOT IOS)
CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
RKNPU_DEPS ${rknpu_kernels}
NNA_DEPS ${nna_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels}
......@@ -476,6 +495,7 @@ if(NOT IOS)
CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
RKNPU_DEPS ${rknpu_kernels}
NNA_DEPS ${nna_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels}
......@@ -486,6 +506,7 @@ if(NOT IOS)
ARM_DEPS ${arm_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels}
NNA_DEPS ${nna_kernels}
XPU_DEPS ${xpu_kernels}
RKNPU_DEPS ${rknpu_kernels}
MLU_DEPS ${mlu_kernels}
......@@ -504,6 +525,7 @@ if(NOT IOS)
APU_DEPS ${apu_kernels}
XPU_DEPS ${xpu_kernels}
RKNPU_DEPS ${rknpu_kernels}
NNA_DEPS ${nna_kernels}
MLU_DEPS ${mlu_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels}
CL_DEPS ${opencl_kernels}
......@@ -518,6 +540,7 @@ if(NOT IOS)
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels}
RKNPU_DEPS ${npu_kernels}
NNA_DEPS ${nna_kernels}
XPU_DEPS ${xpu_kernels}
APU_DEPS ${apu_kernels}
CL_DEPS ${opencl_kernels}
......
......@@ -125,6 +125,10 @@ std::vector<Place> ParserValidPlaces() {
} else if (target_repr == "apu") {
valid_places.emplace_back(
Place{TARGET(kAPU), PRECISION(kInt8), DATALAYOUT(kNCHW)});
} else if (target_repr == "nna") {
valid_places.emplace_back(TARGET(kNNA));
valid_places.emplace_back(
Place{TARGET(kNNA), PRECISION(kInt8), DATALAYOUT(kNCHW)});
} else {
LOG(FATAL) << lite::string_format(
"Wrong target '%s' found, please check the command flag "
......@@ -204,6 +208,7 @@ void PrintOpsInfo(std::set<std::string> valid_ops = {}) {
"kRKNPU",
"kAPU",
"kHuaweiAscendNPU",
"kNNA",
"kAny",
"kUnk"};
int maximum_optype_length = 0;
......@@ -269,16 +274,19 @@ void PrintHelpInfo() {
" `--optimize_out_type=(protobuf|naive_buffer)`\n"
" `--optimize_out=<output_optimize_model_dir>`\n"
" "
"`--valid_targets=(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu)`\n"
"`--valid_targets=(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu|"
"nna)`\n"
" `--record_tailoring_info=(true|false)`\n"
" Arguments of model checking and ops information:\n"
" `--print_all_ops=true` Display all the valid operators of "
"Paddle-Lite\n"
" `--print_supported_ops=true "
"--valid_targets=(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu)`"
"--valid_targets=(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu|nna)"
"`"
" Display valid operators of input targets\n"
" `--print_model_ops=true --model_dir=<model_param_dir> "
"--valid_targets=(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu)`"
"--valid_targets=(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu|nna)"
"`"
" Display operators in the input model\n";
std::cout << "opt version:" << opt_version << std::endl
<< help_info << std::endl;
......
......@@ -84,6 +84,10 @@ void OptBase::SetValidPlaces(const std::string& valid_places) {
} else if (target_repr == "apu") {
valid_places_.emplace_back(
Place{TARGET(kAPU), PRECISION(kInt8), DATALAYOUT(kNCHW)});
} else if (target_repr == "nna") {
valid_places.emplace_back(TARGET(kNNA));
valid_places.emplace_back(
Place{TARGET(kNNA), PRECISION(kInt8), DATALAYOUT(kNCHW)});
} else {
LOG(FATAL) << lite::string_format(
"Wrong target '%s' found, please check the command flag "
......@@ -240,7 +244,8 @@ void OptBase::PrintHelpInfo() {
"default\n"
" `set_lite_out(output_optimize_model_dir)`\n"
" "
"`set_valid_places(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu)`\n"
"`set_valid_places(arm|opencl|x86|npu|xpu|rknpu|apu|huawei_ascend_npu|"
"nna)`\n"
" `record_model_info(false|true)`: refer to whether to record ops "
"info for striping lib, false by default`\n"
" `run() : start model transformation`\n"
......@@ -277,16 +282,17 @@ void OptBase::PrintExecutableBinHelpInfo() {
" `--param_file=<param_path>`\n"
" `--optimize_out_type=(protobuf|naive_buffer)`\n"
" `--optimize_out=<output_optimize_model_dir>`\n"
" `--valid_targets=(arm|opencl|x86|npu|xpu|huawei_ascend_npu)`\n"
" "
"`--valid_targets=(arm|opencl|x86|npu|xpu|huawei_ascend_npu|nna)`\n"
" `--record_tailoring_info=(true|false)`\n"
" Arguments of model checking and ops information:\n"
" `--print_all_ops=true` Display all the valid operators of "
"Paddle-Lite\n"
" `--print_supported_ops=true "
"--valid_targets=(arm|opencl|x86|npu|xpu|huawei_ascend_npu)`"
"--valid_targets=(arm|opencl|x86|npu|xpu|huawei_ascend_npu|nna)`"
" Display valid operators of input targets\n"
" `--print_model_ops=true --model_dir=<model_param_dir> "
"--valid_targets=(arm|opencl|x86|npu|xpu|huawei_ascend_npu)`"
"--valid_targets=(arm|opencl|x86|npu|xpu|huawei_ascend_npu|nna)`"
" Display operators in the input model\n";
std::cout << "paddlelite opt version:" << opt_version << std::endl
<< help_info << std::endl;
......@@ -305,6 +311,7 @@ void OptBase::PrintOpsInfo(const std::set<std::string>& valid_ops) {
"kRKNPU",
"kAPU",
"kHuaweiAscendNPU",
"kNNA",
"kAny",
"kUnk"};
// Get the lengh of the first column: maximum length of the op_type
......
......@@ -81,7 +81,8 @@ const std::string& TargetToStr(TargetType target) {
"mlu",
"rknpu",
"apu",
"huawei_ascend_npu"};
"huawei_ascend_npu",
"nna"};
auto x = static_cast<int>(target);
CHECK_LT(x, static_cast<int>(TARGET(NUM)));
return target2string[x];
......@@ -125,7 +126,8 @@ const std::string& TargetRepr(TargetType target) {
"kMLU",
"kRKNPU",
"kAPU",
"kHuaweiAscendNPU"};
"kHuaweiAscendNPU",
"kNNA"};
auto x = static_cast<int>(target);
CHECK_LT(x, static_cast<int>(TARGET(NUM)));
return target2string[x];
......@@ -171,7 +173,8 @@ std::set<TargetType> ExpandValidTargets(TargetType target) {
TARGET(kAPU),
TARGET(kRKNPU),
TARGET(kFPGA),
TARGET(kHuaweiAscendNPU)});
TARGET(kHuaweiAscendNPU),
TARGET(kNNA)});
if (target == TARGET(kAny)) {
return valid_set;
}
......
......@@ -58,7 +58,8 @@ enum class TargetType : int {
kRKNPU = 12,
kAPU = 13,
kHuaweiAscendNPU = 14,
NUM = 15, // number of fields.
kNNA = 15,
NUM = 16, // number of fields.
};
enum class PrecisionType : int {
kUnk = 0,
......
......@@ -53,6 +53,7 @@ USE_MIR_PASS(multi_stream_analysis_pass);
USE_MIR_PASS(elementwise_mul_constant_eliminate_pass)
USE_MIR_PASS(npu_subgraph_pass);
USE_MIR_PASS(huawei_ascend_npu_subgraph_pass);
USE_MIR_PASS(nna_subgraph_pass);
USE_MIR_PASS(xpu_subgraph_pass);
USE_MIR_PASS(mlu_subgraph_pass);
USE_MIR_PASS(mlu_postprocess_pass);
......
......@@ -39,17 +39,17 @@ namespace paddle {
namespace lite {
namespace pybind {
using lite_api::Tensor;
using lite::LightPredictorImpl;
using lite_api::CxxConfig;
using lite_api::MobileConfig;
using lite_api::PowerMode;
using lite_api::TargetType;
using lite_api::PrecisionType;
using lite_api::DataLayoutType;
using lite_api::Place;
using lite_api::MLUCoreVersion;
using lite::LightPredictorImpl;
using lite_api::MobileConfig;
using lite_api::OptBase;
using lite_api::Place;
using lite_api::PowerMode;
using lite_api::PrecisionType;
using lite_api::TargetType;
using lite_api::Tensor;
#ifndef LITE_ON_TINY_PUBLISH
using lite::CxxPaddleApiImpl;
......@@ -192,6 +192,7 @@ void BindLitePlace(py::module *m) {
.value("RKNPU", TargetType::kRKNPU)
.value("APU", TargetType::kAPU)
.value("HUAWEI_ASCEND_NPU", TargetType::kHuaweiAscendNPU)
.value("NNA", TargetType::kNNA)
.value("Any", TargetType::kAny);
// PrecisionType
......
......@@ -11,3 +11,4 @@ add_subdirectory(bm)
add_subdirectory(apu)
add_subdirectory(rknpu)
add_subdirectory(huawei_ascend_npu)
add_subdirectory(nna)
if(NOT LITE_WITH_NNA)
return()
endif()
lite_cc_library(device_nna SRCS imgdnn_manager.cc DEPS ${nna_builder_libs} ${nna_runtime_libs})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "imgdnn_manager.h" // NOLINT
#include <utility>
namespace paddle {
namespace lite {
namespace nna {
static void err_callback(imgdnn_report_flags flags,
const char **tensor_names,
int num_tensor_names,
imgdnn_err_code error_code,
const char *error_message) {
std::string msg_prefix;
switch (flags) {
case imgdnn_report_flags::IMGDNN_REPORT_ERROR:
msg_prefix = "ERROR";
break;
case imgdnn_report_flags::IMGDNN_REPORT_VERBOSE:
msg_prefix = "VERBOSE";
break;
case imgdnn_report_flags::IMGDNN_REPORT_INFO:
msg_prefix = "INFO";
break;
case imgdnn_report_flags::IMGDNN_REPORT_WARNING:
msg_prefix = "WARNING";
break;
default:
std::cerr << "unknown report flag in error callback" << std::endl;
}
std::cerr << msg_prefix << ": " << error_message << std::endl;
}
ImgdnnManager::ImgdnnManager() {
err_ = imgdnnSetErrorHandler(err_callback);
net_ = imgdnnCreateNetwork(&err_);
ASSERT(err_ != IMGDNN_SUCCESS, "CreateNetwork failed!");
unsigned int num_devices;
err_ = imgdnnGetDevices(
IMGDNN_DEVICE_TYPE_ACCELERATOR, 1, &device_, &num_devices);
ASSERT(err_ != IMGDNN_SUCCESS, "GetDevices failed!");
context_ = imgdnnCreateContext(num_devices, &device_, 0, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "CreateContext failed!");
binding_ = imgdnnCreateBinding(&err_);
ASSERT(err_ != IMGDNN_SUCCESS, "CreateBinding failed!");
}
imgdnn_tensor ImgdnnManager::createConvolutionLayer(
imgdnn_tensor input_tensor,
imgdnn_tensor weights_tensor,
imgdnn_tensor bias_tensor,
imgdnn_quant_param dst_quant_param,
unsigned int stride[2],
unsigned int pad_begin[2],
unsigned int pad_end[2],
unsigned int dilation[2],
bool use_dwconv) {
imgdnn_tensor convw_tensor;
if (use_dwconv) {
// transpose weight
int order[4] = {1, 0, 2, 3};
imgdnn_tensor transport_weights =
imgdnnNetworkTransposeOp(net_, weights_tensor, order, &err_);
convw_tensor = imgdnnNetworkDepthConvolution2dOp_v2(net_,
input_tensor,
transport_weights,
stride,
pad_begin,
pad_end,
dilation,
&err_);
} else {
convw_tensor = imgdnnNetworkConvolution2dOp_v2(net_,
input_tensor,
weights_tensor,
stride,
pad_begin,
pad_end,
dilation,
&err_);
}
// debug
imgdnn_tensor_descriptor desc_1;
imgdnnGetTensorDescriptor(input_tensor, &desc_1);
imgdnnGetTensorDescriptor(weights_tensor, &desc_1);
imgdnnGetTensorDescriptor(convw_tensor, &desc_1);
imgdnn_tensor conv2d_tensor;
if (bias_tensor) {
imgdnn_tensor convw_int_tensor = imgdnnNetworkCastOp(
net_, convw_tensor, IMGDNN_TYPE_I32, nullptr, &err_);
imgdnn_tensor_descriptor bias_desc;
imgdnnGetTensorDescriptor(convw_tensor, &bias_desc);
imgdnn_tensor broadcast2_tensor;
broadcast2_tensor = imgdnnNetworkBroadcastOp(
net_, bias_tensor, 2, bias_desc.size[2], &err_);
imgdnn_tensor broadcast3_tensor;
broadcast3_tensor = imgdnnNetworkBroadcastOp(
net_, broadcast2_tensor, 3, bias_desc.size[3], &err_);
conv2d_tensor = imgdnnNetworkBinaryOp(
net_, convw_int_tensor, broadcast3_tensor, IMGDNN_OPERATION_ADD, &err_);
} else {
conv2d_tensor = convw_tensor;
}
imgdnn_tensor conv2d_out_tensor;
imgdnn_tensor_descriptor desc;
imgdnnGetTensorDescriptor(input_tensor, &desc);
if (desc.type == IMGDNN_TYPE_Q_I8 || desc.type == IMGDNN_TYPE_Q_U8) {
conv2d_out_tensor = imgdnnNetworkCastOp(
net_, conv2d_tensor, desc.type, &dst_quant_param, &err_);
}
return conv2d_out_tensor;
}
imgdnn_tensor ImgdnnManager::createBatchNormLayer(imgdnn_tensor input_tensor,
const void *const avg_in,
const void *const var_in,
const float eps) {
imgdnn_tensor bna_tensor;
imgdnn_tensor average_tensor;
imgdnn_tensor_descriptor av_desc;
imgdnn_tensor broadcast2_tensor;
imgdnn_tensor broadcast3_tensor;
unsigned int buffer_size;
imgdnn_tensor_descriptor in_desc;
imgdnnGetTensorDescriptor(input_tensor, &in_desc);
av_desc.dimensions = 2;
av_desc.type = in_desc.type;
av_desc.size[0] = in_desc.size[0];
av_desc.size[1] = in_desc.size[1];
average_tensor = createFixedInputTensor(&av_desc, avg_in, true);
broadcast2_tensor =
imgdnnNetworkBroadcastOp(net_, average_tensor, 2, in_desc.size[2], &err_);
broadcast3_tensor = imgdnnNetworkBroadcastOp(
net_, broadcast2_tensor, 3, in_desc.size[3], &err_);
bna_tensor = imgdnnNetworkBinaryOp(
net_, input_tensor, broadcast3_tensor, IMGDNN_OPERATION_SUB, &err_);
imgdnn_tensor variance_tensor;
imgdnn_tensor_descriptor va_desc;
va_desc.dimensions = 2;
va_desc.type = in_desc.type;
va_desc.size[0] = in_desc.size[0];
va_desc.size[1] = in_desc.size[1];
buffer_size = imgdnnGetDescriptorSize(&va_desc, &err_);
float *variance = reinterpret_cast<float *>(GetBufromPool(buffer_size));
memcpy(variance, var_in, buffer_size);
// Perform 1/sqrt(var+eps) and Update var_data.
buffer_size /= sizeof(float);
for (size_t i = 0; i < buffer_size; i++) {
variance[i] = 1.0 / (sqrt(variance[i] + eps));
}
variance_tensor = createFixedInputTensor(&va_desc, variance, false);
broadcast2_tensor = imgdnnNetworkBroadcastOp(
net_, variance_tensor, 2, in_desc.size[2], &err_);
broadcast3_tensor = imgdnnNetworkBroadcastOp(
net_, broadcast2_tensor, 3, in_desc.size[3], &err_);
imgdnn_tensor bn_tensor;
bn_tensor = imgdnnNetworkBinaryOp(
net_, bna_tensor, broadcast3_tensor, IMGDNN_OPERATION_MUL, &err_);
return bn_tensor;
}
imgdnn_tensor ImgdnnManager::createPoolingLayer(
imgdnn_tensor in_tensor,
imgdnn_quant_param dst_quant_param,
const unsigned int size[2],
const unsigned int stride[2],
const unsigned int pad_to_begin[2],
const unsigned int pad_to_end[2],
imgdnn_pooling_type type) {
// debug
imgdnn_tensor_descriptor desc_1;
imgdnnGetTensorDescriptor(in_tensor, &desc_1);
imgdnn_tensor pool_tensor = imgdnnNetworkPooling2dOp_v2(
net_, in_tensor, size, stride, pad_to_begin, pad_to_end, type, &err_);
// debug
imgdnnGetTensorDescriptor(pool_tensor, &desc_1);
imgdnn_tensor_descriptor desc;
imgdnnGetTensorDescriptor(in_tensor, &desc);
if (desc.type == IMGDNN_TYPE_Q_I8 || desc.type == IMGDNN_TYPE_Q_U8) {
pool_tensor = imgdnnNetworkCastOp(
net_, pool_tensor, desc.type, &dst_quant_param, &err_);
}
return pool_tensor;
}
imgdnn_tensor ImgdnnManager::createFullyConnectedLayer(
imgdnn_tensor input_tensor,
imgdnn_tensor weights_tensor,
imgdnn_tensor bias_tensor,
imgdnn_quant_param dst_quant_param) {
imgdnn_tensor fcw_tensor;
imgdnn_tensor fcb_tensor;
imgdnn_tensor_descriptor in_desc;
imgdnnGetTensorDescriptor(input_tensor, &in_desc);
// int flatten_dim = 1
for (unsigned i = 2; i < in_desc.dimensions; ++i)
in_desc.size[1] *= in_desc.size[i];
in_desc.dimensions = 2;
auto reshaped_input =
imgdnnNetworkReshapeOp(net_, input_tensor, &in_desc, &err_);
// debug
imgdnn_tensor_descriptor desc_1;
imgdnnGetTensorDescriptor(reshaped_input, &desc_1);
imgdnn_tensor_descriptor desc_2;
imgdnnGetTensorDescriptor(weights_tensor, &desc_2);
imgdnn_tensor_descriptor desc_3;
imgdnnGetTensorDescriptor(bias_tensor, &desc_3);
// handle weights [num_units, input_size] tensor
/* const int order[] = { 1, 0 };
auto isnu_weights_tensor = imgdnnNetworkTransposeOp(net,
weights_tensor,
order,
&err_);*/
fcw_tensor = imgdnnNetworkBinaryOp(
net_, reshaped_input, weights_tensor, IMGDNN_OPERATION_MATMUL, &err_);
if (bias_tensor) {
imgdnn_tensor fcw_int_tensor =
imgdnnNetworkCastOp(net_, fcw_tensor, IMGDNN_TYPE_I32, nullptr, &err_);
imgdnn_tensor_descriptor desc_4;
imgdnnGetTensorDescriptor(fcw_int_tensor, &desc_4);
fcb_tensor = imgdnnNetworkBinaryOp(
net_, fcw_int_tensor, bias_tensor, IMGDNN_OPERATION_ADD, &err_);
} else {
fcb_tensor = fcw_tensor;
}
imgdnn_tensor_descriptor desc;
imgdnnGetTensorDescriptor(input_tensor, &desc);
if (desc.type == IMGDNN_TYPE_Q_I8 || desc.type == IMGDNN_TYPE_Q_U8) {
fcb_tensor = imgdnnNetworkCastOp(
net_, fcb_tensor, desc.type, &dst_quant_param, &err_);
}
return fcb_tensor;
}
imgdnn_tensor ImgdnnManager::createSoftmaxLayer(
imgdnn_tensor input_tensor,
float beta,
unsigned int axis,
imgdnn_quant_param dst_quant_param) {
// debug
imgdnn_tensor_descriptor desc_1;
imgdnnGetTensorDescriptor(input_tensor, &desc_1);
imgdnn_tensor softmax_tensor =
imgdnnNetworkSoftmaxOp(net_, input_tensor, beta, axis, &err_);
imgdnn_tensor_descriptor desc;
imgdnnGetTensorDescriptor(input_tensor, &desc);
if (desc.type == IMGDNN_TYPE_Q_I8 || desc.type == IMGDNN_TYPE_Q_U8) {
softmax_tensor = imgdnnNetworkCastOp(
net_, softmax_tensor, desc.type, &dst_quant_param, &err_);
}
imgdnn_tensor_descriptor desc_2;
imgdnnGetTensorDescriptor(softmax_tensor, &desc_2);
return softmax_tensor;
}
imgdnn_tensor ImgdnnManager::createScaleLayer(imgdnn_tensor input_tensor,
bool with_biasscale,
const void *const scale,
const void *const bias) {
imgdnn_tensor sc_tensor;
imgdnn_tensor scale_tensor;
imgdnn_tensor_descriptor sc_desc;
imgdnn_tensor broadcast2_tensor;
imgdnn_tensor broadcast3_tensor;
unsigned int buffer_size;
imgdnn_tensor_descriptor in_desc;
imgdnnGetTensorDescriptor(input_tensor, &in_desc);
sc_desc.dimensions = 2;
sc_desc.type = in_desc.type;
sc_desc.size[0] = in_desc.size[0];
sc_desc.size[1] = in_desc.size[1];
scale_tensor = createFixedInputTensor(&sc_desc, scale, true);
broadcast2_tensor =
imgdnnNetworkBroadcastOp(net_, scale_tensor, 2, in_desc.size[2], &err_);
broadcast3_tensor = imgdnnNetworkBroadcastOp(
net_, broadcast2_tensor, 3, in_desc.size[3], &err_);
sc_tensor = imgdnnNetworkBinaryOp(
net_, input_tensor, broadcast3_tensor, IMGDNN_OPERATION_MUL, &err_);
if (with_biasscale) {
imgdnn_tensor bsc_tensor;
imgdnn_tensor biasscale_tensor;
biasscale_tensor = createFixedInputTensor(&sc_desc, bias, true);
broadcast2_tensor = imgdnnNetworkBroadcastOp(
net_, biasscale_tensor, 2, in_desc.size[2], &err_);
broadcast3_tensor = imgdnnNetworkBroadcastOp(
net_, broadcast2_tensor, 3, in_desc.size[3], &err_);
bsc_tensor = imgdnnNetworkBinaryOp(
net_, sc_tensor, broadcast3_tensor, IMGDNN_OPERATION_ADD, &err_);
return bsc_tensor;
} else {
return sc_tensor;
}
}
imgdnn_network_object ImgdnnManager::createNetworkObject(
unsigned int num_inputs,
imgdnn_tensor *inputs,
unsigned int num_outputs,
imgdnn_tensor *outputs) {
const imgdnn_network_object_flags flags = 0;
std::string options_str;
std::string ddk_root{"/home/jasonwang/imgtools/ndk/main/"};
std::string hwconfig =
ddk_root + "nna-tools/config/mirage_hw_config06_23_2_6500_301.json";
std::string mapconfig = ddk_root + "nna-tools/config/mapconfig_q8a.json";
options_str += "-h " + hwconfig;
options_str += " -m " + mapconfig;
// options_str += " --dump_debug_binaries enabled";
net_obj_ = imgdnnCreateNetworkObject(device_,
context_,
net_,
num_inputs,
inputs,
num_outputs,
outputs,
flags,
options_str.c_str(),
&err_);
ASSERT(err_ != IMGDNN_SUCCESS, "CreateNetworkObject failed!");
return net_obj_;
}
} // namespace nna
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cmath>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "imgdnn.h" // NOLINT
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
namespace nna {
static inline void CheckAndPrint(bool cond,
const char *msg,
int line,
const char *filename) {
if (cond) {
std::stringstream err_msg;
err_msg << "ERROR: " << msg << "\n";
err_msg << "Violated condition at line " << line << " in " << filename;
std::cerr << err_msg.str() << "\n";
exit(EXIT_FAILURE);
}
}
#define ASSERT(statement, msg) \
lite::nna::CheckAndPrint(statement, msg, __LINE__, __FILE__)
class ImgdnnManager {
imgdnn_err_code err_;
imgdnn_device device_;
imgdnn_network net_{nullptr};
imgdnn_context context_{nullptr};
imgdnn_binding binding_{nullptr};
imgdnn_network_object net_obj_{nullptr};
std::vector<uint8_t *> coef_pool;
public:
ImgdnnManager();
virtual ~ImgdnnManager() {
std::cout << "~ImgdnnManager called" << std::endl;
if (net_obj_) err_ = imgdnnNetworkObjectDestroy(net_obj_);
if (context_) err_ = imgdnnContextDestroy(context_);
if (binding_) err_ = imgdnnBindingDestroy(binding_);
if (net_) err_ = imgdnnNetworkDestroy(net_);
for (auto buf : coef_pool) delete[] buf;
}
uint8_t *GetBufromPool(size_t size) {
uint8_t *buf = new uint8_t[size];
coef_pool.push_back(buf);
return buf;
}
imgdnn_network GetNetwork() { return net_; }
imgdnn_tensor createInputTensor(imgdnn_tensor_descriptor *desc) {
return imgdnnNetworkInput(net_, desc, &err_);
}
imgdnn_tensor createFixedInputTensor(imgdnn_tensor_descriptor *desc,
const void *const fixed_data,
bool mem_copy) {
imgdnn_tensor fixed_input;
if (mem_copy) {
size_t buffer_size = imgdnnGetDescriptorSize(desc, &err_);
void *buf = GetBufromPool(buffer_size);
memcpy(buf, fixed_data, buffer_size);
fixed_input = imgdnnNetworkFixedInput(net_, desc, buf, &err_);
} else {
fixed_input = imgdnnNetworkFixedInput(net_, desc, fixed_data, &err_);
}
return fixed_input;
}
imgdnn_tensor createConvolutionLayer(imgdnn_tensor input_tensor,
imgdnn_tensor weights_tensor,
imgdnn_tensor bias_tensor,
imgdnn_quant_param dst_quant_param,
unsigned int stride[2],
unsigned int pad_begin[2],
unsigned int pad_end[2],
unsigned int dilation[2],
bool use_dwconv = false);
imgdnn_tensor createBatchNormLayer(imgdnn_tensor input_tensor,
const void *const avg_in,
const void *const var_in,
const float eps);
imgdnn_tensor createPoolingLayer(imgdnn_tensor in_tensor,
imgdnn_quant_param dst_quant_param,
const unsigned int size[2],
const unsigned int stride[2],
const unsigned int pad_to_begin[2],
const unsigned int pad_to_end[2],
imgdnn_pooling_type type);
imgdnn_tensor createFullyConnectedLayer(imgdnn_tensor input_tensor,
imgdnn_tensor weights_tensor,
imgdnn_tensor bias_tensor,
imgdnn_quant_param dst_quant_param);
imgdnn_tensor createSoftmaxLayer(imgdnn_tensor in_tensor,
float beta,
unsigned int axis,
imgdnn_quant_param dst_quant_param);
imgdnn_tensor createScaleLayer(imgdnn_tensor input_tensor,
bool with_biasscale,
const void *const scale,
const void *const bias);
imgdnn_tensor createReLULayer(imgdnn_tensor in_tensor,
bool has_min_clamp,
float min_clamp,
bool has_max_clamp,
float max_clamp,
float negative_slope) {
imgdnn_tensor relu_tensor = imgdnnNetworkReLUOp(net_,
in_tensor,
has_min_clamp,
min_clamp,
has_max_clamp,
max_clamp,
negative_slope,
&err_);
ASSERT(err_ != IMGDNN_SUCCESS, "ReLU OP fails");
imgdnn_tensor_descriptor in_desc, relu_desc;
imgdnnGetTensorDescriptor(in_tensor, &in_desc);
imgdnnGetTensorDescriptor(relu_tensor, &relu_desc);
if (relu_desc.type != in_desc.type) {
relu_tensor = imgdnnNetworkCastOp(
net_, relu_tensor, in_desc.type, &in_desc.quant_param, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "ReLU cast fails");
}
return relu_tensor;
}
imgdnn_network_object createNetworkObject(unsigned int num_inputs,
imgdnn_tensor *inputs,
unsigned int num_outputs,
imgdnn_tensor *outputs);
imgdnn_memory importMemory(
void *memory,
size_t size,
imgdnn_import_mem_type import_mem_type = IMGDNN_IMPORT_MEM_TYPE_CPU) {
imgdnn_memory mem =
imgdnnImportMemory(context_, memory, size, import_mem_type, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "ImportMemory fails");
return mem;
}
imgdnn_memory allocateMemory(size_t size) {
imgdnn_memory mem = imgdnnAllocateMemory(context_, size, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "AllocateMemory fails");
return mem;
}
void destroyMemory(imgdnn_memory memory) {
err_ = imgdnnMemoryDestroy(memory);
ASSERT(err_ != IMGDNN_SUCCESS, "MemoryDestroy fails");
}
void *lockMemory(imgdnn_memory memory, imgdnn_lock_access lock_access) {
void *mem = imgdnnMemoryLock(memory, lock_access, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "MemoryLock fails");
return mem;
}
void unlockMemory(imgdnn_memory memory) {
err_ = imgdnnMemoryUnlock(memory);
ASSERT(err_ != IMGDNN_SUCCESS, "MemoryUnLock fails");
}
void getNetworkObjectInputs(unsigned int max_inputs,
imgdnn_input inputs[],
unsigned int *num_inputs) {
ASSERT(net_obj_ == nullptr, "NetworkObject NULL when get its inputs");
err_ =
imgdnnNetworkObjectGetInputs(net_obj_, max_inputs, inputs, num_inputs);
ASSERT(err_ != IMGDNN_SUCCESS, "NetworkObjectGetInputs failed!");
}
void getNetworkObjectOutputs(unsigned int max_outputs,
imgdnn_output outputs[],
unsigned int *num_outputs) {
ASSERT(net_obj_ == nullptr, "NetworkObject NULL when get its outputs");
err_ = imgdnnNetworkObjectGetOutputs(
net_obj_, max_outputs, outputs, num_outputs);
ASSERT(err_ != IMGDNN_SUCCESS, "NetworkObjectGetOutputs failed!");
}
imgdnn_tensor_descriptor getInputDescriptor(imgdnn_input input) {
imgdnn_tensor_descriptor desc = imgdnnGetInputDescriptor(input, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "GetInputDescriptors failed!");
return desc;
}
imgdnn_tensor_descriptor getOutputDescriptor(imgdnn_output output) {
imgdnn_tensor_descriptor desc = imgdnnGetOutputDescriptor(output, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "GetOutputDescriptors failed!");
return desc;
}
size_t getDescriptorSize(const imgdnn_tensor_descriptor *const descriptor) {
size_t size = imgdnnGetDescriptorSize(descriptor, &err_);
ASSERT(err_ != IMGDNN_SUCCESS, "GetDescriptorSize failed!");
return size;
}
void addBindingInput(imgdnn_input input, imgdnn_memory memory) {
err_ = imgdnnBindingAddInput(binding_, input, memory);
ASSERT(err_ != IMGDNN_SUCCESS, "BindingAddInput failed!");
}
void addBindingOutput(imgdnn_output output, imgdnn_memory memory) {
err_ = imgdnnBindingAddOutput(binding_, output, memory);
ASSERT(err_ != IMGDNN_SUCCESS, "BindingAddOutput failed!");
}
void executeNetworkObject(bool blocking_execute,
unsigned int num_events_in_wait_list,
const imgdnn_event event_wait_list[],
imgdnn_event *event) {
err_ = imgdnnNetworkObjectExecute(net_obj_,
binding_,
blocking_execute,
num_events_in_wait_list,
event_wait_list,
event);
ASSERT(err_ != IMGDNN_SUCCESS, "NetworkObjectExecute failed!");
}
};
} // namespace nna
} // namespace lite
} // namespace paddle
......@@ -6,5 +6,5 @@ endif()
lite_cc_library(arena_framework SRCS framework.cc DEPS program gtest)
if((NOT LITE_WITH_OPENCL) AND (LITE_WITH_X86 OR LITE_WITH_ARM))
lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${rknpu_kernels} ${mlu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_arena_framework SRCS framework_test.cc DEPS arena_framework ${rknpu_kernels} ${mlu_kernels} ${bm_kernels} ${npu_kernels} ${huawei_ascend_npu_kernels} ${nna_kernels} ${xpu_kernels} ${x86_kernels} ${cuda_kernels} ${fpga_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
......@@ -64,6 +64,7 @@ using BMContext = Context<TargetType::kBM>;
using MLUContext = Context<TargetType::kMLU>;
using RKNPUContext = Context<TargetType::kRKNPU>;
using HuaweiAscendNPUContext = Context<TargetType::kHuaweiAscendNPU>;
using NNAContext = Context<TargetType::kNNA>;
template <>
class Context<TargetType::kHost> {
......@@ -173,6 +174,21 @@ class Context<TargetType::kRKNPU> {
};
#endif
#ifdef LITE_WITH_NNA
template <>
class Context<TargetType::kNNA> {
public:
Context() {}
explicit Context(const NNAContext& ctx);
// NOTE: InitOnce should only be used by ContextScheduler
void InitOnce() {}
void CopySharedTo(NNAContext* ctx) {}
// NNAContext& operator=(const NNAContext& ctx) {}
std::string name() const { return "NNAContext"; }
};
#endif
#ifdef LITE_WITH_XPU
template <>
class Context<TargetType::kXPU> {
......@@ -471,6 +487,12 @@ class ContextScheduler {
&ctx->As<BMContext>());
break;
#endif
#ifdef LITE_WITH_NNA
case TARGET(kNNA):
kernel_contexts_[TargetType::kNNA].As<NNAContext>().CopySharedTo(
&ctx->As<NNAContext>());
break;
#endif
#ifdef LITE_WITH_MLU
case TARGET(kMLU): {
int dev_id = TargetWrapper<TargetType::kMLU>::GetCurDevice();
......@@ -533,6 +555,9 @@ class ContextScheduler {
#endif
#ifdef LITE_WITH_MLU
InitContext<TargetType::kMLU, MLUContext>();
#endif
#ifdef LITE_WITH_NNA
InitContext<TargetType::kNNA, NNAContext>();
#endif
}
......
......@@ -52,21 +52,21 @@ void MemoryOptimizePass::CollectLifeCycleByDevice(
"feed",
"fetch"};
auto insert_invalid_op_nodes_for_specific_target = [&](
std::set<std::string> op_node_set, TargetType specific_target) {
std::set<std::string> invalid_op_nodes_opencl = {"layout", "fc"};
for (auto& op_node : graph->StmtTopologicalOrder()) {
if (!op_node->IsStmt()) continue;
TargetType op_target_type = op_node->AsStmt().place().target;
if (op_target_type == specific_target &&
specific_target == TARGET(kOpenCL)) {
invalid_op_nodes.insert(invalid_op_nodes_opencl.begin(),
invalid_op_nodes_opencl.end());
break;
}
// else if // you can add more targets
}
};
auto insert_invalid_op_nodes_for_specific_target =
[&](std::set<std::string> op_node_set, TargetType specific_target) {
std::set<std::string> invalid_op_nodes_opencl = {"layout", "fc"};
for (auto& op_node : graph->StmtTopologicalOrder()) {
if (!op_node->IsStmt()) continue;
TargetType op_target_type = op_node->AsStmt().place().target;
if (op_target_type == specific_target &&
specific_target == TARGET(kOpenCL)) {
invalid_op_nodes.insert(invalid_op_nodes_opencl.begin(),
invalid_op_nodes_opencl.end());
break;
}
// else if // you can add more targets
}
};
VLOG(4) << "invalid_op_nodes.size();" << invalid_op_nodes.size();
insert_invalid_op_nodes_for_specific_target(invalid_op_nodes,
......@@ -315,4 +315,5 @@ REGISTER_MIR_PASS(memory_optimize_pass, paddle::lite::mir::MemoryOptimizePass)
TARGET(kRKNPU),
TARGET(kAPU),
TARGET(kMLU),
TARGET(kHuaweiAscendNPU)});
TARGET(kHuaweiAscendNPU),
TARGET(kNNA)});
......@@ -128,6 +128,20 @@ void MLUSubgraphPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
fuser();
}
void NNASubgraphPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
std::set<std::string> supported_lists;
#define USE_SUBGRAPH_BRIDGE(op_type, target) supported_lists.insert(#op_type);
#include "lite/kernels/nna/bridges/paddle_use_bridges.h"
#undef USE_SUBGRAPH_BRIDGE
auto teller = [&](Node* node) {
if (!node->IsStmt()) return false;
auto& stmt = node->AsStmt();
return supported_lists.count(stmt.op_type()) != 0;
};
SubgraphFuser fuser(graph.get(), teller, 1 /* min_subgraph_size */);
fuser();
}
} // namespace mir
} // namespace lite
} // namespace paddle
......@@ -147,3 +161,5 @@ REGISTER_MIR_PASS(rknpu_subgraph_pass, paddle::lite::mir::RKNPUSubgraphPass)
.BindTargets({TARGET(kRKNPU)});
REGISTER_MIR_PASS(mlu_subgraph_pass, paddle::lite::mir::MLUSubgraphPass)
.BindTargets({TARGET(kMLU)});
REGISTER_MIR_PASS(nna_subgraph_pass, paddle::lite::mir::NNASubgraphPass)
.BindTargets({TARGET(kNNA)});
......@@ -57,6 +57,11 @@ class MLUSubgraphPass : public ProgramPass {
void Apply(const std::unique_ptr<SSAGraph>& graph) override;
};
class NNASubgraphPass : public ProgramPass {
public:
void Apply(const std::unique_ptr<SSAGraph>& graph) override;
};
} // namespace mir
} // namespace lite
} // namespace paddle
......@@ -126,6 +126,7 @@ class Optimizer {
// of the quantized ops.
"npu_subgraph_pass",
"huawei_ascend_npu_subgraph_pass",
"imagination_nna_subgraph_pass",
"xpu_subgraph_pass",
"bm_subgraph_pass",
"apu_subgraph_pass",
......
......@@ -17,6 +17,7 @@ lite_cc_test(test_gen_code SRCS gen_code_test.cc
NPU_DEPS ${npu_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels}
RKNPU_DEPS ${rknpu_kernels}
NNA_DEPS ${nna_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
......@@ -47,6 +48,7 @@ lite_cc_test(test_generated_code SRCS generated_code_test.cc DEPS __generated_co
NPU_DEPS ${npu_kernels}
HUAWEI_ASCEND_NPU_DEPS ${huawei_ascend_npu_kernels}
RKNPU_DEPS ${rknpu_kernels}
NNA_DEPS ${nna_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
......
......@@ -15,3 +15,4 @@ add_subdirectory(apu)
add_subdirectory(bm)
add_subdirectory(rknpu)
add_subdirectory(huawei_ascend_npu)
add_subdirectory(nna)
add_subdirectory(bridges)
add_kernel(subgraph_compute_nna NNA basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} device_nna subgraph_bridge_engine ${nna_subgraph_bridges})
if(NOT LITE_WITH_NNA)
return()
endif()
lite_cc_library(subgraph_bridge_utility_nna SRCS utility.cc DEPS ${nna_builder_libs} ${nna_runtime_libs} tensor)
lite_cc_library(subgraph_bridge_graph_nna SRCS graph.cc DEPS subgraph_bridge_utility_nna)
set(nna_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_utility_nna subgraph_bridge_graph_nna)
lite_cc_library(subgraph_bridge_fc_op_nna SRCS fc_op.cc DEPS ${nna_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_conv_op_nna SRCS conv_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_matmul_op_nna SRCS matmul_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_mul_op_nna SRCS mul_op.cc DEPS ${nna_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_act_op_nna SRCS act_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_scale_op_nna SRCS scale_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_softmax_op_nna SRCS softmax_op.cc DEPS ${nna_subgraph_bridge_deps})
lite_cc_library(subgraph_bridge_pool_op_nna SRCS pool_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_batch_norm_op_nna SRCS batch_norm_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_elementwise_ops_nna SRCS elementwise_ops.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_reshape_op_nna SRCS reshape_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_conv_transpose_op_nna SRCS conv_transpose_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_interpolate_op_nna SRCS interpolate_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_transpose_op_nna SRCS transpose_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_split_op_nna SRCS split_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_concat_op_nna SRCS concat_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_shuffle_channel_op_nna SRCS shuffle_channel_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_pad2d_op_nna SRCS pad2d_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_reduce_mean_op_nna SRCS reduce_mean_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_unsqueeze_op_nna SRCS unsqueeze_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_gather_op_nna SRCS gather_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_lookup_table_op_nna SRCS lookup_table_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_argmax_op_nna SRCS argmax_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_instance_norm_op_nna SRCS instance_norm_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_dropout_op_nna SRCS dropout_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_topk_op_nna SRCS topk_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_layer_norm_op_nna SRCS layer_norm_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_fill_constant_op_nna SRCS fill_constant_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_fill_constant_batch_size_like_op_nna SRCS fill_constant_batch_size_like_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_increment_op_nna SRCS increment_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_compare_op_nna SRCS compare_op.cc DEPS ${nna_subgraph_bridge_deps})
#lite_cc_library(subgraph_bridge_shape_op_nna SRCS shape_op.cc DEPS ${nna_subgraph_bridge_deps})
set(nna_subgraph_bridges
subgraph_bridge_registry
subgraph_bridge_utility_nna
subgraph_bridge_graph_nna
subgraph_bridge_fc_op_nna
subgraph_bridge_conv_op_nna
#subgraph_bridge_matmul_op_nna
#subgraph_bridge_mul_op_nna
subgraph_bridge_act_op_nna
#subgraph_bridge_scale_op_nna
#subgraph_bridge_softmax_op_nna
subgraph_bridge_pool_op_nna
#subgraph_bridge_batch_norm_op_nna
#subgraph_bridge_elementwise_ops_nna
#subgraph_bridge_reshape_op_nna
#subgraph_bridge_conv_transpose_op_nna
#subgraph_bridge_interpolate_op_nna
#subgraph_bridge_transpose_op_nna
#subgraph_bridge_split_op_nna
#subgraph_bridge_concat_op_nna
#subgraph_bridge_shuffle_channel_op_nna
#subgraph_bridge_pad2d_op_nna
#subgraph_bridge_reduce_mean_op_nna
#subgraph_bridge_unsqueeze_op_nna
#subgraph_bridge_gather_op_nna
#subgraph_bridge_lookup_table_op_nna
#subgraph_bridge_argmax_op_nna
#subgraph_bridge_instance_norm_op_nna
#subgraph_bridge_dropout_op_nna
#subgraph_bridge_topk_op_nna
#subgraph_bridge_layer_norm_op_nna
#subgraph_bridge_fill_constant_op_nna
#subgraph_bridge_fill_constant_batch_size_like_op_nna
#subgraph_bridge_increment_op_nna
#subgraph_bridge_compare_op_nna
CACHE INTERNAL "nna_subgraph_bridges")
message(STATUS "+++++ nna_subgraph_bridges: ${nna_subgraph_bridges}")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/nna/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
// template <typename ActType>
int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x_type = kernel->GetInputDeclType("X");
CHECK(x_type->precision() == PRECISION(kFloat));
CHECK(x_type->layout() == DATALAYOUT(kNCHW));
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
auto out_type = kernel->GetOutputDeclType("Out");
CHECK(out_type->precision() == PRECISION(kFloat));
CHECK(out_type->layout() == DATALAYOUT(kNCHW));
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
// x_node = graph->Add(x_name, *x);
LOG(WARNING) << "ActConverter:x_node not in graph";
}
imgdnn_tensor relu_output = graph->GetBuilder()->createReLULayer(
x_node->data(), true, 0.0, false, 0.0, 0.0);
imgdnn_tensor_descriptor desc;
imgdnn_err_code err = imgdnnGetTensorDescriptor(relu_output, &desc);
CHECK(err == IMGDNN_SUCCESS) << "fail get tensor description(RELU)";
graph->Add(out_name, relu_output, desc.type);
return SUCCESS;
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
#if 0
REGISTER_SUBGRAPH_BRIDGE(
sigmoid,
kNNA,
paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
#endif
REGISTER_SUBGRAPH_BRIDGE(relu, kNNA, paddle::lite::subgraph::nna::ActConverter);
#if 0
REGISTER_SUBGRAPH_BRIDGE(
tanh, kNNA, paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
relu_clipped,
kNNA,
paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
relu6, kNNA, paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
leaky_relu,
kNNA,
paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
abs, kNNA, paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
softsign,
kNNA,
paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
softplus,
kNNA,
paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
hard_sigmoid,
kNNA,
paddle::lite::subgraph::nna::ActConverter<ge::op::Activation>);
REGISTER_SUBGRAPH_BRIDGE(
log, kNNA, paddle::lite::subgraph::nna::ActConverter<ge::op::Log>);
REGISTER_SUBGRAPH_BRIDGE(
square, kNNA, paddle::lite::subgraph::nna::ActConverter<ge::op::Square>);
REGISTER_SUBGRAPH_BRIDGE(
sqrt, kNNA, paddle::lite::subgraph::nna::ActConverter<ge::op::Sqrt>);
#endif
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/nna/bridges/registry.h"
#include "lite/kernels/nna/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
// Get innat and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto scale_name = op_info->Input("Scale").front();
auto scale = scope->FindMutableTensor(scale_name);
auto bias_name = op_info->Input("Bias").front();
auto bias = scope->FindMutableTensor(bias_name);
auto mean_name = op_info->Input("Mean").front();
auto mean = scope->FindMutableTensor(mean_name);
auto variance_name = op_info->Input("Variance").front();
auto variance = scope->FindMutableTensor(variance_name);
auto y_name = op_info->Output("Y").front();
// float momentum = op_info->GetAttr<float>("momentum");
float epsilon = op_info->GetAttr<float>("epsilon");
// int mode = 1; // bnScale, bnBias tensor dims are 1xCx1x1
/*
bool use_global_stats = !op_info->HasAttr("use_global_stats") ||
op_info->GetAttr<bool>("use_global_stats");
if (!use_global_stats) {
LOG(WARNING) << "[NNA] Only use_global_stats=true is supported by DDK";
}
*/
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
// x_node = graph->Add(x_name, *x);
LOG(WARNING) << "BatchNormConverter:x_node not in graph";
}
ConvNetBuilder& builder = graph->GetBuilder();
auto bn_out = builder.createBatchNormLayer(x_node->data(),
mean->mutable_data<float>(),
variance->mutable_data<float>(),
epsilon);
bn_out = builder.createScaleLayer(
bn_out, true, scale->mutable_data<float>(), bias->mutable_data<float>());
// PrecisionType precision = x->precision();
imgdnn_tensor_descriptor desc;
imgdnn_err_code err = imgdnnGetTensorDescriptor(bn_out, &desc);
CHECK(err == IMGDNN_SUCCESS) << "fail get tensor description(BN)";
graph->Add(y_name, bn_out, desc.type);
return SUCCESS;
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(batch_norm,
kNNA,
paddle::lite::subgraph::nna::BatchNormConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/conv_op.h"
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/nna/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
int ConvConverter(void *ctx, OpLite *op, KernelBase *kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph *>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " << op_type << "... ";
// Get input and output vars and op attributes
auto input_name = op_info->Input("Input").front();
auto input = scope->FindMutableTensor(input_name);
auto input_dims = input->dims();
auto filter_name = op_info->Input("Filter").front();
auto filter = scope->FindMutableTensor(filter_name);
auto filter_dims = filter->dims();
auto output_name = op_info->Output("Output").front();
auto output = scope->FindMutableTensor(output_name);
auto output_dims = output->dims();
auto bs = input_dims[0];
auto ic = input_dims[1];
auto oc = filter_dims[0];
CHECK_EQ(input_dims.size(), 4L);
CHECK_EQ(output_dims.size(), 4L);
CHECK_EQ(filter_dims.size(), 4L);
CHECK_EQ(output_dims[0], bs);
CHECK_EQ(output_dims[1], oc);
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
auto groups = op_info->GetAttr<int>("groups");
auto dilations = op_info->GetAttr<std::vector<int>>("dilations");
bool with_act =
op_info->HasAttr("with_act") && op_info->GetAttr<bool>("with_act");
std::string act_type =
with_act ? op_info->GetAttr<std::string>("act_type") : "";
float leaky_relu_alpha = act_type == "leaky_relu"
? op_info->GetAttr<float>("leaky_relu_alpha")
: 0.f;
CHECK_EQ(strides.size(), 2L);
CHECK_EQ(dilations.size(), 2L);
// for quantization
bool enable_int8 = false;
float input_scale = 1.0;
float output_scale = 1.0;
std::vector<float> weight_scale;
TensorInfo qnt;
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
output_scale = op_info->GetAttr<float>("output_scale");
weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
}
// Input node
std::shared_ptr<Node> input_node = nullptr;
imgdnn_tensor in_tensor;
if (graph->Has(input_name)) {
input_node = graph->Get(input_name);
in_tensor = input_node->data();
} else {
TensorInfoReset(&qnt);
if (enable_int8)
qnt.type = IMGDNN_TYPE_Q_U8;
else
qnt.type = IMGDNN_TYPE_F32;
qnt.scales.push_back(input_scale);
qnt.zero_points.push_back(128);
input_node = graph->Add(input_name, *input, qnt, Node::Role::kInput);
in_tensor = input_node->data();
}
if (paddings.size() == 2L) {
for (size_t i = 0; i < strides.size(); ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
}
CHECK_EQ(paddings.size(), 4L)
<< "[NNA] Paddings size should be the same or twice as the input size.";
std::string padding_algorithm("");
if (op_info->HasAttr("padding_algorithm")) {
padding_algorithm = op_info->GetAttr<std::string>("padding_algorithm");
}
operators::UpdatePaddingAndDilation(&paddings,
&dilations,
strides,
padding_algorithm,
input_dims,
filter_dims);
// Check depthwise mode, and decide whether use ConvolutionDepthwise Op
bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1);
// Filter node
std::shared_ptr<Node> filter_node = nullptr;
imgdnn_tensor filter_tensor;
bool per_channel = isScalesPerChannel(weight_scale);
TensorInfoReset(&qnt);
uint8_t *weights_u8 =
graph->GetBuilder()->GetBufromPool(filter_dims.production());
if (enable_int8) {
char *weight_src = static_cast<char *>(filter->raw_data());
qnt.type = IMGDNN_TYPE_Q_U8;
if (per_channel) {
qnt.scales.assign(weight_scale.begin(), weight_scale.end());
qnt.zero_points.assign(weight_scale.size(), 128);
qnt.count = oc;
qnt.axis = 1;
} else {
qnt.scales.push_back(weight_scale.at(0));
qnt.zero_points.push_back(128);
}
for (int i = 0; i < filter_dims.production(); i++) {
weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
}
filter_node = graph->Add(filter_name,
weights_u8,
filter_dims.Vectorize(),
qnt,
Node::Role::kConst);
filter_tensor = filter_node->data();
} else {
qnt.type = IMGDNN_TYPE_F32;
filter_node = graph->Add(filter_name, *filter, qnt, Node::Role::kConst);
}
// Add bias node if exists bias
// Supports the bias nodes with the following dimensions
// 0: {oc}
// 1: {1, oc, oh, ow}
// 2: {n, oc, oh, ow}
std::shared_ptr<Node> bias_node = NULL;
imgdnn_tensor bias_tensor = nullptr;
if (HasInputArg(op_info, scope, "Bias")) {
auto bias_name = op_info->Input("Bias").front();
if (graph->Has(bias_name)) {
bias_node = graph->Get(bias_name);
} else {
auto bias = scope->FindMutableTensor(bias_name);
auto bias_dims = bias->dims();
auto bias_data_size = bias_dims.production();
auto output_data_size = output_dims.production();
std::vector<int64_t> bias_shape;
if (bias_data_size == oc) {
// 0: {oc}
bias_shape = {1, oc, 1, 1};
} else if (bias_data_size == output_data_size / bs) {
// 1: {1, oc, oh, ow}
bias_shape = {1, output_dims[1], output_dims[2], output_dims[3]};
} else if (bias_data_size == output_data_size) {
// 2: {n, oc, oh, ow}
bias_shape = output_dims.Vectorize();
} else {
LOG(WARNING)
<< "[NNA] Bias dimension " << bias_dims
<< " isn't supported in conv2d Op when output dimension is "
<< output_dims;
return FAILED;
}
TensorInfoReset(&qnt);
std::vector<int64_t> shapes{1, oc};
auto bias_data = bias->data<float, float>();
if (enable_int8) {
qnt.type = IMGDNN_TYPE_I32;
if (per_channel) {
qnt.scales.resize(bias_data_size);
for (int i = 0; i < bias_data_size; i++)
qnt.scales[i] = input_scale * weight_scale[i];
qnt.zero_points.assign(bias_data_size, 0);
qnt.count = 2;
qnt.axis = 1;
} else {
qnt.scales.push_back(input_scale * weight_scale[0]);
qnt.zero_points.push_back(0);
}
int quant_bits = 32;
auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
auto dtype_min = static_cast<int>(0 - dtype_max);
int32_t *bias_qnt_data =
reinterpret_cast<int32_t *>(graph->GetBuilder()->GetBufromPool(
bias_dims.production() * sizeof(int32_t)));
for (int i = 0; i < bias_data_size; i++) {
float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
bias_qnt_data[i] =
std::min(std::max(static_cast<int>(bias_data[i] / current_scale),
dtype_min),
dtype_max);
}
bias_node = graph->Add(
bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
} else {
qnt.type = IMGDNN_TYPE_F32;
std::vector<float> bias_float_data(bias_data,
bias_data + bias_data_size);
bias_node = graph->Add(
bias_name, bias_float_data.data(), shapes, qnt, Node::Role::kConst);
}
bias_tensor = bias_node->data();
}
}
unsigned int img_stride[2] = {(unsigned int)strides[0],
(unsigned int)strides[1]};
unsigned int pad_to_begin[2] = {(unsigned int)paddings[0],
(unsigned int)paddings[2]}; // top,left
unsigned int pad_to_end[2] = {(unsigned int)paddings[1],
(unsigned int)paddings[3]}; // bottom,right
unsigned int img_dilation[2] = {(unsigned int)dilations[0],
(unsigned int)dilations[1]};
imgdnn_quant_param output_quant_param;
output_quant_param.scale = output_scale;
output_quant_param.zero_point = 128;
imgdnn_tensor conv_out =
graph->GetBuilder()->createConvolutionLayer(in_tensor,
filter_tensor,
bias_tensor,
output_quant_param,
img_stride,
pad_to_begin,
pad_to_end,
img_dilation,
is_depthwise_mode);
if (!act_type.empty()) {
imgdnn_tensor act_out;
if (act_type == "leaky_relu") {
act_out = graph->GetBuilder()->createReLULayer(
conv_out, false, 0.0, false, 0.0, leaky_relu_alpha);
} else if (act_type == "relu6") {
act_out = graph->GetBuilder()->createReLULayer(
conv_out, true, 0.0, true, 6.0, false);
} else if (act_type == "relu") {
act_out = graph->GetBuilder()->createReLULayer(
conv_out, true, 0.0, false, 0.0, false);
} else {
VLOG(3) << "act_type: " << act_type << " Not handled";
}
graph->Add(output_name, act_out, IMGDNN_TYPE_Q_U8);
} else {
graph->Add(output_name, conv_out, IMGDNN_TYPE_Q_U8);
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(conv2d,
kNNA,
paddle::lite::subgraph::nna::ConvConverter);
REGISTER_SUBGRAPH_BRIDGE(depthwise_conv2d,
kNNA,
paddle::lite::subgraph::nna::ConvConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "imgdnn.h" // NOLINT
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
auto input_name = op_info->Input("Input").front();
auto input = scope->FindTensor(input_name);
auto input_dims = input->dims();
auto weight_name = op_info->Input("W").front();
auto weights = scope->FindTensor(weight_name);
auto w_dims = weights->dims();
CHECK_EQ(w_dims.size(), 2UL);
auto out_name = op_info->Output("Out").front();
auto out = scope->FindTensor(out_name);
auto out_dims = out->dims();
// notes : m, input row
// k, input col
// n, weight col
// input_dims : {1,1024,1,1}
// in_num_col_dims : 1
// m =1, k=1024,n=1000
// w_dims : {1024,1000}
int in_num_col_dims = op_info->GetAttr<int>("in_num_col_dims");
int m = input_dims.Slice(0, in_num_col_dims).production();
int k = input_dims.Slice(in_num_col_dims, input_dims.size()).production();
int n = w_dims[1];
CHECK_EQ(k * n, w_dims.production());
VLOG(3) << "[NNA] input dims: " << input_dims << " w dims: " << w_dims
<< " m: " << m << " k: " << k << " n: " << n;
// for quantization
bool enable_int8 = false;
float input_scale = 1.0;
float output_scale = 1.0;
std::vector<float> weight_scale;
TensorInfo qnt;
if (op_info->HasAttr("enable_int8")) {
enable_int8 = op_info->GetAttr<bool>("enable_int8");
input_scale = op_info->GetAttr<float>("input_scale");
output_scale = op_info->GetAttr<float>("output_scale");
weight_scale = op_info->GetAttr<std::vector<float>>("weight_scale");
}
// Create input node and reshape it to (m, k, 1, 1)
std::shared_ptr<Node> input_node = nullptr;
if (graph->Has(input_name)) {
input_node = graph->Get(input_name);
} else {
LOG(FATAL) << "[NNA] input node: " << input_name << ", could not be found";
}
// weight tensor
std::shared_ptr<Node> weight_node = nullptr;
bool per_channel = isScalesPerChannel(weight_scale);
uint8_t* weights_u8 = graph->GetBuilder()->GetBufromPool(w_dims.production());
if (enable_int8) {
qnt.type = IMGDNN_TYPE_Q_U8;
if (per_channel) {
LOG(FATAL)
<< "[NNA] FC per-channel quantization is not supported for Mirage";
} else {
qnt.scales.push_back(weight_scale.at(0));
qnt.zero_points.push_back(128);
}
const char* weight_src = static_cast<const char*>(weights->raw_data());
for (int i = 0; i < w_dims.production(); i++)
weights_u8[i] = static_cast<uint8_t>(weight_src[i] + 128);
} else {
LOG(FATAL) << "[NNA] PaddleLite Only 8-bits quantization.";
}
weight_node = graph->Add(
weight_name, weights_u8, w_dims.Vectorize(), qnt, Node::Role::kConst);
// Add bias node if bias tensor exists
imgdnn_tensor bias_tensor = nullptr;
if (HasInputArg(op_info, scope, "Bias")) {
std::shared_ptr<Node> bias_node = nullptr;
auto bias_name = op_info->Input("Bias").front();
if (graph->Has(bias_name)) {
bias_node = graph->Get(bias_name);
} else {
auto bias = scope->FindTensor(bias_name);
auto bias_dims = bias->dims();
CHECK_EQ(bias_dims.production(), n);
if (enable_int8 && bias->precision() == PRECISION(kFloat)) {
TensorInfoReset(&qnt);
qnt.type = IMGDNN_TYPE_I32;
if (per_channel) {
qnt.scales.resize(weight_scale.size());
qnt.count = bias_dims.size();
qnt.axis = 0;
for (int i = 0; i < weight_scale.size(); i++) {
qnt.scales[i] = input_scale * weight_scale[i];
}
LOG(FATAL)
<< "[NNA] per-channel quantization is not supported for FC";
} else {
qnt.scales.push_back(weight_scale.at(0) * input_scale);
qnt.zero_points.push_back(0);
}
int quant_bits = 32;
auto dtype_max = static_cast<int>((1 << (quant_bits - 1)) - 1);
auto dtype_min = static_cast<int>(0 - dtype_max);
auto* bias_data = bias->data<float, float>();
int32_t* bias_qnt_data =
reinterpret_cast<int32_t*>(graph->GetBuilder()->GetBufromPool(
bias_dims.production() * sizeof(int32_t)));
for (int i = 0; i < n; i++) {
float current_scale = per_channel ? qnt.scales[i] : qnt.scales[0];
bias_qnt_data[i] =
std::min(std::max(static_cast<int>(bias_data[i] / current_scale),
dtype_min),
dtype_max);
}
std::vector<int64_t> shapes{1};
bias_node = graph->Add(
bias_name, bias_qnt_data, shapes, qnt, Node::Role::kConst);
} else {
qnt.type = IMGDNN_TYPE_F32;
bias_node = graph->Add(bias_name, *bias, qnt, Node::Role::kConst);
}
}
bias_tensor = bias_node->data();
}
imgdnn_quant_param output_quant_param;
output_quant_param.scale = output_scale;
output_quant_param.zero_point = 128;
imgdnn_tensor fc_out_tensor = graph->GetBuilder()->createFullyConnectedLayer(
input_node->data(), weight_node->data(), bias_tensor, output_quant_param);
imgdnn_tensor_descriptor desc;
imgdnn_err_code err = imgdnnGetTensorDescriptor(fc_out_tensor, &desc);
graph->Add(out_name, fc_out_tensor, desc.type);
CHECK(err == IMGDNN_SUCCESS) << "fail get tensor description(FC)";
// reshape to out_dims
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(fc, kNNA, paddle::lite::subgraph::nna::FCConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/nna/bridges/graph.h"
#include <utility>
#include "lite/kernels/nna/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
// Add 1
int Graph::Add(const std::string& name, std::shared_ptr<Node> node) {
auto it = nodes_.find(name);
if (it != nodes_.end()) {
// Only intermediate node can be shared with the same name
if (!node->is_data() || !it->second.back()->is_data()) {
LOG(FATAL) << "[NNA] Const or Input node " << name << " is redefined.";
return -1;
}
} else {
auto ret = nodes_.insert(
std::make_pair(name, std::vector<std::shared_ptr<Node>>()));
CHECK(ret.second);
it = ret.first;
}
it->second.push_back(node);
return it->second.size();
}
// Add 2
std::shared_ptr<Node> Graph::Add(const std::string& name,
const void* const const_data,
std::vector<int64_t> shape,
const TensorInfo& qnt,
Node::Role role /* = Node::Role::kData*/) {
auto node = std::make_shared<Node>(qnt.type, qnt.layout, role);
auto idx = Add(name, node);
CHECK_GE(idx, 1);
imgdnn_tensor_descriptor desc;
desc.type = qnt.type;
desc.dimensions = (unsigned)shape.size();
for (uint32_t i = 0; i < shape.size(); ++i) desc.size[i] = shape[i];
switch (qnt.type) {
case IMGDNN_TYPE_F32:
case IMGDNN_TYPE_I32:
break;
case IMGDNN_TYPE_Q_I8:
case IMGDNN_TYPE_Q_U8:
desc.quant_param.scale = qnt.scales[0];
desc.quant_param.zero_point = qnt.zero_points[0];
break;
case IMGDNN_TYPE_QPA_I8:
case IMGDNN_TYPE_QPA_U8:
desc.quant_param.per_axis = imgdnnCreatePerAxisQuantParam(
qnt.axis, qnt.count, qnt.scales.data(), qnt.zero_points.data());
CHECK(desc.quant_param.per_axis != nullptr);
break;
default:
LOG(FATAL) << "[NNA] invalid tensor type set in node: " << name;
return nullptr;
}
imgdnn_tensor out_tensor;
if (role == Node::Role::kConst) {
out_tensor = pImgdnnMgr->createFixedInputTensor(&desc, const_data, true);
} else {
LOG(INFO) << "[NNA] invald role set in this path: " << name;
}
if ((desc.type == IMGDNN_TYPE_QPA_I8 || desc.type == IMGDNN_TYPE_QPA_U8) &&
desc.quant_param.per_axis != nullptr)
imgdnnDestroyPerAxisQuantParam(desc.quant_param.per_axis);
node->set_data(out_tensor);
return node;
}
// Add 3
std::shared_ptr<Node> Graph::Add(const std::string& name,
const Tensor& tensor,
std::vector<int64_t> shape,
const TensorInfo& qnt,
Node::Role role) {
auto node = std::make_shared<Node>(qnt.type, qnt.layout, role);
auto idx = Add(name, node);
CHECK_GE(idx, 1);
imgdnn_tensor_descriptor desc;
desc.type = qnt.type;
desc.dimensions = (unsigned)shape.size();
for (uint32_t i = 0; i < shape.size(); ++i) desc.size[i] = shape[i];
switch (qnt.type) {
case IMGDNN_TYPE_F32:
case IMGDNN_TYPE_I32:
break;
case IMGDNN_TYPE_Q_I8:
case IMGDNN_TYPE_Q_U8:
desc.quant_param.scale = qnt.scales[0];
desc.quant_param.zero_point = qnt.zero_points[0];
break;
case IMGDNN_TYPE_QPA_I8:
case IMGDNN_TYPE_QPA_U8:
desc.quant_param.per_axis = imgdnnCreatePerAxisQuantParam(
qnt.axis, qnt.count, qnt.scales.data(), qnt.zero_points.data());
CHECK(desc.quant_param.per_axis != nullptr);
break;
default:
LOG(FATAL) << "[NNA] invalid tensor type set in node: " << name;
return nullptr;
}
imgdnn_tensor out_tensor;
if (role == Node::Role::kInput) {
out_tensor = pImgdnnMgr->createInputTensor(&desc);
} else if (role == Node::Role::kConst) {
const void* const_data = tensor.raw_data();
out_tensor = pImgdnnMgr->createFixedInputTensor(&desc, const_data, false);
} else {
LOG(INFO) << "[NNA] invald role set in this path: " << name;
}
if ((desc.type == IMGDNN_TYPE_QPA_I8 || desc.type == IMGDNN_TYPE_QPA_U8) &&
desc.quant_param.per_axis != nullptr)
imgdnnDestroyPerAxisQuantParam(desc.quant_param.per_axis);
node->set_data(out_tensor);
return node;
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "imgdnn.h" // NOLINT
#include "lite/backends/nna/imgdnn_manager.h"
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
#include "utility.h" // NOLINT
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
#define NNA_UNUSED(var) \
do { \
(void)(var); \
} while (0)
// Graph and node is defined to collect all of converted IMGDNN IR nodes
class Node {
public:
enum class Role {
kInput = 0,
kConst,
kData,
};
Node(imgdnn_tensor data, imgdnn_type type, DataLayoutType layout, Role role)
: data_(data), type_(type), layout_(layout), role_(role) {}
Node(imgdnn_type type, DataLayoutType layout, Role role)
: type_(type), layout_(layout), role_(role) {}
void set_data(imgdnn_tensor data) { data_ = data; }
void set_type(imgdnn_type type) { type_ = type; }
void set_layout(DataLayoutType layout) { layout_ = layout; }
void set_role(Role role) { role_ = role; }
template <typename T>
std::shared_ptr<T> data() {
return std::static_pointer_cast<T>(data_);
}
imgdnn_tensor data() { return data_; }
imgdnn_type type() const { return type_; }
DataLayoutType layout() const { return layout_; }
bool is_input() const { return role_ == Role::kInput; }
bool is_const() const { return role_ == Role::kConst; }
bool is_data() const { return role_ == Role::kData; }
private:
imgdnn_tensor data_{nullptr};
imgdnn_type type_{IMGDNN_TYPE_MAX};
DataLayoutType layout_{DATALAYOUT(kNCHW)};
Role role_{Role::kData};
};
class Graph {
public:
explicit Graph(lite::nna::ImgdnnManager* pMgr) {
pImgdnnMgr = pMgr;
std::cout << "graph construct" << std::endl;
}
~Graph() { std::cout << "Graph deconst" << std::endl; }
// Add 1
int Add(const std::string& name, std::shared_ptr<Node> node);
// Add 2, weights,bias
std::shared_ptr<Node> Add(const std::string& name,
const void* const const_data,
std::vector<int64_t> shape,
const TensorInfo& qnt,
Node::Role role /* = Node::Role::kData*/);
// Add 3
std::shared_ptr<Node> Add(const std::string& name,
const Tensor& tensor,
std::vector<int64_t> shape,
const TensorInfo& qnt,
Node::Role role);
// Add 4
std::shared_ptr<Node> Add(const std::string& name,
const Tensor& tensor,
const TensorInfo& qnt,
Node::Role role) {
return Add(name, tensor, tensor.dims().Vectorize(), qnt, role);
}
// Used to add intermediate tensor
// Add 5
int Add(const std::string& name,
imgdnn_tensor tensor,
imgdnn_type type,
DataLayoutType layout = DATALAYOUT(kNCHW)) {
Node::Role role = Node::Role::kData;
auto node = std::make_shared<Node>(type, layout, role);
node->set_data(tensor);
return Add(name, node); // call Add 1
}
std::shared_ptr<Node> Get(std::string name) {
CHECK(Has(name)) << "[NNA] Node " << name << " not found.";
return nodes_.at(name).back();
}
bool Has(const std::string& name) {
return nodes_.find(name) != nodes_.end();
}
lite::nna::ImgdnnManager* GetBuilder() {
ASSERT(pImgdnnMgr == nullptr, "pImgdnnMgr used before initialize");
return pImgdnnMgr;
}
private:
std::unordered_map<std::string, std::vector<std::shared_ptr<Node>>> nodes_;
lite::nna::ImgdnnManager* pImgdnnMgr{nullptr};
};
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
USE_SUBGRAPH_BRIDGE(relu, kNNA);
USE_SUBGRAPH_BRIDGE(conv2d, kNNA);
USE_SUBGRAPH_BRIDGE(depthwise_conv2d, kNNA);
USE_SUBGRAPH_BRIDGE(fc, kNNA);
USE_SUBGRAPH_BRIDGE(pool2d, kNNA);
// USE_SUBGRAPH_BRIDGE(softmax, kNNA);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/pool_op.h"
#include "imgdnn.h" // NOLINT
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/nna/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto out_name = op_info->Output("Out").front();
auto pooling_type = op_info->GetAttr<std::string>("pooling_type");
auto global_pooling = op_info->GetAttr<bool>("global_pooling");
auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
// for quantization
float output_scale = 1.0;
if (op_info->HasAttr("enable_int8")) {
output_scale = op_info->GetAttr<float>("output_scale");
}
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
// x_node = graph->Add(x_name, *x);
LOG(INFO) << "[NNA] Pooling input not found: " << x_name;
}
// pool mode
imgdnn_pooling_type img_pool_type;
if (pooling_type == "max") {
img_pool_type = IMGDNN_POOLING_MAX;
} else if (pooling_type == "avg") {
img_pool_type = IMGDNN_POOLING_AVERAGE;
} else {
LOG(WARNING) << "[NNA] Unsupported pooling type: " << pooling_type;
return FAILED;
}
// pad mode
std::string padding_algorithm("");
if (op_info->HasAttr("padding_algorithm")) {
padding_algorithm = op_info->GetAttr<std::string>("padding_algorithm");
}
// paddings and strides
if (paddings.size() == 2L) {
for (size_t i = 0; i < 2L; ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
}
CHECK_EQ(paddings.size(), 4L)
<< "[NNA] Paddings size should be the same or twice as the inputs size.";
bool adaptive = false;
if (op_info->HasAttr("adaptive")) {
adaptive = op_info->GetAttr<bool>("adaptive");
}
auto strides = op_info->GetAttr<std::vector<int>>("strides");
lite::operators::UpdatePadding(&paddings,
global_pooling,
adaptive,
padding_algorithm,
x->dims(),
strides,
ksize);
// ceil mode
/* bool ceil_mode =
op_info->HasAttr("ceil_mode") && op_info->GetAttr<bool>("ceil_mode");
*/
unsigned int img_ksize[2] = {(unsigned int)ksize[0], (unsigned int)ksize[1]};
unsigned int img_stride[2] = {(unsigned int)strides[0],
(unsigned int)strides[1]};
unsigned int pad_to_begin[2] = {(unsigned int)paddings[0],
(unsigned int)paddings[2]}; // top,left
unsigned int pad_to_end[2] = {(unsigned int)paddings[1],
(unsigned int)paddings[3]}; // bottom,right
if (global_pooling) {
img_ksize[0] = x_dims[2];
img_ksize[1] = x_dims[3];
}
imgdnn_quant_param output_quant_param;
output_quant_param.scale = output_scale;
output_quant_param.zero_point = 128;
imgdnn_tensor pooling_out =
graph->GetBuilder()->createPoolingLayer(x_node->data(),
output_quant_param,
img_ksize,
img_stride,
pad_to_begin,
pad_to_end,
img_pool_type);
// LOG(INFO) << "pooling op output:" << static_cast<int>(pooling_out);
imgdnn_tensor_descriptor desc;
imgdnn_err_code err = imgdnnGetTensorDescriptor(pooling_out, &desc);
CHECK(err == IMGDNN_SUCCESS) << "fail get tensor description(POOL)";
graph->Add(out_name, pooling_out, desc.type);
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(pool2d,
kNNA,
paddle::lite::subgraph::nna::PoolConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/nna/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK(ctx != nullptr);
CHECK(op != nullptr);
auto graph = static_cast<Graph*>(ctx);
auto op_info = op->op_info();
auto op_type = op_info->Type();
auto scope = op->scope();
VLOG(3) << "[NNA] Converting " + op_type + "...";
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
auto x_dims = x->dims();
auto x_rank = x_dims.size();
auto out_name = op_info->Output("Out").front();
int axis = op_info->HasAttr("axis") ? op_info->GetAttr<int>("axis") : -1;
if (axis < 0) {
axis += x_rank;
}
// for quantization
float output_scale = 1.0;
if (op_info->HasAttr("enable_int8")) {
output_scale = op_info->GetAttr<float>("output_scale");
// X node
std::shared_ptr<Node> x_node = nullptr;
if (graph->Has(x_name)) {
x_node = graph->Get(x_name);
} else {
LOG(FATAL) << "[NNA] Softmax: Could not find the input tensor.";
}
imgdnn_quant_param output_quant_param;
output_quant_param.scale = output_scale;
output_quant_param.zero_point = 128;
imgdnn_tensor softmax_out_tensor = graph->GetBuilder()->createSoftmaxLayer(
x_node->data(), 1.0, axis, output_quant_param);
graph->Add(out_name, softmax_out_tensor, IMGDNN_TYPE_Q_U8);
} else {
LOG(FATAL) << "[NNA] Softmax: has no enable_int8 attribute.";
}
return REBUILD_WHEN_SHAPE_CHANGED;
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
REGISTER_SUBGRAPH_BRIDGE(softmax,
kNNA,
paddle::lite::subgraph::nna::SoftmaxConverter);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/nna/bridges/utility.h"
#include <utility>
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
bool HasInputArg(const OpInfo* op_info,
const Scope* scope,
const std::string& argname) {
auto iarg_names = op_info->input_argnames();
if (std::find(iarg_names.begin(), iarg_names.end(), argname) !=
iarg_names.end()) {
auto inputs = op_info->Input(argname);
if (inputs.empty()) {
return false;
}
auto var_name = inputs.front();
auto var = scope->FindVar(var_name);
return var != nullptr;
} else {
return false;
}
}
bool isScalesPerChannel(std::vector<float> scales) {
bool per_channel = false;
for (std::vector<float>::iterator iter = scales.begin() + 1;
iter != scales.end();
iter++) {
if (*iter != scales.at(0)) {
per_channel = true;
break;
}
}
return per_channel;
}
void TensorInfoReset(TensorInfo* qnt) {
qnt->count = 0;
qnt->axis = 0;
qnt->scales.clear();
// qnt.scales.shrink_to_fit();
qnt->zero_points.clear();
// qnt.zero_points.shrink_to_fit();
qnt->layout = DATALAYOUT(kNCHW);
}
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "imgdnn.h" // NOLINT
#include "lite/core/op_lite.h"
#include "lite/utils/macros.h"
namespace paddle {
namespace lite {
namespace subgraph {
namespace nna {
struct TensorInfo {
imgdnn_type type;
std::vector<float> scales;
std::vector<int> zero_points;
DataLayoutType layout;
unsigned count;
unsigned axis;
};
bool HasInputArg(const OpInfo* op_info,
const Scope* scope,
const std::string& argname);
bool isScalesPerChannel(std::vector<float> scales);
void TensorInfoReset(TensorInfo* qnt);
} // namespace nna
} // namespace subgraph
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/nna/subgraph_compute.h"
#include <sys/time.h>
#include <time.h>
#include <limits>
#include <utility>
#include "lite/core/op_registry.h"
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/nna/bridges/paddle_use_bridges.h"
#include "lite/kernels/nna/bridges/utility.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace nna {
bool SubgraphEngine::BuildDeviceProgram() {
int status = 0;
// Convert all of ops and their input vars and weights and added into the NNA
// IMG IR graph
subgraph::nna::Graph graph{&imgdnn_mgr_};
const auto& bridges = subgraph::Registry::Instance();
if (!origin_program_) {
BuildOriginProgram();
}
const auto& insts = origin_program_->instructions(kRootBlockIdx);
for (auto& inst : insts) {
auto op = const_cast<OpLite*>(inst.op());
CHECK(op);
op->CheckShape();
op->InferShape();
std::string op_type = op->op_info()->Type();
if (!bridges.Exists(op_type, TARGET(kNNA))) {
// return subgraph::FAILED;
return false;
}
auto kernel = inst.kernel();
status |=
bridges.Select(op_type, TARGET(kNNA))(reinterpret_cast<void*>(&graph),
const_cast<OpLite*>(op),
const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) {
// return subgraph::FAILED;
return false;
}
}
// Collect the valid input and output nodes in the IMGDNN IR graph and update
// the input and output names
device_inames_.clear();
std::vector<imgdnn_tensor> device_inodes;
for (auto& input_name : input_names_) {
if (graph.Has(input_name)) {
device_inodes.push_back(graph.Get(input_name)->data());
device_inames_.push_back(input_name);
} else {
LOG(WARNING) << "[NNA] Input node " << input_name
<< " is ignored because it does not exist.";
}
}
device_onames_.clear();
std::vector<imgdnn_tensor> device_onodes;
for (auto& output_name : output_names_) {
if (graph.Has(output_name)) {
device_onodes.push_back(graph.Get(output_name)->data());
device_onames_.push_back(output_name);
} else {
LOG(WARNING) << "[NNA] Output node " << output_name
<< " is ignored because it does not exist.";
}
}
CHECK(!device_inames_.empty())
<< "[NNA] No input nodes found for building NNA model";
CHECK(!device_onames_.empty())
<< "[NNA] No output nodes found for building NNA model";
imgdnn_mgr_.createNetworkObject(device_inodes.size(),
device_inodes.data(),
device_onodes.size(),
device_onodes.data());
// inputs
unsigned int num_inputs, num_outputs;
imgdnn_mgr_.getNetworkObjectInputs(
std::numeric_limits<unsigned int>::max(), nullptr, &num_inputs);
CHECK_EQ(num_inputs, device_inames_.size());
// origin_idims_.resize(num_inputs);
// origin_itensors_.resize(num_inputs);
device_itensors_.resize(num_inputs);
imgdnn_mgr_.getNetworkObjectInputs(
num_inputs, device_itensors_.data(), nullptr);
// show input info
for (int i = 0; i < num_inputs; i++) {
auto node = graph.Get(device_inames_[i]);
auto type = node->type();
auto layout = node->layout();
// origin_itensors_[i] = scope_->FindMutableTensor(device_inames_[i]);
// CHECK(origin_itensors_[i]);
// origin_idims_[i] = origin_itensors_[i]->dims();
VLOG(3) << "[NNA] Inputs[" << i << "] name: " << device_inames_[i]
<< " type: " << type << " layout: " << DataLayoutToStr(layout);
}
// outputs
imgdnn_mgr_.getNetworkObjectOutputs(
std::numeric_limits<unsigned int>::max(), nullptr, &num_outputs);
CHECK_EQ(num_outputs, device_onames_.size());
// origin_odims_.resize(num_outputs);
// origin_otensors_.resize(num_outputs);
device_otensors_.resize(num_outputs);
imgdnn_mgr_.getNetworkObjectOutputs(
num_outputs, device_otensors_.data(), nullptr);
// show output info
for (int i = 0; i < num_outputs; i++) {
auto node = graph.Get(device_onames_[i]);
auto type = node->type();
auto layout = node->layout();
// origin_otensors_[i] = scope_->FindMutableTensor(device_onames_[i]);
// CHECK(origin_otensors_[i]);
// origin_odims_[i] = origin_otensors_[i]->dims();
VLOG(3) << "[NNA] Outputs[" << i << "] name: " << device_onames_[i]
<< " type: " << type << " layout: " << DataLayoutToStr(layout);
// Prepare the device output tensors
switch (type) {
case IMGDNN_TYPE_F32:
origin_otensors_[i]->mutable_data<float>();
break;
case IMGDNN_TYPE_Q_I8:
case IMGDNN_TYPE_Q_U8:
origin_otensors_[i]->mutable_data<int8_t>();
break;
case IMGDNN_TYPE_I16:
origin_otensors_[i]->mutable_data<int16_t>();
break;
case IMGDNN_TYPE_I32:
origin_otensors_[i]->mutable_data<int32_t>();
break;
default:
LOG(FATAL) << "[NNA] " << device_onames_[i]
<< " can't mutable data with precision type " << type;
break;
}
}
return true;
}
bool SubgraphEngine::LaunchDeviceProgram() {
// Set input buffer
for (size_t i = 0; i < origin_itensors_.size(); i++) {
// check input shapes
imgdnn_tensor_descriptor in_desc =
imgdnn_mgr_.getInputDescriptor(device_itensors_[i]);
size_t in_size = imgdnn_mgr_.getDescriptorSize(&in_desc);
CHECK_EQ(in_size, origin_itensors_[i]->memory_size());
auto origin_data = origin_itensors_[i]->mutable_data<int8_t>();
auto converted_data = reinterpret_cast<uint8_t*>(origin_data);
for (int j = 0; j < origin_itensors_[i]->data_size(); j++) {
converted_data[j] =
static_cast<uint8_t>(static_cast<int16_t>(origin_data[j]) + 128);
}
imgdnn_memory in_mem = imgdnn_mgr_.importMemory(
static_cast<void*>(converted_data), origin_itensors_[i]->memory_size());
imgdnn_mgr_.addBindingInput(device_itensors_[i], in_mem);
}
// Set output buffer
std::vector<imgdnn_memory> out_mems;
for (size_t i = 0; i < origin_otensors_.size(); i++) {
// check output shapes
imgdnn_tensor_descriptor out_desc =
imgdnn_mgr_.getOutputDescriptor(device_otensors_[i]);
size_t out_size = imgdnn_mgr_.getDescriptorSize(&out_desc);
CHECK_EQ(out_size, origin_otensors_[i]->memory_size());
imgdnn_memory out_mem =
imgdnn_mgr_.allocateMemory(origin_otensors_[i]->memory_size());
imgdnn_mgr_.addBindingOutput(device_otensors_[i], out_mem);
out_mems.push_back(out_mem);
}
// Run the img model by name
imgdnn_mgr_.executeNetworkObject(true, 0, nullptr, nullptr);
// Copy the data of output tensor to the buffer of origin output tensors
for (size_t i = 0; i < out_mems.size(); i++) {
uint8_t* data = static_cast<uint8_t*>(
imgdnn_mgr_.lockMemory(out_mems[i], IMGDNN_LOCK_ACCESS_READ_ONLY));
int8_t* output_data = origin_otensors_[i]->mutable_data<int8_t>();
for (size_t j = 0; j < origin_otensors_[i]->data_size(); j++) {
output_data[j] = data[j] - 128;
}
imgdnn_mgr_.unlockMemory(out_mems[i]);
imgdnn_mgr_.destroyMemory(out_mems[i]);
}
return true;
}
void SubgraphCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
engine_.reset(new SubgraphEngine(ctx_.get(),
param.block_idx,
param.program_desc,
param.exec_scope,
param.input_data_names,
param.output_data_names));
CHECK(engine_);
}
void SubgraphCompute::Run() {
CHECK(engine_);
engine_->Run();
}
} // namespace nna
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(subgraph,
kNNA,
kInt8,
kNCHW,
paddle::lite::kernels::nna::SubgraphCompute,
def)
.BindInput("Inputs",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt8))})
.BindOutput("Outputs",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt8))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "imgdnn.h" // NOLINT
#include "lite/backends/nna/imgdnn_manager.h"
#include "lite/core/kernel.h"
#include "lite/kernels/nna/bridges/graph.h"
#include "lite/kernels/npu/bridges/engine.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace nna {
class SubgraphEngine : public subgraph::Engine {
public:
SubgraphEngine(KernelContext* ctx,
int block_idx,
const std::shared_ptr<const cpp::ProgramDesc>& program_desc,
Scope* exec_scope,
const std::vector<std::string>& input_names,
const std::vector<std::string>& output_names)
: subgraph::Engine(ctx,
block_idx,
program_desc,
exec_scope,
input_names,
output_names) {}
~SubgraphEngine() {}
protected:
bool BuildDeviceProgram() override;
bool LaunchDeviceProgram() override;
std::vector<std::string> device_inames_;
std::vector<std::string> device_onames_;
std::vector<imgdnn_input> device_itensors_;
std::vector<imgdnn_output> device_otensors_;
lite::nna::ImgdnnManager imgdnn_mgr_;
};
class SubgraphCompute
: public KernelLite<TARGET(kNNA), PRECISION(kInt8), DATALAYOUT(kNCHW)> {
public:
using param_t = operators::SubgraphParam;
void PrepareForRun() override;
void Run() override;
virtual ~SubgraphCompute() { // = default;
std::cout << "~SubgraphCompute" << std::endl;
engine_.reset();
}
private:
std::unique_ptr<SubgraphEngine> engine_;
};
} // namespace nna
} // namespace kernels
} // namespace lite
} // namespace paddle
......@@ -56,8 +56,8 @@ const std::vector<std::vector<std::string>> supported_ops_target = {
ops_lines = []
# valid targets and valid_ops
valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU", "kBM", "kMLU", "kRKNPU", "kAPU", "kHuaweiAscendNPU"]
valid_ops = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
valid_targets = ["kUnk", "kHost", "kX86", "kCUDA", "kARM", "kOpenCL", "kAny", "kFPGA", "kNPU", "kXPU", "kBM", "kMLU", "kRKNPU", "kAPU", "kHuaweiAscendNPU", "kNNA"]
valid_ops = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
class TargetType:
kUnk = 0
kHost = 1
......@@ -74,6 +74,7 @@ class TargetType:
kRKNPU = 12
kAPU = 13
kHuaweiAscendNPU = 14
kNNA = 15
# record op_info of valid kernels into `valid_ops` according to different target type
......
#!/bin/bash
set -e
readonly VERSION="3.8"
readonly VERSION="6.0.0"
version=$(clang-format -version)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册