未验证 提交 431afc39 编写于 作者: H heliqi 提交者: GitHub

Inference add ONNXRuntime back-end (#39988)

* add onnxruntime predictor

* Add code comments

* support link paddle2onnx onnxruntime

* support onnxruntime with python

* support onnxruntime with python

* support onnxruntime with windows

* paddle2onnx compile with windows

* supoort windows compile

* supoort windows compile with onnxruntime

* supoort windows compile with paddle2onnx

* supoort mac compile

* compile with mac

* compile with mac

* add code comments

* fix remind word

* code optimization

* add test case

* add test case

* add inference demo_ci test case

* fix compile paddle2onnx with no python

* add inference demo_ci test case

* add inference demo_ci test case

* add inference infer_ut test case

* support c go api and test cases

* add converage test case

* add converage test case

* add capi test case

* add capi test case
上级 bd4dc3be
......@@ -53,6 +53,7 @@ option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
# to develop some acl related functionality on x86
option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" ${WITH_ASCEND})
option(WITH_ASCEND_CXX11 "Compile PaddlePaddle with ASCEND and CXX11 ABI" OFF)
option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
# Note(zhouwei): It use option above, so put here
include(init)
include(generic) # simplify cmake module
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if (NOT WITH_ONNXRUNTIME)
return()
endif ()
if (WITH_ARM)
message(SEND_ERROR "The current onnxruntime backend doesn't support ARM cpu")
return()
endif ()
INCLUDE(ExternalProject)
add_definitions(-DPADDLE_WITH_ONNXRUNTIME)
SET(ONNXRUNTIME_PROJECT "extern_onnxruntime")
SET(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime)
SET(ONNXRUNTIME_SOURCE_DIR ${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT})
SET(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime)
SET(ONNXRUNTIME_INC_DIR "${ONNXRUNTIME_INSTALL_DIR}/include" CACHE PATH "onnxruntime include directory." FORCE)
SET(ONNXRUNTIME_LIB_DIR "${ONNXRUNTIME_INSTALL_DIR}/lib" CACHE PATH "onnxruntime lib directory." FORCE)
SET(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}")
if (WIN32)
SET(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-win-x64-1.10.0.zip")
elseif (APPLE)
SET(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-osx-x86_64-1.10.0.tgz")
else ()
SET(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-linux-x64-1.10.0.tgz")
endif()
INCLUDE_DIRECTORIES(${ONNXRUNTIME_INC_DIR}) # For ONNXRUNTIME code to include internal headers.
if (WIN32)
SET(ONNXRUNTIME_SOURCE_LIB "${ONNXRUNTIME_SOURCE_DIR}/lib/onnxruntime.dll" CACHE FILEPATH "ONNXRUNTIME source library." FORCE)
SET(ONNXRUNTIME_SHARED_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.dll" CACHE FILEPATH "ONNXRUNTIME shared library." FORCE)
SET(ONNXRUNTIME_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.lib" CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
elseif (APPLE)
SET(ONNXRUNTIME_SOURCE_LIB "${ONNXRUNTIME_SOURCE_DIR}/lib/libonnxruntime.1.10.0.dylib" CACHE FILEPATH "ONNXRUNTIME source library." FORCE)
SET(ONNXRUNTIME_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.1.10.0.dylib" CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
SET(ONNXRUNTIME_SHARED_LIB ${ONNXRUNTIME_LIB} CACHE FILEPATH "ONNXRUNTIME shared library." FORCE)
else ()
SET(ONNXRUNTIME_SOURCE_LIB "${ONNXRUNTIME_SOURCE_DIR}/lib/libonnxruntime.so.1.10.0" CACHE FILEPATH "ONNXRUNTIME source library." FORCE)
SET(ONNXRUNTIME_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.so.1.10.0" CACHE FILEPATH "ONNXRUNTIME static library." FORCE)
SET(ONNXRUNTIME_SHARED_LIB ${ONNXRUNTIME_LIB} CACHE FILEPATH "ONNXRUNTIME shared library." FORCE)
endif ()
if (WIN32)
ExternalProject_Add(
${ONNXRUNTIME_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${ONNXRUNTIME_URL}
PREFIX ${ONNXRUNTIME_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_LIB} ${ONNXRUNTIME_SHARED_LIB} &&
${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_DIR}/lib/onnxruntime.lib ${ONNXRUNTIME_LIB} &&
${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include ${ONNXRUNTIME_INC_DIR}
BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB}
)
else ()
ExternalProject_Add(
${ONNXRUNTIME_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${ONNXRUNTIME_URL}
PREFIX ${ONNXRUNTIME_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_LIB} ${ONNXRUNTIME_LIB} &&
${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include ${ONNXRUNTIME_INC_DIR}
BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB}
)
endif()
ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB})
ADD_DEPENDENCIES(onnxruntime ${ONNXRUNTIME_PROJECT})
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_ONNXRUNTIME)
return()
endif()
if (WITH_ARM)
message(SEND_ERROR "The current onnxruntime backend doesn't support ARM cpu")
return()
endif ()
INCLUDE(ExternalProject)
SET(PADDLE2ONNX_PROJECT "extern_paddle2onnx")
SET(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx)
SET(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx)
SET(PADDLE2ONNX_INC_DIR "${PADDLE2ONNX_INSTALL_DIR}/include" CACHE PATH "paddle2onnx include directory." FORCE)
SET(PADDLE2ONNX_REPOSITORY ${GIT_URL}/PaddlePaddle/Paddle2ONNX.git)
SET(PADDLE2ONNX_TAG cpp)
SET(LIBDIR "lib")
SET(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}")
INCLUDE_DIRECTORIES(${PADDLE2ONNX_INC_DIR}) # For PADDLE2ONNX code to include internal headers.
if(WIN32)
SET(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/paddle2onnx.lib" CACHE FILEPATH "paddle2onnx static library." FORCE)
SET(PADDLE2ONNX_SHARED_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/paddle2onnx.dll" CACHE FILEPATH "paddle2onnx shared library." FORCE)
elseif(APPLE)
SET(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/libpaddle2onnx.dylib" CACHE FILEPATH "PADDLE2ONNX library." FORCE)
else()
SET(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/libpaddle2onnx.so" CACHE FILEPATH "PADDLE2ONNX library." FORCE)
endif(WIN32)
# The protoc path is required to compile onnx.
string(REPLACE "/" ";" PROTOC_BIN_PATH ${PROTOBUF_PROTOC_EXECUTABLE})
list(POP_BACK PROTOC_BIN_PATH)
list(JOIN PROTOC_BIN_PATH "/" PROTOC_BIN_PATH)
set(PADDLE2ONNX_OPTIONAL_ARGS
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH}
-DWITH_STATIC=OFF
-DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
)
if (WITH_PYTHON)
set(PADDLE2ONNX_OPTIONAL_ARGS ${PADDLE2ONNX_OPTIONAL_ARGS}
-DPYTHON_EXECUTABLE:FILEPATH=${PYTHON_EXECUTABLE}
-DPYTHON_INCLUDE_DIR:PATH=${PYTHON_INCLUDE_DIR}
-DPYTHON_LIBRARY:FILEPATH=${PYTHON_LIBRARY}
)
endif ()
ExternalProject_Add(
${PADDLE2ONNX_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${PADDLE2ONNX_REPOSITORY}
GIT_TAG ${PADDLE2ONNX_TAG}
DEPENDS protobuf
PREFIX ${PADDLE2ONNX_PREFIX_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS ${PADDLE2ONNX_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PADDLE2ONNX_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${PADDLE2ONNX_LIB}
)
ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE2ONNX_LIB})
ADD_DEPENDENCIES(paddle2onnx ${PADDLE2ONNX_PROJECT})
......@@ -198,7 +198,11 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
"-Dprotobuf_MSVC_STATIC_RUNTIME=${MSVC_STATIC_CRT}")
ENDIF()
if(WITH_ASCEND AND NOT WITH_ASCEND_CXX11)
if(WITH_ONNXRUNTIME)
SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git)
SET(PROTOBUF_TAG v3.18.0)
elseif(WITH_ASCEND AND NOT WITH_ASCEND_CXX11)
SET(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git)
SET(PROTOBUF_TAG v3.8.0)
elseif(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11)
......@@ -248,7 +252,9 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
)
ENDFUNCTION()
if(WITH_ASCEND OR WITH_ASCEND_CL)
if(WITH_ONNXRUNTIME)
SET(PROTOBUF_VERSION 3.18.0)
elseif(WITH_ASCEND OR WITH_ASCEND_CL)
SET(PROTOBUF_VERSION 3.8.0)
elseif(WITH_IPU)
SET(PROTOBUF_VERSION 3.6.1)
......
......@@ -114,6 +114,24 @@ function(copy_part_of_thrid_party TARGET DST)
endif()
endif()
if (WITH_ONNXRUNTIME)
set(dst_dir "${DST}/third_party/install/onnxruntime")
copy(${TARGET}
SRCS ${ONNXRUNTIME_INC_DIR} ${ONNXRUNTIME_LIB_DIR}
DSTS ${dst_dir} ${dst_dir})
set(dst_dir "${DST}/third_party/install/paddle2onnx")
if(WIN32)
copy(${TARGET}
SRCS ${PADDLE2ONNX_INC_DIR}/paddle2onnx ${PADDLE2ONNX_SHARED_LIB} ${PADDLE2ONNX_LIB}
DSTS ${dst_dir}/include ${dst_dir}/lib ${dst_dir}/lib)
else()
copy(${TARGET}
SRCS ${PADDLE2ONNX_INC_DIR}/paddle2onnx ${PADDLE2ONNX_LIB}
DSTS ${dst_dir}/include ${dst_dir}/lib)
endif()
endif()
set(dst_dir "${DST}/third_party/install/gflags")
copy(${TARGET}
SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES}
......
......@@ -250,6 +250,12 @@ IF(WITH_TESTING OR WITH_DISTRIBUTE)
list(APPEND third_party_deps extern_gtest)
ENDIF()
if(WITH_ONNXRUNTIME)
include(external/onnxruntime) # download, build, install onnxruntime、paddle2onnx
include(external/paddle2onnx)
list(APPEND third_party_deps extern_onnxruntime extern_paddle2onnx)
endif()
if(WITH_GPU)
if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
include(external/cub) # download cub
......
......@@ -45,6 +45,11 @@ add_subdirectory(api)
set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor
zero_copy_tensor reset_tensor_array
analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg})
if(WITH_ONNXRUNTIME)
set(STATIC_INFERENCE_API ${STATIC_INFERENCE_API} onnxruntime_predictor)
endif()
#TODO(wilber, T8T9): Do we still need to support windows gpu static library?
if(WIN32 AND WITH_GPU)
cc_library(paddle_inference DEPS ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules})
......@@ -91,6 +96,13 @@ if (WITH_PSCORE)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} fleet ps_service)
endif ()
if (WITH_ONNXRUNTIME)
set(SHARED_INFERENCE_SRCS ${SHARED_INFERENCE_SRCS}
${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc
)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} onnxruntime_predictor)
endif (WITH_ONNXRUNTIME)
# Create shared inference library
cc_library(paddle_inference_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${SHARED_INFERENCE_DEPS})
......
......@@ -49,8 +49,15 @@ if(WITH_GPU AND TENSORRT_FOUND)
set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter)
endif()
cc_library(analysis_predictor SRCS analysis_predictor.cc ${mkldnn_quantizer_src} DEPS ${inference_deps}
if (WITH_ONNXRUNTIME)
cc_library(analysis_predictor SRCS analysis_predictor.cc ${mkldnn_quantizer_src} DEPS ${inference_deps}
zero_copy_tensor ir_pass_manager op_compatible_info infer_io_utils onnxruntime paddle2onnx)
cc_library(onnxruntime_predictor SRCS onnxruntime_predictor.cc DEPS analysis_predictor)
else (WITH_ONNXRUNTIME)
cc_library(analysis_predictor SRCS analysis_predictor.cc ${mkldnn_quantizer_src} DEPS ${inference_deps}
zero_copy_tensor ir_pass_manager op_compatible_info infer_io_utils)
endif (WITH_ONNXRUNTIME)
cc_test(test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api)
......@@ -75,6 +82,16 @@ elseif (WIN32)
ARGS --dirname=${WORD2VEC_MODEL_DIR})
endif()
if (WITH_ONNXRUNTIME)
if (NOT APPLE AND NOT WIN32)
cc_test(test_onnxruntime_predictor SRCS onnxruntime_predictor_tester.cc DEPS paddle_inference_shared
ARGS --dirname=${MOBILENETV2_MODEL_DIR})
elseif (WIN32)
cc_test(test_onnxruntime_predictor SRCS onnxruntime_predictor_tester.cc DEPS onnxruntime_predictor benchmark ${inference_deps}
ARGS --dirname=${MOBILENETV2_MODEL_DIR})
endif()
endif()
if(WITH_TESTING AND WITH_MKLDNN)
if (NOT APPLE AND NOT WIN32)
cc_test(test_mkldnn_quantizer SRCS mkldnn_quantizer_tester.cc DEPS paddle_inference_shared ARGS --dirname=${WORD2VEC_MODEL_DIR})
......
......@@ -168,6 +168,33 @@ void AnalysisConfig::SetIpuConfig(bool ipu_enable_fp16, int ipu_replica_num,
Update();
}
void AnalysisConfig::EnableONNXRuntime() {
#ifdef PADDLE_WITH_ONNXRUNTIME
use_onnxruntime_ = true;
#else
LOG(ERROR) << "Please compile with onnxruntime to EnableONNXRuntime()";
use_onnxruntime_ = false;
#endif
Update();
}
void AnalysisConfig::DisableONNXRuntime() {
use_onnxruntime_ = false;
Update();
}
void AnalysisConfig::EnableORTOptimization() {
#ifdef PADDLE_WITH_ONNXRUNTIME
enable_ort_optimization_ = true;
#else
LOG(ERROR) << "Please compile with onnxruntime to EnableORTOptimization()";
enable_ort_optimization_ = false;
#endif
Update();
}
AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
#define CP_MEMBER(member__) member__ = other.member__;
......
......@@ -65,6 +65,10 @@
#include "paddle/fluid/inference/api/mkldnn_quantizer.h"
#endif
#ifdef PADDLE_WITH_ONNXRUNTIME
#include "paddle/fluid/inference/api/onnxruntime_predictor.h"
#endif
#if PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
......@@ -1762,6 +1766,27 @@ namespace paddle_infer {
Predictor::Predictor(const Config &config) {
const_cast<Config *>(&config)->SwitchUseFeedFetchOps(false);
// The second parameter indicates that the discard log is not printed
if (config.use_onnxruntime()) {
#ifdef PADDLE_WITH_ONNXRUNTIME
if (config.use_gpu()) {
LOG(WARNING) << "The current ONNXRuntime backend doesn't support GPU,"
"and it falls back to use Paddle Inference.";
} else if (!paddle::CheckConvertToONNX(config)) {
LOG(WARNING)
<< "Paddle2ONNX do't support convert the Model, fall back to using "
"Paddle Inference.";
} else {
predictor_ = paddle::CreatePaddlePredictor<
Config, paddle::PaddleEngineKind::kONNXRuntime>(config);
return;
}
#else
LOG(WARNING)
<< "The onnxruntime backend isn't enabled,"
" and please re-compile Paddle with WITH_ONNXRUNTIME option,"
"fall back to using Paddle Inference.";
#endif
}
predictor_ = paddle::CreatePaddlePredictor<
Config, paddle::PaddleEngineKind::kAnalysis>(config);
}
......
......@@ -357,6 +357,24 @@ TEST(AnalysisPredictor, set_xpu_device_id) {
}
#endif
TEST(AnalysisPredictor, enable_onnxruntime) {
AnalysisConfig config;
config.EnableONNXRuntime();
#ifdef PADDLE_WITH_ONNXRUNTIME
ASSERT_TRUE(config.use_onnxruntime());
#else
ASSERT_TRUE(!config.use_onnxruntime());
#endif
config.EnableORTOptimization();
#ifdef PADDLE_WITH_ONNXRUNTIME
ASSERT_TRUE(config.ort_optimization_enabled());
#else
ASSERT_TRUE(!config.ort_optimization_enabled());
#endif
config.DisableONNXRuntime();
ASSERT_TRUE(!config.use_onnxruntime());
}
} // namespace paddle
namespace paddle_infer {
......@@ -408,6 +426,14 @@ TEST(Predictor, Run) {
predictor->TryShrinkMemory();
}
TEST(Predictor, EnableONNXRuntime) {
Config config;
config.SetModel(FLAGS_dirname);
config.EnableONNXRuntime();
config.EnableORTOptimization();
auto predictor = CreatePredictor(config);
}
TEST(Tensor, CpuShareExternalData) {
Config config;
config.SetModel(FLAGS_dirname);
......
......@@ -4,6 +4,7 @@ option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL.
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
option(USE_TENSORRT "Compile demo with TensorRT." OFF)
option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF)
if(NOT WITH_STATIC_LIB)
add_definitions("-DPADDLE_WITH_SHARED_LIB")
......@@ -46,6 +47,13 @@ link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib")
link_directories("${PADDLE_LIB}/paddle/lib")
if (WITH_ONNXRUNTIME)
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/include")
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/include")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib")
endif()
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
......@@ -151,6 +159,17 @@ else()
endif()
endif()
if (WITH_ONNXRUNTIME)
if(WIN32)
set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib paddle2onnx)
elseif(APPLE)
set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib paddle2onnx)
else()
set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 paddle2onnx)
endif()
endif()
if (NOT WIN32)
set(EXTERNAL_LIB "-lrt -ldl -lpthread")
set(DEPS ${DEPS}
......@@ -213,6 +232,14 @@ if(WIN32)
COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${CMAKE_BINARY_DIR}/Release
)
endif()
if(WITH_ONNXRUNTIME)
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
)
endif()
if(NOT WITH_STATIC_LIB)
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file contains demo of mobilenet for tensorrt.
*/
#include <glog/logging.h> // use glog instead of CHECK to avoid importing other paddle header files.
#include <vector>
#include "gflags/gflags.h"
#include "utils.h" // NOLINT
DEFINE_string(modeldir, "", "Directory of the inference model.");
namespace paddle {
namespace demo {
/*
* Use the onnxruntime engine to inference the demo.
*/
void Main() {
paddle::AnalysisConfig config;
config.EnableONNXRuntime();
config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
auto predictor = paddle_infer::CreatePredictor(config);
// Inference.
std::vector<int> input_shape = {1, 3, 224, 224};
std::vector<float> input_data(1 * 3 * 224 * 224, 1.0);
std::vector<float> out_data;
out_data.resize(1000);
auto input_names = predictor->GetInputNames();
auto output_names = predictor->GetOutputNames();
auto input_tensor = predictor->GetInputHandle(input_names[0]);
input_tensor->Reshape(input_shape);
auto output_tensor = predictor->GetOutputHandle(output_names[0]);
input_tensor->CopyFromCpu(input_data.data());
predictor->Run();
output_tensor->CopyToCpu(out_data.data());
VLOG(3) << "output.size " << out_data.size();
}
} // namespace demo
} // namespace paddle
int main(int argc, char** argv) {
::GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
paddle::demo::Main();
return 0;
}
......@@ -21,7 +21,8 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
DATA_DIR=$4 # dataset
USE_TENSORRT=$5
TENSORRT_ROOT_DIR=$6 # TensorRT root dir, default to /usr
MSVC_STATIC_CRT=$7
WITH_ONNXRUNTIME=$7
MSVC_STATIC_CRT=$8
inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir
WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform
......@@ -38,6 +39,26 @@ else
use_gpu_list='false'
fi
mkdir -p $DATA_DIR
cd $DATA_DIR
if [ $7 == ON ]; then
ONNXRUNTIME_LIB=${inference_install_dir}/third_party/install/onnxruntime/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${ONNXRUNTIME_LIB}
PADDLE2ONNX_LIB=${inference_install_dir}/third_party/install/paddle2onnx/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE2ONNX_LIB}
#download model
mkdir -p MobileNetV2
cd MobileNetV2
if [[ -e "MobileNetV2.inference.model.tar.gz" ]]; then
echo "MobileNetV2.inference.model.tar.gz has been downloaded."
else
wget -q --no-proxy http://paddle-inference-dist.bj.bcebos.com/MobileNetV2.inference.model.tar.gz
tar xzf *.tar.gz
fi
cd ..
fi
PREFIX=inference-vis-demos%2F
URL_ROOT=http://paddlemodels.bj.bcebos.com/${PREFIX}
......@@ -58,8 +79,7 @@ function download() {
fi
cd ..
}
mkdir -p $DATA_DIR
cd $DATA_DIR
vis_demo_list='se_resnext50 ocr mobilenet'
for vis_demo_name in $vis_demo_list; do
download $vis_demo_name
......@@ -93,7 +113,8 @@ for WITH_STATIC_LIB in ON OFF; do
-DDEMO_NAME=simple_on_word2vec \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB \
-DMSVC_STATIC_CRT=$MSVC_STATIC_CRT
-DMSVC_STATIC_CRT=$MSVC_STATIC_CRT \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln
for use_gpu in $use_gpu_list; do
Release/simple_on_word2vec.exe \
......@@ -112,7 +133,8 @@ for WITH_STATIC_LIB in ON OFF; do
-DDEMO_NAME=vis_demo \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB \
-DMSVC_STATIC_CRT=$MSVC_STATIC_CRT
-DMSVC_STATIC_CRT=$MSVC_STATIC_CRT \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln
for use_gpu in $use_gpu_list; do
for vis_demo_name in $vis_demo_list; do
......@@ -138,7 +160,8 @@ for WITH_STATIC_LIB in ON OFF; do
-DWITH_STATIC_LIB=$WITH_STATIC_LIB \
-DMSVC_STATIC_CRT=$MSVC_STATIC_CRT \
-DUSE_TENSORRT=$USE_TENSORRT \
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln
Release/trt_mobilenet_demo.exe \
--modeldir=$DATA_DIR/mobilenet/model \
......@@ -156,7 +179,8 @@ for WITH_STATIC_LIB in ON OFF; do
-DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=simple_on_word2vec \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB
-DWITH_STATIC_LIB=$WITH_STATIC_LIB \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
make -j$(nproc)
word2vec_model=$DATA_DIR'/word2vec/word2vec.inference.model'
if [ -d $word2vec_model ]; then
......@@ -176,7 +200,8 @@ for WITH_STATIC_LIB in ON OFF; do
-DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=vis_demo \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB
-DWITH_STATIC_LIB=$WITH_STATIC_LIB \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
make -j$(nproc)
for use_gpu in $use_gpu_list; do
for vis_demo_name in $vis_demo_list; do
......@@ -200,7 +225,8 @@ for WITH_STATIC_LIB in ON OFF; do
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB \
-DUSE_TENSORRT=$USE_TENSORRT \
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
make -j$(nproc)
./trt_mobilenet_demo \
--modeldir=$DATA_DIR/mobilenet/model \
......@@ -211,6 +237,26 @@ for WITH_STATIC_LIB in ON OFF; do
exit 1
fi
fi
# --------onnxruntime mobilenetv2 on linux/mac------
if [ $WITH_ONNXRUNTIME == ON ]; then
rm -rf *
cmake .. -DPADDLE_LIB=${inference_install_dir} \
-DWITH_MKL=$TURN_ON_MKL \
-DDEMO_NAME=onnxruntime_mobilenet_demo \
-DWITH_GPU=$TEST_GPU_CPU \
-DWITH_STATIC_LIB=$WITH_STATIC_LIB \
-DUSE_TENSORRT=$USE_TENSORRT \
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
make -j$(nproc)
./onnxruntime_mobilenet_demo \
--modeldir=$DATA_DIR/MobileNetV2/MobileNetV2
if [ $? -ne 0 ]; then
echo "onnxruntime demo onnxruntime_mobilenet_demo runs fail."
exit 1
fi
fi
fi
done
set +x
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/onnxruntime_predictor.h"
#include <glog/logging.h>
#include <algorithm>
#include <fstream>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid//platform/device/gpu/gpu_types.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
framework::proto::VarType::Type ConvertONNXType(
ONNXTensorElementDataType type) {
switch (type) {
case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
return framework::proto::VarType::FP32;
// case ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
// return DataType::FP16;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
return framework::proto::VarType::INT8;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
return framework::proto::VarType::INT32;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
return framework::proto::VarType::INT64;
case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
return framework::proto::VarType::UINT8;
default:
LOG(ERROR) << "unsupported ONNX Tensor Type: " << static_cast<int>(type);
return framework::proto::VarType::FP32;
}
}
bool CheckConvertToONNX(const AnalysisConfig &config) {
if (!config.model_dir().empty()) {
LOG(ERROR) << "Paddle2ONNX not support model_dir config";
// TODO(heliqi jiangjiajun): Paddle2ONNX not support
// config.model_dir() + "/__model__"
// config.model_dir() + var_name
return false;
} else if (config.prog_file().empty() || config.params_file().empty()) {
LOG(ERROR) << string::Sprintf(
"not valid model path '%s' or program path '%s' or params path '%s'.",
config.model_dir(), config.prog_file(), config.params_file());
return false;
}
return paddle2onnx::IsExportable(config.prog_file(), config.params_file(),
config.model_from_memory());
}
bool ONNXRuntimePredictor::Init() {
VLOG(3) << "ONNXRuntime Predictor::init()";
// Now ONNXRuntime only suuport CPU
if (config_.use_gpu()) {
place_ = paddle::platform::CUDAPlace(config_.gpu_device_id());
} else {
place_ = paddle::platform::CPUPlace();
}
scope_.reset(new paddle::framework::Scope());
sub_scope_ = &scope_->NewScope();
std::string onnx_proto;
paddle2onnx::Export(config_.prog_file(), config_.params_file(), &onnx_proto,
config_.model_from_memory());
Ort::SessionOptions session_options;
if (config_.ort_optimization_enabled()) {
session_options.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_ALL);
}
// Turn optimization off first, and then turn it on when it's stable
// session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
// session_options.EnableCpuMemArena();
// session_options.EnableMemPattern();
// session_options.SetInterOpNumThreads(config_.cpu_math_library_num_threads());
session_options.SetIntraOpNumThreads(config_.cpu_math_library_num_threads());
VLOG(2) << "ONNXRuntime threads " << config_.cpu_math_library_num_threads();
if (config_.profile_enabled()) {
LOG(WARNING) << "ONNXRuntime Profiler is activated, which might affect the "
"performance";
#if defined(_WIN32)
session_options.EnableProfiling(L"ONNX");
#else
session_options.EnableProfiling("ONNX");
#endif
} else {
VLOG(2) << "ONNXRuntime Profiler is deactivated, and no profiling report "
"will be "
"generated.";
}
session_ = {env_, onnx_proto.data(), onnx_proto.size(), session_options};
auto memory_info =
Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Allocator allocator(session_, memory_info);
framework::proto::VarType::Type proto_type =
framework::proto::VarType::LOD_TENSOR;
size_t n_inputs = session_.GetInputCount();
for (size_t i = 0; i < n_inputs; ++i) {
auto input_name = session_.GetInputName(i, allocator);
auto type_info = session_.GetInputTypeInfo(i);
std::vector<int64_t> shape =
type_info.GetTensorTypeAndShapeInfo().GetShape();
ONNXTensorElementDataType data_type =
type_info.GetTensorTypeAndShapeInfo().GetElementType();
input_desc_.emplace_back(ONNXDesc{input_name, shape, data_type});
auto *ptr = scope_->Var(input_name);
framework::InitializeVariable(ptr, proto_type);
allocator.Free(input_name);
}
size_t n_outputs = session_.GetOutputCount();
for (size_t i = 0; i < n_outputs; ++i) {
auto output_name = session_.GetOutputName(i, allocator);
auto type_info = session_.GetOutputTypeInfo(i);
std::vector<int64_t> shape =
type_info.GetTensorTypeAndShapeInfo().GetShape();
ONNXTensorElementDataType data_type =
type_info.GetTensorTypeAndShapeInfo().GetElementType();
output_desc_.emplace_back(ONNXDesc{output_name, shape, data_type});
auto *ptr = scope_->Var(output_name);
framework::InitializeVariable(ptr, proto_type);
allocator.Free(output_name);
}
return true;
}
template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kONNXRuntime>(
const AnalysisConfig &config) {
if (config.glog_info_disabled()) {
FLAGS_logtostderr = 1;
FLAGS_minloglevel = 2; // GLOG_ERROR
}
PADDLE_ENFORCE_EQ(
config.is_valid(), true,
platform::errors::InvalidArgument(
"Note: Each config can only be used for one predictor."));
VLOG(3) << "create ONNXRuntimePredictor";
std::unique_ptr<PaddlePredictor> predictor(new ONNXRuntimePredictor(config));
// Each config can only be used for one predictor.
config.SetInValid();
auto predictor_p = dynamic_cast<ONNXRuntimePredictor *>(predictor.get());
if (!predictor_p->Init()) {
return nullptr;
}
return predictor;
}
std::vector<std::string> ONNXRuntimePredictor::GetInputNames() {
std::vector<std::string> input_names;
for (auto input_desc : input_desc_) {
input_names.push_back(input_desc.name);
}
return input_names;
}
std::map<std::string, std::vector<int64_t>>
ONNXRuntimePredictor::GetInputTensorShape() {
std::map<std::string, std::vector<int64_t>> input_shapes;
for (auto input_desc : input_desc_) {
input_shapes[input_desc.name] = input_desc.shape;
}
return input_shapes;
}
std::vector<std::string> ONNXRuntimePredictor::GetOutputNames() {
std::vector<std::string> output_names;
for (auto output_desc : output_desc_) {
output_names.push_back(output_desc.name);
}
return output_names;
}
std::unique_ptr<ZeroCopyTensor> ONNXRuntimePredictor::GetInputTensor(
const std::string &name) {
PADDLE_ENFORCE_NOT_NULL(scope_->FindVar(name),
platform::errors::PreconditionNotMet(
"The in variable named %s is not found in the "
"scope of the ONNXPredictor.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(scope_.get())));
res->input_or_output_ = true;
res->SetName(name);
if (platform::is_cpu_place(place_)) {
res->SetPlace(PaddlePlace::kCPU);
} else {
auto gpu_place = place_;
res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());
}
return res;
}
std::unique_ptr<ZeroCopyTensor> ONNXRuntimePredictor::GetOutputTensor(
const std::string &name) {
PADDLE_ENFORCE_NOT_NULL(scope_->FindVar(name),
platform::errors::PreconditionNotMet(
"The out variable named %s is not found in the "
"scope of the ONNXPredictor.",
name));
std::unique_ptr<ZeroCopyTensor> res(
new ZeroCopyTensor(static_cast<void *>(scope_.get())));
res->input_or_output_ = false;
res->SetName(name);
if (platform::is_cpu_place(place_)) {
res->SetPlace(PaddlePlace::kCPU);
} else {
auto gpu_place = place_;
res->SetPlace(PaddlePlace::kGPU, gpu_place.GetDeviceId());
}
return res;
}
Ort::Value ONNXRuntimePredictor::GetOrtValue(const ONNXDesc &desc,
const char *device_name) {
Ort::MemoryInfo memory_info(device_name, OrtDeviceAllocator,
place_.GetDeviceId(), OrtMemTypeDefault);
auto *var = scope_->FindVar(desc.name);
auto *tensor = var->GetMutable<framework::LoDTensor>();
size_t size =
tensor->numel() *
framework::SizeOfType(framework::TransToProtoVarType(tensor->dtype()));
std::vector<int64_t> shape = phi::vectorize<int64_t>(tensor->dims());
return Ort::Value::CreateTensor(memory_info,
static_cast<void *>(tensor->data()), size,
shape.data(), shape.size(), desc.dtype);
}
void ONNXRuntimePredictor::AsTensor(const Ort::Value &value,
const ONNXDesc &desc) {
auto info = value.GetTensorTypeAndShapeInfo();
auto *var = scope_->FindVar(desc.name);
auto *tensor = var->GetMutable<framework::LoDTensor>();
tensor->Resize(phi::make_ddim(info.GetShape()));
auto dtype = ConvertONNXType(info.GetElementType());
auto *ptr = tensor->mutable_data(place_, dtype);
if (platform::is_cpu_place(place_)) {
std::memcpy(ptr, const_cast<void *>(value.GetTensorData<void>()),
tensor->numel() * framework::SizeOfType(dtype));
} else {
auto src_place = place_;
auto dst_place = place_;
memory::Copy(dst_place, ptr, src_place,
const_cast<void *>(value.GetTensorData<void>()),
tensor->numel() * framework::SizeOfType(dtype));
}
}
bool ONNXRuntimePredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
LOG(ERROR) << "Not support Run";
return false;
}
bool ONNXRuntimePredictor::ZeroCopyRun() {
try {
Ort::IoBinding binding(session_);
std::vector<Ort::Value> inputs;
std::vector<Ort::Value> outputs;
Ort::RunOptions options;
inputs.reserve(input_desc_.size());
const char *device_name = config_.use_gpu() ? "Cuda" : "Cpu";
for (auto desc : input_desc_) {
inputs.push_back(GetOrtValue(desc, device_name));
binding.BindInput(desc.name.c_str(), inputs.back());
}
// TODO(heliqi): Optimization —— move to Init()
for (auto desc : output_desc_) {
Ort::MemoryInfo memory_info(device_name, OrtDeviceAllocator,
place_.GetDeviceId(), OrtMemTypeDefault);
binding.BindOutput(desc.name.c_str(), memory_info);
}
session_.Run({}, binding);
outputs = binding.GetOutputValues();
for (size_t i = 0; i < output_desc_.size(); ++i) {
AsTensor(outputs[i], output_desc_[i]);
}
} catch (const std::exception &e) {
LOG(ERROR) << e.what();
return false;
}
return true;
}
std::unique_ptr<PaddlePredictor> ONNXRuntimePredictor::Clone() {
LOG(ERROR) << "Not support Clone(), Please create new Predictor";
return nullptr;
}
uint64_t ONNXRuntimePredictor::TryShrinkMemory() {
return paddle::memory::Release(place_);
}
ONNXRuntimePredictor::~ONNXRuntimePredictor() {
if (sub_scope_) {
scope_->DeleteScope(sub_scope_);
}
memory::Release(place_);
}
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/op_compatible_info.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/fluid/string/printf.h"
#include "onnxruntime_c_api.h" // NOLINT
#include "onnxruntime_cxx_api.h" // NOLINT
#include "paddle2onnx/converter.h"
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#include <gtest/gtest_prod.h>
#endif
///
/// \file onnxruntime_predictor.h
///
/// \brief A predictor using ONNXRuntime
///
/// \author heliqi@baidu.com
/// \date 2022-02-14
/// \since 2.3.0
///
namespace paddle {
bool CheckConvertToONNX(const AnalysisConfig &config);
struct ONNXDesc {
std::string name;
std::vector<int64_t> shape;
ONNXTensorElementDataType dtype;
};
///
/// \class ONNXRuntimePredictor
///
/// \brief The ONNXRuntimePredictor using ONNXRuntime for inference
///
/// The predictor has the following typical uses:
///
/// Get predictor
/// \code{cpp}
/// auto predictor = CreatePaddlePredictor(config);
/// \endcode
///
/// Get input or output names
/// \code{cpp}
/// auto input_names = predictor->GetInputNames();
/// auto output_names = predictor->GetOutputNames();
/// \endcode
///
/// Get input or output tensors
/// \code{cpp}
/// auto input_t = predictor->GetInputTensor(input_names[0]);
/// auto output_t = predictor->GetOutputTensor(output_names[0]);
/// \endcode
///
/// Run predictor
/// \code{cpp}
/// predictor->ZeroCopyRun();
/// \endcode
///
class ONNXRuntimePredictor : public PaddlePredictor {
public:
///
/// \brief Construct a new ONNXRuntime Predictor object
///
/// \param[in] AnalysisConfig config
///
explicit ONNXRuntimePredictor(const AnalysisConfig &config)
: config_(config) {
predictor_id_ = inference::GetUniqueId();
env_ = Ort::Env(ORT_LOGGING_LEVEL_INFO, "onnx");
}
///
/// \brief Destroy the ONNXRuntime Predictor object
///
~ONNXRuntimePredictor();
///
/// \brief Initialize predictor
///
/// \return Whether the init function executed successfully
///
bool Init();
///
/// \brief Get the input names
///
/// \return input names
///
std::vector<std::string> GetInputNames();
///
/// \brief Get the output names
///
/// \return output names
///
std::vector<std::string> GetOutputNames();
///
/// \brief Get the Input Tensor object
///
/// \param[in] name input name
/// \return input tensor
///
std::unique_ptr<ZeroCopyTensor> GetInputTensor(
const std::string &name) override;
///
/// \brief Get the Output Tensor object
///
/// \param[in] name otuput name
/// \return output tensor
///
std::unique_ptr<ZeroCopyTensor> GetOutputTensor(
const std::string &name) override;
///
/// \brief Get all input names and their corresponding shapes
///
/// \return the map of input names and shapes
///
std::map<std::string, std::vector<int64_t>> GetInputTensorShape() override;
/// Not supoort
bool Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size = -1) override;
///
/// \brief Run the prediction engine
///
/// \return Whether the function executed successfully
///
bool ZeroCopyRun() override;
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
uint64_t TryShrinkMemory() override;
///
/// \brief Clone to get the new predictor. thread safe.
///
/// \return get a new predictor
///
std::unique_ptr<PaddlePredictor> Clone() override;
std::shared_ptr<framework::Scope> scope_;
private:
///
/// \brief get the Ort Value(input Tensor).
///
/// \param[in] desc ONNXDesce(name、shape、dtype)
///
/// \param[in] device_name "cpu" or "gpu" of device
///
/// \return get a Ort::Value
///
Ort::Value GetOrtValue(const ONNXDesc &desc, const char *device_name);
///
/// \brief Ort::Value to Paddle::ZeroCopyTensor.
///
/// \param[in] value Ort::Value(output Tensor)
///
/// \param[in] desc a ONNXDesce(name、shape、dtype)
///
/// \return get a Ort::Value
///
void AsTensor(const Ort::Value &value, const ONNXDesc &desc);
private:
AnalysisConfig config_;
// ONNXRuntime
Ort::Env env_;
Ort::Session session_{nullptr};
platform::Place place_;
framework::Scope *sub_scope_{nullptr};
std::vector<ONNXDesc> input_desc_;
std::vector<ONNXDesc> output_desc_;
int predictor_id_;
// Some more detailed tests, they are made the friends of the predictor, so that
// the all the details can be tested.
#if PADDLE_WITH_TESTING
FRIEND_TEST(ONNXRuntimePredictor, onnxruntime_on);
#endif
};
} // namespace paddle
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/onnxruntime_predictor.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <string>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#include "paddle/fluid/platform/cpu_info.h"
DEFINE_string(dirname, "", "dirname to tests.");
namespace paddle {
TEST(ONNXRuntimePredictor, onnxruntime_on) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname + "/inference.pdmodel",
FLAGS_dirname + "/inference.pdiparams");
config.EnableONNXRuntime();
config.EnableORTOptimization();
config.SetCpuMathLibraryNumThreads(2);
LOG(INFO) << config.Summary();
auto _predictor =
CreatePaddlePredictor<AnalysisConfig,
paddle::PaddleEngineKind::kONNXRuntime>(config);
ASSERT_TRUE(_predictor);
auto* predictor = static_cast<ONNXRuntimePredictor*>(_predictor.get());
ASSERT_TRUE(predictor);
ASSERT_TRUE(!predictor->Clone());
ASSERT_TRUE(predictor->scope_);
ASSERT_TRUE(predictor->sub_scope_);
ASSERT_EQ(predictor->scope_->parent(), nullptr);
ASSERT_EQ(predictor->sub_scope_->parent(), predictor->scope_.get());
// Dummy Input Data
std::vector<int64_t> input_shape = {-1, 3, 224, 224};
std::vector<float> input_data(1 * 3 * 224 * 224, 1.0);
std::vector<float> out_data;
out_data.resize(1000);
// testing all interfaces
auto input_names = predictor->GetInputNames();
auto output_names = predictor->GetOutputNames();
auto get_input_shape = predictor->GetInputTensorShape();
ASSERT_EQ(input_names.size(), 1UL);
ASSERT_EQ(output_names.size(), 1UL);
ASSERT_EQ(input_names[0], "inputs");
ASSERT_EQ(output_names[0], "save_infer_model/scale_0.tmp_1");
ASSERT_EQ(get_input_shape["inputs"], input_shape);
auto input_tensor = predictor->GetInputTensor(input_names[0]);
input_tensor->Reshape({1, 3, 224, 224});
auto output_tensor = predictor->GetOutputTensor(output_names[0]);
input_tensor->CopyFromCpu(input_data.data());
ASSERT_TRUE(predictor->ZeroCopyRun());
output_tensor->CopyToCpu(out_data.data());
predictor->TryShrinkMemory();
}
} // namespace paddle
......@@ -319,6 +319,18 @@ struct PD_INFER_DECL AnalysisConfig {
///
void EnableNpu(int device_id = 0);
///
/// \brief Turn on ONNXRuntime.
///
void EnableONNXRuntime();
///
/// \brief Turn off ONNXRuntime.
///
void DisableONNXRuntime();
///
/// \brief Turn on ONNXRuntime Optimization.
///
void EnableORTOptimization();
///
/// \brief A boolean state telling whether the GPU is turned on.
///
/// \return bool Whether the GPU is turned on.
......@@ -342,6 +354,19 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool use_ipu() const { return use_ipu_; }
///
/// \brief A boolean state telling whether the ONNXRuntime is turned on.
///
/// \return bool Whether the ONNXRuntime is turned on.
///
bool use_onnxruntime() const { return use_onnxruntime_; }
///
/// \brief A boolean state telling whether the ONNXRuntime Optimization is
/// turned on.
///
/// \return bool Whether the ONNXRuntime Optimization is turned on.
///
bool ort_optimization_enabled() const { return enable_ort_optimization_; }
///
/// \brief Get the GPU device id.
///
/// \return int The GPU device id.
......@@ -841,6 +866,10 @@ struct PD_INFER_DECL AnalysisConfig {
bool use_npu_{false};
int npu_device_id_{0};
// ONNXRuntime related
bool use_onnxruntime_{false};
bool enable_ort_optimization_{false};
// Padding related
bool use_fc_padding_{true};
......
......@@ -192,6 +192,7 @@ class PD_INFER_DECL ZeroCopyTensor : public paddle_infer::Tensor {
private:
friend class AnalysisPredictor;
friend class ONNXRuntimePredictor;
explicit ZeroCopyTensor(void* scope) : paddle_infer::Tensor{scope} {}
};
......@@ -381,6 +382,7 @@ enum class PaddleEngineKind {
kNative = 0, ///< Use the native Fluid facility.
kAutoMixedTensorRT, ///< Automatically mix Fluid with TensorRT.
kAnalysis, ///< More optimization.
kONNXRuntime, ///< Use ONNXRuntime
};
template <typename ConfigT, PaddleEngineKind engine>
......@@ -395,6 +397,11 @@ template <>
PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config);
template <>
PD_INFER_DECL std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kONNXRuntime>(
const AnalysisConfig& config);
PD_INFER_DECL int PaddleDtypeSize(PaddleDType dtype);
PD_INFER_DECL std::string get_version();
......
......@@ -126,6 +126,26 @@ PD_Bool PD_ConfigUseGpu(__pd_keep PD_Config* pd_config) {
return config->use_gpu();
}
void PD_ConfigEnableONNXRuntime(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableONNXRuntime();
}
void PD_ConfigDisableONNXRuntime(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->DisableONNXRuntime();
}
PD_Bool PD_ConfigONNXRuntimeEnabled(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
return config->use_onnxruntime();
}
void PD_ConfigEnableORTOptimization(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableORTOptimization();
}
void PD_ConfigEnableXpu(__pd_keep PD_Config* pd_config,
int32_t l3_workspace_size, PD_Bool locked,
PD_Bool autotune, const char* autotune_file,
......
......@@ -152,6 +152,34 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigDisableGpu(
PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseGpu(
__pd_keep PD_Config* pd_config);
///
/// \brief Turn on ONNXRuntime.
///
/// \param[in] pd_onfig config
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableONNXRuntime(
__pd_keep PD_Config* pd_config);
///
/// \brief Turn off ONNXRuntime.
///
/// \param[in] pd_onfig config
///
PADDLE_CAPI_EXPORT extern void PD_ConfigDisableONNXRuntime(
__pd_keep PD_Config* pd_config);
///
/// \brief A boolean state telling whether the ONNXRutnime is turned on.
///
/// \return Whether the ONNXRuntime is turned on.
///
PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigONNXRuntimeEnabled(
__pd_keep PD_Config* pd_config);
///
/// \brief Turn on ONNXRuntime Optimization.
///
/// \param[in] pd_onfig config
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableORTOptimization(
__pd_keep PD_Config* pd_config);
///
/// \brief Turn on XPU.
///
/// \param[in] pd_onfig config
......
......@@ -160,6 +160,36 @@ func (config *Config) EnableUseGpu(memorySize uint64, deviceId int32) {
C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId))
}
///
/// \brief Turn on ONNXRuntime.
///
func (config *Config) EnableONNXRuntime() {
C.PD_ConfigEnableONNXRuntime(config.c)
}
///
/// \brief Turn off ONNXRuntime.
///
func (config *Config) DisableONNXRuntime() {
C.PD_ConfigDisableONNXRuntime(config.c)
}
///
/// \brief A boolean state telling whether the ONNXRuntime is turned on.
///
/// \return bool Whether the ONNXRuntime is turned on.
///
func (config *Config) ONNXRuntimeEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigONNXRuntimeEnabled(config.c))
}
///
/// \brief Turn on ONNXRuntime Optimization.
///
func (config *Config) EnableORTOptimization() {
C.PD_ConfigEnableORTOptimization(config.c)
}
///
/// \brief Turn on XPU.
///
......
......@@ -122,3 +122,20 @@ func TestMkldnn(t *testing.T) {
config.SetBfloat16Op([]string{"fc", "mul"})
}
func TestONNXRuntime(t *testing.T) {
config := NewConfig()
config.SetModelDir("modelDir")
t.Log(config.ModelDir())
config.EnableONNXRuntime()
t.Logf("ONNXRuntimeEnabled:%+v", config.ONNXRuntimeEnabled())
config.DisableONNXRuntime()
t.Logf("ONNXRuntimeEnabled:%+v", config.ONNXRuntimeEnabled())
config.EnableORTOptimization()
config.SetCpuMathLibraryNumThreads(4)
t.Logf("CpuMathLibraryNumThreads:%+v", config.CpuMathLibraryNumThreads())
}
\ No newline at end of file
......@@ -66,6 +66,42 @@ func TestNewPredictor(t *testing.T) {
cloned.ClearIntermediateTensor()
}
func TestONNXRuntimePredictor(t *testing.T) {
t.Logf("Version:\n%+v", Version())
config := NewConfig()
config.SetModel("./mobilenetv1/inference.pdmodel", "./mobilenetv1/inference.pdiparams")
config.EnableONNXRuntime()
config.EnableORTOptimization()
predictor := NewPredictor(config)
inNames := predictor.GetInputNames()
t.Logf("InputNames:%+v", inNames)
outNames := predictor.GetOutputNames()
t.Logf("OutputNames:%+v", outNames)
inHandle := predictor.GetInputHandle(inNames[0])
inHandle.Reshape([]int32{1, 3, 224, 224})
t.Logf("inHandle name:%+v, shape:%+v", inHandle.Name(), inHandle.Shape())
data := make([]float32, numElements([]int32{1, 3, 224, 224}))
for i := 0; i < int(numElements([]int32{1, 3, 224, 224})); i++ {
data[i] = float32(i%255) * 0.1
}
inHandle.CopyFromCpu(data)
t.Logf("inHandle Type:%+v", inHandle.Type())
predictor.Run()
outHandle := predictor.GetOutputHandle(outNames[0])
t.Logf("outHandle name:%+v", outHandle.Name())
outShape := outHandle.Shape()
t.Logf("outHandle Shape:%+v", outShape)
outData := make([]float32, numElements(outShape))
outHandle.CopyToCpu(outData)
t.Log(outData)
}
func TestFromBuffer(t *testing.T) {
modelFile, err := os.Open("./mobilenetv1/inference.pdmodel")
if err != nil {
......
......@@ -22,6 +22,7 @@ fi
# 2. set LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/onnxruntime/lib/:$PWD/paddle_inference_c/third_party/install/paddle2onnx/lib/
# 3. go test
go clean -testcache
......
......@@ -81,6 +81,18 @@ TEST(PD_Config, interface) {
PD_ConfigSetBfloat16Op(config, 1, &ops_name);
#endif
PD_ConfigEnableONNXRuntime(config);
bool onnxruntime_enabled = PD_ConfigONNXRuntimeEnabled(config);
#ifdef PADDLE_WITH_ONNXRUNTIME
EXPECT_TRUE(onnxruntime_enabled);
#else
EXPECT_FALSE(onnxruntime_enabled);
#endif
PD_ConfigDisableONNXRuntime(config);
bool onnxruntime_disabled = PD_ConfigONNXRuntimeEnabled(config);
EXPECT_FALSE(onnxruntime_disabled);
PD_ConfigEnableORTOptimization(config);
PD_ConfigEnableMemoryOptim(config, true);
bool memory_enabled = PD_ConfigMemoryOptimEnabled(config);
EXPECT_TRUE(memory_enabled);
......
......@@ -5,6 +5,7 @@ option(WITH_GPU "Compile demo with GPU/CPU, default use CPU."
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF)
option(USE_TENSORRT "Compile demo with TensorRT." OFF)
option(WITH_GTEST "Compile demo with GTEST" OFF)
option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF)
if(NOT WITH_STATIC_LIB)
add_definitions("-DPADDLE_WITH_SHARED_LIB")
......@@ -45,6 +46,13 @@ link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib")
link_directories("${PADDLE_LIB}/paddle/lib")
if (WITH_ONNXRUNTIME)
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/include")
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/include")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib")
endif()
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
......@@ -172,6 +180,16 @@ else()
endif()
endif()
if (WITH_ONNXRUNTIME)
if(WIN32)
set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib paddle2onnx)
elseif(APPLE)
set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib paddle2onnx)
else()
set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 paddle2onnx)
endif()
endif()
if (NOT WIN32)
set(EXTERNAL_LIB "-lrt -ldl -lpthread")
set(DEPS ${DEPS}
......@@ -248,6 +266,14 @@ if(WIN32)
COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${CMAKE_BINARY_DIR}/Release
)
endif()
if(WITH_ONNXRUNTIME)
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll
${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
)
endif()
if(NOT WITH_STATIC_LIB)
add_custom_command(TARGET ${DEMO_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}
......
......@@ -20,7 +20,8 @@ TURN_ON_MKL=$2 # use MKL or Openblas
TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode
DATA_DIR=$4 # dataset
TENSORRT_ROOT_DIR=$5 # TensorRT ROOT dir, default to /usr/local/TensorRT
MSVC_STATIC_CRT=$6
WITH_ONNXRUNTIME=$6
MSVC_STATIC_CRT=$7
inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir
EXIT_CODE=0 # init default exit code
WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform
......@@ -144,7 +145,8 @@ function compile_test() {
-DMSVC_STATIC_CRT=$MSVC_STATIC_CRT \
-DWITH_GTEST=ON \
-DCMAKE_CXX_FLAGS='/std:c++17' \
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_BUILD_TYPE=Release \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
msbuild /maxcpucount /property:Configuration=Release ALL_BUILD.vcxproj
else
cmake .. -DPADDLE_LIB=${inference_install_dir} \
......@@ -154,7 +156,8 @@ function compile_test() {
-DWITH_STATIC_LIB=OFF \
-DUSE_TENSORRT=$USE_TENSORRT \
-DTENSORRT_ROOT=$TENSORRT_ROOT_DIR \
-DWITH_GTEST=ON
-DWITH_GTEST=ON \
-DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME
make -j$(nproc)
fi;
cd -
......
......@@ -80,6 +80,14 @@ if(NOT EXISTS ${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inferenc
endif()
set(IMG_CLS_RESNET_MODEL_DIR "${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inference.model")
if(WITH_ONNXRUNTIME)
set(MOBILENETV2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/MobileNetV2")
if(NOT EXISTS ${MOBILENETV2_INSTALL_DIR}/MobileNetV2.inference.model.tar.gz)
inference_download_and_uncompress_without_verify(${MOBILENETV2_INSTALL_DIR} ${INFERENCE_URL} "MobileNetV2.inference.model.tar.gz")
endif()
set(MOBILENETV2_MODEL_DIR "${MOBILENETV2_INSTALL_DIR}/MobileNetV2")
endif()
function (inference_base_test_build TARGET)
set(options "")
set(oneValueArgs "")
......
......@@ -80,6 +80,10 @@ set(PYBIND_SRCS
communication.cc
cuda_streams_py.cc)
if (WITH_ONNXRUNTIME)
set(PYBIND_DEPS ${PYBIND_DEPS} onnxruntime_predictor)
endif()
if(NOT ON_INFER)
set (PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer)
if (WITH_NCCL)
......@@ -152,6 +156,10 @@ if(WITH_PYTHON)
list(APPEND OP_FUNCTION_GENERETOR_DEPS hccl_context)
endif(WITH_ASCEND_CL)
if (WITH_ONNXRUNTIME)
list(APPEND OP_FUNCTION_GENERETOR_DEPS onnxruntime_predictor)
endif()
if(WITH_CNCL)
list(APPEND OP_FUNCTION_GENERETOR_DEPS cncl_context)
endif(WITH_CNCL)
......@@ -242,6 +250,19 @@ if(WITH_PYTHON)
list(APPEND OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll)
list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll)
endif()
if(WITH_ONNXRUNTIME)
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS paddle2onnx)
list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll)
list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll)
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll
COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS onnxruntime)
list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll)
list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll)
endif()
add_custom_command(OUTPUT ${impl_file}
COMMAND ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat
......@@ -260,6 +281,28 @@ if(WITH_PYTHON)
# copy these *.so to current directory and append current directory to
# LD_LIBRARY_PATH. This is different with Windows platformm, which search
# *.dll in current directory automatically.
if(WITH_ONNXRUNTIME)
if (APPLE)
set(PADDLE2ONNX_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.dylib)
set(ONNXRUNTIME_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.dylib)
else()
set(PADDLE2ONNX_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.so)
set(ONNXRUNTIME_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.so)
endif()
ADD_CUSTOM_COMMAND(OUTPUT ${PADDLE2ONNX_PYBIND_OUT}
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_LIB} ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS paddle2onnx)
list(APPEND OP_IMPL_DEPS ${PADDLE2ONNX_PYBIND_OUT})
list(APPEND EAGER_OP_IMPL_DEPS ${PADDLE2ONNX_PYBIND_OUT})
ADD_CUSTOM_COMMAND(OUTPUT ${ONNXRUNTIME_PYBIND_OUT}
COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_LIB} ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS onnxruntime)
list(APPEND OP_IMPL_DEPS ${ONNXRUNTIME_PYBIND_OUT})
list(APPEND EAGER_OP_IMPL_DEPS ${ONNXRUNTIME_PYBIND_OUT})
endif()
if(WITH_MKLML)
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so
COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} ${CMAKE_CURRENT_BINARY_DIR}
......
......@@ -33,6 +33,10 @@
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/inference/utils/io_utils.h"
#ifdef PADDLE_WITH_ONNXRUNTIME
#include "paddle/fluid/inference/api/onnxruntime_predictor.h"
#endif
namespace py = pybind11;
namespace pybind11 {
......@@ -556,6 +560,10 @@ void BindAnalysisConfig(py::module *m) {
py::arg("device_id") = 0)
.def("enable_npu", &AnalysisConfig::EnableNpu, py::arg("device_id") = 0)
.def("disable_gpu", &AnalysisConfig::DisableGpu)
.def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime)
.def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime)
.def("onnxruntime_enabled", &AnalysisConfig::use_onnxruntime)
.def("enable_ort_optimization", &AnalysisConfig::EnableORTOptimization)
.def("use_gpu", &AnalysisConfig::use_gpu)
.def("use_xpu", &AnalysisConfig::use_xpu)
.def("use_npu", &AnalysisConfig::use_npu)
......
......@@ -242,6 +242,7 @@ function cmake_base() {
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF}
-DWITH_RECORD_BUILDTIME=${WITH_RECORD_BUILDTIME:-OFF}
-DCUDA_ARCH_BIN="${CUDA_ARCH_BIN}"
-DWITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF}
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
......@@ -293,7 +294,9 @@ EOF
-DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \
-DCUDA_ARCH_BIN="${CUDA_ARCH_BIN}" \
-DWITH_RECORD_BUILDTIME=${WITH_RECORD_BUILDTIME:-OFF} \
-DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF};build_error=$?
-DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} \
-DWITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF};build_error=$?
if [ "$build_error" != 0 ];then
exit 7;
fi
......@@ -2504,7 +2507,8 @@ EOF
fi
startTime_s=`date +%s`
set +e
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON -DWITH_TENSORRT=ON -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-Auto} -DWITH_PYTHON=${WITH_PYTHON:-ON};build_error=$?
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON -DWITH_TENSORRT=ON -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-Auto} -DWITH_PYTHON=${WITH_PYTHON:-ON} -DWITH_ONNXRUNTIME=${WITH_ONNXRUNTIME:-OFF};build_error=$?
# reset ccache zero stats for collect PR's actual hit rate
ccache -z
......@@ -2548,7 +2552,7 @@ EOF
demo_ci_startTime_s=`date +%s`
cd ${PADDLE_ROOT}/paddle/fluid/inference/api/demo_ci
./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} \
${WITH_TENSORRT:-ON} ${TENSORRT_ROOT_DIR:-/usr}
${WITH_TENSORRT:-ON} ${TENSORRT_ROOT_DIR:-/usr} ${WITH_ONNXRUNTIME:-ON}
DEMO_EXIT_CODE=$?
./clean.sh
demo_ci_endTime_s=`date +%s`
......@@ -2558,7 +2562,7 @@ EOF
infer_ut_startTime_s=`date +%s`
cd ${PADDLE_ROOT}/paddle/fluid/inference/tests/infer_ut
./run.sh ${PADDLE_ROOT} ${WITH_MKL:-ON} ${WITH_GPU:-OFF} ${INFERENCE_DEMO_INSTALL_DIR} \
${TENSORRT_ROOT_DIR:-/usr}
${TENSORRT_ROOT_DIR:-/usr} ${WITH_ONNXRUNTIME:-ON}
TEST_EXIT_CODE=$?
infer_ut_endTime_s=`date +%s`
echo "infer_ut tests Total time: $[ $infer_ut_endTime_s - $infer_ut_startTime_s ]s"
......
......@@ -505,6 +505,18 @@ if '${WITH_MKLDNN}' == 'ON':
else:
package_data['paddle.libs']+=['mkldnn.dll']
if '${WITH_ONNXRUNTIME}' == 'ON':
shutil.copy('${ONNXRUNTIME_SHARED_LIB}', libs_path)
if os.name == 'nt':
shutil.copy('${PADDLE2ONNX_SHARED_LIB}', libs_path)
package_data['paddle.libs']+=['paddle2onnx.dll', 'onnxruntime.dll']
else:
shutil.copy('${PADDLE2ONNX_LIB}', libs_path)
if sys.platform == 'darwin':
package_data['paddle.libs']+=['libpaddle2onnx.dylib', 'libonnxruntime.1.10.0.dylib']
else:
package_data['paddle.libs']+=['libpaddle2onnx.so', 'libonnxruntime.so.1.10.0']
if '${WITH_XPU}' == 'ON':
# only change rpath in Release mode,
if '${CMAKE_BUILD_TYPE}' == 'Release':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册