From 61591afe9fea817a2a9b4cf3035c14ad46ccf35a Mon Sep 17 00:00:00 2001 From: heliqi <1101791222@qq.com> Date: Tue, 21 Jun 2022 08:19:07 -0500 Subject: [PATCH] [Inference]Fix the ort Backend multiple input bug (#43621) * fix or backend many inputs bug * fix or backend many inputs bug * fix or backend many inputs bug * fix or backend many inputs bug * code format * code format --- cmake/external/onnxruntime.cmake | 5 +- cmake/external/paddle2onnx.cmake | 16 +- .../api/demo_ci/onnxruntime_mobilenet_demo.cc | 35 +- paddle/fluid/inference/api/demo_ci/run.sh | 11 +- .../inference/api/details/zero_copy_tensor.cc | 312 ++++++++++++------ .../inference/api/onnxruntime_predictor.cc | 60 +++- .../inference/api/onnxruntime_predictor.h | 1 + paddle/fluid/inference/api/paddle_tensor.h | 11 +- paddle/fluid/pybind/CMakeLists.txt | 4 +- 9 files changed, 304 insertions(+), 151 deletions(-) diff --git a/cmake/external/onnxruntime.cmake b/cmake/external/onnxruntime.cmake index 9ace4caafd..832761f69d 100644 --- a/cmake/external/onnxruntime.cmake +++ b/cmake/external/onnxruntime.cmake @@ -52,8 +52,9 @@ else() ) endif() -include_directories(${ONNXRUNTIME_INC_DIR} -)# For ONNXRUNTIME code to include internal headers. +# For ONNXRUNTIME code to include internal headers. +include_directories(${ONNXRUNTIME_INC_DIR}) + if(WIN32) set(ONNXRUNTIME_SOURCE_LIB "${ONNXRUNTIME_SOURCE_DIR}/lib/onnxruntime.dll" diff --git a/cmake/external/paddle2onnx.cmake b/cmake/external/paddle2onnx.cmake index 96f24bfc8a..0fbd1803c6 100644 --- a/cmake/external/paddle2onnx.cmake +++ b/cmake/external/paddle2onnx.cmake @@ -34,15 +34,11 @@ set(PADDLE2ONNX_INC_DIR set(PADDLE2ONNX_LIB_DIR "${PADDLE2ONNX_INSTALL_DIR}/lib" CACHE PATH "onnxruntime lib directory." FORCE) -set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" - "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}") +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${PADDLE2ONNX_LIB_DIR}") -include_directories(${PADDLE2ONNX_INC_DIR} -)# For PADDLE2ONNX code to include internal headers. +# For PADDLE2ONNX code to include internal headers. +include_directories(${PADDLE2ONNX_INC_DIR}) if(WIN32) - set(PADDLE2ONNX_SOURCE_LIB - "${PADDLE2ONNX_SOURCE_DIR}/lib/libpaddle2onnx.dylib" - CACHE FILEPATH "Paddle2ONNX source library." FORCE) set(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/lib/paddle2onnx.dll" CACHE FILEPATH "paddle2onnx library." FORCE) @@ -50,9 +46,6 @@ if(WIN32) "${PADDLE2ONNX_INSTALL_DIR}/lib/paddle2onnx.lib" CACHE FILEPATH "paddle2onnx compile library." FORCE) elseif(APPLE) - set(PADDLE2ONNX_SOURCE_LIB - "${PADDLE2ONNX_SOURCE_DIR}/lib/libpaddle2onnx.dylib" - CACHE FILEPATH "Paddle2ONNX source library." FORCE) set(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.dylib" CACHE FILEPATH "PADDLE2ONNX library." FORCE) @@ -60,9 +53,6 @@ elseif(APPLE) "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.dylib" CACHE FILEPATH "paddle2onnx compile library." FORCE) else() - set(PADDLE2ONNX_SOURCE_LIB - "${PADDLE2ONNX_SOURCE_DIR}/lib/libpaddle2onnx.so" - CACHE FILEPATH "Paddle2ONNX source library." FORCE) set(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/lib/libpaddle2onnx.so" CACHE FILEPATH "PADDLE2ONNX library." FORCE) diff --git a/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc index f9ac07a830..fb5cee4e05 100644 --- a/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc @@ -13,17 +13,19 @@ See the License for the specific language governing permissions and limitations under the License. */ /* - * This file contains demo of mobilenet for tensorrt. + * This file contains demo of mobilenet for onnxruntime backend. */ - #include // use glog instead of CHECK to avoid importing other paddle header files. +#include +#include #include #include "gflags/gflags.h" #include "utils.h" // NOLINT DEFINE_string(modeldir, "", "Directory of the inference model."); +DEFINE_string(data, "", "path of data"); namespace paddle { namespace demo { @@ -39,8 +41,21 @@ void Main() { auto predictor = paddle_infer::CreatePredictor(config); // Inference. + LOG(INFO) << "--- prepare input data ----"; std::vector input_shape = {1, 3, 224, 224}; - std::vector input_data(1 * 3 * 224 * 224, 1.0); + std::vector input_data; + std::string line; + std::ifstream file(FLAGS_data); + std::getline(file, line); + file.close(); + std::vector data_strs; + split(line, ' ', &data_strs); + int input_num = 0; + for (auto& d : data_strs) { + input_num += 1; + input_data.push_back(std::stof(d)); + } + std::vector out_data; out_data.resize(1000); auto input_names = predictor->GetInputNames(); @@ -53,7 +68,19 @@ void Main() { predictor->Run(); output_tensor->CopyToCpu(out_data.data()); - VLOG(3) << "output.size " << out_data.size(); + std::vector out_index(out_data.size()); + std::iota(out_index.begin(), out_index.end(), 0); + std::sort( + out_index.begin(), out_index.end(), [&out_data](int index1, int index2) { + return out_data[index1] > out_data[index2]; + }); + LOG(INFO) << "output.size " << out_data.size() + << " max_index:" << out_index[0]; + CHECK_EQ(out_data.size(), 1000); + int max_index = out_index[0]; + CHECK_EQ(max_index, 13); + float max_score = out_data[max_index]; + CHECK_LE(fabs(max_score - 0.99981), 1e-4); } } // namespace demo diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index c8a78a168a..f11319d766 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -52,15 +52,17 @@ if [ $7 == ON ]; then mkdir -p MobileNetV2 cd MobileNetV2 if [[ -e "MobileNetV2.inference.model.tar.gz" ]]; then - echo "MobileNetV2.inference.model.tar.gz has been downloaded." - else + rm -rf MobileNetV2.inference.model.tar.gz + fi + # echo "MobileNetV2.inference.model.tar.gz has been downloaded." + # else if [ $WIN_DETECT != "" ]; then wget -q -Y off http://paddle-inference-dist.bj.bcebos.com/MobileNetV2.inference.model.tar.gz else wget -q --no-proxy http://paddle-inference-dist.bj.bcebos.com/MobileNetV2.inference.model.tar.gz fi tar xzf *.tar.gz - fi + # fi cd .. fi @@ -265,7 +267,8 @@ for WITH_STATIC_LIB in ON OFF; do -DWITH_ONNXRUNTIME=$WITH_ONNXRUNTIME make -j$(nproc) ./onnxruntime_mobilenet_demo \ - --modeldir=$DATA_DIR/MobileNetV2/MobileNetV2 + --modeldir=$DATA_DIR/MobileNetV2/MobileNetV2 \ + --data=$DATA_DIR/MobileNetV2/MobileNetV2/data.txt if [ $? -ne 0 ]; then echo "onnxruntime_mobilenet_demo runs failed " >> ${current_dir}/test_summary.txt EXIT_CODE=1 diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index ae0af77319..4040d09c45 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -40,36 +40,42 @@ void Tensor::Reshape(const std::vector &shape) { #endif PADDLE_ENFORCE_EQ( - name_.empty(), false, + name_.empty(), + false, paddle::platform::errors::PreconditionNotMet( "Need to SetName first, so that the corresponding tensor can " "be retrieved.")); - PADDLE_ENFORCE_EQ(input_or_output_, true, + PADDLE_ENFORCE_EQ(input_or_output_, + true, paddle::platform::errors::PermissionDenied( "Can't reshape the output tensor, it is readonly")); auto *scope = static_cast(scope_); auto *var = scope->FindVar(name_); PADDLE_ENFORCE_NOT_NULL( - var, paddle::platform::errors::PreconditionNotMet( - "No tensor called [%s] in the runtime scope", name_)); + var, + paddle::platform::errors::PreconditionNotMet( + "No tensor called [%s] in the runtime scope", name_)); auto *tensor = var->GetMutable(); tensor->Resize(phi::make_ddim(shape)); } void Tensor::ReshapeStrings(const size_t &shape) { PADDLE_ENFORCE_EQ( - name_.empty(), false, + name_.empty(), + false, paddle::platform::errors::PreconditionNotMet( "Need to SetName first, so that the corresponding tensor can " "be retrieved.")); - PADDLE_ENFORCE_EQ(input_or_output_, true, + PADDLE_ENFORCE_EQ(input_or_output_, + true, paddle::platform::errors::PermissionDenied( "Can't reshape the output tensor, it is readonly")); auto *scope = static_cast(scope_); auto *var = scope->FindVar(name_); PADDLE_ENFORCE_NOT_NULL( - var, paddle::platform::errors::PreconditionNotMet( - "No tensor called [%s] in the runtime scope", name_)); + var, + paddle::platform::errors::PreconditionNotMet( + "No tensor called [%s] in the runtime scope", name_)); paddle_infer::Strings *tensor = var->GetMutable(); tensor->resize(shape); } @@ -84,7 +90,8 @@ template T *Tensor::mutable_data(PlaceType place) { EAGER_GET_TENSOR(paddle::framework::LoDTensor); PADDLE_ENFORCE_GT( - tensor->numel(), 0, + tensor->numel(), + 0, paddle::platform::errors::PreconditionNotMet( "You should call Tensor::Reshape(const std::vector " "&shape)" @@ -97,8 +104,9 @@ T *Tensor::mutable_data(PlaceType place) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) paddle::platform::CUDAPlace gpu_place(device_); auto *dev_ctxs = reinterpret_cast>> - *>(device_contexs_); + phi::Place, + std::shared_future>> *>( + device_contexs_); auto *dev_ctx = static_cast(dev_ctxs->at(gpu_place).get().get()); return dev_ctx->Alloc(tensor, tensor->numel() * sizeof(T)); @@ -179,7 +187,8 @@ void Tensor::CopyFromCpu(const T *data) { #endif EAGER_GET_TENSOR(paddle::framework::LoDTensor); - PADDLE_ENFORCE_GE(tensor->numel(), 0, + PADDLE_ENFORCE_GE(tensor->numel(), + 0, paddle::platform::errors::PreconditionNotMet( "You should call Tensor::Reshape(const " "std::vector &shape)" @@ -194,14 +203,18 @@ void Tensor::CopyFromCpu(const T *data) { paddle::platform::CUDAPlace gpu_place(device_); auto *dev_ctxs = reinterpret_cast>> *>( + phi::Place, + std::shared_future>> *>( device_contexs_); auto *dev_ctx = static_cast(dev_ctxs->at(gpu_place).get().get()); auto *t_data = dev_ctx->Alloc(tensor, tensor->numel() * sizeof(T)); - paddle::memory::Copy(gpu_place, static_cast(t_data), - paddle::platform::CPUPlace(), data, ele_size, + paddle::memory::Copy(gpu_place, + static_cast(t_data), + paddle::platform::CPUPlace(), + data, + ele_size, dev_ctx->stream()); #else PADDLE_THROW(paddle::platform::errors::Unavailable( @@ -212,8 +225,11 @@ void Tensor::CopyFromCpu(const T *data) { #ifdef PADDLE_WITH_XPU paddle::platform::XPUPlace xpu_place(device_); auto *t_data = tensor->mutable_data(xpu_place); - paddle::memory::Copy(xpu_place, static_cast(t_data), - paddle::platform::CPUPlace(), data, ele_size); + paddle::memory::Copy(xpu_place, + static_cast(t_data), + paddle::platform::CPUPlace(), + data, + ele_size); #else PADDLE_THROW(paddle::platform::errors::Unavailable( "Can not create tensor with XPU place because paddle is not compiled " @@ -227,8 +243,11 @@ void Tensor::CopyFromCpu(const T *data) { auto *t_data = tensor->mutable_data(npu_place); auto *dev_ctx = static_cast( pool.Get(npu_place)); - paddle::memory::Copy(npu_place, static_cast(t_data), - paddle::platform::CPUPlace(), data, ele_size, + paddle::memory::Copy(npu_place, + static_cast(t_data), + paddle::platform::CPUPlace(), + data, + ele_size, dev_ctx->stream()); #else PADDLE_THROW(paddle::platform::errors::Unavailable( @@ -246,8 +265,11 @@ void Tensor::CopyFromCpu(const T *data) { auto *t_data = tensor->mutable_data(custom_place); auto *dev_ctx = static_cast( pool.Get(custom_place)); - paddle::memory::Copy(custom_place, static_cast(t_data), - paddle::platform::CPUPlace(), data, ele_size, + paddle::memory::Copy(custom_place, + static_cast(t_data), + paddle::platform::CPUPlace(), + data, + ele_size, dev_ctx->stream()); #else PADDLE_THROW(paddle::platform::errors::InvalidArgument( @@ -291,30 +313,33 @@ struct DataTypeInfo { paddle::experimental::DataLayout LayoutConvert(DataLayout layout) { PADDLE_ENFORCE_EQ( - layout, DataLayout::kNCHW, + layout, + DataLayout::kNCHW, paddle::platform::errors::InvalidArgument("Only NCHW is supported now.")); return paddle::experimental::DataLayout::NCHW; } template -void Tensor::ShareExternalData(const T *data, const std::vector &shape, - PlaceType place, DataLayout layout) { +void Tensor::ShareExternalData(const T *data, + const std::vector &shape, + PlaceType place, + DataLayout layout) { EAGER_GET_TENSOR(paddle::framework::LoDTensor) size_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * sizeof(T); - phi::DenseTensorMeta meta(DataTypeInfo().TYPE, phi::make_ddim(shape), - LayoutConvert(layout)); + phi::DenseTensorMeta meta( + DataTypeInfo().TYPE, phi::make_ddim(shape), LayoutConvert(layout)); if (place == PlaceType::kCPU) { phi::DenseTensor dtensor( - std::make_shared(const_cast(data), size, - paddle::platform::CPUPlace()), + std::make_shared( + const_cast(data), size, paddle::platform::CPUPlace()), meta); *tensor = std::move(dtensor); } else if (place == PlaceType::kGPU) { phi::DenseTensor dtensor( - std::make_shared(const_cast(data), size, - paddle::platform::CUDAPlace(device_)), + std::make_shared( + const_cast(data), size, paddle::platform::CUDAPlace(device_)), meta); *tensor = std::move(dtensor); } else { @@ -325,7 +350,8 @@ void Tensor::ShareExternalData(const T *data, const std::vector &shape, void Tensor::CopyStringsFromCpu(const paddle_infer::Strings *data) { EAGER_GET_TENSOR(paddle_infer::Strings); - PADDLE_ENFORCE_GE(tensor->size(), 0, + PADDLE_ENFORCE_GE(tensor->size(), + 0, paddle::platform::errors::PreconditionNotMet( "You should call Tensor::Reshape(const " "std::size_t &shape)function before copying" @@ -334,7 +360,9 @@ void Tensor::CopyStringsFromCpu(const paddle_infer::Strings *data) { } template -void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, +void Tensor::CopyToCpuImpl(T *data, + void *exec_stream, + CallbackFunc cb, void *cb_params) const { EAGER_GET_TENSOR(paddle::framework::LoDTensor); auto ele_num = tensor->numel(); @@ -344,7 +372,8 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, paddle::framework::Tensor out; auto mem_allocation = std::make_shared( - static_cast(data), ele_num * sizeof(T), + static_cast(data), + ele_num * sizeof(T), paddle::platform::CPUPlace()); out.ResetHolder(mem_allocation); @@ -355,7 +384,10 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, tensor->layout(), paddle::platform::MKLDNNDeviceContext::tls() .get_cur_paddle_data_layout(), - *tensor, &out, paddle::platform::CPUPlace(), true); + *tensor, + &out, + paddle::platform::CPUPlace(), + true); else std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); #else @@ -373,13 +405,17 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) auto gpu_place = t_place; auto *dev_ctxs = reinterpret_cast>> *>( + phi::Place, + std::shared_future>> *>( device_contexs_); auto *dev_ctx = static_cast(dev_ctxs->at(gpu_place).get().get()); paddle::memory::Copy(paddle::platform::CPUPlace(), - static_cast(data), gpu_place, t_data, - ele_num * sizeof(T), dev_ctx->stream()); + static_cast(data), + gpu_place, + t_data, + ele_num * sizeof(T), + dev_ctx->stream()); #ifdef PADDLE_WITH_HIP hipStreamSynchronize(dev_ctx->stream()); #else @@ -403,7 +439,9 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, #ifdef PADDLE_WITH_XPU auto xpu_place = t_place; paddle::memory::Copy(paddle::platform::CPUPlace(), - static_cast(data), xpu_place, t_data, + static_cast(data), + xpu_place, + t_data, ele_num * sizeof(T)); #else PADDLE_THROW(paddle::platform::errors::Unavailable( @@ -418,8 +456,11 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, auto *dev_ctx = static_cast( pool.Get(npu_place)); paddle::memory::Copy(paddle::platform::CPUPlace(), - static_cast(data), npu_place, t_data, - ele_num * sizeof(T), dev_ctx->stream()); + static_cast(data), + npu_place, + t_data, + ele_num * sizeof(T), + dev_ctx->stream()); paddle::platform::NPUStreamSync(dev_ctx->stream()); #else PADDLE_THROW(paddle::platform::errors::Unavailable( @@ -434,8 +475,11 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, auto *dev_ctx = static_cast( pool.Get(custom_place)); paddle::memory::Copy(paddle::platform::CPUPlace(), - static_cast(data), custom_place, t_data, - ele_num * sizeof(T), dev_ctx->stream()); + static_cast(data), + custom_place, + t_data, + ele_num * sizeof(T), + dev_ctx->stream()); // TODO(wangran16): sync_stream #else PADDLE_THROW(paddle::platform::errors::InvalidArgument( @@ -474,22 +518,34 @@ template PD_INFER_DECL void Tensor::CopyFromCpu(const int8_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const float16 *data); template PD_INFER_DECL void Tensor::ShareExternalData( - const float *data, const std::vector &shape, PlaceType place, + const float *data, + const std::vector &shape, + PlaceType place, DataLayout layout); template PD_INFER_DECL void Tensor::ShareExternalData( - const int64_t *data, const std::vector &shape, PlaceType place, + const int64_t *data, + const std::vector &shape, + PlaceType place, DataLayout layout); template PD_INFER_DECL void Tensor::ShareExternalData( - const int32_t *data, const std::vector &shape, PlaceType place, + const int32_t *data, + const std::vector &shape, + PlaceType place, DataLayout layout); template PD_INFER_DECL void Tensor::ShareExternalData( - const uint8_t *data, const std::vector &shape, PlaceType place, + const uint8_t *data, + const std::vector &shape, + PlaceType place, DataLayout layout); template PD_INFER_DECL void Tensor::ShareExternalData( - const int8_t *data, const std::vector &shape, PlaceType place, + const int8_t *data, + const std::vector &shape, + PlaceType place, DataLayout layout); template PD_INFER_DECL void Tensor::ShareExternalData( - const float16 *data, const std::vector &shape, PlaceType place, + const float16 *data, + const std::vector &shape, + PlaceType place, DataLayout layout); template PD_INFER_DECL void Tensor::CopyToCpu(float *data) const; @@ -566,15 +622,17 @@ Tensor::Tensor(void *scope, const void *device_contexts) template void *Tensor::FindTensor() const { PADDLE_ENFORCE_EQ( - name_.empty(), false, + name_.empty(), + false, paddle::platform::errors::PreconditionNotMet( "Need to SetName first, so that the corresponding tensor can " "be retrieved.")); auto *scope = static_cast(scope_); auto *var = scope->FindVar(name_); PADDLE_ENFORCE_NOT_NULL( - var, paddle::platform::errors::PreconditionNotMet( - "No tensor called [%s] in the runtime scope", name_)); + var, + paddle::platform::errors::PreconditionNotMet( + "No tensor called [%s] in the runtime scope", name_)); auto *tensor = var->GetMutable(); return tensor; } @@ -602,8 +660,9 @@ std::vector Tensor::shape() const { #endif EAGER_GET_TENSOR(paddle::framework::LoDTensor); PADDLE_ENFORCE_NOT_NULL( - tensor_, paddle::platform::errors::PreconditionNotMet( - "Not found tensor called %s in the scope", name_)); + tensor_, + paddle::platform::errors::PreconditionNotMet( + "Not found tensor called %s in the scope", name_)); // mkldnn may does layout transform internally, so need to reorder before // return #ifdef PADDLE_WITH_MKLDNN @@ -668,40 +727,65 @@ void Tensor::SetOrtBinding(const std::shared_ptr binding) { binding_ = binding; } -Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, float *data, - size_t size, const int64_t *shape, size_t shape_len) { - return Ort::Value::CreateTensor(memory_info, data, size, shape, - shape_len); +void Tensor::SetOrtBuffer(const std::shared_ptr> buffer) { + buffer_ = buffer; } -Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, int64_t *data, - size_t size, const int64_t *shape, size_t shape_len) { - return Ort::Value::CreateTensor(memory_info, data, size, shape, - shape_len); +Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, + float *data, + size_t size, + const int64_t *shape, + size_t shape_len) { + return Ort::Value::CreateTensor( + memory_info, data, size, shape, shape_len); } -Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, int32_t *data, - size_t size, const int64_t *shape, size_t shape_len) { - return Ort::Value::CreateTensor(memory_info, data, size, shape, - shape_len); +Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, + int64_t *data, + size_t size, + const int64_t *shape, + size_t shape_len) { + return Ort::Value::CreateTensor( + memory_info, data, size, shape, shape_len); } -Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, uint8_t *data, - size_t size, const int64_t *shape, size_t shape_len) { - return Ort::Value::CreateTensor(memory_info, data, size, shape, - shape_len); +Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, + int32_t *data, + size_t size, + const int64_t *shape, + size_t shape_len) { + return Ort::Value::CreateTensor( + memory_info, data, size, shape, shape_len); } -Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, int8_t *data, - size_t size, const int64_t *shape, size_t shape_len) { - return Ort::Value::CreateTensor(memory_info, data, size, shape, - shape_len); +Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, + uint8_t *data, + size_t size, + const int64_t *shape, + size_t shape_len) { + return Ort::Value::CreateTensor( + memory_info, data, size, shape, shape_len); } -Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, float16 *data, - size_t size, const int64_t *shape, size_t shape_len) { - return Ort::Value::CreateTensor(memory_info, static_cast(data), - size * sizeof(float16), shape, shape_len, +Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, + int8_t *data, + size_t size, + const int64_t *shape, + size_t shape_len) { + return Ort::Value::CreateTensor( + memory_info, data, size, shape, shape_len); +} + +Ort::Value GetOrtVaule(const Ort::MemoryInfo &memory_info, + float16 *data, + size_t size, + const int64_t *shape, + size_t shape_len) { + return Ort::Value::CreateTensor(memory_info, + static_cast(data), + size * sizeof(float16), + shape, + shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16); } @@ -712,15 +796,16 @@ void Tensor::ORTCopyFromCpu(const T *data) { paddle::platform::errors::PreconditionNotMet( "input tensor [%s] no binding ptr", name_)); const char *device_name = place_ == PlaceType::kCPU ? "Cpu" : "Cuda"; - Ort::MemoryInfo memory_info(device_name, OrtDeviceAllocator, device_, - OrtMemTypeDefault); - size_t size = std::accumulate(begin(shape_), end(shape_), 1UL, - std::multiplies()); + Ort::MemoryInfo memory_info( + device_name, OrtDeviceAllocator, device_, OrtMemTypeDefault); + size_t size = std::accumulate( + begin(shape_), end(shape_), 1UL, std::multiplies()); + auto buffer = buffer_.lock(); size_t buffer_size = size * sizeof(T); - if (buffer_size > buffer_.size()) { - buffer_.resize(buffer_size); + if (buffer_size > buffer->size()) { + buffer->resize(buffer_size); } - std::memcpy(static_cast(buffer_.data()), data, buffer_size); + std::memcpy(static_cast(buffer->data()), data, buffer_size); auto onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; if (std::is_same::value) { @@ -737,18 +822,18 @@ void Tensor::ORTCopyFromCpu(const T *data) { onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; } else if (std::is_same::value) { onnx_dtype = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; - } - - if (onnx_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED) { + } else { PADDLE_THROW(paddle::platform::errors::InvalidArgument( "Found undefined data type for onnxruntime, only supports " "float16/float32/float64/int8/uint8/int32/int64.")); } - auto ort_value = - Ort::Value::CreateTensor(memory_info, buffer_.data(), buffer_size, - shape_.data(), shape_.size(), onnx_dtype); - + auto ort_value = Ort::Value::CreateTensor(memory_info, + buffer->data(), + buffer_size, + shape_.data(), + shape_.size(), + onnx_dtype); binding->BindInput(name_.c_str(), ort_value); } @@ -793,21 +878,24 @@ void InternalUtils::CopyFromCpuWithIoStream(paddle_infer::Tensor *t, cudaStream_t stream) { if (t->tensor_ == nullptr) { PADDLE_ENFORCE_EQ( - t->name_.empty(), false, + t->name_.empty(), + false, paddle::platform::errors::PreconditionNotMet( "Need to SetName first, so that the corresponding tensor can " "be retrieved.")); auto *scope = static_cast(t->scope_); auto *var = scope->FindVar(t->name_); PADDLE_ENFORCE_NOT_NULL( - var, paddle::platform::errors::PreconditionNotMet( - "No tensor called [%s] in the runtime scope", t->name_)); + var, + paddle::platform::errors::PreconditionNotMet( + "No tensor called [%s] in the runtime scope", t->name_)); auto *tensor = var->GetMutable(); t->tensor_ = tensor; } auto *tensor = static_cast(t->tensor_); - PADDLE_ENFORCE_GE(tensor->numel(), 0, + PADDLE_ENFORCE_GE(tensor->numel(), + 0, paddle::platform::errors::PreconditionNotMet( "You should call Tensor::Reshape(const " "std::vector &shape)" @@ -820,8 +908,12 @@ void InternalUtils::CopyFromCpuWithIoStream(paddle_infer::Tensor *t, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) paddle::platform::CUDAPlace gpu_place(t->device_); auto *t_data = tensor->mutable_data(gpu_place); - paddle::memory::Copy(gpu_place, static_cast(t_data), - paddle::platform::CPUPlace(), data, ele_size, stream); + paddle::memory::Copy(gpu_place, + static_cast(t_data), + paddle::platform::CPUPlace(), + data, + ele_size, + stream); #else PADDLE_THROW(paddle::platform::errors::Unavailable( "Can not create tensor with CUDA place because paddle is not compiled " @@ -834,19 +926,22 @@ void InternalUtils::CopyFromCpuWithIoStream(paddle_infer::Tensor *t, } template -void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, T *data, +void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, + T *data, cudaStream_t stream) { if (t->tensor_ == nullptr) { PADDLE_ENFORCE_EQ( - t->name_.empty(), false, + t->name_.empty(), + false, paddle::platform::errors::PreconditionNotMet( "Need to SetName first, so that the corresponding tensor can " "be retrieved.")); auto *scope = static_cast(t->scope_); auto *var = scope->FindVar(t->name_); PADDLE_ENFORCE_NOT_NULL( - var, paddle::platform::errors::PreconditionNotMet( - "No tensor called [%s] in the runtime scope", t->name_)); + var, + paddle::platform::errors::PreconditionNotMet( + "No tensor called [%s] in the runtime scope", t->name_)); auto *tensor = var->GetMutable(); t->tensor_ = tensor; } @@ -859,7 +954,8 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, T *data, paddle::framework::Tensor out; auto mem_allocation = std::make_shared( - static_cast(data), ele_num * sizeof(T), + static_cast(data), + ele_num * sizeof(T), paddle::platform::CPUPlace()); out.ResetHolder(mem_allocation); @@ -870,7 +966,10 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, T *data, tensor->layout(), paddle::platform::MKLDNNDeviceContext::tls() .get_cur_paddle_data_layout(), - *tensor, &out, paddle::platform::CPUPlace(), true); + *tensor, + &out, + paddle::platform::CPUPlace(), + true); else std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); #else @@ -879,8 +978,11 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, T *data, } else if (t->place_ == PlaceType::kGPU) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) paddle::memory::Copy(paddle::platform::CPUPlace(), - static_cast(data), t_place, t_data, - ele_num * sizeof(T), stream); + static_cast(data), + t_place, + t_data, + ele_num * sizeof(T), + stream); #else PADDLE_THROW(paddle::platform::errors::Unavailable( "Can not create tensor with CUDA place because paddle is not compiled " diff --git a/paddle/fluid/inference/api/onnxruntime_predictor.cc b/paddle/fluid/inference/api/onnxruntime_predictor.cc index 326da0e433..83919ad139 100644 --- a/paddle/fluid/inference/api/onnxruntime_predictor.cc +++ b/paddle/fluid/inference/api/onnxruntime_predictor.cc @@ -71,13 +71,16 @@ bool CheckConvertToONNX(const AnalysisConfig &config) { } else if (config.prog_file().empty() || config.params_file().empty()) { LOG(ERROR) << string::Sprintf( "not valid model path '%s' or program path '%s' or params path '%s'.", - config.model_dir(), config.prog_file(), config.params_file()); + config.model_dir(), + config.prog_file(), + config.params_file()); return false; } if (config.model_from_memory()) { - return paddle2onnx::IsExportable( - config.prog_file().data(), config.prog_file().size(), - config.params_file().data(), config.params_file().size()); + return paddle2onnx::IsExportable(config.prog_file().data(), + config.prog_file().size(), + config.params_file().data(), + config.params_file().size()); } else { return paddle2onnx::IsExportable(config.prog_file().c_str(), config.params_file().c_str()); @@ -98,12 +101,17 @@ bool ONNXRuntimePredictor::Init() { char *onnx_proto = nullptr; int out_size; if (config_.model_from_memory()) { - paddle2onnx::Export(config_.prog_file().data(), config_.prog_file().size(), + paddle2onnx::Export(config_.prog_file().data(), + config_.prog_file().size(), config_.params_file().data(), - config_.params_file().size(), &onnx_proto, &out_size); + config_.params_file().size(), + &onnx_proto, + &out_size); } else { paddle2onnx::Export(config_.prog_file().c_str(), - config_.params_file().c_str(), &onnx_proto, &out_size); + config_.params_file().c_str(), + &onnx_proto, + &out_size); } Ort::SessionOptions session_options; @@ -134,8 +142,8 @@ bool ONNXRuntimePredictor::Init() { session_ = {env_, onnx_proto, static_cast(out_size), session_options}; binding_ = std::make_shared(session_); - Ort::MemoryInfo memory_info(device_name, OrtDeviceAllocator, - place_.GetDeviceId(), OrtMemTypeDefault); + Ort::MemoryInfo memory_info( + device_name, OrtDeviceAllocator, place_.GetDeviceId(), OrtMemTypeDefault); Ort::Allocator allocator(session_, memory_info); size_t n_inputs = session_.GetInputCount(); @@ -160,8 +168,10 @@ bool ONNXRuntimePredictor::Init() { type_info.GetTensorTypeAndShapeInfo().GetElementType(); output_desc_.emplace_back(ONNXDesc{output_name, shape, data_type}); - Ort::MemoryInfo out_memory_info(device_name, OrtDeviceAllocator, - place_.GetDeviceId(), OrtMemTypeDefault); + Ort::MemoryInfo out_memory_info(device_name, + OrtDeviceAllocator, + place_.GetDeviceId(), + OrtMemTypeDefault); binding_->BindOutput(output_name, out_memory_info); allocator.Free(output_name); @@ -181,7 +191,8 @@ CreatePaddlePredictor( } PADDLE_ENFORCE_EQ( - config.is_valid(), true, + config.is_valid(), + true, platform::errors::InvalidArgument( "Note: Each config can only be used for one predictor.")); @@ -238,7 +249,8 @@ bool ONNXRuntimePredictor::FindONNXDesc(const std::string &name, std::unique_ptr ONNXRuntimePredictor::GetInputTensor( const std::string &name) { - PADDLE_ENFORCE_EQ(FindONNXDesc(name, true), true, + PADDLE_ENFORCE_EQ(FindONNXDesc(name, true), + true, platform::errors::PreconditionNotMet( "The in variable named %s is not found in the " "ONNXPredictor.", @@ -254,12 +266,21 @@ std::unique_ptr ONNXRuntimePredictor::GetInputTensor( } res->SetOrtMark(true); res->SetOrtBinding(binding_); + auto iter = input_buffers_.find(name); + if (iter == input_buffers_.end()) { + std::vector i_vector; + input_buffers_[name] = std::make_shared>(i_vector); + res->SetOrtBuffer(input_buffers_[name]); + } else { + res->SetOrtBuffer(iter->second); + } return res; } std::unique_ptr ONNXRuntimePredictor::GetOutputTensor( const std::string &name) { - PADDLE_ENFORCE_EQ(FindONNXDesc(name, false), true, + PADDLE_ENFORCE_EQ(FindONNXDesc(name, false), + true, platform::errors::PreconditionNotMet( "The out variable named %s is not found in the " "ONNXPredictor.", @@ -296,8 +317,10 @@ bool ONNXRuntimePredictor::ZeroCopyRun() { try { const char *device_name = place_ == PlaceType::kCPU ? "Cpu" : "Cuda"; for (auto output : output_desc_) { - Ort::MemoryInfo out_memory_info(device_name, OrtDeviceAllocator, - place_.GetDeviceId(), OrtMemTypeDefault); + Ort::MemoryInfo out_memory_info(device_name, + OrtDeviceAllocator, + place_.GetDeviceId(), + OrtMemTypeDefault); binding_->BindOutput(output.name.c_str(), out_memory_info); } session_.Run({}, *(binding_.get())); @@ -330,8 +353,9 @@ const void *ONNXRuntimePredictor::GetDeviceContexts() const { paddle::platform::DeviceContextPool &pool = paddle::platform::DeviceContextPool::Instance(); const auto &dev_ctxs = pool.device_contexts(); - return &const_cast>> &>( + return &const_cast< + std::map>> &>( dev_ctxs); } diff --git a/paddle/fluid/inference/api/onnxruntime_predictor.h b/paddle/fluid/inference/api/onnxruntime_predictor.h index 4c44a7dc0a..27ce4529a8 100644 --- a/paddle/fluid/inference/api/onnxruntime_predictor.h +++ b/paddle/fluid/inference/api/onnxruntime_predictor.h @@ -202,6 +202,7 @@ class ONNXRuntimePredictor : public PaddlePredictor { platform::Place place_; std::vector input_desc_; std::vector output_desc_; + std::map>> input_buffers_; int predictor_id_; // Some more detailed tests, they are made the friends of the predictor, so that diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 39ba366f35..c0396713bb 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -110,7 +110,8 @@ class PD_INFER_DECL Tensor { /// \param place The place of data. /// \param layout The layout of data. Only NCHW is supported now. template - void ShareExternalData(const T* data, const std::vector& shape, + void ShareExternalData(const T* data, + const std::vector& shape, PlaceType place, DataLayout layout = DataLayout::kNCHW); @@ -171,7 +172,9 @@ class PD_INFER_DECL Tensor { void SetName(const std::string& name); template - void CopyToCpuImpl(T* data, void* stream = nullptr, CallbackFunc cb = nullptr, + void CopyToCpuImpl(T* data, + void* stream = nullptr, + CallbackFunc cb = nullptr, void* cb_params = nullptr) const; std::string name_; @@ -188,7 +191,7 @@ class PD_INFER_DECL Tensor { #ifdef PADDLE_WITH_ONNXRUNTIME bool is_ort_tensor_{false}; std::vector shape_; - std::vector buffer_; + std::weak_ptr> buffer_; std::weak_ptr binding_; int idx_{-1}; @@ -196,6 +199,8 @@ class PD_INFER_DECL Tensor { void SetOrtBinding(const std::shared_ptr binding); + void SetOrtBuffer(const std::shared_ptr> buffer); + template void ORTCopyFromCpu(const T* data); diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index e4d4bf1a1c..fd6a76a706 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -384,12 +384,12 @@ if(WITH_PYTHON) set(PADDLE2ONNX_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.dylib) set(ONNXRUNTIME_PYBIND_OUT - ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.dylib) + ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.1.10.0.dylib) else() set(PADDLE2ONNX_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.so) set(ONNXRUNTIME_PYBIND_OUT - ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.so) + ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.so.1.10.0) endif() add_custom_command( -- GitLab