diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 3a7f269eaa7f352b790f88a873d932e9c4d4cf14..458ab992c25f3818ae53b28fab38d9f986a36265 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -20,10 +20,6 @@ if(WITH_TESTING) add_definitions(-DPADDLE_WITH_TESTING) endif(WITH_TESTING) -if(WITH_INFERENCE_API_TEST) - add_definitions(-DPADDLE_WITH_INFERENCE_API_TEST) -endif(WITH_INFERENCE_API_TEST) - if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 3bf5cc262e7805884530384ffe111762aaa9bbef..9e49dea9e674f135cd31a07a113532012769286f 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -28,15 +28,14 @@ if(WITH_MKLDNN) endif() cc_library(analysis_config SRCS analysis_config.cc DEPS ${mkldnn_quantizer_cfg} lod_tensor paddle_pass_builder table_printer) -cc_library(paddle_infer_contrib SRCS paddle_infer_contrib.cc DEPS zero_copy_tensor) cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) if(WITH_CRYPTO) cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope reset_tensor_array - analysis_config paddle_infer_contrib zero_copy_tensor trainer_desc_proto paddle_crypto custom_operator) + analysis_config zero_copy_tensor trainer_desc_proto paddle_crypto custom_operator) else() cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope reset_tensor_array - analysis_config paddle_infer_contrib zero_copy_tensor trainer_desc_proto custom_operator) + analysis_config zero_copy_tensor trainer_desc_proto custom_operator) endif() if(WIN32) diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 5cb8725b4d3d44ed48c1d85c9abe151684ca54f9..ff167aa7cf1068f2100b310d8aed0e2256de2605 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -121,8 +121,6 @@ DataType Tensor::type() const { return DataType::FLOAT32; } -PlaceType Tensor::place() const { return place_; } - template void Tensor::CopyFromCpu(const T *data) { EAGER_GET_TENSOR; @@ -187,8 +185,7 @@ void Tensor::CopyFromCpu(const T *data) { } template -void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, - void *cb_params) const { +void Tensor::CopyToCpu(T *data) { EAGER_GET_TENSOR; auto ele_num = tensor->numel(); auto *t_data = tensor->data(); @@ -225,16 +222,7 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, #ifdef PADDLE_WITH_HIP hipStreamSynchronize(dev_ctx->stream()); #else - // async, return stream - if (nullptr != exec_stream) { - *(static_cast(exec_stream)) = dev_ctx->stream(); - // async with callback - } else if (cb) { - cudaLaunchHostFunc(dev_ctx->stream(), cb, cb_params); - // sync - } else { - cudaStreamSynchronize(dev_ctx->stream()); - } + cudaStreamSynchronize(dev_ctx->stream()); #endif #else PADDLE_THROW(paddle::platform::errors::Unavailable( @@ -273,22 +261,6 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, "The analysis predictor supports CPU, GPU, NPU and XPU now.")); } } - -template -void Tensor::CopyToCpu(T *data) const { - CopyToCpuImpl(data, nullptr, nullptr, nullptr); -} - -template -void Tensor::CopyToCpuAsync(T *data, void *exec_stream) const { - CopyToCpuImpl(data, exec_stream, nullptr, nullptr); -} - -template -void Tensor::CopyToCpuAsync(T *data, CallbackFunc cb, void *cb_params) const { - CopyToCpuImpl(data, nullptr, cb, cb_params); -} - template PD_INFER_DECL void Tensor::CopyFromCpu(const float *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const int64_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const int32_t *data); @@ -296,38 +268,12 @@ template PD_INFER_DECL void Tensor::CopyFromCpu(const uint8_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const int8_t *data); template PD_INFER_DECL void Tensor::CopyFromCpu(const float16 *data); -template PD_INFER_DECL void Tensor::CopyToCpu(float *data) const; -template PD_INFER_DECL void Tensor::CopyToCpu(int64_t *data) const; -template PD_INFER_DECL void Tensor::CopyToCpu(int32_t *data) const; -template PD_INFER_DECL void Tensor::CopyToCpu(uint8_t *data) const; -template PD_INFER_DECL void Tensor::CopyToCpu(int8_t *data) const; -template PD_INFER_DECL void Tensor::CopyToCpu(float16 *data) const; - -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - float *data, void *exec_stream) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - int64_t *data, void *exec_stream) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - int32_t *data, void *exec_stream) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - uint8_t *data, void *exec_stream) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - int8_t *data, void *exec_stream) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - float16 *data, void *exec_stream) const; - -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - float *data, CallbackFunc cb, void *cb_params) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - int64_t *data, CallbackFunc cb, void *cb_params) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - int32_t *data, CallbackFunc cb, void *cb_params) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - uint8_t *data, CallbackFunc cb, void *cb_params) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - int8_t *data, CallbackFunc cb, void *cb_params) const; -template PD_INFER_DECL void Tensor::CopyToCpuAsync( - float16 *data, CallbackFunc cb, void *cb_params) const; +template PD_INFER_DECL void Tensor::CopyToCpu(float *data); +template PD_INFER_DECL void Tensor::CopyToCpu(int64_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(int32_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(uint8_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(int8_t *data); +template PD_INFER_DECL void Tensor::CopyToCpu(float16 *data); template PD_INFER_DECL float *Tensor::data(PlaceType *place, int *size) const; @@ -339,15 +285,12 @@ template PD_INFER_DECL uint8_t *Tensor::data(PlaceType *place, int *size) const; template PD_INFER_DECL int8_t *Tensor::data(PlaceType *place, int *size) const; -template PD_INFER_DECL float16 *Tensor::data(PlaceType *place, - int *size) const; template PD_INFER_DECL float *Tensor::mutable_data(PlaceType place); template PD_INFER_DECL int64_t *Tensor::mutable_data(PlaceType place); template PD_INFER_DECL int32_t *Tensor::mutable_data(PlaceType place); template PD_INFER_DECL uint8_t *Tensor::mutable_data(PlaceType place); template PD_INFER_DECL int8_t *Tensor::mutable_data(PlaceType place); -template PD_INFER_DECL float16 *Tensor::mutable_data(PlaceType place); Tensor::Tensor(void *scope) : scope_{scope} { PADDLE_ENFORCE_NOT_NULL(scope_, diff --git a/paddle/fluid/inference/api/paddle_infer_contrib.cc b/paddle/fluid/inference/api/paddle_infer_contrib.cc deleted file mode 100644 index aad1c3fa6f882caa64f984bfab0f78a220880077..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/api/paddle_infer_contrib.cc +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/api/paddle_infer_contrib.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" - -namespace paddle_infer { -namespace contrib { - -using paddle::PaddleDType; - -void* TensorUtils::CudaMallocPinnedMemory(size_t size) { -#if defined(PADDLE_WITH_CUDA) - void* ptr = nullptr; - PADDLE_ENFORCE_CUDA_SUCCESS(cudaMallocHost(&ptr, size)); - return ptr; -#else - return nullptr; -#endif -} - -void TensorUtils::CudaFreePinnedMemory(void* ptr) { -#if defined(PADDLE_WITH_CUDA) - PADDLE_ENFORCE_CUDA_SUCCESS(cudaFreeHost(ptr)); -#endif -} - -void TensorUtils::CopyTensorImpl(Tensor* p_dst, const Tensor& src, - void* exec_stream, CallbackFunc cb, - void* cb_params) { - Tensor& dst = *p_dst; - dst.Reshape(src.shape()); - PADDLE_ENFORCE( - src.place() == PlaceType::kCPU || src.place() == PlaceType::kGPU, - paddle::platform::errors::InvalidArgument( - "CopyTensor only support PlaceType kCPU/kGPU now.")); - PADDLE_ENFORCE( - dst.place() == PlaceType::kCPU || dst.place() == PlaceType::kGPU, - paddle::platform::errors::InvalidArgument( - "CopyTensor only support PlaceType kCPU/kGPU now.")); - // copy to cpu, gpu => cpu or cpu => cpu - if (dst.place() == PlaceType::kCPU) { - switch (src.type()) { - case PaddleDType::INT32: - src.CopyToCpuImpl(dst.mutable_data(PlaceType::kCPU), - exec_stream, cb, cb_params); - break; - case PaddleDType::INT64: - src.CopyToCpuImpl(dst.mutable_data(PlaceType::kCPU), - exec_stream, cb, cb_params); - break; - case PaddleDType::FLOAT32: - src.CopyToCpuImpl(dst.mutable_data(PlaceType::kCPU), exec_stream, - cb, cb_params); - break; - case PaddleDType::UINT8: - src.CopyToCpuImpl(dst.mutable_data(PlaceType::kCPU), - exec_stream, cb, cb_params); - break; - case PaddleDType::INT8: - src.CopyToCpuImpl(dst.mutable_data(PlaceType::kCPU), - exec_stream, cb, cb_params); - break; - case PaddleDType::FLOAT16: - src.CopyToCpuImpl( - dst.mutable_data(PlaceType::kCPU), - exec_stream, cb, cb_params); - break; - default: - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Only INT32, INT64, UINT8, INT8, FLOAT16 and " - "FLOAT32 is supported in Tensor. Others not implements")); - } - // gpu => gpu or cpu => gpu - } else { -#if defined(PADDLE_WITH_CUDA) - void* dst_data = nullptr; - void* src_data = nullptr; - size_t data_len = 0; - int data_size = 0; - PlaceType src_place; - switch (src.type()) { - case PaddleDType::INT32: - dst_data = - static_cast(dst.mutable_data(PlaceType::kGPU)); - src_data = - static_cast(src.data(&src_place, &data_size)); - data_len = data_size * sizeof(int32_t); - break; - case PaddleDType::INT64: - dst_data = - static_cast(dst.mutable_data(PlaceType::kGPU)); - src_data = - static_cast(src.data(&src_place, &data_size)); - data_len = data_size * sizeof(int64_t); - break; - case PaddleDType::FLOAT32: - dst_data = static_cast(dst.mutable_data(PlaceType::kGPU)); - src_data = static_cast(src.data(&src_place, &data_size)); - data_len = data_size * sizeof(float); - break; - case PaddleDType::UINT8: - dst_data = - static_cast(dst.mutable_data(PlaceType::kGPU)); - src_data = - static_cast(src.data(&src_place, &data_size)); - data_len = data_size * sizeof(uint8_t); - break; - case PaddleDType::INT8: - dst_data = - static_cast(dst.mutable_data(PlaceType::kGPU)); - src_data = static_cast(src.data(&src_place, &data_size)); - data_len = data_size * sizeof(int8_t); - break; - case PaddleDType::FLOAT16: - dst_data = static_cast( - dst.mutable_data(PlaceType::kGPU)); - src_data = static_cast( - src.data(&src_place, &data_size)); - data_len = data_size * 2; - break; - default: - PADDLE_THROW(paddle::platform::errors::Unimplemented( - "Only INT32, INT64, UINT8, INT8, FLOAT16 and " - "FLOAT32 is supported in Tensor. Others not implements")); - } - - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - paddle::platform::CUDAPlace gpu_place(dst.device_); - auto* dev_ctx = static_cast( - pool.Get(gpu_place)); - - if (src.place() == PlaceType::kCPU) { - paddle::memory::Copy(gpu_place, static_cast(dst_data), - paddle::platform::CPUPlace(), src_data, data_len, - dev_ctx->stream()); - } else { - paddle::memory::Copy(gpu_place, static_cast(dst_data), - paddle::platform::CUDAPlace(), src_data, data_len, - dev_ctx->stream()); - } - - if (nullptr != exec_stream) { - *(static_cast(exec_stream)) = dev_ctx->stream(); - } else if (cb) { - cudaLaunchHostFunc(dev_ctx->stream(), cb, cb_params); - } else { - cudaStreamSynchronize(dev_ctx->stream()); - } -#else - PADDLE_THROW(paddle::platform::errors::Unavailable( - "Can not copy tensor to GPU CUDA place because paddle is not compiled " - "with CUDA.")); -#endif - } - return; -} - -void TensorUtils::CopyTensor(Tensor* p_dst, const Tensor& src) { - CopyTensorImpl(p_dst, src, nullptr, nullptr, nullptr); -} - -void TensorUtils::CopyTensorAsync(Tensor* p_dst, const Tensor& src, - void* exec_stream) { - CopyTensorImpl(p_dst, src, exec_stream, nullptr, nullptr); -} - -void TensorUtils::CopyTensorAsync(Tensor* p_dst, const Tensor& src, - CallbackFunc cb, void* cb_params) { - CopyTensorImpl(p_dst, src, nullptr, cb, cb_params); -} - -} // namespace contrib -} // namespace paddle_infer diff --git a/paddle/fluid/inference/api/paddle_infer_contrib.h b/paddle/fluid/inference/api/paddle_infer_contrib.h deleted file mode 100644 index 7d35567e4302a3a43ecbae05069f5b9435db5414..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/api/paddle_infer_contrib.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/inference/api/paddle_inference_api.h" - -namespace paddle_infer { -namespace contrib { - -class TensorUtils { - public: - static void* CudaMallocPinnedMemory(size_t size); - static void CudaFreePinnedMemory(void* mem); - - static void CopyTensor(Tensor* p_dst, const Tensor& src); - static void CopyTensorAsync(Tensor* p_dst, const Tensor& src, - void* exec_stream); - static void CopyTensorAsync(Tensor* p_dst, const Tensor& src, CallbackFunc cb, - void* cb_params); - - private: - static void CopyTensorImpl(Tensor* p_dst, const Tensor& src, - void* exec_stream, CallbackFunc cb, - void* cb_params); -}; - -} // namespace contrib -} // namespace paddle_infer diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index f6dce74c30ded1c23926e6cd4cdec85073f604c9..1f813d52ef5e76f1380e529fbbce8e79eae0545d 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -18,16 +18,6 @@ namespace paddle_infer { -typedef void (*CallbackFunc)(void*); - -#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST) -class InferApiTesterUtils; -#endif - -namespace contrib { -class TensorUtils; -} - /// \brief Paddle data type. enum DataType { FLOAT32, @@ -82,21 +72,7 @@ class PD_INFER_DECL Tensor { /// It's usually used to get the output tensor data. /// \param[out] data The tensor will copy the data to the address. template - void CopyToCpu(T* data) const; - - /// \brief Copy the tensor data to the host memory asynchronously. - /// \param[out] data The tensor will copy the data to the address. - /// \param[out] exec_stream The tensor will excute copy in this stream(Only - /// GPU CUDA stream suppported now). - template - void CopyToCpuAsync(T* data, void* exec_stream) const; - - /// \brief Copy the tensor data to the host memory asynchronously. - /// \param[out] data The tensor will copy the data to the address. - /// \param[out] cb Callback function cb(cb_params) will be executed on the - /// host after all currently enqueued items in the stream have completed . - template - void CopyToCpuAsync(T* data, CallbackFunc cb, void* cb_params) const; + void CopyToCpu(T* data); /// \brief Return the shape of the Tensor. std::vector shape() const; @@ -116,20 +92,12 @@ class PD_INFER_DECL Tensor { /// \return The data type of the tensor. DataType type() const; - /// \brief Return the place type of the tensor. - /// \return The place type of the tensor. - PlaceType place() const; - protected: explicit Tensor(void* scope); void* FindTensor() const; void SetPlace(PlaceType place, int device = -1); void SetName(const std::string& name); - template - void CopyToCpuImpl(T* data, void* stream = nullptr, CallbackFunc cb = nullptr, - void* cb_params = nullptr) const; - std::string name_; // The corresponding tensor pointer inside Paddle workspace is cached for // performance. @@ -139,11 +107,6 @@ class PD_INFER_DECL Tensor { void* scope_{nullptr}; PlaceType place_; int device_; - - friend class paddle_infer::contrib::TensorUtils; -#if defined(PADDLE_WITH_TESTING) && defined(PADDLE_WITH_INFERENCE_API_TEST) - friend class paddle_infer::InferApiTesterUtils; -#endif }; } // namespace paddle_infer diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index bb47558c69b57787662fb61ffddb8e7cd0bd07bf..f0eb0d1fa675b7e88aae44acd79e425a2bc70e47 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -682,11 +682,6 @@ if(WITH_GPU) inference_analysis_test(paddle_infer_api_test SRCS paddle_infer_api_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${RESNET50_MODEL_DIR}) - - inference_analysis_test(paddle_infer_api_copy_tensor_tester SRCS paddle_infer_api_copy_tensor_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${RESNET50_MODEL_DIR}) - set_tests_properties(paddle_infer_api_copy_tensor_tester PROPERTIES TIMEOUT 30) endif() if("$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") diff --git a/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc b/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc deleted file mode 100644 index 2be69781c4e60bd7058e3a1ef4a9c27c23def9d4..0000000000000000000000000000000000000000 --- a/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc +++ /dev/null @@ -1,329 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include -#include "gflags/gflags.h" -#include "glog/logging.h" -#include "paddle/fluid/inference/api/paddle_infer_contrib.h" -#include "paddle/fluid/inference/tests/api/trt_test_helper.h" -#include "paddle/fluid/platform/float16.h" - -namespace paddle_infer { - -class InferApiTesterUtils { - public: - static std::unique_ptr CreateInferTensorForTest( - const std::string &name, PlaceType place, void *p_scope) { - auto var = static_cast(p_scope)->Var(name); - var->GetMutable(); - std::unique_ptr res(new Tensor(p_scope)); - res->input_or_output_ = true; - res->SetName(name); - res->SetPlace(place, 0 /*device id*/); - return res; - } -}; - -TEST(Tensor, copy_to_cpu_async_stream) { - LOG(INFO) << GetVersion(); - UpdateDllFlag("conv_workspace_size_limit", "4000"); - std::string model_dir = FLAGS_infer_model + "/model"; - Config config; - config.SetModel(model_dir + "/model", model_dir + "/params"); - config.EnableUseGpu(100, 0); - - auto predictor = CreatePredictor(config); - auto pred_clone = predictor->Clone(); - - std::vector in_shape = {1, 3, 318, 318}; - int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1, - [](int &a, int &b) { return a * b; }); - - std::vector input(in_num, 1.0); - - const auto &input_names = predictor->GetInputNames(); - auto input_tensor = predictor->GetInputHandle(input_names[0]); - - input_tensor->Reshape(in_shape); - input_tensor->CopyFromCpu(input.data()); - - predictor->Run(); - - const auto &output_names = predictor->GetOutputNames(); - auto output_tensor = predictor->GetOutputHandle(output_names[0]); - std::vector output_shape = output_tensor->shape(); - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, - std::multiplies()); - - float *out_data = static_cast( - contrib::TensorUtils::CudaMallocPinnedMemory(sizeof(float) * out_num)); - memset(out_data, 0, sizeof(float) * out_num); - std::vector correct_out_data = { - 127.78, 1.07353, -229.42, 1127.28, -177.365, - -292.412, -271.614, 466.054, 540.436, -214.223, - }; - - for (int i = 0; i < 100; i++) { - predictor->Run(); - } - - cudaStream_t stream; - output_tensor->CopyToCpuAsync(out_data, static_cast(&stream)); - - // sync - cudaStreamSynchronize(stream); - - for (int i = 0; i < 10; i++) { - EXPECT_NEAR(out_data[i] / correct_out_data[i], 1.0, 1e-3); - } - contrib::TensorUtils::CudaFreePinnedMemory(static_cast(out_data)); -} - -TEST(Tensor, copy_to_cpu_async_callback) { - LOG(INFO) << GetVersion(); - UpdateDllFlag("conv_workspace_size_limit", "4000"); - std::string model_dir = FLAGS_infer_model + "/model"; - Config config; - config.SetModel(model_dir + "/model", model_dir + "/params"); - config.EnableUseGpu(100, 0); - - auto predictor = CreatePredictor(config); - auto pred_clone = predictor->Clone(); - - std::vector in_shape = {1, 3, 318, 318}; - int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1, - [](int &a, int &b) { return a * b; }); - - std::vector input(in_num, 1.0); - - const auto &input_names = predictor->GetInputNames(); - auto input_tensor = predictor->GetInputHandle(input_names[0]); - - input_tensor->Reshape(in_shape); - input_tensor->CopyFromCpu(input.data()); - - predictor->Run(); - - const auto &output_names = predictor->GetOutputNames(); - auto output_tensor = predictor->GetOutputHandle(output_names[0]); - std::vector output_shape = output_tensor->shape(); - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, - std::multiplies()); - - float *out_data = static_cast( - contrib::TensorUtils::CudaMallocPinnedMemory(sizeof(float) * out_num)); - memset(out_data, 0, sizeof(float) * out_num); - - for (int i = 0; i < 100; i++) { - predictor->Run(); - } - - output_tensor->CopyToCpuAsync( - out_data, - [](void *cb_params) { - float *data = static_cast(cb_params); - std::vector correct_out_data = { - 127.78, 1.07353, -229.42, 1127.28, -177.365, - -292.412, -271.614, 466.054, 540.436, -214.223, - }; - for (int i = 0; i < 10; i++) { - EXPECT_NEAR(data[i] / correct_out_data[i], 1.0, 1e-3); - } - }, - static_cast(out_data)); - - cudaDeviceSynchronize(); - contrib::TensorUtils::CudaFreePinnedMemory(static_cast(out_data)); -} - -template -static void test_copy_tensor(PlaceType src_place, PlaceType dst_place) { - paddle::framework::Scope scope; - auto tensor_src = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_src", src_place, static_cast(&scope)); - auto tensor_dst = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_dst", dst_place, static_cast(&scope)); - std::vector data_src(6, 1); - tensor_src->Reshape({2, 3}); - tensor_src->CopyFromCpu(data_src.data()); - - std::vector data_dst(4, 2); - tensor_dst->Reshape({2, 2}); - tensor_dst->CopyFromCpu(data_dst.data()); - - paddle_infer::contrib::TensorUtils::CopyTensor(tensor_dst.get(), *tensor_src); - - EXPECT_EQ(tensor_dst->shape().size(), (size_t)2); - EXPECT_EQ(tensor_dst->shape()[0], 2); - EXPECT_EQ(tensor_dst->shape()[1], 3); - - std::vector data_check(6, 3); - tensor_dst->CopyToCpu(static_cast(data_check.data())); - - for (int i = 0; i < 6; i++) { - EXPECT_NEAR(data_check[i], 1, 1e-5); - } -} - -TEST(CopyTensor, float32) { - test_copy_tensor(PlaceType::kCPU, PlaceType::kCPU); - test_copy_tensor(PlaceType::kCPU, PlaceType::kGPU); - test_copy_tensor(PlaceType::kGPU, PlaceType::kGPU); -} - -TEST(CopyTensor, int32) { - test_copy_tensor(PlaceType::kCPU, PlaceType::kCPU); - test_copy_tensor(PlaceType::kGPU, PlaceType::kGPU); -} - -TEST(CopyTensor, int64) { - test_copy_tensor(PlaceType::kCPU, PlaceType::kCPU); - test_copy_tensor(PlaceType::kGPU, PlaceType::kGPU); -} - -TEST(CopyTensor, int8) { - test_copy_tensor(PlaceType::kCPU, PlaceType::kCPU); - test_copy_tensor(PlaceType::kGPU, PlaceType::kGPU); -} - -TEST(CopyTensor, uint8) { - test_copy_tensor(PlaceType::kCPU, PlaceType::kCPU); - test_copy_tensor(PlaceType::kGPU, PlaceType::kGPU); -} - -TEST(CopyTensor, float16) { - paddle::framework::Scope scope; - auto tensor_src = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_src", PlaceType::kCPU, static_cast(&scope)); - auto tensor_dst = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_dst", PlaceType::kCPU, static_cast(&scope)); - - using paddle::platform::float16; - std::vector data_src(6, float16(1.0)); - tensor_src->Reshape({2, 3}); - tensor_src->CopyFromCpu(data_src.data()); - - std::vector data_dst(4, float16(2.0)); - tensor_dst->Reshape({2, 2}); - tensor_dst->CopyFromCpu(data_dst.data()); - - paddle_infer::contrib::TensorUtils::CopyTensor(tensor_dst.get(), *tensor_src); - - EXPECT_EQ(tensor_dst->shape().size(), (size_t)2); - EXPECT_EQ(tensor_dst->shape()[0], 2); - EXPECT_EQ(tensor_dst->shape()[1], 3); - - std::vector data_check(6, float16(1.0)); - tensor_dst->CopyToCpu(data_check.data()); - - for (int i = 0; i < 6; i++) { - EXPECT_TRUE(data_check[i] == float16(1.0)); - } -} - -TEST(CopyTensor, float16_gpu) { - paddle::framework::Scope scope; - auto tensor_src = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_src", PlaceType::kGPU, static_cast(&scope)); - auto tensor_dst = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_dst", PlaceType::kGPU, static_cast(&scope)); - - using paddle::platform::float16; - std::vector data_src(6, float16(1.0)); - tensor_src->Reshape({2, 3}); - tensor_src->CopyFromCpu(data_src.data()); - - std::vector data_dst(4, float16(2.0)); - tensor_dst->Reshape({2, 2}); - tensor_dst->CopyFromCpu(data_dst.data()); - - paddle_infer::contrib::TensorUtils::CopyTensor(tensor_dst.get(), *tensor_src); - - EXPECT_EQ(tensor_dst->shape().size(), (size_t)2); - EXPECT_EQ(tensor_dst->shape()[0], 2); - EXPECT_EQ(tensor_dst->shape()[1], 3); - - std::vector data_check(6, float16(1.0)); - tensor_dst->CopyToCpu(data_check.data()); - - for (int i = 0; i < 6; i++) { - EXPECT_TRUE(data_check[i] == float16(1.0)); - } -} - -TEST(CopyTensor, async_stream) { - paddle::framework::Scope scope; - auto tensor_src = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_src", PlaceType::kGPU, static_cast(&scope)); - auto tensor_dst = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_dst", PlaceType::kGPU, static_cast(&scope)); - - std::vector data_src(6, 1.0); - tensor_src->Reshape({2, 3}); - tensor_src->CopyFromCpu(data_src.data()); - - std::vector data_dst(4, 2.0); - tensor_dst->Reshape({2, 2}); - tensor_dst->CopyFromCpu(data_dst.data()); - - cudaStream_t stream; - paddle_infer::contrib::TensorUtils::CopyTensorAsync( - tensor_dst.get(), *tensor_src, static_cast(&stream)); - - EXPECT_EQ(tensor_dst->shape().size(), (size_t)2); - EXPECT_EQ(tensor_dst->shape()[0], 2); - EXPECT_EQ(tensor_dst->shape()[1], 3); - - cudaStreamSynchronize(stream); - - std::vector data_check(6, 1.0); - tensor_dst->CopyToCpu(data_check.data()); - - for (int i = 0; i < 6; i++) { - EXPECT_NEAR(data_check[i], static_cast(1.0), 1e-5); - } -} - -TEST(CopyTensor, async_callback) { - paddle::framework::Scope scope; - auto tensor_src = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_src", PlaceType::kCPU, static_cast(&scope)); - auto tensor_dst = paddle_infer::InferApiTesterUtils::CreateInferTensorForTest( - "tensor_dst", PlaceType::kGPU, static_cast(&scope)); - - std::vector data_src(6, 1.0); - tensor_src->Reshape({2, 3}); - tensor_src->CopyFromCpu(data_src.data()); - - std::vector data_dst(4, 2.0); - tensor_dst->Reshape({2, 2}); - tensor_dst->CopyFromCpu(data_dst.data()); - - paddle_infer::contrib::TensorUtils::CopyTensorAsync( - tensor_dst.get(), *tensor_src, - [](void *cb_params) { - Tensor *tensor = static_cast(cb_params); - EXPECT_EQ(tensor->shape().size(), (size_t)2); - EXPECT_EQ(tensor->shape()[0], 2); - EXPECT_EQ(tensor->shape()[1], 3); - }, - static_cast(&(*tensor_dst))); - - cudaDeviceSynchronize(); -} - -} // namespace paddle_infer diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 6b3c150a0b9c8eb04948ff191cca4bb3441b60e8..b7cf907b5db614054f17169132016548d38fdd69 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -28,7 +28,6 @@ #include #include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_infer_contrib.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/inference/utils/io_utils.h" @@ -287,12 +286,6 @@ py::bytes SerializePDTensorToBytes(PaddleTensor &tensor) { // NOLINT paddle::inference::SerializePDTensorToStream(&ss, tensor); return static_cast(ss.str()); } - -void CopyPaddleInferTensor(paddle_infer::Tensor &dst, - const paddle_infer::Tensor &src) { - return paddle_infer::contrib::TensorUtils::CopyTensor(&dst, src); -} - } // namespace void BindInferenceApi(py::module *m) { @@ -324,7 +317,6 @@ void BindInferenceApi(py::module *m) { new paddle_infer::Predictor(config)); return std::move(pred); }); - m->def("copy_tensor", &CopyPaddleInferTensor); m->def("paddle_dtype_size", &paddle::PaddleDtypeSize); m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes); m->def("get_version", &paddle_infer::GetVersion); diff --git a/python/paddle/inference/contrib/__init__.py b/python/paddle/inference/contrib/__init__.py deleted file mode 100644 index 6f0ea85344b7e0c679730356928c8749cf71cd66..0000000000000000000000000000000000000000 --- a/python/paddle/inference/contrib/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/paddle/inference/contrib/utils/__init__.py b/python/paddle/inference/contrib/utils/__init__.py deleted file mode 100644 index 5a5252504925073ca22b70f95dfd3bd99a69fb98..0000000000000000000000000000000000000000 --- a/python/paddle/inference/contrib/utils/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ....fluid.core import copy_tensor # noqa: F401