From 2eb3a7a905356a3e3bc775182b917ddc2d4ad1e0 Mon Sep 17 00:00:00 2001 From: Zhaolong Xing Date: Fri, 7 Feb 2020 12:17:12 +0800 Subject: [PATCH] [Cherry-pick] [Fix BUG]: Core when multi thread + clone + paddle-tr #22442 (#22471) test=release/1.7 --- paddle/fluid/inference/api/details/zero_copy_tensor.cc | 3 ++- paddle/fluid/inference/tensorrt/engine.cc | 2 +- paddle/fluid/inference/tensorrt/engine.h | 3 ++- .../fluid/inference/tests/api/trt_quant_int8_test.cc | 10 ++++++++++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 271b0fcbb7..444ac5b031 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -138,7 +138,8 @@ void ZeroCopyTensor::copy_to_cpu(T *data) { static_cast(pool.Get(gpu_place)); memory::Copy(platform::CPUPlace(), static_cast(data), gpu_place, t_data, ele_num * sizeof(T), dev_ctx->stream()); - cudaDeviceSynchronize(); + + cudaStreamSynchronize(dev_ctx->stream()); #else PADDLE_THROW("Not compile with CUDA, should not reach here."); #endif diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 6f66e8d972..771ad70244 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -38,13 +38,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector *buffers, const std::thread::id tid = std::this_thread::get_id(); batch_size_ = batch_size; if (infer_context_.find(tid) == infer_context_.end()) { + std::unique_lock lock(mutex_); PADDLE_ENFORCE_NOT_NULL( infer_engine_, "You should build engine first and then set the context."); infer_context_[tid].reset(infer_engine_->createExecutionContext()); } infer_context_[tid]->enqueue(batch_size, buffers->data(), stream, nullptr); - cudaStreamSynchronize(stream); SetRuntimeBatch(batch_size); } diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 50857674fc..d847ce4b5d 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -82,7 +82,7 @@ class TensorRTEngine { void Build(const DescType& paddle_model); void Execute(int batch_size, std::vector* buffers, - cudaStream_t stream); + cudaStream_t stream = nullptr); // Initialize the inference network, so that TensorRT layers can add to this // network. @@ -216,6 +216,7 @@ class TensorRTEngine { infer_context_; infer_ptr ihost_memory_; std::unordered_map quant_dynamic_range_; + std::mutex mutex_; }; // class TensorRTEngine #define IS_TRT_VERSION_GE(version) \ diff --git a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc index e1ce9d5c20..ca5cdbbcb2 100644 --- a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc +++ b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include "paddle/fluid/inference/tests/api/trt_test_helper.h" @@ -44,6 +45,15 @@ TEST(quant_int8, resnet50) { input_t->copy_from_cpu(input); ASSERT_TRUE(predictor->ZeroCopyRun()); + + std::vector out_data; + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputTensor(output_names[0]); + std::vector output_shape = output_t->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + out_data.resize(out_num); + output_t->copy_to_cpu(out_data.data()); } } // namespace inference -- GitLab