From ceda0b9b1a180e507dc9335acbe5215bb4558d1f Mon Sep 17 00:00:00 2001
From: Zhaolong Xing <nhzlx.dragon@gmail.com>
Date: Wed, 5 Feb 2020 16:49:04 +0800
Subject: [PATCH] [Fix BUG]:  Core when  multi thread  + clone + paddle-trt
 (#22442)

* add mutex for trt engine
test=develop

* add the test for copy_to_cpu
test=develop
---
 paddle/fluid/inference/api/details/zero_copy_tensor.cc |  3 ++-
 paddle/fluid/inference/tensorrt/engine.cc              |  2 +-
 paddle/fluid/inference/tensorrt/engine.h               |  3 ++-
 .../fluid/inference/tests/api/trt_quant_int8_test.cc   | 10 ++++++++++
 4 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index 271b0fcbb72..444ac5b0315 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -138,7 +138,8 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
         static_cast<const platform::CUDADeviceContext *>(pool.Get(gpu_place));
     memory::Copy(platform::CPUPlace(), static_cast<void *>(data), gpu_place,
                  t_data, ele_num * sizeof(T), dev_ctx->stream());
-    cudaDeviceSynchronize();
+
+    cudaStreamSynchronize(dev_ctx->stream());
 #else
     PADDLE_THROW("Not compile with CUDA, should not reach here.");
 #endif
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index 6f66e8d972c..771ad702448 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -38,13 +38,13 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
   const std::thread::id tid = std::this_thread::get_id();
   batch_size_ = batch_size;
   if (infer_context_.find(tid) == infer_context_.end()) {
+    std::unique_lock<std::mutex> lock(mutex_);
     PADDLE_ENFORCE_NOT_NULL(
         infer_engine_,
         "You should build engine first and then set the context.");
     infer_context_[tid].reset(infer_engine_->createExecutionContext());
   }
   infer_context_[tid]->enqueue(batch_size, buffers->data(), stream, nullptr);
-  cudaStreamSynchronize(stream);
   SetRuntimeBatch(batch_size);
 }
 
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 50857674fc7..d847ce4b5df 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -82,7 +82,7 @@ class TensorRTEngine {
   void Build(const DescType& paddle_model);
 
   void Execute(int batch_size, std::vector<void*>* buffers,
-               cudaStream_t stream);
+               cudaStream_t stream = nullptr);
 
   // Initialize the inference network, so that TensorRT layers can add to this
   // network.
@@ -216,6 +216,7 @@ class TensorRTEngine {
       infer_context_;
   infer_ptr<nvinfer1::IHostMemory> ihost_memory_;
   std::unordered_map<nvinfer1::ITensor*, float> quant_dynamic_range_;
+  std::mutex mutex_;
 };  // class TensorRTEngine
 
 #define IS_TRT_VERSION_GE(version)                       \
diff --git a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc
index e1ce9d5c20b..ca5cdbbcb26 100644
--- a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc
+++ b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
+#include <numeric>
 
 #include "paddle/fluid/inference/tests/api/trt_test_helper.h"
 
@@ -44,6 +45,15 @@ TEST(quant_int8, resnet50) {
   input_t->copy_from_cpu(input);
 
   ASSERT_TRUE(predictor->ZeroCopyRun());
+
+  std::vector<float> out_data;
+  auto output_names = predictor->GetOutputNames();
+  auto output_t = predictor->GetOutputTensor(output_names[0]);
+  std::vector<int> output_shape = output_t->shape();
+  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
+                                std::multiplies<int>());
+  out_data.resize(out_num);
+  output_t->copy_to_cpu(out_data.data());
 }
 
 }  // namespace inference
-- 
GitLab