diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h
index c3339686aa9c3e7960224b2d674820f71817dc79..79843c9c523bd9e0c7fbec594bfd49ed65ae0977 100644
--- a/lite/api/cxx_api.h
+++ b/lite/api/cxx_api.h
@@ -256,8 +256,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
   bool status_is_cloned_;
 #ifdef LITE_WITH_CUDA
   bool multi_stream_{false};
-  cudaStream_t* io_stream_{nullptr};
-  cudaStream_t* exec_stream_{nullptr};
+  std::shared_ptr<cudaStream_t> io_stream_;
+  std::shared_ptr<cudaStream_t> exec_stream_;
   cudaEvent_t input_event_;
   std::vector<cudaEvent_t> output_events_;
   // only for multi exec stream mode.
diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc
index 0370dcdee7acd43b793a00c493025dbe860b5b74..96fc03efd3befb1b054d307c4c883f15fd72de7c 100644
--- a/lite/api/cxx_api_impl.cc
+++ b/lite/api/cxx_api_impl.cc
@@ -95,18 +95,18 @@ void CxxPaddleApiImpl::CudaEnvInit(std::vector<std::string> *passes) {
   if (config_.exec_stream()) {
     exec_stream_ = config_.exec_stream();
   } else {
-    exec_stream_ = new cudaStream_t();
-    TargetWrapperCuda::CreateStream(exec_stream_);
+    exec_stream_ = std::make_shared<cudaStream_t>();
+    TargetWrapperCuda::CreateStream(exec_stream_.get());
   }
   if (config_.io_stream()) {
     io_stream_ = config_.io_stream();
   } else {
-    io_stream_ = new cudaStream_t();
-    TargetWrapperCuda::CreateStream(io_stream_);
+    io_stream_ = std::make_shared<cudaStream_t>();
+    TargetWrapperCuda::CreateStream(io_stream_.get());
   }
 
-  raw_predictor_->set_exec_stream(exec_stream_);
-  raw_predictor_->set_io_stream(io_stream_);
+  raw_predictor_->set_exec_stream(exec_stream_.get());
+  raw_predictor_->set_io_stream(io_stream_.get());
 
   // init sync events.
   if (config_.multi_stream()) {
@@ -158,7 +158,8 @@ void CxxPaddleApiImpl::OutputSync() {
 std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) {
   auto *x = raw_predictor_->GetInput(i);
 #ifdef LITE_WITH_CUDA
-  return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x, io_stream_));
+  return std::unique_ptr<lite_api::Tensor>(
+      new lite_api::Tensor(x, io_stream_.get()));
 #else
   return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
 #endif
@@ -168,7 +169,8 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
     int i) const {
   const auto *x = raw_predictor_->GetOutput(i);
 #ifdef LITE_WITH_CUDA
-  return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x, io_stream_));
+  return std::unique_ptr<lite_api::Tensor>(
+      new lite_api::Tensor(x, io_stream_.get()));
 #else
   return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
 #endif
@@ -250,10 +252,6 @@ CxxPaddleApiImpl::~CxxPaddleApiImpl() {
   for (size_t i = 0; i < output_events_.size(); ++i) {
     TargetWrapperCuda::DestroyEvent(output_events_[i]);
   }
-  if (multi_stream_) {
-    TargetWrapperCuda::DestroyStream(*io_stream_);
-    TargetWrapperCuda::DestroyStream(*exec_stream_);
-  }
 #endif
 }
 
diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h
index b1ef7593d3f2e74867468f8b811b42ba99523c88..9cf2e580bf7927b17bc62fb1c524a977ee806307 100644
--- a/lite/api/paddle_api.h
+++ b/lite/api/paddle_api.h
@@ -167,8 +167,8 @@ class LITE_API CxxConfig : public ConfigBase {
 #endif
 #ifdef LITE_WITH_CUDA
   bool multi_stream_{false};
-  cudaStream_t* exec_stream_{nullptr};
-  cudaStream_t* io_stream_{nullptr};
+  std::shared_ptr<cudaStream_t> exec_stream_;
+  std::shared_ptr<cudaStream_t> io_stream_;
 #endif
 #ifdef LITE_WITH_MLU
   lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270};
@@ -217,12 +217,14 @@ class LITE_API CxxConfig : public ConfigBase {
 #ifdef LITE_WITH_CUDA
   void set_multi_stream(bool multi_stream) { multi_stream_ = multi_stream; }
   bool multi_stream() const { return multi_stream_; }
-  void set_exec_stream(cudaStream_t* exec_stream) {
+  void set_exec_stream(std::shared_ptr<cudaStream_t> exec_stream) {
     exec_stream_ = exec_stream;
   }
-  void set_io_stream(cudaStream_t* io_stream) { io_stream_ = io_stream; }
-  cudaStream_t* exec_stream() { return exec_stream_; }
-  cudaStream_t* io_stream() { return io_stream_; }
+  void set_io_stream(std::shared_ptr<cudaStream_t> io_stream) {
+    io_stream_ = io_stream;
+  }
+  std::shared_ptr<cudaStream_t> exec_stream() { return exec_stream_; }
+  std::shared_ptr<cudaStream_t> io_stream() { return io_stream_; }
 #endif
 
 #ifdef LITE_WITH_MLU
diff --git a/lite/api/test_resnet50_lite_cuda.cc b/lite/api/test_resnet50_lite_cuda.cc
index 75605d5220a7604fd9eda98e1d3440140faf771e..a01cd0ab9f8fcc6fa0248957feec907a92d6097b 100644
--- a/lite/api/test_resnet50_lite_cuda.cc
+++ b/lite/api/test_resnet50_lite_cuda.cc
@@ -95,9 +95,9 @@ TEST(Resnet50, config_exec_stream) {
   lite_api::CxxConfig config;
   config.set_model_dir(FLAGS_model_dir);
   config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
-  cudaStream_t exec_stream;
-  lite::TargetWrapperCuda::CreateStream(&exec_stream);
-  config.set_exec_stream(&exec_stream);
+  std::shared_ptr<cudaStream_t> exec_stream = std::make_shared<cudaStream_t>();
+  lite::TargetWrapperCuda::CreateStream(exec_stream.get());
+  config.set_exec_stream(exec_stream);
 
   RunModel(config);
 }
@@ -106,9 +106,9 @@ TEST(Resnet50, config_io_stream) {
   lite_api::CxxConfig config;
   config.set_model_dir(FLAGS_model_dir);
   config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
-  cudaStream_t io_stream;
-  lite::TargetWrapperCuda::CreateStream(&io_stream);
-  config.set_io_stream(&io_stream);
+  std::shared_ptr<cudaStream_t> io_stream = std::make_shared<cudaStream_t>();
+  lite::TargetWrapperCuda::CreateStream(io_stream.get());
+  config.set_io_stream(io_stream);
 
   RunModel(config);
 }
@@ -117,12 +117,12 @@ TEST(Resnet50, config_all_stream) {
   lite_api::CxxConfig config;
   config.set_model_dir(FLAGS_model_dir);
   config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
-  cudaStream_t exec_stream;
-  lite::TargetWrapperCuda::CreateStream(&exec_stream);
-  config.set_exec_stream(&exec_stream);
-  cudaStream_t io_stream;
-  lite::TargetWrapperCuda::CreateStream(&io_stream);
-  config.set_io_stream(&io_stream);
+  std::shared_ptr<cudaStream_t> exec_stream = std::make_shared<cudaStream_t>();
+  lite::TargetWrapperCuda::CreateStream(exec_stream.get());
+  config.set_exec_stream(exec_stream);
+  std::shared_ptr<cudaStream_t> io_stream = std::make_shared<cudaStream_t>();
+  lite::TargetWrapperCuda::CreateStream(io_stream.get());
+  config.set_io_stream(io_stream);
 
   RunModel(config);
 }