diff --git a/lite/api/cxx_api.h b/lite/api/cxx_api.h index c3339686aa9c3e7960224b2d674820f71817dc79..79843c9c523bd9e0c7fbec594bfd49ed65ae0977 100644 --- a/lite/api/cxx_api.h +++ b/lite/api/cxx_api.h @@ -256,8 +256,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor { bool status_is_cloned_; #ifdef LITE_WITH_CUDA bool multi_stream_{false}; - cudaStream_t* io_stream_{nullptr}; - cudaStream_t* exec_stream_{nullptr}; + std::shared_ptr io_stream_; + std::shared_ptr exec_stream_; cudaEvent_t input_event_; std::vector output_events_; // only for multi exec stream mode. diff --git a/lite/api/cxx_api_impl.cc b/lite/api/cxx_api_impl.cc index 0370dcdee7acd43b793a00c493025dbe860b5b74..96fc03efd3befb1b054d307c4c883f15fd72de7c 100644 --- a/lite/api/cxx_api_impl.cc +++ b/lite/api/cxx_api_impl.cc @@ -95,18 +95,18 @@ void CxxPaddleApiImpl::CudaEnvInit(std::vector *passes) { if (config_.exec_stream()) { exec_stream_ = config_.exec_stream(); } else { - exec_stream_ = new cudaStream_t(); - TargetWrapperCuda::CreateStream(exec_stream_); + exec_stream_ = std::make_shared(); + TargetWrapperCuda::CreateStream(exec_stream_.get()); } if (config_.io_stream()) { io_stream_ = config_.io_stream(); } else { - io_stream_ = new cudaStream_t(); - TargetWrapperCuda::CreateStream(io_stream_); + io_stream_ = std::make_shared(); + TargetWrapperCuda::CreateStream(io_stream_.get()); } - raw_predictor_->set_exec_stream(exec_stream_); - raw_predictor_->set_io_stream(io_stream_); + raw_predictor_->set_exec_stream(exec_stream_.get()); + raw_predictor_->set_io_stream(io_stream_.get()); // init sync events. if (config_.multi_stream()) { @@ -158,7 +158,8 @@ void CxxPaddleApiImpl::OutputSync() { std::unique_ptr CxxPaddleApiImpl::GetInput(int i) { auto *x = raw_predictor_->GetInput(i); #ifdef LITE_WITH_CUDA - return std::unique_ptr(new lite_api::Tensor(x, io_stream_)); + return std::unique_ptr( + new lite_api::Tensor(x, io_stream_.get())); #else return std::unique_ptr(new lite_api::Tensor(x)); #endif @@ -168,7 +169,8 @@ std::unique_ptr CxxPaddleApiImpl::GetOutput( int i) const { const auto *x = raw_predictor_->GetOutput(i); #ifdef LITE_WITH_CUDA - return std::unique_ptr(new lite_api::Tensor(x, io_stream_)); + return std::unique_ptr( + new lite_api::Tensor(x, io_stream_.get())); #else return std::unique_ptr(new lite_api::Tensor(x)); #endif @@ -250,10 +252,6 @@ CxxPaddleApiImpl::~CxxPaddleApiImpl() { for (size_t i = 0; i < output_events_.size(); ++i) { TargetWrapperCuda::DestroyEvent(output_events_[i]); } - if (multi_stream_) { - TargetWrapperCuda::DestroyStream(*io_stream_); - TargetWrapperCuda::DestroyStream(*exec_stream_); - } #endif } diff --git a/lite/api/paddle_api.h b/lite/api/paddle_api.h index b1ef7593d3f2e74867468f8b811b42ba99523c88..9cf2e580bf7927b17bc62fb1c524a977ee806307 100644 --- a/lite/api/paddle_api.h +++ b/lite/api/paddle_api.h @@ -167,8 +167,8 @@ class LITE_API CxxConfig : public ConfigBase { #endif #ifdef LITE_WITH_CUDA bool multi_stream_{false}; - cudaStream_t* exec_stream_{nullptr}; - cudaStream_t* io_stream_{nullptr}; + std::shared_ptr exec_stream_; + std::shared_ptr io_stream_; #endif #ifdef LITE_WITH_MLU lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270}; @@ -217,12 +217,14 @@ class LITE_API CxxConfig : public ConfigBase { #ifdef LITE_WITH_CUDA void set_multi_stream(bool multi_stream) { multi_stream_ = multi_stream; } bool multi_stream() const { return multi_stream_; } - void set_exec_stream(cudaStream_t* exec_stream) { + void set_exec_stream(std::shared_ptr exec_stream) { exec_stream_ = exec_stream; } - void set_io_stream(cudaStream_t* io_stream) { io_stream_ = io_stream; } - cudaStream_t* exec_stream() { return exec_stream_; } - cudaStream_t* io_stream() { return io_stream_; } + void set_io_stream(std::shared_ptr io_stream) { + io_stream_ = io_stream; + } + std::shared_ptr exec_stream() { return exec_stream_; } + std::shared_ptr io_stream() { return io_stream_; } #endif #ifdef LITE_WITH_MLU diff --git a/lite/api/test_resnet50_lite_cuda.cc b/lite/api/test_resnet50_lite_cuda.cc index 75605d5220a7604fd9eda98e1d3440140faf771e..a01cd0ab9f8fcc6fa0248957feec907a92d6097b 100644 --- a/lite/api/test_resnet50_lite_cuda.cc +++ b/lite/api/test_resnet50_lite_cuda.cc @@ -95,9 +95,9 @@ TEST(Resnet50, config_exec_stream) { lite_api::CxxConfig config; config.set_model_dir(FLAGS_model_dir); config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}}); - cudaStream_t exec_stream; - lite::TargetWrapperCuda::CreateStream(&exec_stream); - config.set_exec_stream(&exec_stream); + std::shared_ptr exec_stream = std::make_shared(); + lite::TargetWrapperCuda::CreateStream(exec_stream.get()); + config.set_exec_stream(exec_stream); RunModel(config); } @@ -106,9 +106,9 @@ TEST(Resnet50, config_io_stream) { lite_api::CxxConfig config; config.set_model_dir(FLAGS_model_dir); config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}}); - cudaStream_t io_stream; - lite::TargetWrapperCuda::CreateStream(&io_stream); - config.set_io_stream(&io_stream); + std::shared_ptr io_stream = std::make_shared(); + lite::TargetWrapperCuda::CreateStream(io_stream.get()); + config.set_io_stream(io_stream); RunModel(config); } @@ -117,12 +117,12 @@ TEST(Resnet50, config_all_stream) { lite_api::CxxConfig config; config.set_model_dir(FLAGS_model_dir); config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}}); - cudaStream_t exec_stream; - lite::TargetWrapperCuda::CreateStream(&exec_stream); - config.set_exec_stream(&exec_stream); - cudaStream_t io_stream; - lite::TargetWrapperCuda::CreateStream(&io_stream); - config.set_io_stream(&io_stream); + std::shared_ptr exec_stream = std::make_shared(); + lite::TargetWrapperCuda::CreateStream(exec_stream.get()); + config.set_exec_stream(exec_stream); + std::shared_ptr io_stream = std::make_shared(); + lite::TargetWrapperCuda::CreateStream(io_stream.get()); + config.set_io_stream(io_stream); RunModel(config); }