提交 281d7c34 编写于 作者: J jiweibo

update shared_ptr. test=develop

上级 f6ba9268
...@@ -256,8 +256,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor { ...@@ -256,8 +256,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
bool status_is_cloned_; bool status_is_cloned_;
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
bool multi_stream_{false}; bool multi_stream_{false};
cudaStream_t* io_stream_{nullptr}; std::shared_ptr<cudaStream_t> io_stream_;
cudaStream_t* exec_stream_{nullptr}; std::shared_ptr<cudaStream_t> exec_stream_;
cudaEvent_t input_event_; cudaEvent_t input_event_;
std::vector<cudaEvent_t> output_events_; std::vector<cudaEvent_t> output_events_;
// only for multi exec stream mode. // only for multi exec stream mode.
......
...@@ -95,18 +95,18 @@ void CxxPaddleApiImpl::CudaEnvInit(std::vector<std::string> *passes) { ...@@ -95,18 +95,18 @@ void CxxPaddleApiImpl::CudaEnvInit(std::vector<std::string> *passes) {
if (config_.exec_stream()) { if (config_.exec_stream()) {
exec_stream_ = config_.exec_stream(); exec_stream_ = config_.exec_stream();
} else { } else {
exec_stream_ = new cudaStream_t(); exec_stream_ = std::make_shared<cudaStream_t>();
TargetWrapperCuda::CreateStream(exec_stream_); TargetWrapperCuda::CreateStream(exec_stream_.get());
} }
if (config_.io_stream()) { if (config_.io_stream()) {
io_stream_ = config_.io_stream(); io_stream_ = config_.io_stream();
} else { } else {
io_stream_ = new cudaStream_t(); io_stream_ = std::make_shared<cudaStream_t>();
TargetWrapperCuda::CreateStream(io_stream_); TargetWrapperCuda::CreateStream(io_stream_.get());
} }
raw_predictor_->set_exec_stream(exec_stream_); raw_predictor_->set_exec_stream(exec_stream_.get());
raw_predictor_->set_io_stream(io_stream_); raw_predictor_->set_io_stream(io_stream_.get());
// init sync events. // init sync events.
if (config_.multi_stream()) { if (config_.multi_stream()) {
...@@ -158,7 +158,8 @@ void CxxPaddleApiImpl::OutputSync() { ...@@ -158,7 +158,8 @@ void CxxPaddleApiImpl::OutputSync() {
std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) { std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) {
auto *x = raw_predictor_->GetInput(i); auto *x = raw_predictor_->GetInput(i);
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x, io_stream_)); return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(x, io_stream_.get()));
#else #else
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x)); return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
#endif #endif
...@@ -168,7 +169,8 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput( ...@@ -168,7 +169,8 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
int i) const { int i) const {
const auto *x = raw_predictor_->GetOutput(i); const auto *x = raw_predictor_->GetOutput(i);
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x, io_stream_)); return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(x, io_stream_.get()));
#else #else
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x)); return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
#endif #endif
...@@ -250,10 +252,6 @@ CxxPaddleApiImpl::~CxxPaddleApiImpl() { ...@@ -250,10 +252,6 @@ CxxPaddleApiImpl::~CxxPaddleApiImpl() {
for (size_t i = 0; i < output_events_.size(); ++i) { for (size_t i = 0; i < output_events_.size(); ++i) {
TargetWrapperCuda::DestroyEvent(output_events_[i]); TargetWrapperCuda::DestroyEvent(output_events_[i]);
} }
if (multi_stream_) {
TargetWrapperCuda::DestroyStream(*io_stream_);
TargetWrapperCuda::DestroyStream(*exec_stream_);
}
#endif #endif
} }
......
...@@ -167,8 +167,8 @@ class LITE_API CxxConfig : public ConfigBase { ...@@ -167,8 +167,8 @@ class LITE_API CxxConfig : public ConfigBase {
#endif #endif
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
bool multi_stream_{false}; bool multi_stream_{false};
cudaStream_t* exec_stream_{nullptr}; std::shared_ptr<cudaStream_t> exec_stream_;
cudaStream_t* io_stream_{nullptr}; std::shared_ptr<cudaStream_t> io_stream_;
#endif #endif
#ifdef LITE_WITH_MLU #ifdef LITE_WITH_MLU
lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270}; lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270};
...@@ -217,12 +217,14 @@ class LITE_API CxxConfig : public ConfigBase { ...@@ -217,12 +217,14 @@ class LITE_API CxxConfig : public ConfigBase {
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
void set_multi_stream(bool multi_stream) { multi_stream_ = multi_stream; } void set_multi_stream(bool multi_stream) { multi_stream_ = multi_stream; }
bool multi_stream() const { return multi_stream_; } bool multi_stream() const { return multi_stream_; }
void set_exec_stream(cudaStream_t* exec_stream) { void set_exec_stream(std::shared_ptr<cudaStream_t> exec_stream) {
exec_stream_ = exec_stream; exec_stream_ = exec_stream;
} }
void set_io_stream(cudaStream_t* io_stream) { io_stream_ = io_stream; } void set_io_stream(std::shared_ptr<cudaStream_t> io_stream) {
cudaStream_t* exec_stream() { return exec_stream_; } io_stream_ = io_stream;
cudaStream_t* io_stream() { return io_stream_; } }
std::shared_ptr<cudaStream_t> exec_stream() { return exec_stream_; }
std::shared_ptr<cudaStream_t> io_stream() { return io_stream_; }
#endif #endif
#ifdef LITE_WITH_MLU #ifdef LITE_WITH_MLU
......
...@@ -95,9 +95,9 @@ TEST(Resnet50, config_exec_stream) { ...@@ -95,9 +95,9 @@ TEST(Resnet50, config_exec_stream) {
lite_api::CxxConfig config; lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir); config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}}); config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
cudaStream_t exec_stream; std::shared_ptr<cudaStream_t> exec_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(&exec_stream); lite::TargetWrapperCuda::CreateStream(exec_stream.get());
config.set_exec_stream(&exec_stream); config.set_exec_stream(exec_stream);
RunModel(config); RunModel(config);
} }
...@@ -106,9 +106,9 @@ TEST(Resnet50, config_io_stream) { ...@@ -106,9 +106,9 @@ TEST(Resnet50, config_io_stream) {
lite_api::CxxConfig config; lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir); config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}}); config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
cudaStream_t io_stream; std::shared_ptr<cudaStream_t> io_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(&io_stream); lite::TargetWrapperCuda::CreateStream(io_stream.get());
config.set_io_stream(&io_stream); config.set_io_stream(io_stream);
RunModel(config); RunModel(config);
} }
...@@ -117,12 +117,12 @@ TEST(Resnet50, config_all_stream) { ...@@ -117,12 +117,12 @@ TEST(Resnet50, config_all_stream) {
lite_api::CxxConfig config; lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir); config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}}); config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
cudaStream_t exec_stream; std::shared_ptr<cudaStream_t> exec_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(&exec_stream); lite::TargetWrapperCuda::CreateStream(exec_stream.get());
config.set_exec_stream(&exec_stream); config.set_exec_stream(exec_stream);
cudaStream_t io_stream; std::shared_ptr<cudaStream_t> io_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(&io_stream); lite::TargetWrapperCuda::CreateStream(io_stream.get());
config.set_io_stream(&io_stream); config.set_io_stream(io_stream);
RunModel(config); RunModel(config);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册