提交 281d7c34 编写于 作者: J jiweibo

update shared_ptr. test=develop

上级 f6ba9268
......@@ -256,8 +256,8 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
bool status_is_cloned_;
#ifdef LITE_WITH_CUDA
bool multi_stream_{false};
cudaStream_t* io_stream_{nullptr};
cudaStream_t* exec_stream_{nullptr};
std::shared_ptr<cudaStream_t> io_stream_;
std::shared_ptr<cudaStream_t> exec_stream_;
cudaEvent_t input_event_;
std::vector<cudaEvent_t> output_events_;
// only for multi exec stream mode.
......
......@@ -95,18 +95,18 @@ void CxxPaddleApiImpl::CudaEnvInit(std::vector<std::string> *passes) {
if (config_.exec_stream()) {
exec_stream_ = config_.exec_stream();
} else {
exec_stream_ = new cudaStream_t();
TargetWrapperCuda::CreateStream(exec_stream_);
exec_stream_ = std::make_shared<cudaStream_t>();
TargetWrapperCuda::CreateStream(exec_stream_.get());
}
if (config_.io_stream()) {
io_stream_ = config_.io_stream();
} else {
io_stream_ = new cudaStream_t();
TargetWrapperCuda::CreateStream(io_stream_);
io_stream_ = std::make_shared<cudaStream_t>();
TargetWrapperCuda::CreateStream(io_stream_.get());
}
raw_predictor_->set_exec_stream(exec_stream_);
raw_predictor_->set_io_stream(io_stream_);
raw_predictor_->set_exec_stream(exec_stream_.get());
raw_predictor_->set_io_stream(io_stream_.get());
// init sync events.
if (config_.multi_stream()) {
......@@ -158,7 +158,8 @@ void CxxPaddleApiImpl::OutputSync() {
std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) {
auto *x = raw_predictor_->GetInput(i);
#ifdef LITE_WITH_CUDA
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x, io_stream_));
return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(x, io_stream_.get()));
#else
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
#endif
......@@ -168,7 +169,8 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
int i) const {
const auto *x = raw_predictor_->GetOutput(i);
#ifdef LITE_WITH_CUDA
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x, io_stream_));
return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(x, io_stream_.get()));
#else
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
#endif
......@@ -250,10 +252,6 @@ CxxPaddleApiImpl::~CxxPaddleApiImpl() {
for (size_t i = 0; i < output_events_.size(); ++i) {
TargetWrapperCuda::DestroyEvent(output_events_[i]);
}
if (multi_stream_) {
TargetWrapperCuda::DestroyStream(*io_stream_);
TargetWrapperCuda::DestroyStream(*exec_stream_);
}
#endif
}
......
......@@ -167,8 +167,8 @@ class LITE_API CxxConfig : public ConfigBase {
#endif
#ifdef LITE_WITH_CUDA
bool multi_stream_{false};
cudaStream_t* exec_stream_{nullptr};
cudaStream_t* io_stream_{nullptr};
std::shared_ptr<cudaStream_t> exec_stream_;
std::shared_ptr<cudaStream_t> io_stream_;
#endif
#ifdef LITE_WITH_MLU
lite_api::MLUCoreVersion mlu_core_version_{lite_api::MLUCoreVersion::MLU_270};
......@@ -217,12 +217,14 @@ class LITE_API CxxConfig : public ConfigBase {
#ifdef LITE_WITH_CUDA
void set_multi_stream(bool multi_stream) { multi_stream_ = multi_stream; }
bool multi_stream() const { return multi_stream_; }
void set_exec_stream(cudaStream_t* exec_stream) {
void set_exec_stream(std::shared_ptr<cudaStream_t> exec_stream) {
exec_stream_ = exec_stream;
}
void set_io_stream(cudaStream_t* io_stream) { io_stream_ = io_stream; }
cudaStream_t* exec_stream() { return exec_stream_; }
cudaStream_t* io_stream() { return io_stream_; }
void set_io_stream(std::shared_ptr<cudaStream_t> io_stream) {
io_stream_ = io_stream;
}
std::shared_ptr<cudaStream_t> exec_stream() { return exec_stream_; }
std::shared_ptr<cudaStream_t> io_stream() { return io_stream_; }
#endif
#ifdef LITE_WITH_MLU
......
......@@ -95,9 +95,9 @@ TEST(Resnet50, config_exec_stream) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
cudaStream_t exec_stream;
lite::TargetWrapperCuda::CreateStream(&exec_stream);
config.set_exec_stream(&exec_stream);
std::shared_ptr<cudaStream_t> exec_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(exec_stream.get());
config.set_exec_stream(exec_stream);
RunModel(config);
}
......@@ -106,9 +106,9 @@ TEST(Resnet50, config_io_stream) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
cudaStream_t io_stream;
lite::TargetWrapperCuda::CreateStream(&io_stream);
config.set_io_stream(&io_stream);
std::shared_ptr<cudaStream_t> io_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(io_stream.get());
config.set_io_stream(io_stream);
RunModel(config);
}
......@@ -117,12 +117,12 @@ TEST(Resnet50, config_all_stream) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kCUDA), PRECISION(kFloat)}});
cudaStream_t exec_stream;
lite::TargetWrapperCuda::CreateStream(&exec_stream);
config.set_exec_stream(&exec_stream);
cudaStream_t io_stream;
lite::TargetWrapperCuda::CreateStream(&io_stream);
config.set_io_stream(&io_stream);
std::shared_ptr<cudaStream_t> exec_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(exec_stream.get());
config.set_exec_stream(exec_stream);
std::shared_ptr<cudaStream_t> io_stream = std::make_shared<cudaStream_t>();
lite::TargetWrapperCuda::CreateStream(io_stream.get());
config.set_io_stream(io_stream);
RunModel(config);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册