提交 4b54c594 编写于 作者: J jiweibo

update name format. test=develop

上级 df43322a
......@@ -355,7 +355,7 @@ void Predictor::GenRuntimeProgram() {
program_generated_ = true;
#ifdef LITE_WITH_CUDA
if (!cuda_use_multi_stream_) {
program_->UpdateContext(cuda_exec_stream_, cuda_io_stream_);
program_->UpdateCudaContext(cuda_exec_stream_, cuda_io_stream_);
}
#endif
}
......
......@@ -29,7 +29,7 @@
#ifdef LITE_WITH_CUDA
#include "lite/backends/cuda/cuda_utils.h"
#include "lite/backends/cuda/stream_wrapper.h"
#include "lite/backends/cuda/stream_guard.h"
#endif
namespace paddle {
......@@ -254,12 +254,12 @@ class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
#ifdef LITE_WITH_CUDA
bool cuda_use_multi_stream_{false};
std::unique_ptr<lite::StreamWrapper> cuda_io_stream_;
std::unique_ptr<lite::StreamWrapper> cuda_exec_stream_;
std::unique_ptr<lite::CudaStreamGuard> cuda_io_stream_;
std::unique_ptr<lite::CudaStreamGuard> cuda_exec_stream_;
cudaEvent_t cuda_input_event_;
std::vector<cudaEvent_t> cuda_output_events_;
// only used for multi exec stream mode.
std::vector<lite::StreamWrapper> cuda_exec_streams_;
std::vector<lite::CudaStreamGuard> cuda_exec_streams_;
#endif
};
......
......@@ -97,14 +97,14 @@ void CxxPaddleApiImpl::InitCudaEnv(std::vector<std::string> *passes) {
// init two streams for each predictor.
if (config_.cuda_exec_stream()) {
cuda_exec_stream_.reset(
new lite::StreamWrapper(*config_.cuda_exec_stream()));
new lite::CudaStreamGuard(*config_.cuda_exec_stream()));
} else {
cuda_exec_stream_.reset(new lite::StreamWrapper());
cuda_exec_stream_.reset(new lite::CudaStreamGuard());
}
if (config_.cuda_io_stream()) {
cuda_io_stream_.reset(new lite::StreamWrapper(*config_.cuda_io_stream()));
cuda_io_stream_.reset(new lite::CudaStreamGuard(*config_.cuda_io_stream()));
} else {
cuda_io_stream_.reset(new lite::StreamWrapper());
cuda_io_stream_.reset(new lite::CudaStreamGuard());
}
raw_predictor_->set_cuda_exec_stream(cuda_exec_stream_->stream());
......
......@@ -29,7 +29,7 @@
namespace paddle {
namespace lite {
void RunModel(lite_api::CxxConfig config) {
void RunModel(const lite_api::CxxConfig& config) {
auto predictor = lite_api::CreatePaddlePredictor(config);
const int batch_size = 4;
const int channels = 3;
......
......@@ -9,6 +9,6 @@ nv_library(cuda_blas SRCS blas.cc DEPS ${cuda_deps})
nv_library(nvtx_wrapper SRCS nvtx_wrapper DEPS ${cuda_deps})
lite_cc_library(cuda_context SRCS context.cc DEPS device_info)
lite_cc_library(stream_wrapper SRCS stream_wrapper.cc DEPS target_wrapper_cuda ${cuda_deps})
lite_cc_library(stream_guard SRCS stream_guard.cc DEPS target_wrapper_cuda ${cuda_deps})
add_subdirectory(math)
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/cuda/stream_wrapper.h"
#include "lite/backends/cuda/stream_guard.h"
#include "lite/backends/cuda/cuda_utils.h"
namespace paddle {
......
......@@ -21,24 +21,35 @@
namespace paddle {
namespace lite {
class StreamWrapper {
// CudaStreamGuard is a encapsulation of cudaStream_t, which can accept external
// stream or internally created stream
//
// std::unique_ptr<CudaStreamGuard> sm;
//
// external stream: exec_stream
// sm.reset(new CudaStreamGuard(exec_stream));
// internal stream
// sm.reset(new CudaStreamGuard());
// get cudaStream_t
// sm->stream();
class CudaStreamGuard {
public:
explicit StreamWrapper(cudaStream_t stream)
: stream_(stream), owner_(false) {}
StreamWrapper() : owner_(true) {
explicit CudaStreamGuard(cudaStream_t stream)
: stream_(stream), owned_(false) {}
CudaStreamGuard() : owned_(true) {
lite::TargetWrapperCuda::CreateStream(&stream_);
}
~StreamWrapper() {
if (owner_) {
~CudaStreamGuard() {
if (owned_) {
lite::TargetWrapperCuda::DestroyStream(stream_);
}
}
cudaStream_t stream() { return stream_; }
bool owner() { return owner_; }
bool owned() { return owned_; }
private:
cudaStream_t stream_;
bool owner_;
bool owned_{false};
};
} // namespace lite
......
......@@ -71,7 +71,7 @@ void RuntimeProgram::UpdateVarsOfProgram(cpp::ProgramDesc* desc) {
std::map<std::string, cpp::VarDesc> origin_var_maps;
auto& main_block = *desc->GetBlock<cpp::BlockDesc>(0);
auto var_size = main_block.VarsSize();
for (int i = 0; i < static_cast<int>(var_size); i++) {
for (size_t i = 0; i < var_size; i++) {
auto v = main_block.GetVar<cpp::VarDesc>(i);
auto name = v->Name();
origin_var_maps.emplace(name, *v);
......@@ -144,9 +144,9 @@ void RuntimeProgram::UpdateVarsOfProgram(cpp::ProgramDesc* desc) {
}
#ifdef LITE_WITH_CUDA
void RuntimeProgram::UpdateContext(cudaStream_t exec, cudaStream_t io) {
void RuntimeProgram::UpdateCudaContext(cudaStream_t exec, cudaStream_t io) {
for (auto& inst : instructions_) {
inst.UpdateContext(exec, io);
inst.UpdateCudaContext(exec, io);
}
}
#endif
......
......@@ -129,7 +129,7 @@ struct Instruction {
}
}
void Sync() const { kernel_->mutable_context()->As<CUDAContext>().Sync(); }
void UpdateContext(cudaStream_t exec, cudaStream_t io) {
void UpdateCudaContext(cudaStream_t exec, cudaStream_t io) {
if (kernel_->target() == TargetType::kCUDA) {
kernel_->mutable_context()->As<CUDAContext>().SetExecStream(exec);
kernel_->mutable_context()->As<CUDAContext>().SetIoStream(io);
......@@ -223,9 +223,9 @@ class LITE_API RuntimeProgram {
void UpdateVarsOfProgram(cpp::ProgramDesc* desc);
#ifdef LITE_WITH_CUDA
// UpdateContext will update the exec stream and io stream of all kernels in
// the program.
void UpdateContext(cudaStream_t exec, cudaStream_t io);
// UpdateCudaContext will update the exec stream and io stream of all kernels
// in the program.
void UpdateCudaContext(cudaStream_t exec, cudaStream_t io);
#endif
private:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册