From a4a19ba4fdc6088b7d3eac0e209ac30fe3d0a345 Mon Sep 17 00:00:00 2001 From: Wilber Date: Wed, 23 Oct 2019 22:39:51 +0800 Subject: [PATCH] modify yolobox_cuda to support multiple runs (#2245) * modify yolobox_cuda to support multiple runs test=develop --- lite/backends/cuda/target_wrapper.cc | 11 +++++++++++ lite/backends/cuda/target_wrapper.h | 7 +++++++ lite/kernels/cuda/yolo_box_compute.cu | 4 ++++ 3 files changed, 22 insertions(+) diff --git a/lite/backends/cuda/target_wrapper.cc b/lite/backends/cuda/target_wrapper.cc index 75e5e9887c..a79eb75393 100644 --- a/lite/backends/cuda/target_wrapper.cc +++ b/lite/backends/cuda/target_wrapper.cc @@ -74,5 +74,16 @@ void TargetWrapperCuda::MemcpyAsync(void* dst, } } +void TargetWrapperCuda::MemsetSync(void* devPtr, int value, size_t count) { + CUDA_CALL(cudaMemset(devPtr, value, count)); +} + +void TargetWrapperCuda::MemsetAsync(void* devPtr, + int value, + size_t count, + const stream_t& stream) { + CUDA_CALL(cudaMemsetAsync(devPtr, value, count, stream)); +} + } // namespace lite } // namespace paddle diff --git a/lite/backends/cuda/target_wrapper.h b/lite/backends/cuda/target_wrapper.h index 50063007ce..5b57ddf004 100644 --- a/lite/backends/cuda/target_wrapper.h +++ b/lite/backends/cuda/target_wrapper.h @@ -59,6 +59,13 @@ class TargetWrapper { size_t size, IoDirection dir, const stream_t& stream); + + static void MemsetSync(void* devPtr, int value, size_t count); + + static void MemsetAsync(void* devPtr, + int value, + size_t count, + const stream_t& stream); }; } // namespace lite } // namespace paddle diff --git a/lite/kernels/cuda/yolo_box_compute.cu b/lite/kernels/cuda/yolo_box_compute.cu index d04da30cc7..0a00c06cbf 100644 --- a/lite/kernels/cuda/yolo_box_compute.cu +++ b/lite/kernels/cuda/yolo_box_compute.cu @@ -171,6 +171,10 @@ void YoloBoxCompute::Run() { const int* imgsize = ImgSize->data(); float* boxes = Boxes->mutable_data(TARGET(kCUDA)); float* scores = Scores->mutable_data(TARGET(kCUDA)); + TargetWrapperCuda::MemsetAsync( + boxes, 0, Boxes->numel() * sizeof(float), stream); + TargetWrapperCuda::MemsetAsync( + scores, 0, Scores->numel() * sizeof(float), stream); const int n = X->dims()[0]; const int h = X->dims()[2]; -- GitLab