diff --git a/lite/backends/cuda/target_wrapper.cc b/lite/backends/cuda/target_wrapper.cc index 75e5e9887cc9f794c6b4d3b1721402364b0bd6da..a79eb7539318b52e21683bdf97bd534f7cc75fb5 100644 --- a/lite/backends/cuda/target_wrapper.cc +++ b/lite/backends/cuda/target_wrapper.cc @@ -74,5 +74,16 @@ void TargetWrapperCuda::MemcpyAsync(void* dst, } } +void TargetWrapperCuda::MemsetSync(void* devPtr, int value, size_t count) { + CUDA_CALL(cudaMemset(devPtr, value, count)); +} + +void TargetWrapperCuda::MemsetAsync(void* devPtr, + int value, + size_t count, + const stream_t& stream) { + CUDA_CALL(cudaMemsetAsync(devPtr, value, count, stream)); +} + } // namespace lite } // namespace paddle diff --git a/lite/backends/cuda/target_wrapper.h b/lite/backends/cuda/target_wrapper.h index 50063007ce30cca7642a668f6c315903daf026bc..5b57ddf0043c59219aded9836cc0b1ad982eec2d 100644 --- a/lite/backends/cuda/target_wrapper.h +++ b/lite/backends/cuda/target_wrapper.h @@ -59,6 +59,13 @@ class TargetWrapper { size_t size, IoDirection dir, const stream_t& stream); + + static void MemsetSync(void* devPtr, int value, size_t count); + + static void MemsetAsync(void* devPtr, + int value, + size_t count, + const stream_t& stream); }; } // namespace lite } // namespace paddle diff --git a/lite/kernels/cuda/yolo_box_compute.cu b/lite/kernels/cuda/yolo_box_compute.cu index d04da30cc7eca00da1a4b74899aaa1eb8b4c7126..0a00c06cbfb9200e45d48a59aa26f2350c2cf9ab 100644 --- a/lite/kernels/cuda/yolo_box_compute.cu +++ b/lite/kernels/cuda/yolo_box_compute.cu @@ -171,6 +171,10 @@ void YoloBoxCompute::Run() { const int* imgsize = ImgSize->data(); float* boxes = Boxes->mutable_data(TARGET(kCUDA)); float* scores = Scores->mutable_data(TARGET(kCUDA)); + TargetWrapperCuda::MemsetAsync( + boxes, 0, Boxes->numel() * sizeof(float), stream); + TargetWrapperCuda::MemsetAsync( + scores, 0, Scores->numel() * sizeof(float), stream); const int n = X->dims()[0]; const int h = X->dims()[2];