提交 e4b113eb 编写于 作者: W Wilber 提交者: GitHub

modify yolobox_cuda to support multiple runs (#2245)

* modify yolobox_cuda to support multiple runs test=develop
上级 ea4a5854
...@@ -74,5 +74,16 @@ void TargetWrapperCuda::MemcpyAsync(void* dst, ...@@ -74,5 +74,16 @@ void TargetWrapperCuda::MemcpyAsync(void* dst,
} }
} }
void TargetWrapperCuda::MemsetSync(void* devPtr, int value, size_t count) {
CUDA_CALL(cudaMemset(devPtr, value, count));
}
void TargetWrapperCuda::MemsetAsync(void* devPtr,
int value,
size_t count,
const stream_t& stream) {
CUDA_CALL(cudaMemsetAsync(devPtr, value, count, stream));
}
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -59,6 +59,13 @@ class TargetWrapper<TARGET(kCUDA)> { ...@@ -59,6 +59,13 @@ class TargetWrapper<TARGET(kCUDA)> {
size_t size, size_t size,
IoDirection dir, IoDirection dir,
const stream_t& stream); const stream_t& stream);
static void MemsetSync(void* devPtr, int value, size_t count);
static void MemsetAsync(void* devPtr,
int value,
size_t count,
const stream_t& stream);
}; };
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -171,6 +171,10 @@ void YoloBoxCompute::Run() { ...@@ -171,6 +171,10 @@ void YoloBoxCompute::Run() {
const int* imgsize = ImgSize->data<int>(); const int* imgsize = ImgSize->data<int>();
float* boxes = Boxes->mutable_data<float>(TARGET(kCUDA)); float* boxes = Boxes->mutable_data<float>(TARGET(kCUDA));
float* scores = Scores->mutable_data<float>(TARGET(kCUDA)); float* scores = Scores->mutable_data<float>(TARGET(kCUDA));
TargetWrapperCuda::MemsetAsync(
boxes, 0, Boxes->numel() * sizeof(float), stream);
TargetWrapperCuda::MemsetAsync(
scores, 0, Scores->numel() * sizeof(float), stream);
const int n = X->dims()[0]; const int n = X->dims()[0];
const int h = X->dims()[2]; const int h = X->dims()[2];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册