未验证 提交 c89d6518 编写于 作者: R Ruibiao Chen 提交者: GitHub

Reduce time variation for cuda_managed_memory_test (#42458)

上级 d90e24ac
......@@ -107,7 +107,7 @@ TEST(ManagedMemoryTest, OversubscribeGPUMemoryTest) {
uint64_t available_mem = platform::GpuAvailableMemToAlloc();
uint64_t n_data = available_mem * 2 / sizeof(int) +
1; // requires more than 2 * available_mem bytes
uint64_t step = 1024;
uint64_t step = std::max(n_data / 1024, static_cast<uint64_t>(1));
AllocationPtr data_allocation =
Alloc(platform::CUDAPlace(0), n_data * sizeof(int));
AllocationPtr sum_allocation = Alloc(platform::CUDAPlace(0), sizeof(int));
......@@ -115,8 +115,8 @@ TEST(ManagedMemoryTest, OversubscribeGPUMemoryTest) {
int* sum = static_cast<int*>(sum_allocation->ptr());
(*sum) = 0;
write_kernel<<<5120, 1024>>>(data, n_data, step);
sum_kernel<<<5120, 1024>>>(data, n_data, step, sum);
write_kernel<<<1, 1024>>>(data, n_data, step);
sum_kernel<<<1, 1024>>>(data, n_data, step, sum);
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize());
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册