From 626c1edccd7dee19b7a08610db9d4e3e82c9a12c Mon Sep 17 00:00:00 2001 From: feng_shuai Date: Wed, 9 Jun 2021 10:40:33 +0800 Subject: [PATCH] fix the bug of yolo_box which can't run on nano and tx2 (#33422) --- paddle/fluid/operators/detection/yolo_box_op.cu | 9 ++++++++- paddle/fluid/platform/gpu_launch_config.h | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/detection/yolo_box_op.cu b/paddle/fluid/operators/detection/yolo_box_op.cu index ef0b870ebfd..83a0eb87d02 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cu +++ b/paddle/fluid/operators/detection/yolo_box_op.cu @@ -120,7 +120,14 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel { platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), n * box_num); - KeYoloBoxFw<<<<>>( input_data, imgsize_data, boxes_data, scores_data, conf_thresh, anchors_data, n, h, w, an_num, class_num, box_num, input_size_h, diff --git a/paddle/fluid/platform/gpu_launch_config.h b/paddle/fluid/platform/gpu_launch_config.h index 6c265677d63..4da91b4e764 100644 --- a/paddle/fluid/platform/gpu_launch_config.h +++ b/paddle/fluid/platform/gpu_launch_config.h @@ -37,6 +37,7 @@ struct GpuLaunchConfig { dim3 theory_thread_count = dim3(1, 1, 1); dim3 thread_per_block = dim3(1, 1, 1); dim3 block_per_grid = dim3(1, 1, 1); + int compute_capability = 0; }; inline GpuLaunchConfig GetGpuLaunchConfig1D( @@ -67,11 +68,14 @@ inline GpuLaunchConfig GetGpuLaunchConfig1D( std::min(max_threads, context.GetMaxThreadsPerBlock()); const int block_count = std::min(DivUp(physical_thread_count, thread_per_block), sm); + // Get compute_capability + const int capability = context.GetComputeCapability(); GpuLaunchConfig config; config.theory_thread_count.x = theory_thread_count; config.thread_per_block.x = thread_per_block; config.block_per_grid.x = block_count; + config.compute_capability = capability; return config; } -- GitLab