diff --git a/paddle/fluid/operators/detection/yolo_box_op.cu b/paddle/fluid/operators/detection/yolo_box_op.cu index ef0b870ebfdf7874ea1e80f8716bc496f3aca890..83a0eb87d02dd549521b68a112c5d9eea6055159 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cu +++ b/paddle/fluid/operators/detection/yolo_box_op.cu @@ -120,7 +120,14 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel { platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), n * box_num); - KeYoloBoxFw<<<<>>( input_data, imgsize_data, boxes_data, scores_data, conf_thresh, anchors_data, n, h, w, an_num, class_num, box_num, input_size_h, diff --git a/paddle/fluid/platform/gpu_launch_config.h b/paddle/fluid/platform/gpu_launch_config.h index 6c265677d63e99c173b7fdce8de362dc9b381352..4da91b4e764a5285b005ebc459c4dfa4e52df9cd 100644 --- a/paddle/fluid/platform/gpu_launch_config.h +++ b/paddle/fluid/platform/gpu_launch_config.h @@ -37,6 +37,7 @@ struct GpuLaunchConfig { dim3 theory_thread_count = dim3(1, 1, 1); dim3 thread_per_block = dim3(1, 1, 1); dim3 block_per_grid = dim3(1, 1, 1); + int compute_capability = 0; }; inline GpuLaunchConfig GetGpuLaunchConfig1D( @@ -67,11 +68,14 @@ inline GpuLaunchConfig GetGpuLaunchConfig1D( std::min(max_threads, context.GetMaxThreadsPerBlock()); const int block_count = std::min(DivUp(physical_thread_count, thread_per_block), sm); + // Get compute_capability + const int capability = context.GetComputeCapability(); GpuLaunchConfig config; config.theory_thread_count.x = theory_thread_count; config.thread_per_block.x = thread_per_block; config.block_per_grid.x = block_count; + config.compute_capability = capability; return config; }