refine the gpu config for performance optimization (#28291)

0f4b6247 · wangchaochaohu · GitHub · acc11c2a · 0f4b6247
隐藏空白更改
内联并排

Showing with 1 addition and 3 deletion

paddle/fluid/platform/gpu_launch_config.h paddle/fluid/platform/gpu_launch_config.h +1 -3

未找到文件。
--- a/paddle/fluid/platform/gpu_launch_config.h
+++ b/paddle/fluid/platform/gpu_launch_config.h
@@ -53,10 +53,8 @@ inline GpuLaunchConfig GetGpuLaunchConfig1D(
  // Need get from device
  const int thread_per_block = std::min(1024, context.GetMaxThreadsPerBlock());
-  // Suppose block count small than factor * sm, factor is a experiments value.
-  int factor = 4;
  const int block_count =
-      std::min(DivUp(physical_thread_count, thread_per_block), factor * sm);
+      std::min(DivUp(physical_thread_count, thread_per_block), sm);
  GpuLaunchConfig config;
  config.theory_thread_count.x = theory_thread_count;