add determine action for embed_grad and index_add. (#46040)

0c40d889 · Li Min · GitHub · 54a43981 · 0c40d889 · 0c40d889
Showing with 15 addition and 0 deletion

paddle/phi/kernels/gpu/embedding_grad_kernel.cu paddle/phi/kernels/gpu/embedding_grad_kernel.cu +7 -0

paddle/phi/kernels/gpu/index_add_kernel.cu paddle/phi/kernels/gpu/index_add_kernel.cu +8 -0

未找到文件。
--- a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
@@ -23,6 +23,8 @@
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/embedding_util.h"

+DECLARE_bool(cudnn_deterministic);
+
 namespace phi {

 template <typename InT, typename OutT>
@@ -101,6 +103,11 @@ struct EmbeddingGradCUDAFunctor {
      const int gridx = 2 * dev_ctx_.GetSMCount();
      dim3 threads(128, 8);
      dim3 grids(gridx, 1);
+
+      if (FLAGS_cudnn_deterministic) {
+        VLOG(2) << "Run grad kernel of embedding with single thread.";
+        grids.x = 1;
+      }
      EmbeddingGrad<T, IdT><<<grids, threads, 0, dev_ctx_.stream()>>>(
          d_table, d_output, ids, N, K, D);
    }

--- a/paddle/phi/kernels/gpu/index_add_kernel.cu
+++ b/paddle/phi/kernels/gpu/index_add_kernel.cu
@@ -20,6 +20,8 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/utils/data_type.h"

+DECLARE_bool(cudnn_deterministic);
+
 namespace phi {

 using paddle::platform::PADDLE_CUDA_NUM_THREADS;
@@ -79,6 +81,12 @@ void IndexAddKernel(const Context& ctx,
  // todo(@limin29): inplace do not need copy.
  phi::Copy(ctx, x, ctx.GetPlace(), false, output);

+  if (FLAGS_cudnn_deterministic) {
+    VLOG(2) << "Run grad kernel of index_add with single thread.";
+    block_dim = 1;
+    grid_dim.x = 1;
+  }
+
  if (index_type == phi::DataType::INT64) {
    const int64_t* index_data = index.data<int64_t>();
    index_add_cuda_kernel<T, int64_t>