fix conv3d backward (#42502)

503569a0 · zhangkaihuo · GitHub · d73eb38c · 503569a0
显示空白变更内容
内联并排

Showing with 8 addition and 27 deletion

paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu +8 -27

未找到文件。
--- a/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu
@@ -22,7 +22,7 @@ limitations under the License. */
 #include "paddle/phi/kernels/copy_kernel.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
-#include "paddle/phi/kernels/funcs/sparse/scatter.cu.h"
+#include "paddle/phi/kernels/funcs/scatter.cu.h"
 #include "paddle/phi/kernels/sparse/convolution_grad_kernel.h"
 #include "paddle/phi/kernels/sparse/gpu/convolution.cu.h"

@@ -203,38 +203,19 @@ void Conv3dGradGPUKernel(const GPUContext& dev_ctx,
  }

  // 4. scatter
-  // x_grad->ResizeAndAllocate(x.non_zero_elements().dims());
-  DenseTensorMeta index_meta(DataType::INT32, {rulebook_len}, DataLayout::NCHW);
-  DenseTensor out_index = phi::Empty(dev_ctx, std::move(index_meta));
-  DenseTensor unique_key = phi::Empty(
-      dev_ctx,
-      DenseTensorMeta(paddle::experimental::CppTypeToDataType<IntT>::Type(),
-                      {rulebook_len},
-                      DataLayout::NCHW));
-  DenseTensor unique_value = phi::Empty(dev_ctx, std::move(index_meta));
-
-  SortedAndUniqueIndex<GPUContext, IntT>(dev_ctx,
-                                         rulebook_ptr + rulebook_len,
-                                         rulebook_len,
-                                         &out_index,
-                                         &unique_key,
-                                         &unique_value);
-
  config = phi::backends::gpu::GetGpuLaunchConfig1D(
      dev_ctx, rulebook_len * in_channels, 1);

-  phi::funcs::sparse::ScatterKernel<T><<<config.block_per_grid.x,
-                                         config.thread_per_block.x,
+  phi::funcs::ScatterCUDAKernel<<<config.block_per_grid,
+                                  config.thread_per_block,
                                  0,
                                  dev_ctx.stream()>>>(
      d_x_features_ptr,
-      unique_value.data<int>(),
-      out_index.data<int>(),
-      x.nnz(),
+      rulebook_ptr + rulebook_len,
+      x_grad_values_ptr,
      rulebook_len,
      in_channels,
-      x_grad_values_ptr,
-      subm);
+      false);
 }

 template <typename T, typename Context>