fix batch norm memory issue (#41717)

* try to fix batch norm memory issue * fix batch norm memroy alloc bug * polish some code

fix batch norm memory issue (#41717)
* try to fix batch norm memory issue * fix batch norm memroy alloc bug * polish some code
42abcc08 · hong · GitHub · e7f0aa38 · 42abcc08 · 42abcc08
Showing with 8 addition and 5 deletion

paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +4 -3

paddle/phi/kernels/gpu/batch_norm_kernel.cu paddle/phi/kernels/gpu/batch_norm_kernel.cu +4 -2

未找到文件。
--- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -570,7 +570,8 @@ void BatchNormGradRawKernel(const Context &ctx,
                  /*sizeInBytes=*/&workspace_size));
      workspace_tensor.Resize({static_cast<int64_t>(workspace_size)});
-      workspace_ptr = ctx.template Alloc<T>(&workspace_tensor);
+      workspace_ptr =
+          static_cast<void *>(ctx.template Alloc<uint8_t>(&workspace_tensor));
      PADDLE_ENFORCE_GPU_SUCCESS(
          paddle::platform::dynload::cudnnBatchNormalizationBackwardEx(
@@ -603,8 +604,8 @@ void BatchNormGradRawKernel(const Context &ctx,
              /*activationDesc=*/nullptr,
              /*workspace=*/workspace_ptr,
              /*workSpaceSizeInBytes=*/workspace_size,
-              /*reserveSpace=*/const_cast<T *>(
+              /*reserveSpace=*/const_cast<uint8_t *>(
-                  reserve_space->template data<T>()),
+                  reserve_space->template data<uint8_t>()),
              /*reserveSpaceSizeInBytes=*/reserve_space_size));
 #endif  // CUDNN_VERSION_MIN(7, 4, 1)
      if (!called) {

--- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -498,9 +498,11 @@ void BatchNormKernel(const Context &ctx,
                  /*sizeInBytes=*/&reserve_space_size));
      reserve_space->Resize({static_cast<int64_t>(reserve_space_size)});
-      reserve_space_ptr = ctx.template Alloc<T>(reserve_space);
+      reserve_space_ptr =
+          static_cast<void *>(ctx.template Alloc<uint8_t>(reserve_space));
      workspace_tensor.Resize({static_cast<int64_t>(workspace_size)});
-      workspace_ptr = ctx.template Alloc<T>(&workspace_tensor);
+      workspace_ptr =
+          static_cast<void *>(ctx.template Alloc<uint8_t>(&workspace_tensor));
      PADDLE_ENFORCE_GPU_SUCCESS(
          paddle::platform::dynload::cudnnBatchNormalizationForwardTrainingEx(
              handle,