Fix cast op that can not cast the arrays that the size of arrays is beyond int32 (#34209)

* fix cast

Fix cast op that can not cast the arrays that the size of arrays is beyond int32 (#34209)
* fix cast
038883fd · 李季 · GitHub · c8fb6fc4 · 038883fd · 038883fd
隐藏空白更改
内联并排

Showing with 3 addition and 2 deletion

paddle/fluid/operators/cast_op.cu paddle/fluid/operators/cast_op.cu +2 -1

paddle/fluid/platform/gpu_launch_config.h paddle/fluid/platform/gpu_launch_config.h +1 -1

未找到文件。
--- a/paddle/fluid/operators/cast_op.cu
+++ b/paddle/fluid/operators/cast_op.cu
@@ -40,7 +40,8 @@ __global__ void VecCastCUDAKernel(const InT* in, const int64_t N, OutT* out) {
  int64_t idx = blockDim.x * blockIdx.x + threadIdx.x;
  using LoadT = AlignedVector<InT, VecSize>;
  using StoreT = AlignedVector<OutT, VecSize>;
-  for (int i = idx * VecSize; i < N; i += blockDim.x * gridDim.x * VecSize) {
+  for (int64_t i = idx * VecSize; i < N;
+       i += blockDim.x * gridDim.x * VecSize) {
    InT in_vec[VecSize];
    LoadT* in_value = reinterpret_cast<LoadT*>(&in_vec);
    *in_value = *reinterpret_cast<const LoadT*>(&in[i]);

--- a/paddle/fluid/platform/gpu_launch_config.h
+++ b/paddle/fluid/platform/gpu_launch_config.h
@@ -41,7 +41,7 @@ struct GpuLaunchConfig {
 };

 inline GpuLaunchConfig GetGpuLaunchConfig1D(
-    const platform::CUDADeviceContext& context, int element_count,
+    const platform::CUDADeviceContext& context, int64_t element_count,
 #ifdef PADDLE_WITH_HIP
    // HIP will throw GPU memory access fault if threads > 256
    int max_threads = 256) {