diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h index 1f1b8ddd5f412d494572a2e9cf154c9c93095b8d..16fc70b9ab7a1c8e421f0da55a65dc5b4406de2c 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h @@ -769,7 +769,7 @@ static void LaunchReduceKernel(const Tx* x_data, } } -void TensorCopy(const DenseTensor& src, DenseTensor* dst) { +static void AsyncCopy(const DenseTensor& src, DenseTensor* dst) { paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool::Instance(); const paddle::platform::CUDADeviceContext* dev_ctx; @@ -817,7 +817,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, if (config.reduce_num == 1) { auto out_dims = y->dims(); if (x.dtype() == y->dtype()) { - TensorCopy(x, y); + AsyncCopy(x, y); y->Resize(out_dims); } else { PD_VISIT_ALL_TYPES(y->dtype(), "CastKernelImpl", ([&] {