diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce.h index c88965e6defea5cd2c6f042ab179877ef30f45ce..f55d483de14621da6068ec33e1ede5eb0290a297 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce.h @@ -61,7 +61,7 @@ void Reduce(const CUDAContext& dev_ctx, gpuStream_t stream = dev_ctx.stream(); - if (out_dtype != pten::DataType::UNDEFINED) { + if (out_dtype != pten::DataType::UNDEFINED && out_dtype != x.dtype()) { PD_DISPATCH_FLOATING_AND_INTEGRAL_AND_COMPLEX_TYPES( out_dtype, "TensorReduceFunctorImpl", ([&] { pten::detail::TensorReduceFunctorImpl(