diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h index 42fee144883734d38bcaa824bed5d1776cc662c3..df14b0a21f24dbbc81919bab5f9d5fce98f78a88 100644 --- a/paddle/phi/kernels/funcs/reduce_function.h +++ b/paddle/phi/kernels/funcs/reduce_function.h @@ -473,7 +473,11 @@ struct ReduceConfig { bool not_higher = x_dim[0] >= max_grid_z; #endif if (reduce_last_dim && (reduce_rank == 1)) { +#ifdef PADDLE_WITH_XPU_KP + reduce_type = static_cast(ReduceType::kReduceAny); +#else reduce_type = static_cast(ReduceType::kReduceLastDim); +#endif } else if (reduce_rank == 1) { reduce_type = static_cast(ReduceType::kReduceHigherDim); if (rank == 3 && not_higher) { @@ -588,7 +592,7 @@ struct ReduceConfig { void SetBlockDim() { // init should_reduce_again = false; - dim3 block_dim; + dim3 block_dim(1, 1, 1); dim3 grid_dim(left_num, 1, 1); blocking_size = reduce_num; diff --git a/paddle/phi/kernels/primitive/compute_primitives_xpu2.h b/paddle/phi/kernels/primitive/compute_primitives_xpu2.h index 4d65dd6dd5d871c45a2d0d5f408debdd07392b48..0e77b11988e76646278750a3c4d42687955ae84c 100644 --- a/paddle/phi/kernels/primitive/compute_primitives_xpu2.h +++ b/paddle/phi/kernels/primitive/compute_primitives_xpu2.h @@ -329,14 +329,12 @@ __device__ __forceinline__ void Reduce(T* out, ReduceFunctor reducer, bool reduce_last_dim) { if (Mode == details::kGlobalMode) { + if (reduce_last_dim) { #pragma unroll - for (int i = 0; i < NY; ++i) { -#pragma unroll - for (int j = 0; j < NX; ++j) { - out[i] = reducer(out[i], in[i * NX + j]); + for (int i = 0; i < NY * NX; i++) { // reduce along blockDim.x + details::BlockXReduce(&out[i], reducer); } } - details::BlockXReduce(out, reducer); } else { // else kLocalMode #pragma unroll for (int i = 0; i < NY; ++i) {