From 85a11c4781158fb17b0306f0f8085f0b58012909 Mon Sep 17 00:00:00 2001 From: Zhang Zheng <32410583+ZzSean@users.noreply.github.com> Date: Tue, 22 Feb 2022 11:04:02 +0800 Subject: [PATCH] Modify the implementation of BlockXReduce to fit more scenes (#39554) * Modify the implementation of BlockYReduce to fit more scenes * fix * fix --- paddle/phi/kernels/primitive/compute_primitives.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/phi/kernels/primitive/compute_primitives.h b/paddle/phi/kernels/primitive/compute_primitives.h index 2d9a7522515..4f3c069f3b2 100644 --- a/paddle/phi/kernels/primitive/compute_primitives.h +++ b/paddle/phi/kernels/primitive/compute_primitives.h @@ -110,7 +110,11 @@ __device__ __forceinline__ T BlockXReduce(T val, ReduceOp reducer) { T temp = paddle::platform::CudaShuffleDownSync(mask, val, stride); val = reducer(val, temp); } - return val; + if (threadIdx.x == 0) { + shared[threadIdx.y] = val; + } + __syncthreads(); + return shared[threadIdx.y]; } /** -- GitLab