diff --git a/paddle/fluid/platform/cuda_device_function.h b/paddle/fluid/platform/cuda_device_function.h index 7cfeaab35b8c52225ff6e6cc2cdb8296621b30d9..2405f33d4f0ad83611e57d07a47e787eab439285 100644 --- a/paddle/fluid/platform/cuda_device_function.h +++ b/paddle/fluid/platform/cuda_device_function.h @@ -35,6 +35,16 @@ __forceinline__ __device__ T __shfl_sync(unsigned, T val, int src_line, #define FULL_WARP_MASK 0xFFFFFFFF #define CREATE_SHFL_MASK(mask, predicate) \ mask = __ballot_sync(FULL_WARP_MASK, (predicate)) +template +__forceinline__ __device__ T __shfl_down_sync(unsigned mask, T val, int delta) { + return __shfl_down_sync(mask, val, delta); +} + +template +__forceinline__ __device__ T __shfl_sync(unsigned mask, T val, int src_line, + int width) { + return __shfl_sync(mask, val, src_line, width); +} #endif template