未验证 提交 8a071ffb 编写于 作者: C chengduo 提交者: GitHub

Merge pull request #10366 from chengduoZH/feature/fix_shlf_for_cuda9.0

Fix __shfl and __shfl_down for CUDA9.0
......@@ -35,6 +35,16 @@ __forceinline__ __device__ T __shfl_sync(unsigned, T val, int src_line,
#define FULL_WARP_MASK 0xFFFFFFFF
#define CREATE_SHFL_MASK(mask, predicate) \
mask = __ballot_sync(FULL_WARP_MASK, (predicate))
template <typename T>
__forceinline__ __device__ T __shfl_down_sync(unsigned mask, T val, int delta) {
return __shfl_down_sync(mask, val, delta);
}
template <typename T>
__forceinline__ __device__ T __shfl_sync(unsigned mask, T val, int src_line,
int width) {
return __shfl_sync(mask, val, src_line, width);
}
#endif
template <typename T>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册