提交 b0944dc7 编写于 作者: M Megvii Engine Team

fix(mgb): fix tx1 compile

GitOrigin-RevId: 0eeb62ff7389085d9a0d7f3488b01916423133d3
上级 ce88e6c4
......@@ -5,6 +5,10 @@
namespace {
#if __CUDACC_VER_MAJOR__ >= 9
#define __shfl_down(x, y) __shfl_down_sync(0xffffffffu, x, y)
#endif
// each thread computs one bit
const int THREADS_PER_BLOCK = 64;
......@@ -95,7 +99,7 @@ __device__ __forceinline__ uint32_t warp_reduce_min_brdcst(uint32_t val) {
static_assert(WARP_SIZE == 32, "warp size != 32");
#pragma unroll
for (uint32_t offset = WARP_SIZE / 2; offset; offset /= 2)
val = min(val, __shfl_down_sync(0xFFFFFFFF, val, offset));
val = min(val, __shfl_down(val, offset));
if (!threadIdx.x)
ans = val;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册