未验证 提交 2dde0eb0 编写于 作者: Z Zhang Zheng 提交者: GitHub

optimize perfermance of multiple-dimension reduce (#33761)

上级 4d259b91
...@@ -54,7 +54,7 @@ struct FastDivMod { ...@@ -54,7 +54,7 @@ struct FastDivMod {
return (t + n) >> shift_val; return (t + n) >> shift_val;
} }
__device__ __forceinline__ DivModT Divmod(uint32_t n) { __device__ __forceinline__ DivModT Divmod(uint32_t n) const {
uint32_t q = Div(n); uint32_t q = Div(n);
DivModT result = {q, n - q * divisor}; DivModT result = {q, n - q * divisor};
return result; return result;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册