未验证 提交 c4be80f4 编写于 作者: W wangchaochaohu 提交者: GitHub

polish the code of cumsum and remove some unused code (#29303)

上级 2cd0bf57
...@@ -14,7 +14,6 @@ limitations under the License. */ ...@@ -14,7 +14,6 @@ limitations under the License. */
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/gather.h>
#include <thrust/reverse.h> #include <thrust/reverse.h>
#include <thrust/scan.h> #include <thrust/scan.h>
#include "cub/cub.cuh" #include "cub/cub.cuh"
...@@ -95,8 +94,6 @@ struct BlockPrefixCallbackOp { ...@@ -95,8 +94,6 @@ struct BlockPrefixCallbackOp {
}; };
// No bank-conflict transpose // No bank-conflict transpose
// Same as transposeCoalesced except the first tile dimension is padded
// to avoid shared memory bank conflicts.
template <typename T, int TILE_DIM, int BLOCK_ROWS> template <typename T, int TILE_DIM, int BLOCK_ROWS>
__global__ void MatrixTranspose(T* odata, const T* idata, size_t height, __global__ void MatrixTranspose(T* odata, const T* idata, size_t height,
size_t width) { size_t width) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册