未验证 提交 c4be80f4 编写于 作者: W wangchaochaohu 提交者: GitHub

polish the code of cumsum and remove some unused code (#29303)

上级 2cd0bf57
......@@ -14,7 +14,6 @@ limitations under the License. */
#include <thrust/device_ptr.h>
#include <thrust/device_vector.h>
#include <thrust/gather.h>
#include <thrust/reverse.h>
#include <thrust/scan.h>
#include "cub/cub.cuh"
......@@ -95,8 +94,6 @@ struct BlockPrefixCallbackOp {
};
// No bank-conflict transpose
// Same as transposeCoalesced except the first tile dimension is padded
// to avoid shared memory bank conflicts.
template <typename T, int TILE_DIM, int BLOCK_ROWS>
__global__ void MatrixTranspose(T* odata, const T* idata, size_t height,
size_t width) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册