提交 43f1358f 编写于 作者: P Pei Yang 提交者: 石晓伟

fix sequence pool cuda (#2466)

* add sequence_pool cuda kernel, test=develop

* fix sequence_pool cuda,test=develop

* fix and complete unittest, test=develop

* fix macro of sequence_pool cuda, test=develop
上级 e1b67433
......@@ -17,19 +17,6 @@
#include "lite/core/target_wrapper.h"
#include "lite/kernels/cuda/sequence_pool_compute.h"
const int CUDA_NUM_THREADS = 512;
#define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
/// CUDA: number of blocks for threads.
inline int CUDA_GET_BLOCKS(const int N) {
return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
inline int CUDA_GET_BLOCKS(const int N, const int base) {
return (N + base - 1) / base;
}
namespace paddle {
namespace lite {
namespace kernels {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册