提交 c90b82a6 编写于 作者: Y Yiqun Liu 提交者: ceci3

Fix error in CUDA kernel of beam_search. (#15957)

test=develop
上级 4cfc5b49
...@@ -119,6 +119,18 @@ __device__ __forceinline__ int SelectTopBeam( ...@@ -119,6 +119,18 @@ __device__ __forceinline__ int SelectTopBeam(
__syncthreads(); __syncthreads();
} }
if ((num_used_threads & 0x1) != 0) {
// If num_used_threads is a odd number, merge local top_beam of thread 0
// and num_used_threads - 1
if (tid_of_seq == 0) {
int index_in_sh = (num_used_threads - 1 + tid) * beam_size;
for (int i = 0; i < beam_size; i++) {
Insert(top_beam_local, top_beam[index_in_sh], beam_size);
index_in_sh++;
}
}
}
num_used_threads = num_used_threads >> 1; num_used_threads = num_used_threads >> 1;
if (tid_of_seq < num_used_threads) { if (tid_of_seq < num_used_threads) {
int index_in_sh = (num_used_threads + tid) * beam_size; int index_in_sh = (num_used_threads + tid) * beam_size;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册