提交 2ce5c9d4 编写于 作者: Y Yancey1989

fix gpu implement

上级 07e87ff1
......@@ -71,7 +71,8 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
auto seq_dims = seq.dims();
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
PADDLE_ENFORCE_EQ(seq_dims[0],
static_cast<int64_t>(abs_offset_lod[level].back()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length.");
......@@ -80,17 +81,17 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
"The input padding should be a 3-D Tensor of shape "
"[max_sequence_length, num_sequences, sequence_width].");
size_t max_sequence_length = MaximumSequenceLength(lod, level);
int64_t max_sequence_length = MaximumSequenceLength(lod, level);
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
"The first dimension of Tensor padding should be the "
"maximum length of all sequences in LoDTensor seq.");
const size_t num_sequences = abs_offset_lod[level].size() - 1;
const int64_t num_sequences = abs_offset_lod[level].size() - 1;
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
"The second dimension of Tensor padding should be the "
"number of sequences in LoDTensor seq.");
const size_t sequence_width = seq.numel() / seq_dims[0];
const int64_t sequence_width = seq.numel() / seq_dims[0];
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq.");
......@@ -101,7 +102,7 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
return;
}
const size_t kBlockSize = 512;
const int64_t kBlockSize = 512;
/* At least use 32 threads to copy sequence_width elements,
* and at least 8 elements for each thread.
......@@ -143,7 +144,8 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
auto seq_dims = seq.dims();
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(),
PADDLE_ENFORCE_EQ(seq_dims[0],
static_cast<int64_t>(abs_offset_lod[level].back()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length.");
......@@ -152,17 +154,17 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
"The input padding should be a 3-D Tensor of shape "
"[max_sequnece_length, num_sequences, sequence_width].");
size_t max_sequence_length = MaximumSequenceLength(lod, level);
int64_t max_sequence_length = MaximumSequenceLength(lod, level);
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
"The first dimension of Tensor padding should be "
"the maximum length of all sequences in LoDTensor seq.");
const size_t num_sequences = abs_offset_lod[level].size() - 1;
const int64_t num_sequences = abs_offset_lod[level].size() - 1;
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
"The second dimension of Tensor padding should be "
"the number of sequences in LoDTensor seq.");
const size_t sequence_width = seq.numel() / seq_dims[0];
const int64_t sequence_width = seq.numel() / seq_dims[0];
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq.");
......@@ -173,7 +175,7 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
return;
}
const size_t kBlockSize = 512;
const int64_t kBlockSize = 512;
/* At least use 32 threads to copy sequence_width elements,
* and at least 8 elements for each thread.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册