提交 2ce5c9d4 编写于 作者: Y Yancey1989

fix gpu implement

上级 07e87ff1
...@@ -71,7 +71,8 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -71,7 +71,8 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
auto seq_dims = seq.dims(); auto seq_dims = seq.dims();
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(), PADDLE_ENFORCE_EQ(seq_dims[0],
static_cast<int64_t>(abs_offset_lod[level].back()),
"The first dimension of LoDTensor seq should be " "The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."); "equal to the sum of all sequences's length.");
...@@ -80,17 +81,17 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -80,17 +81,17 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
"The input padding should be a 3-D Tensor of shape " "The input padding should be a 3-D Tensor of shape "
"[max_sequence_length, num_sequences, sequence_width]."); "[max_sequence_length, num_sequences, sequence_width].");
size_t max_sequence_length = MaximumSequenceLength(lod, level); int64_t max_sequence_length = MaximumSequenceLength(lod, level);
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
"The first dimension of Tensor padding should be the " "The first dimension of Tensor padding should be the "
"maximum length of all sequences in LoDTensor seq."); "maximum length of all sequences in LoDTensor seq.");
const size_t num_sequences = abs_offset_lod[level].size() - 1; const int64_t num_sequences = abs_offset_lod[level].size() - 1;
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
"The second dimension of Tensor padding should be the " "The second dimension of Tensor padding should be the "
"number of sequences in LoDTensor seq."); "number of sequences in LoDTensor seq.");
const size_t sequence_width = seq.numel() / seq_dims[0]; const int64_t sequence_width = seq.numel() / seq_dims[0];
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
"The third dimension of Tensor padding should be the " "The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."); "width of sequence in LoDTensor seq.");
...@@ -101,7 +102,7 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -101,7 +102,7 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
return; return;
} }
const size_t kBlockSize = 512; const int64_t kBlockSize = 512;
/* At least use 32 threads to copy sequence_width elements, /* At least use 32 threads to copy sequence_width elements,
* and at least 8 elements for each thread. * and at least 8 elements for each thread.
...@@ -143,7 +144,8 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -143,7 +144,8 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
auto seq_dims = seq.dims(); auto seq_dims = seq.dims();
PADDLE_ENFORCE_EQ(seq_dims[0], abs_offset_lod[level].back(), PADDLE_ENFORCE_EQ(seq_dims[0],
static_cast<int64_t>(abs_offset_lod[level].back()),
"The first dimension of LoDTensor seq should be " "The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."); "equal to the sum of all sequences's length.");
...@@ -152,17 +154,17 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -152,17 +154,17 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
"The input padding should be a 3-D Tensor of shape " "The input padding should be a 3-D Tensor of shape "
"[max_sequnece_length, num_sequences, sequence_width]."); "[max_sequnece_length, num_sequences, sequence_width].");
size_t max_sequence_length = MaximumSequenceLength(lod, level); int64_t max_sequence_length = MaximumSequenceLength(lod, level);
PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length,
"The first dimension of Tensor padding should be " "The first dimension of Tensor padding should be "
"the maximum length of all sequences in LoDTensor seq."); "the maximum length of all sequences in LoDTensor seq.");
const size_t num_sequences = abs_offset_lod[level].size() - 1; const int64_t num_sequences = abs_offset_lod[level].size() - 1;
PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences,
"The second dimension of Tensor padding should be " "The second dimension of Tensor padding should be "
"the number of sequences in LoDTensor seq."); "the number of sequences in LoDTensor seq.");
const size_t sequence_width = seq.numel() / seq_dims[0]; const int64_t sequence_width = seq.numel() / seq_dims[0];
PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width,
"The third dimension of Tensor padding should be the " "The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."); "width of sequence in LoDTensor seq.");
...@@ -173,7 +175,7 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> { ...@@ -173,7 +175,7 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
return; return;
} }
const size_t kBlockSize = 512; const int64_t kBlockSize = 512;
/* At least use 32 threads to copy sequence_width elements, /* At least use 32 threads to copy sequence_width elements,
* and at least 8 elements for each thread. * and at least 8 elements for each thread.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册