diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index d63c6c4ed55331235188c1c750468d4e75b9b7f2..2dd2cafa23babbc11271955b8cce84a629f8f738 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -18,128 +18,111 @@ namespace paddle { namespace operators { namespace math { -template -class PaddingLoDTensorFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const framework::LoDTensor& seq, framework::Tensor* padding, - bool norm_by_times) { - auto lod = seq.lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The LoD of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq.dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding->dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequence_length, num_sequences, sequence_width]."); - - const int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be the " - "maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be the " - "number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq.numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - const T* seq_data = seq.data(); - T* padding_data = padding->data(); - for (int64_t i = 0; i < max_sequence_length; ++i) { - for (int64_t j = 0; j < num_sequences; ++j) { - int64_t start_pos = abs_offset_lod[level][j]; - int64_t sequence_length = abs_offset_lod[level][j + 1] - start_pos; - if (i < sequence_length) { - // i > 0 => sequence_length > 0 - T scale = - norm_by_times ? (1.0f / static_cast(sequence_length)) : 1.0f; - for (int64_t k = 0; k < sequence_width; ++k) { - padding_data[(i * num_sequences + j) * sequence_width + k] = - seq_data[(start_pos + i) * sequence_width + k] * scale; - } +template +void CopyDataCPU(framework::LoDTensor* seq_tensor, + framework::Tensor* padding_tensor, + const framework::Vector& abs_offset, + const int64_t& max_seq_len, const int64_t& seq_width, + bool seq_to_padding, bool norm_by_len) { + T* seq_data = seq_tensor->data(); + T* padding_data = padding_tensor->data(); + + int64_t seq_num = abs_offset.size() - 1; + + for (int64_t i = 0; i < seq_num; ++i) { + int64_t seq_start = abs_offset[i]; + int64_t seq_len = abs_offset[i + 1] - seq_start; + + T scale = norm_by_len ? (1.0f / static_cast(seq_len)) : 1.0f; + + for (int64_t j = 0; j < seq_len; ++j) { + for (int64_t k = 0; k < seq_width; ++k) { + size_t padding_offset = 0; + if (padding_layout == BATCH_LENGTH_WIDTH) { + padding_offset = (i * max_seq_len * seq_width) + j * seq_width + k; + } else { + padding_offset = (j * seq_num * seq_width) + i * seq_width + k; + } + if (seq_to_padding) { + padding_data[padding_offset] = + seq_data[(seq_start + j) * seq_width + k] * scale; } else { - memset(padding_data + (i * num_sequences + j) * sequence_width, 0, - sequence_width * sizeof(T)); + seq_data[(seq_start + j) * seq_width + k] = + padding_data[padding_offset] * scale; } } } } +} + +template +class PaddingLoDTensorFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::LoDTensor& seq_tensor, + framework::Tensor* padding_tensor, + T padding_value = static_cast(0), + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(seq_tensor, lod_level); + + auto& lod = seq_tensor.lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto seq_dims = seq_tensor.dims(); + auto padding_dims = padding_tensor->dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + int64_t seq_num = abs_offset.size() - 1; + int64_t seq_width = seq_tensor.numel() / seq_dims[0]; + int64_t numel = max_seq_len * seq_num * seq_width; + + ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, + seq_num, seq_width, padding_layout); + + T* padding_data = padding_tensor->data(); + + memset(padding_data, padding_value, numel * sizeof(T)); + + CopyDataCPU( + const_cast(&seq_tensor), padding_tensor, + abs_offset, max_seq_len, seq_width, true /* seq_to_padding */, + norm_by_times); + } }; -template -class UnpaddingLoDTensorFunctor { +template +class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CPUDeviceContext& context, - framework::LoDTensor* seq, const framework::Tensor& padding, - bool norm_by_times) { - auto lod = seq->lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The LoD of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq->dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding.dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequnece_length, num_sequences, sequence_width]."); - - const int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be " - "the maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be " - "the number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq->numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - const T* padding_data = padding.data(); - T* seq_data = seq->data(); - for (int64_t i = 0; i < num_sequences; ++i) { - int64_t start_pos = abs_offset_lod[level][i]; - int64_t sequence_length = abs_offset_lod[level][i + 1] - start_pos; - for (int64_t j = 0; j < sequence_length; ++j) { - // sequence_width > j > 0 - T scale = - norm_by_times ? (1.0f / static_cast(sequence_length)) : 1.0f; - for (int64_t k = 0; k < sequence_width; ++k) { - seq_data[(start_pos + j) * sequence_width + k] = - padding_data[(j * num_sequences + i) * sequence_width + k] * - scale; - } - } - } + framework::LoDTensor* seq_tensor, + const framework::Tensor& padding_tensor, + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(*seq_tensor, lod_level); + + auto& lod = seq_tensor->lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto& seq_dims = seq_tensor->dims(); + auto& padding_dims = padding_tensor.dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + int64_t seq_num = abs_offset.size() - 1; + int64_t seq_width = seq_tensor->numel() / seq_dims[0]; + + ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, + seq_num, seq_width, padding_layout); + + T* seq_data = seq_tensor->data(); + memset(seq_data, static_cast(0), seq_tensor->numel() * sizeof(T)); + + CopyDataCPU( + seq_tensor, const_cast(&padding_tensor), abs_offset, + max_seq_len, seq_width, false /* seq_to_padding */, norm_by_times); } }; -template class PaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 0956a0c17d387f4a174c7ed4e9b1b1f816dcf4ae..2377bef0247fa88b26079fbda16c0ffc56318fa3 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -19,87 +19,76 @@ namespace paddle { namespace operators { namespace math { -template -__global__ void SequencePaddingKernel(T* padding, T* sequence, - const size_t* sequence_start_positions, - const size_t sequence_width, - const size_t max_sequence_length, - const size_t num_sequences) { +template +__global__ void SequencePaddingKernel( + T* padding_data, T* seq_data, const size_t* abs_offset, + const size_t& seq_num, const size_t& max_seq_len, const size_t& seq_width, + const PaddingLayout& padding_layout, bool norm_by_times = false, + const T& padding_value = 0) { size_t padding_idx = blockIdx.y; - size_t start_pos = sequence_start_positions[padding_idx]; - size_t sequence_length = - sequence_start_positions[padding_idx + 1] - start_pos; + size_t seq_start = abs_offset[padding_idx]; + size_t seq_len = abs_offset[padding_idx + 1] - seq_start; - size_t sequence_idx = blockIdx.x * blockDim.y + threadIdx.y; - size_t padding_base_idx = - (sequence_idx * num_sequences + padding_idx) * sequence_width; - size_t sequence_base_idx = (start_pos + sequence_idx) * sequence_width; + size_t seq_idx = blockIdx.x * blockDim.y + threadIdx.y; - if (sequence_idx < sequence_length) { - T scale = NormByTimes ? (1.0f / static_cast(sequence_length)) : 1.0f; + size_t seq_offset = (seq_start + seq_idx) * seq_width; + + size_t padding_offset = 0; + + if (padding_layout == LENGTH_BATCH_WIDTH) { + padding_offset = (seq_idx * seq_num + padding_idx) * seq_width; + } else { + padding_offset = (padding_idx * max_seq_len + seq_idx) * seq_width; + } + + if (seq_idx < seq_len) { + T scale = norm_by_times ? (1.0f / static_cast(seq_len)) : 1.0f; if (Padding) { /* sequence -> padding */ - for (size_t i = threadIdx.x; i < sequence_width; i += blockDim.x) { - padding[padding_base_idx + i] = scale * sequence[sequence_base_idx + i]; + for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + padding_data[padding_offset + i] = scale * seq_data[seq_offset + i]; } } else { /* padding -> sequence */ - for (size_t i = threadIdx.x; i < sequence_width; i += blockDim.x) { - sequence[sequence_base_idx + i] = scale * padding[padding_base_idx + i]; + for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + seq_data[seq_offset + i] = scale * padding_data[padding_offset + i]; } } - } else if (sequence_idx < max_sequence_length) { + } else if (seq_idx < max_seq_len) { if (Padding) { /* sequence -> padding */ - for (size_t i = threadIdx.x; i < sequence_width; i += blockDim.x) { - padding[padding_base_idx + i] = 0; + for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { + padding_data[padding_offset + i] = padding_value; } } } } -template -class PaddingLoDTensorFunctor { +template +class PaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::LoDTensor& seq, framework::Tensor* padding, - bool norm_by_times) { - auto lod = seq.lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The lod of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq.dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding->dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequence_length, num_sequences, sequence_width]."); - - int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be the " - "maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be the " - "number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq.numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - if (!norm_by_times && num_sequences == 1UL) { - TensorCopy(seq, context.GetPlace(), context, padding); - padding->Resize(padding_dims); + const framework::LoDTensor& seq_tensor, + framework::Tensor* padding_tensor, + T padding_value = static_cast(0), + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(seq_tensor, lod_level); + + auto& lod = seq_tensor.lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto seq_dims = seq_tensor.dims(); + auto padding_dims = padding_tensor->dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + const int64_t seq_num = abs_offset.size() - 1; + const int64_t seq_width = seq_tensor.numel() / seq_dims[0]; + + ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, + seq_num, seq_width, padding_layout); + + if (!norm_by_times && seq_num == 1UL) { + TensorCopy(seq_tensor, context.GetPlace(), context, padding_tensor); + padding_tensor->Resize(padding_dims); return; } @@ -109,72 +98,46 @@ class PaddingLoDTensorFunctor { * and at least 8 elements for each thread. */ size_t block_dim_x = - std::min(((((sequence_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); + std::min(((((seq_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); size_t block_dim_y = kBlockSize / block_dim_x; dim3 threads(block_dim_x, block_dim_y); - size_t grid_dim_x = (max_sequence_length + block_dim_y - 1) / block_dim_y; - size_t grid_dim_y = num_sequences; + size_t grid_dim_x = (max_seq_len + block_dim_y - 1) / block_dim_y; + size_t grid_dim_y = seq_num; dim3 grid(grid_dim_x, grid_dim_y); - const T* seq_data = seq.data(); - T* padding_data = padding->data(); - if (norm_by_times) { - SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } else { - SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } + const T* seq_data = seq_tensor.data(); + T* padding_data = padding_tensor->data(); + + SequencePaddingKernel<<>>( + padding_data, const_cast(seq_data), + abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, + seq_width, padding_layout, norm_by_times, padding_value); } }; -template -class UnpaddingLoDTensorFunctor { +template +class UnpaddingLoDTensorFunctor { public: void operator()(const platform::CUDADeviceContext& context, - framework::LoDTensor* seq, const framework::Tensor& padding, - bool norm_by_times) { - auto lod = seq->lod(); - PADDLE_ENFORCE_GT(lod.size(), 0UL, - "The lod of LoDTensor seq should not be null."); - - const size_t level = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - auto seq_dims = seq->dims(); - PADDLE_ENFORCE_EQ(seq_dims[0], - static_cast(abs_offset_lod[level].back()), - "The first dimension of LoDTensor seq should be " - "equal to the sum of all sequences's length."); - - auto padding_dims = padding.dims(); - PADDLE_ENFORCE_EQ(padding_dims.size(), 3UL, - "The input padding should be a 3-D Tensor of shape " - "[max_sequnece_length, num_sequences, sequence_width]."); - - int64_t max_sequence_length = MaximumSequenceLength(lod, level); - PADDLE_ENFORCE_EQ(padding_dims[0], max_sequence_length, - "The first dimension of Tensor padding should be " - "the maximum length of all sequences in LoDTensor seq."); - - const int64_t num_sequences = abs_offset_lod[level].size() - 1; - PADDLE_ENFORCE_EQ(padding_dims[1], num_sequences, - "The second dimension of Tensor padding should be " - "the number of sequences in LoDTensor seq."); - - const int64_t sequence_width = seq->numel() / seq_dims[0]; - PADDLE_ENFORCE_EQ(padding_dims[2], sequence_width, - "The third dimension of Tensor padding should be the " - "width of sequence in LoDTensor seq."); - - if (!norm_by_times && num_sequences == 1UL) { - TensorCopy(padding, context.GetPlace(), context, seq); - seq->Resize(seq_dims); + framework::LoDTensor* seq_tensor, + const framework::Tensor& padding_tensor, + bool norm_by_times = false, size_t lod_level = 0) { + ValidateLoD(*seq_tensor, lod_level); + + auto& lod = seq_tensor->lod(); + auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; + + auto seq_dims = seq_tensor->dims(); + auto padding_dims = padding_tensor.dims(); + int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); + int64_t seq_num = abs_offset.size() - 1; + int64_t seq_width = seq_tensor->numel() / seq_dims[0]; + + if (!norm_by_times && seq_num == 1UL) { + TensorCopy(padding_tensor, context.GetPlace(), context, seq_tensor); + seq_tensor->Resize(seq_dims); return; } @@ -184,32 +147,28 @@ class UnpaddingLoDTensorFunctor { * and at least 8 elements for each thread. */ size_t block_dim_x = - std::min(((((sequence_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); + std::min(((((seq_width + 7) >> 3) + 31) >> 5) << 5, kBlockSize); size_t block_dim_y = kBlockSize / block_dim_x; dim3 threads(block_dim_x, block_dim_y); - size_t grid_dim_x = (max_sequence_length + block_dim_y - 1) / block_dim_y; - size_t grid_dim_y = num_sequences; + size_t grid_dim_x = (max_seq_len + block_dim_y - 1) / block_dim_y; + size_t grid_dim_y = seq_num; dim3 grid(grid_dim_x, grid_dim_y); - const T* padding_data = padding.data(); - T* seq_data = seq->data(); - if (norm_by_times) { - SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } else { - SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, - abs_offset_lod[level].CUDAData(context.GetPlace()), sequence_width, - max_sequence_length, num_sequences); - } + const T* padding_data = padding_tensor.data(); + T* seq_data = seq_tensor->data(); + + SequencePaddingKernel<<>>( + const_cast(padding_data), seq_data, + abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, + seq_width, padding_layout, norm_by_times); } }; -template class PaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; +template class PaddingLoDTensorFunctor; +template class UnpaddingLoDTensorFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index b56e6db1ebdac1a00561c07845c03bb8fbd8d35a..91d205641acd6e674d0f60c6d9e373b08d0fec19 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -22,17 +22,50 @@ namespace paddle { namespace operators { namespace math { +enum PaddingLayout { BATCH_LENGTH_WIDTH, LENGTH_BATCH_WIDTH }; + inline static size_t MaximumSequenceLength(const framework::LoD& lod, const size_t level) { - const size_t num_sequences = lod[level].size() - 1; - size_t max_sequence_length = 0; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - for (size_t i = 0; i < num_sequences; ++i) { - max_sequence_length = - std::max(max_sequence_length, - abs_offset_lod[level][i + 1] - abs_offset_lod[level][i]); + const size_t seq_num = lod[level].size() - 1; + size_t max_seq_len = 0; + auto abs_offset = framework::ToAbsOffset(lod)[level]; + for (size_t i = 0; i < seq_num; ++i) { + max_seq_len = std::max(max_seq_len, abs_offset[i + 1] - abs_offset[i]); + } + return max_seq_len; +} + +inline static void ValidateLoD(const framework::LoDTensor& seq_tensor, + const size_t& lod_level) { + PADDLE_ENFORCE(lod_level < seq_tensor.lod().size(), + "Invalid `lod_level` which should be at least 0 and less " + "than maximum lod level of `seq_tensor`."); +} + +inline static void ValidateShape(const framework::DDim& seq_tensor_dims, + const size_t& abs_offset_back_value, + const framework::DDim& padding_tensor_dims, + const int64_t& max_seq_len, + const int64_t& seq_num, + const int64_t& seq_width, + const PaddingLayout& padding_layout) { + PADDLE_ENFORCE_EQ(static_cast(seq_tensor_dims[0]), + abs_offset_back_value, + "The 1st dimension of `seq_tensor` should be equal to " + "sum of lengths of all sequences."); + + PADDLE_ENFORCE_EQ(padding_tensor_dims.size(), 3UL, + "`padding_tensor` should be a 3-D tensor."); + + if (padding_layout == BATCH_LENGTH_WIDTH) { + PADDLE_ENFORCE_EQ(padding_tensor_dims, + framework::make_ddim({seq_num, max_seq_len, seq_width})); + } else if (padding_layout == LENGTH_BATCH_WIDTH) { + PADDLE_ENFORCE_EQ(padding_tensor_dims, + framework::make_ddim({max_seq_len, seq_num, seq_width})); + } else { + PADDLE_THROW("Unsupported padding layout."); } - return max_sequence_length; } /* @@ -61,18 +94,23 @@ inline static size_t MaximumSequenceLength(const framework::LoD& lod, * * \note transposition is also done in this functor. */ -template +template class PaddingLoDTensorFunctor { public: - void operator()(const DeviceContext& context, const framework::LoDTensor& seq, - framework::Tensor* padding, bool norm_by_times); + void operator()(const DeviceContext& context, + const framework::LoDTensor& seq_tensor, + framework::Tensor* padding_tensor, + T padding_value = static_cast(0), + bool norm_by_times = false, size_t lod_level = 0); }; -template +template class UnpaddingLoDTensorFunctor { public: - void operator()(const DeviceContext& context, framework::LoDTensor* seq, - const framework::Tensor& padding, bool norm_by_times); + void operator()(const DeviceContext& context, + framework::LoDTensor* seq_tensor, + const framework::Tensor& padding_tensor, + bool norm_by_times = false, size_t lod_level = 0); }; } // namespace math diff --git a/paddle/fluid/operators/sequence_pad_op.cc b/paddle/fluid/operators/sequence_pad_op.cc index 183d38fcc9ac09d58127d88385cb77e0eb2ae810..f3a6fff0e10a745f00c9582d7d43115d5bd4d7a6 100644 --- a/paddle/fluid/operators/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_pad_op.cc @@ -32,7 +32,11 @@ class SequencePadOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Only support 2-D tensor, rank of Input(X) should be 2."); - auto out_dims = x_dims; + int lod_level = ctx->Attrs().Get("lod_level"); + + int64_t max_len = -1; + int64_t seq_num = -1; + int x_lod_size = -1; if (ctx->IsRuntime()) { framework::Variable* x_var = @@ -40,27 +44,31 @@ class SequencePadOp : public framework::OperatorWithKernel { auto& x_lod = x_var->Get().lod(); - PADDLE_ENFORCE_GE(x_lod.size(), 1, - "Input(X) should be sequences containing lod."); + x_lod_size = x_lod.size(); + + auto x_abs_offset = framework::ToAbsOffset(x_lod)[lod_level]; + + PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_abs_offset.back()), + "The first dimension of `X` should be equal to sum " + "of all sequences' length."); - auto last_level_lod = x_lod[x_lod.size() - 1]; - size_t max_len = 0; + seq_num = x_abs_offset.size() - 1; - for (size_t i = 1; i < last_level_lod.size(); ++i) { - auto seq_len = last_level_lod[i] - last_level_lod[i - 1]; + for (size_t i = 1; i <= seq_num; ++i) { + int64_t seq_len = x_abs_offset[i] - x_abs_offset[i - 1]; max_len = max_len < seq_len ? seq_len : max_len; } - - out_dims[0] = max_len * (last_level_lod.size() - 1); } else { framework::VarDesc* x_desc = boost::get(ctx->GetInputVarPtrs("X")[0]); - PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1, - "Input(X) should be sequences containing lod."); - out_dims[0] = -1; + x_lod_size = x_desc->GetLoDLevel(); } - ctx->SetOutputDim("Out", out_dims); + PADDLE_ENFORCE(lod_level >= 0 && lod_level < x_lod_size, + "Invalid `lod_level` which should be at least 0 and less " + "than maximum lod level of `X`"); + + ctx->SetOutputDim("Out", {seq_num, max_len, x_dims[1]}); } protected: @@ -84,9 +92,11 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker { "(Tensor) Output variable which would be a common tensor " "without lod. Each sequence would be padded to the maximum " "length."); + AddAttr("lod_level", + "(int, default 0) Specify which level lod to referred to."); AddAttr("pad_value", - "(float, default 0.0) Value to be padded " - "to the end of each sequence."); + "(float, default 0.0) Specify which value to be padded to " + "the end of each sequence."); AddComment(R"DOC( )DOC"); diff --git a/paddle/fluid/operators/sequence_pad_op.h b/paddle/fluid/operators/sequence_pad_op.h index b36465d8e717a57ff6445201632f3f9c05cc28dc..6d136b65f19cf025d4b5f99a56b5c31c1b8182b1 100644 --- a/paddle/fluid/operators/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_pad_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/sequence_padding.h" namespace paddle { namespace operators { @@ -23,39 +24,68 @@ namespace operators { using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; -// @TODO clean code +template +struct CopyFunctor { + LoDTensor* lod_tensor_; + LoDTensor* pad_tensor_; + const LoD& ref_lod_; + const DeviceContext& ctx_; + bool is_lod_to_pad_; + + CopyFunctor(LoDTensor* lod_tensor, const LoD& ref_lod, LoDTensor* pad_tensor, + const DeviceContext& ctx, bool is_lod_to_pad) + : lod_tensor_(lod_tensor), + pad_tensor_(pad_tensor), + ref_lod_(ref_lod), + ctx_(ctx), + is_lod_to_pad_(is_lod_to_pad) {} + + void operator()() const { + /* + auto seq_num = ref_lod_.size() - 1; + auto max_len = pad_tensor_->dims()[0] / seq_num; + + PADDLE_ENFORCE_EQ(max_len * seq_num, pad_tensor_->dims()[0], + "First dimension of padded tensor should be equal to " + "maximum sequence length mulplied by sequence number."); + + for (size_t i = 1; i < ref_lod_.size(); ++i) { + auto seq_start = ref_lod_[i - 1]; + auto seq_end = ref_lod_[i]; + auto pad_start = (i - 1) * max_len; + auto pad_end = pad_start + (seq_end - seq_start); + auto sub_lod_tensor = lod_tensor_->Slice(seq_start, seq_end); + auto sub_pad_tensor = pad_tensor_->Slice(pad_start, pad_end); + if (is_lod_to_pad_) { + framework::TensorCopy(sub_lod_tensor, ctx.GetPlace(), &sub_pad_tensor); + } else { + framework::TensorCopy(sub_pad_tensor, ctx.GetPlace(), &sub_lod_tensor); + } + } + */ + } +}; + template class SequencePadOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x_ptr = ctx.Input("X"); + /* + auto* x = ctx.Input("X"); auto* out_ptr = ctx.Output("Out"); out_ptr->mutable_data(ctx.GetPlace()); + // Resize(); + T pad_value = static_cast(ctx.Attr("pad_value")); + math::PaddingLoDTensorFunctor()( + ctx.template device_context(), *x, *, false); + math::SetConstant set_func; set_func(ctx.template device_context(), out_ptr, pad_value); - - auto& x_lod = x_ptr->lod(); - auto& x_last_level_lod = x_lod[x_lod.size() - 1]; - auto seq_num = x_last_level_lod.size() - 1; - auto max_len = out_ptr->dims()[0] / seq_num; - - PADDLE_ENFORCE_EQ(max_len * seq_num, out_ptr->dims()[0], - "First dimension of `Out` should be equal to " - "maximum length mulplied by sequence number."); - - for (size_t i = 1; i < x_last_level_lod.size(); ++i) { - auto x_start = x_last_level_lod[i - 1]; - auto x_end = x_last_level_lod[i]; - auto out_start = (i - 1) * max_len; - auto out_end = out_start + (x_end - x_start); - auto x_sub_tensor = x_ptr->Slice(x_start, x_end); - auto out_sub_tensor = out_ptr->Slice(out_start, out_end); - framework::TensorCopy(x_sub_tensor, ctx.GetPlace(), &out_sub_tensor); - } + */ } }; @@ -63,33 +93,26 @@ template class SequencePadGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { + /* auto* x_ptr = ctx.Input("X"); auto* g_out_ptr = ctx.Input(framework::GradVarName("Out")); auto* g_x_ptr = ctx.Output(framework::GradVarName("X")); math::SetConstant set_func; - set_func(ctx.template device_context(), g_x_ptr, + set_func(ctx.template device_context(), + g_x_ptr, static_cast(0)); auto& x_lod = x_ptr->lod(); auto& x_last_level_lod = x_lod[x_lod.size() - 1]; - auto seq_num = x_last_level_lod.size() - 1; - int64_t max_len = g_out_ptr->dims()[0] / seq_num; - - PADDLE_ENFORCE_EQ(max_len * seq_num, g_out_ptr->dims()[0], - "First dimension of `Out` should be equal to " - "maximum length mulplied by sequence number."); - - for (size_t i = 1; i < x_last_level_lod.size(); ++i) { - auto x_start = x_last_level_lod[i - 1]; - auto x_end = x_last_level_lod[i]; - auto out_start = (i - 1) * max_len; - auto out_end = out_start + (x_end - x_start); - - auto g_out_sub = g_out_ptr->Slice(out_start, out_end); - auto g_x_sub = g_x_ptr->Slice(x_start, x_end); - framework::TensorCopy(g_x_sub, ctx.GetPlace(), &g_out_sub); - } + + CopyFunctor copy_func(g_out_ptr, + x_last_level_lod, + g_x_ptr, + ctx, + false); + copy_func(); + */ } }; diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index ab70c1f0592d122ba248a101db487e64c0bdae6f..cb9db1682032c3c181e0d93e38268893609b9fae 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -161,7 +161,7 @@ class WarpCTCKernel : public framework::OpKernel { static_cast(num_sequences), static_cast(sequence_width)}); warpctc_logits.mutable_data(warpctc_logits_dims, ctx.GetPlace()); - math::PaddingLoDTensorFunctor()( + math::PaddingLoDTensorFunctor()( ctx.template device_context(), *logits, &warpctc_logits, false); const T* warpctc_logits_data = warpctc_logits.data(); @@ -215,7 +215,8 @@ class WarpCTCGradKernel : public framework::OpKernel { logits_grad->mutable_data(ctx.GetPlace()); bool norm_by_times = ctx.Attr("norm_by_times"); - math::UnpaddingLoDTensorFunctor()( + math::UnpaddingLoDTensorFunctor()( ctx.template device_context(), logits_grad, *warpctc_grad, norm_by_times);