提交 10ec329b 编写于 作者: Y yangyaming

Refine code.

上级 07972467
...@@ -18,111 +18,114 @@ namespace paddle { ...@@ -18,111 +18,114 @@ namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
template <typename T, PaddingLayout padding_layout> template <typename T>
void CopyDataCPU(framework::LoDTensor* seq_tensor, void CopyDataCPU(framework::LoDTensor* seq_tensor,
framework::Tensor* padding_tensor, framework::Tensor* pad_tensor,
const framework::Vector<size_t>& abs_offset, const framework::Vector<size_t>& seq_offset,
const int64_t& max_seq_len, const int64_t& seq_width, const int64_t& max_seq_len, const int64_t& seq_width,
bool seq_to_padding, bool norm_by_len) { bool seq_to_pad, bool norm_by_len,
OutputLayout output_layout) {
T* seq_data = seq_tensor->data<T>(); T* seq_data = seq_tensor->data<T>();
T* padding_data = padding_tensor->data<T>(); T* pad_data = pad_tensor->data<T>();
int64_t seq_num = abs_offset.size() - 1; int64_t seq_num = seq_offset.size() - 1;
for (int64_t i = 0; i < seq_num; ++i) { for (int64_t i = 0; i < seq_num; ++i) {
int64_t seq_start = abs_offset[i]; int64_t seq_start = seq_offset[i];
int64_t seq_len = abs_offset[i + 1] - seq_start; int64_t seq_len = seq_offset[i + 1] - seq_start;
T scale = norm_by_len ? (1.0f / static_cast<T>(seq_len)) : 1.0f; T scale = norm_by_len ? (1.0f / static_cast<T>(seq_len)) : 1.0f;
for (int64_t j = 0; j < seq_len; ++j) { for (int64_t j = 0; j < seq_len; ++j) {
for (int64_t k = 0; k < seq_width; ++k) { for (int64_t k = 0; k < seq_width; ++k) {
size_t padding_offset = 0; size_t pad_data_idx = 0;
if (padding_layout == BATCH_LENGTH_WIDTH) { size_t seq_data_idx = (seq_start + j) * seq_width + k;
padding_offset = (i * max_seq_len * seq_width) + j * seq_width + k; if (output_layout == kBatchLengthWidth) {
pad_data_idx = (i * max_seq_len + j) * seq_width + k;
} else { } else {
padding_offset = (j * seq_num * seq_width) + i * seq_width + k; pad_data_idx = (j * seq_num + i) * seq_width + k;
} }
if (seq_to_padding) { if (seq_to_pad) {
padding_data[padding_offset] = pad_data[pad_data_idx] = seq_data[seq_data_idx] * scale;
seq_data[(seq_start + j) * seq_width + k] * scale;
} else { } else {
seq_data[(seq_start + j) * seq_width + k] = seq_data[seq_data_idx] = pad_data[pad_data_idx] * scale;
padding_data[padding_offset] * scale;
} }
} }
} }
} }
} }
template <typename T, PaddingLayout padding_layout> template <typename T>
class PaddingLoDTensorFunctor<platform::CPUDeviceContext, T, padding_layout> { class PaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
public: public:
void operator()(const platform::CPUDeviceContext& context, void operator()(const platform::CPUDeviceContext& context,
const framework::LoDTensor& seq_tensor, const framework::LoDTensor& seq_tensor,
framework::Tensor* padding_tensor, framework::Tensor* pad_tensor,
T padding_value = static_cast<T>(0), T pad_value = static_cast<T>(0), bool norm_by_times = false,
bool norm_by_times = false, size_t lod_level = 0) { size_t lod_level = 0,
ValidateLoD(seq_tensor, lod_level); OutputLayout output_layout = kBatchLengthWidth) {
CheckLoD(seq_tensor, lod_level);
auto& lod = seq_tensor.lod(); auto& lod = seq_tensor.lod();
auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; auto& seq_offset = framework::ToAbsOffset(lod)[lod_level];
auto seq_dims = seq_tensor.dims(); auto seq_tensor_dims = seq_tensor.dims();
auto padding_dims = padding_tensor->dims(); auto pad_tensor_dims = pad_tensor->dims();
int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); int64_t max_seq_len = MaximumSequenceLength(seq_offset);
int64_t seq_num = abs_offset.size() - 1; int64_t seq_num = seq_offset.size() - 1;
int64_t seq_width = seq_tensor.numel() / seq_dims[0]; int64_t seq_width = seq_tensor.numel() / seq_tensor_dims[0];
int64_t numel = max_seq_len * seq_num * seq_width;
ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len,
seq_num, seq_width, padding_layout); seq_num, seq_width, output_layout);
T* padding_data = padding_tensor->data<T>(); T* pad_data = pad_tensor->data<T>();
memset(padding_data, padding_value, numel * sizeof(T)); memset(pad_data, pad_value, max_seq_len * seq_num * seq_width * sizeof(T));
CopyDataCPU<T, padding_layout>( CopyDataCPU<T>(const_cast<framework::LoDTensor*>(&seq_tensor), pad_tensor,
const_cast<framework::LoDTensor*>(&seq_tensor), padding_tensor, seq_offset, max_seq_len, seq_width, true /* seq_to_pad */,
abs_offset, max_seq_len, seq_width, true /* seq_to_padding */, norm_by_times, output_layout);
norm_by_times);
} }
}; };
template <typename T, PaddingLayout padding_layout> template <typename T>
class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, T, padding_layout> { class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, T> {
public: public:
void operator()(const platform::CPUDeviceContext& context, void operator()(const platform::CPUDeviceContext& context,
framework::LoDTensor* seq_tensor, framework::LoDTensor* seq_tensor,
const framework::Tensor& padding_tensor, const framework::Tensor& pad_tensor,
bool norm_by_times = false, size_t lod_level = 0) { bool norm_by_times = false, size_t lod_level = 0,
ValidateLoD(*seq_tensor, lod_level); OutputLayout output_layout = kBatchLengthWidth) {
CheckLoD(*seq_tensor, lod_level);
auto& lod = seq_tensor->lod(); auto& lod = seq_tensor->lod();
auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; auto& seq_offset = framework::ToAbsOffset(lod)[lod_level];
auto& seq_dims = seq_tensor->dims(); auto& seq_tensor_dims = seq_tensor->dims();
auto& padding_dims = padding_tensor.dims(); auto& pad_tensor_dims = pad_tensor.dims();
int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); int64_t max_seq_len = MaximumSequenceLength(seq_offset);
int64_t seq_num = abs_offset.size() - 1; int64_t seq_num = seq_offset.size() - 1;
int64_t seq_width = seq_tensor->numel() / seq_dims[0]; int64_t seq_width = seq_tensor->numel() / seq_tensor_dims[0];
ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len,
seq_num, seq_width, padding_layout); seq_num, seq_width, output_layout);
T* seq_data = seq_tensor->data<T>(); T* seq_data = seq_tensor->data<T>();
memset(seq_data, static_cast<T>(0), seq_tensor->numel() * sizeof(T)); memset(seq_data, static_cast<T>(0), seq_tensor->numel() * sizeof(T));
CopyDataCPU<T, padding_layout>( CopyDataCPU<T>(seq_tensor, const_cast<framework::Tensor*>(&pad_tensor),
seq_tensor, const_cast<framework::Tensor*>(&padding_tensor), abs_offset, seq_offset, max_seq_len, seq_width, false /* seq_to_pad */,
max_seq_len, seq_width, false /* seq_to_padding */, norm_by_times); norm_by_times, output_layout);
} }
}; };
template class PaddingLoDTensorFunctor<platform::CPUDeviceContext, float, template class PaddingLoDTensorFunctor<platform::CPUDeviceContext, int>;
LENGTH_BATCH_WIDTH>; template class PaddingLoDTensorFunctor<platform::CPUDeviceContext, int64_t>;
template class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, float, template class PaddingLoDTensorFunctor<platform::CPUDeviceContext, float>;
LENGTH_BATCH_WIDTH>; template class PaddingLoDTensorFunctor<platform::CPUDeviceContext, double>;
template class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, int>;
template class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, int64_t>;
template class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, float>;
template class UnpaddingLoDTensorFunctor<platform::CPUDeviceContext, double>;
} // namespace math } // namespace math
} // namespace operators } // namespace operators
......
...@@ -21,74 +21,74 @@ namespace math { ...@@ -21,74 +21,74 @@ namespace math {
template <typename T, bool Padding> template <typename T, bool Padding>
__global__ void SequencePaddingKernel( __global__ void SequencePaddingKernel(
T* padding_data, T* seq_data, const size_t* abs_offset, T* pad_data, T* seq_data, const size_t* seq_offset, const size_t& seq_num,
const size_t& seq_num, const size_t& max_seq_len, const size_t& seq_width, const size_t& max_seq_len, const size_t& seq_width, bool norm_by_times,
const PaddingLayout& padding_layout, bool norm_by_times = false, const T& pad_value, const OutputLayout& output_layout) {
const T& padding_value = 0) { size_t seq_idx = blockIdx.y;
size_t padding_idx = blockIdx.y; size_t seq_start = seq_offset[seq_idx];
size_t seq_start = abs_offset[padding_idx]; size_t seq_len = seq_offset[seq_idx + 1] - seq_start;
size_t seq_len = abs_offset[padding_idx + 1] - seq_start;
size_t seq_idx = blockIdx.x * blockDim.y + threadIdx.y; size_t seq_step_idx = blockIdx.x * blockDim.y + threadIdx.y;
size_t seq_offset = (seq_start + seq_idx) * seq_width; size_t seq_data_offset = (seq_start + seq_step_idx) * seq_width;
size_t padding_offset = 0; size_t pad_data_offset = 0;
if (padding_layout == LENGTH_BATCH_WIDTH) { if (output_layout == kLengthBatchWidth) {
padding_offset = (seq_idx * seq_num + padding_idx) * seq_width; pad_data_offset = (seq_step_idx * seq_num + seq_idx) * seq_width;
} else { } else {
padding_offset = (padding_idx * max_seq_len + seq_idx) * seq_width; pad_data_offset = (seq_idx * max_seq_len + seq_step_idx) * seq_width;
} }
if (seq_idx < seq_len) { if (seq_step_idx < seq_len) {
T scale = norm_by_times ? (1.0f / static_cast<T>(seq_len)) : 1.0f; T scale = norm_by_times ? (1.0f / static_cast<T>(seq_len)) : 1.0f;
if (Padding) { if (Padding) {
/* sequence -> padding */ /* seq -> pad */
for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) {
padding_data[padding_offset + i] = scale * seq_data[seq_offset + i]; pad_data[pad_data_offset + i] = scale * seq_data[seq_data_offset + i];
} }
} else { } else {
/* padding -> sequence */ /* pad -> seq */
for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) {
seq_data[seq_offset + i] = scale * padding_data[padding_offset + i]; seq_data[seq_data_offset + i] = scale * pad_data[pad_data_offset + i];
} }
} }
} else if (seq_idx < max_seq_len) { } else if (seq_step_idx < max_seq_len) {
if (Padding) { if (Padding) {
/* sequence -> padding */ /* seq -> pad */
for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) { for (size_t i = threadIdx.x; i < seq_width; i += blockDim.x) {
padding_data[padding_offset + i] = padding_value; pad_data[pad_data_offset + i] = pad_value;
} }
} }
} }
} }
template <typename T, PaddingLayout padding_layout> template <typename T>
class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T, padding_layout> { class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
public: public:
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context,
const framework::LoDTensor& seq_tensor, const framework::LoDTensor& seq_tensor,
framework::Tensor* padding_tensor, framework::Tensor* pad_tensor,
T padding_value = static_cast<T>(0), T pad_value = static_cast<T>(0), bool norm_by_times = false,
bool norm_by_times = false, size_t lod_level = 0) { size_t lod_level = 0,
ValidateLoD(seq_tensor, lod_level); OutputLayout output_layout = kBatchLengthWidth) {
CheckLoD(seq_tensor, lod_level);
auto& lod = seq_tensor.lod(); auto& lod = seq_tensor.lod();
auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; auto& seq_offset = framework::ToAbsOffset(lod)[lod_level];
auto seq_dims = seq_tensor.dims(); auto seq_tensor_dims = seq_tensor.dims();
auto padding_dims = padding_tensor->dims(); auto pad_tensor_dims = pad_tensor->dims();
int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); int64_t max_seq_len = MaximumSequenceLength(seq_offset);
const int64_t seq_num = abs_offset.size() - 1; int64_t seq_num = seq_offset.size() - 1;
const int64_t seq_width = seq_tensor.numel() / seq_dims[0]; int64_t seq_width = seq_tensor.numel() / seq_tensor_dims[0];
ValidateShape(seq_dims, abs_offset.back(), padding_dims, max_seq_len, CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len,
seq_num, seq_width, padding_layout); seq_num, seq_width, output_layout);
if (!norm_by_times && seq_num == 1UL) { if (!norm_by_times && seq_num == 1UL) {
TensorCopy(seq_tensor, context.GetPlace(), context, padding_tensor); TensorCopy(seq_tensor, context.GetPlace(), context, pad_tensor);
padding_tensor->Resize(padding_dims); pad_tensor->Resize(pad_tensor_dims);
return; return;
} }
...@@ -107,37 +107,40 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T, padding_layout> { ...@@ -107,37 +107,40 @@ class PaddingLoDTensorFunctor<platform::CUDADeviceContext, T, padding_layout> {
dim3 grid(grid_dim_x, grid_dim_y); dim3 grid(grid_dim_x, grid_dim_y);
const T* seq_data = seq_tensor.data<T>(); const T* seq_data = seq_tensor.data<T>();
T* padding_data = padding_tensor->data<T>(); T* pad_data = pad_tensor->data<T>();
SequencePaddingKernel<T, 1><<<grid, threads, 0, context.stream()>>>( SequencePaddingKernel<T, 1><<<grid, threads, 0, context.stream()>>>(
padding_data, const_cast<T*>(seq_data), pad_data, const_cast<T*>(seq_data),
abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, seq_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len,
seq_width, padding_layout, norm_by_times, padding_value); seq_width, norm_by_times, pad_value, output_layout);
} }
}; };
template <typename T, PaddingLayout padding_layout> template <typename T>
class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T, class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T> {
padding_layout> {
public: public:
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context,
framework::LoDTensor* seq_tensor, framework::LoDTensor* seq_tensor,
const framework::Tensor& padding_tensor, const framework::Tensor& pad_tensor,
bool norm_by_times = false, size_t lod_level = 0) { bool norm_by_times = false, size_t lod_level = 0,
ValidateLoD(*seq_tensor, lod_level); OutputLayout output_layout = kBatchLengthWidth) {
CheckLoD(*seq_tensor, lod_level);
auto& lod = seq_tensor->lod(); auto& lod = seq_tensor->lod();
auto& abs_offset = framework::ToAbsOffset(lod)[lod_level]; auto& seq_offset = framework::ToAbsOffset(lod)[lod_level];
auto seq_dims = seq_tensor->dims(); auto seq_tensor_dims = seq_tensor->dims();
auto padding_dims = padding_tensor.dims(); auto pad_tensor_dims = pad_tensor.dims();
int64_t max_seq_len = MaximumSequenceLength(lod, lod_level); int64_t max_seq_len = MaximumSequenceLength(seq_offset);
int64_t seq_num = abs_offset.size() - 1; int64_t seq_num = seq_offset.size() - 1;
int64_t seq_width = seq_tensor->numel() / seq_dims[0]; int64_t seq_width = seq_tensor->numel() / seq_tensor_dims[0];
CheckDims(seq_tensor_dims, seq_offset.back(), pad_tensor_dims, max_seq_len,
seq_num, seq_width, output_layout);
if (!norm_by_times && seq_num == 1UL) { if (!norm_by_times && seq_num == 1UL) {
TensorCopy(padding_tensor, context.GetPlace(), context, seq_tensor); TensorCopy(pad_tensor, context.GetPlace(), context, seq_tensor);
seq_tensor->Resize(seq_dims); seq_tensor->Resize(seq_tensor_dims);
return; return;
} }
...@@ -155,20 +158,25 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T, ...@@ -155,20 +158,25 @@ class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, T,
size_t grid_dim_y = seq_num; size_t grid_dim_y = seq_num;
dim3 grid(grid_dim_x, grid_dim_y); dim3 grid(grid_dim_x, grid_dim_y);
const T* padding_data = padding_tensor.data<T>(); const T* pad_data = pad_tensor.data<T>();
T* seq_data = seq_tensor->data<T>(); T* seq_data = seq_tensor->data<T>();
SequencePaddingKernel<T, 1><<<grid, threads, 0, context.stream()>>>( SequencePaddingKernel<T, 0><<<grid, threads, 0, context.stream()>>>(
const_cast<T*>(padding_data), seq_data, const_cast<T*>(pad_data), seq_data,
abs_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len, seq_offset.CUDAData(context.GetPlace()), seq_num, max_seq_len,
seq_width, padding_layout, norm_by_times); seq_width, norm_by_times, static_cast<T>(0), output_layout);
} }
}; };
template class PaddingLoDTensorFunctor<platform::CUDADeviceContext, float, template class PaddingLoDTensorFunctor<platform::CUDADeviceContext, int>;
LENGTH_BATCH_WIDTH>; template class PaddingLoDTensorFunctor<platform::CUDADeviceContext, int64_t>;
template class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, float, template class PaddingLoDTensorFunctor<platform::CUDADeviceContext, float>;
LENGTH_BATCH_WIDTH>; template class PaddingLoDTensorFunctor<platform::CUDADeviceContext, double>;
template class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, int>;
template class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, int64_t>;
template class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, float>;
template class UnpaddingLoDTensorFunctor<platform::CUDADeviceContext, double>;
} // namespace math } // namespace math
} // namespace operators } // namespace operators
......
...@@ -22,49 +22,46 @@ namespace paddle { ...@@ -22,49 +22,46 @@ namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
enum PaddingLayout { BATCH_LENGTH_WIDTH, LENGTH_BATCH_WIDTH }; enum OutputLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
inline static size_t MaximumSequenceLength(const framework::LoD& lod, inline static size_t MaximumSequenceLength(
const size_t level) { const framework::Vector<size_t>& seq_offset) {
const size_t seq_num = lod[level].size() - 1; size_t seq_num = seq_offset.size() - 1;
size_t max_seq_len = 0; size_t max_seq_len = 0;
auto abs_offset = framework::ToAbsOffset(lod)[level];
for (size_t i = 0; i < seq_num; ++i) { for (size_t i = 0; i < seq_num; ++i) {
max_seq_len = std::max(max_seq_len, abs_offset[i + 1] - abs_offset[i]); max_seq_len = std::max(max_seq_len, seq_offset[i + 1] - seq_offset[i]);
} }
return max_seq_len; return max_seq_len;
} }
inline static void ValidateLoD(const framework::LoDTensor& seq_tensor, inline static void CheckLoD(const framework::LoDTensor& seq_tensor,
const size_t& lod_level) { const size_t& lod_level) {
PADDLE_ENFORCE(lod_level < seq_tensor.lod().size(), PADDLE_ENFORCE(lod_level < seq_tensor.lod().size(),
"Invalid `lod_level` which should be at least 0 and less " "Invalid lod level which should be at least 0 and less "
"than maximum lod level of `seq_tensor`."); "than maximum lod level of sequence tensor.");
} }
inline static void ValidateShape(const framework::DDim& seq_tensor_dims, inline static void CheckDims(const framework::DDim& seq_tensor_dims,
const size_t& abs_offset_back_value, const size_t& last_offset,
const framework::DDim& padding_tensor_dims, const framework::DDim& pad_tensor_dims,
const int64_t& max_seq_len, const int64_t& max_seq_len, const int64_t& seq_num,
const int64_t& seq_num, const int64_t& seq_width,
const int64_t& seq_width, const OutputLayout& output_layout) {
const PaddingLayout& padding_layout) { PADDLE_ENFORCE_EQ(static_cast<size_t>(seq_tensor_dims[0]), last_offset,
PADDLE_ENFORCE_EQ(static_cast<size_t>(seq_tensor_dims[0]), "Value of 1st dimension of the sequence tensor should be "
abs_offset_back_value, "equal to sum of lengths of all sequences.");
"The 1st dimension of `seq_tensor` should be equal to "
"sum of lengths of all sequences.");
PADDLE_ENFORCE_EQ(padding_tensor_dims.size(), 3UL, PADDLE_ENFORCE_EQ(pad_tensor_dims.size(), 3UL,
"`padding_tensor` should be a 3-D tensor."); "Padded tensor should be a 3-D tensor.");
if (padding_layout == BATCH_LENGTH_WIDTH) { if (output_layout == kBatchLengthWidth) {
PADDLE_ENFORCE_EQ(padding_tensor_dims, PADDLE_ENFORCE_EQ(pad_tensor_dims,
framework::make_ddim({seq_num, max_seq_len, seq_width})); framework::make_ddim({seq_num, max_seq_len, seq_width}));
} else if (padding_layout == LENGTH_BATCH_WIDTH) { } else if (output_layout == kLengthBatchWidth) {
PADDLE_ENFORCE_EQ(padding_tensor_dims, PADDLE_ENFORCE_EQ(pad_tensor_dims,
framework::make_ddim({max_seq_len, seq_num, seq_width})); framework::make_ddim({max_seq_len, seq_num, seq_width}));
} else { } else {
PADDLE_THROW("Unsupported padding layout."); PADDLE_THROW("Unsupported output layout.");
} }
} }
...@@ -94,23 +91,25 @@ inline static void ValidateShape(const framework::DDim& seq_tensor_dims, ...@@ -94,23 +91,25 @@ inline static void ValidateShape(const framework::DDim& seq_tensor_dims,
* *
* \note transposition is also done in this functor. * \note transposition is also done in this functor.
*/ */
template <typename DeviceContext, typename T, PaddingLayout padding_layout> template <typename DeviceContext, typename T>
class PaddingLoDTensorFunctor { class PaddingLoDTensorFunctor {
public: public:
void operator()(const DeviceContext& context, void operator()(const DeviceContext& context,
const framework::LoDTensor& seq_tensor, const framework::LoDTensor& seq_tensor,
framework::Tensor* padding_tensor, framework::Tensor* pad_tensor,
T padding_value = static_cast<T>(0), T pad_value = static_cast<T>(0), bool norm_by_times = false,
bool norm_by_times = false, size_t lod_level = 0); size_t lod_level = 0,
OutputLayout output_layout = kBatchLengthWidth);
}; };
template <typename DeviceContext, typename T, PaddingLayout padding_layout> template <typename DeviceContext, typename T>
class UnpaddingLoDTensorFunctor { class UnpaddingLoDTensorFunctor {
public: public:
void operator()(const DeviceContext& context, void operator()(const DeviceContext& context,
framework::LoDTensor* seq_tensor, framework::LoDTensor* seq_tensor,
const framework::Tensor& padding_tensor, const framework::Tensor& pad_tensor,
bool norm_by_times = false, size_t lod_level = 0); bool norm_by_times = false, size_t lod_level = 0,
OutputLayout output_layout = kBatchLengthWidth);
}; };
} // namespace math } // namespace math
......
...@@ -46,20 +46,24 @@ void TestSequencePadding(const paddle::framework::LoD& lod, ...@@ -46,20 +46,24 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
} }
const size_t max_sequence_length = const size_t max_sequence_length =
paddle::operators::math::MaximumSequenceLength(lod, level); paddle::operators::math::MaximumSequenceLength(lod[level]);
const size_t num_sequences = lod[level].size() - 1; const size_t num_sequences = lod[level].size() - 1;
auto padding_dims = auto padding_dims =
paddle::framework::make_ddim({static_cast<int64_t>(max_sequence_length), paddle::framework::make_ddim({static_cast<int64_t>(max_sequence_length),
static_cast<int64_t>(num_sequences), static_cast<int64_t>(num_sequences),
static_cast<int64_t>(sequence_width)}); static_cast<int64_t>(sequence_width)});
padding.mutable_data<T>(padding_dims, *place); padding.mutable_data<T>(padding_dims, *place);
paddle::operators::math::PaddingLoDTensorFunctor<DeviceContext, T>()( paddle::operators::math::PaddingLoDTensorFunctor<DeviceContext, T>()(
*context, seq, &padding, false); *context, seq, &padding, 0, false, 0,
paddle::operators::math::kLengthBatchWidth);
seq_back.set_lod(lod); seq_back.set_lod(lod);
seq_back.mutable_data<T>(seq_dims, *place); seq_back.mutable_data<T>(seq_dims, *place);
paddle::operators::math::UnpaddingLoDTensorFunctor<DeviceContext, T>()( paddle::operators::math::UnpaddingLoDTensorFunctor<DeviceContext, T>()(
*context, &seq_back, padding, false); *context, &seq_back, padding, false, 0,
paddle::operators::math::kLengthBatchWidth);
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
cpu_seq_back = seq_back; cpu_seq_back = seq_back;
......
...@@ -54,7 +54,7 @@ class SequencePadOp : public framework::OperatorWithKernel { ...@@ -54,7 +54,7 @@ class SequencePadOp : public framework::OperatorWithKernel {
seq_num = x_abs_offset.size() - 1; seq_num = x_abs_offset.size() - 1;
for (size_t i = 1; i <= seq_num; ++i) { for (int64_t i = 1; i <= seq_num; ++i) {
int64_t seq_len = x_abs_offset[i] - x_abs_offset[i - 1]; int64_t seq_len = x_abs_offset[i] - x_abs_offset[i - 1];
max_len = max_len < seq_len ? seq_len : max_len; max_len = max_len < seq_len ? seq_len : max_len;
} }
......
...@@ -155,15 +155,16 @@ class WarpCTCKernel : public framework::OpKernel<T> { ...@@ -155,15 +155,16 @@ class WarpCTCKernel : public framework::OpKernel<T> {
// warpctc needs sequences data stored in transposed padding format // warpctc needs sequences data stored in transposed padding format
Tensor warpctc_logits; Tensor warpctc_logits;
const size_t max_sequence_length = const size_t max_sequence_length =
math::MaximumSequenceLength(logits_lod, level); math::MaximumSequenceLength(logits_lod[level]);
auto warpctc_logits_dims = auto warpctc_logits_dims =
framework::make_ddim({static_cast<int64_t>(max_sequence_length), framework::make_ddim({static_cast<int64_t>(max_sequence_length),
static_cast<int64_t>(num_sequences), static_cast<int64_t>(num_sequences),
static_cast<int64_t>(sequence_width)}); static_cast<int64_t>(sequence_width)});
warpctc_logits.mutable_data<T>(warpctc_logits_dims, ctx.GetPlace()); warpctc_logits.mutable_data<T>(warpctc_logits_dims, ctx.GetPlace());
math::PaddingLoDTensorFunctor<DeviceContext, T, math::LENGTH_BATCH_WIDTH>()( math::PaddingLoDTensorFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), *logits, &warpctc_logits, ctx.template device_context<DeviceContext>(), *logits, &warpctc_logits,
false); static_cast<T>(0), false /* norm_by_times */, 0,
math::kLengthBatchWidth);
const T* warpctc_logits_data = warpctc_logits.data<T>(); const T* warpctc_logits_data = warpctc_logits.data<T>();
std::vector<int> warpctc_label_lengths(num_sequences); std::vector<int> warpctc_label_lengths(num_sequences);
...@@ -216,10 +217,9 @@ class WarpCTCGradKernel : public framework::OpKernel<T> { ...@@ -216,10 +217,9 @@ class WarpCTCGradKernel : public framework::OpKernel<T> {
logits_grad->mutable_data<T>(ctx.GetPlace()); logits_grad->mutable_data<T>(ctx.GetPlace());
bool norm_by_times = ctx.Attr<bool>("norm_by_times"); bool norm_by_times = ctx.Attr<bool>("norm_by_times");
math::UnpaddingLoDTensorFunctor<DeviceContext, T, math::UnpaddingLoDTensorFunctor<DeviceContext, T>()(
math::LENGTH_BATCH_WIDTH>()(
ctx.template device_context<DeviceContext>(), logits_grad, ctx.template device_context<DeviceContext>(), logits_grad,
*warpctc_grad, norm_by_times); *warpctc_grad, norm_by_times, 0, math::kLengthBatchWidth);
const T* loss_grad_data = loss_grad->data<T>(); const T* loss_grad_data = loss_grad->data<T>();
math::ScaleLoDTensorFunctor<DeviceContext, T>()( math::ScaleLoDTensorFunctor<DeviceContext, T>()(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册