diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index e752aa58979dddba4d010071d2c4b5dc3e0c6756..5580ee5374658c3b7b8e31962cd50f1d72113ba0 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -37,6 +37,16 @@ inline static size_t MaximumSequenceLength( return max_seq_len; } +inline static size_t TotalSequenceLength( + const framework::Vector& seq_offset) { + size_t seq_num = seq_offset.size() - 1; + size_t total_seq_len = 0; + for (size_t i = 0; i < seq_num; ++i) { + total_seq_len += seq_offset[i + 1] - seq_offset[i]; + } + return total_seq_len; +} + inline static void CheckDims(const framework::DDim& seq_tensor_dims, const framework::DDim& pad_tensor_dims, const framework::Vector& seq_offset, diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index c6d494ff12bcfb70579ee1664327b66383374d9f..8f5e08f708a9b82b8af12d339bd697969ecf9d69 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -230,8 +230,35 @@ class WarpCTCKernel : public framework::OpKernel { static_cast(0)); // warpctc accesses labels in CPU memory - Tensor warpctc_label; - TensorCopySync(*label, platform::CPUPlace(), &warpctc_label); + LoDTensor warpctc_label; + if (ctx.HasInput("LogitsLength")) { + warpctc_label.mutable_data( + {static_cast(math::TotalSequenceLength(label_lod)), 1}, + platform::CPUPlace()); + std::vector> lod; + lod.push_back(label_lod); + warpctc_label.set_lod(lod); + + if (platform::is_cpu_place(ctx.GetPlace())) { + math::UnpaddingLoDTensorFunctor()( + ctx.template device_context(), *label, + &warpctc_label, label->dims()[1] /*pad_seq_len*/, 0 /*lod_level*/, + false /*norm_by_times*/, math::kBatchLengthWidth); + } else { + LoDTensor gpu_label; + gpu_label.mutable_data( + {static_cast(math::TotalSequenceLength(label_lod)), 1}, + ctx.GetPlace()); + gpu_label.set_lod(lod); + math::UnpaddingLoDTensorFunctor()( + ctx.template device_context(), *label, &gpu_label, + label->dims()[1] /*pad_seq_len*/, 0 /*lod_level*/, + false /*norm_by_times*/, math::kBatchLengthWidth); + TensorCopySync(gpu_label, platform::CPUPlace(), &warpctc_label); + } + } else { + TensorCopySync(*label, platform::CPUPlace(), &warpctc_label); + } const int* warpctc_label_data = warpctc_label.data(); // warpctc stores loss in CPU memory diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index 74bb5ea2b02824e2605fb474229802c07e8e8d2c..3bd074f4d01fa37959f240b2add1491fe377a1f8 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -303,7 +303,7 @@ class TestWarpCTCOpWithPadding(OpTest): self.inputs = { "Logits": new_logits, - "Label": labels, + "Label": new_labels, "LogitsLength": self.logits_length, "LabelLength": self.labels_length }