From 4c7d196d832981b47a6fd428c3c7aa75029271fb Mon Sep 17 00:00:00 2001 From: whs Date: Wed, 23 Oct 2019 16:12:59 +0800 Subject: [PATCH] Add norm_by_time for warpctc op in padding mode. (#17580) --- paddle/fluid/operators/warpctc_op.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 98c31e30dc5..c6d494ff12b 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -267,6 +267,11 @@ class WarpCTCGradKernel : public framework::OpKernel { size_t num_sequences = warpctc_grad->dims()[1]; size_t seq_width = warpctc_grad->dims()[2]; + auto* logits_length = ctx.Input("LogitsLength"); + framework::Tensor logits_length_cpu; + framework::TensorCopy(*logits_length, platform::CPUPlace(), + &logits_length_cpu); + LoDTensor logits_grad_with_lod; auto logits_grad_dims = framework::make_ddim({static_cast(max_seq_length), @@ -289,10 +294,14 @@ class WarpCTCGradKernel : public framework::OpKernel { const T* loss_grad_data = loss_grad_cpu.data(); for (size_t i = 0; i < max_seq_length; ++i) { for (size_t j = 0; j < num_sequences; ++j) { + T scale = 1.0; + if (norm_by_times) { + scale = 1.0 / static_cast(logits_length_cpu.data()[j]); + } for (size_t k = 0; k < seq_width; ++k) { size_t idx = i * (num_sequences * seq_width) + j * seq_width + k; scaled_logits_data[idx] = - logits_grad_cpu_data[idx] * loss_grad_data[j]; + logits_grad_cpu_data[idx] * loss_grad_data[j] * scale; } } } -- GitLab