diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index 00a68bb0c44f6ae75dcdbd20dd186bb2f50f764e..81c2a4548d8812edd1b3ff4458f923a8a49d1157 100644 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -2403,6 +2403,18 @@ func : viterbi_decode data_type : input +- api : warpctc + args : (Tensor logits, Tensor label, Tensor logits_length, Tensor labels_length, int blank, bool norm_by_times) + output : Tensor(loss), Tensor(warpctcgrad) + infer_meta : + func : WarpctcInferMeta + kernel : + func : warpctc + data_type: logits + optional: logits_length, labels_length + intermediate: warpctcgrad + backward : warpctc_grad + - api : where args : (Tensor condition, Tensor x, Tensor y) output : Tensor diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index abbb23cc253faaba8f227edfa430bb416a3f78d4..47183fed746d040981f6af69bf8e961428199a51 100644 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -2376,6 +2376,18 @@ inplace : (out_grad -> x_grad) backward : unsqueeze_double_grad +- backward_api : warpctc_grad + forward : warpctc (Tensor logits, Tensor label, Tensor logits_length, Tensor labels_length, int blank, bool norm_by_times) -> Tensor(loss), Tensor(warpctcgrad) + args : (Tensor logits, Tensor logits_length, Tensor warpctcgrad, Tensor loss_grad, int blank, bool norm_by_times) + output : Tensor(logits_grad) + infer_meta : + func : UnchangedInferMeta + param : [logits] + kernel : + func : warpctc_grad + optional : logits_length + no_need_buffer : logits + - backward_api : where_grad forward : where (Tensor condition, Tensor x, Tensor y) -> Tensor(out) args : (Tensor condition, Tensor x, Tensor y, Tensor out_grad) diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 3369b0c392ec33f24894427ccc320b4ba6473bfb..1a05ad495c9813054dad4c0787215cc05e51d231 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -2049,7 +2049,7 @@ void WarpctcInferMeta(const MetaTensor& logits, const MetaTensor& labels_length, int blank, bool norm_by_times, - MetaTensor* warpctc_grad, + MetaTensor* warpctcgrad, MetaTensor* loss) { auto logits_dims = logits.dims(); int sequence_width = 0; diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 0ec71e86893c3ce072db592f9de4ea4526cd6560..06d2530cffa2c19f99baaa0a1292b0672c9a018d 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -358,7 +358,7 @@ void WarpctcInferMeta(const MetaTensor& logits, const MetaTensor& labels_length, int blank, bool norm_by_times, - MetaTensor* warpctc_grad, + MetaTensor* warpctcgrad, MetaTensor* loss); void WhereInferMeta(const MetaTensor& condition, diff --git a/paddle/phi/kernels/impl/warpctc_grad_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_grad_kernel_impl.h index b07628c9814760ef3ef76b20f6eba9da81ecd961..0c20fd1c2fe7e7dcbed00863430edc880721b278 100644 --- a/paddle/phi/kernels/impl/warpctc_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/warpctc_grad_kernel_impl.h @@ -29,33 +29,33 @@ namespace phi { template void WarpctcGradKernel(const Context& dev_ctx, - const DenseTensor& warpctc_grad, const DenseTensor& logits, - const DenseTensor& loss_grad, const paddle::optional& logits_length, + const DenseTensor& warpctcgrad, + const DenseTensor& loss_grad, int blank, bool norm_by_times, DenseTensor* logits_grad) { dev_ctx.template Alloc(logits_grad); if (logits_length.is_initialized()) { - int max_seq_length = warpctc_grad.dims()[0]; // Tmax - int num_sequences = warpctc_grad.dims()[1]; // B - int seq_width = warpctc_grad.dims()[2]; // D + int max_seq_length = warpctcgrad.dims()[0]; // Tmax + int num_sequences = warpctcgrad.dims()[1]; // B + int seq_width = warpctcgrad.dims()[2]; // D // B auto logits_len_e = EigenTensor::From(*logits_length); // (B, 1) auto loss_grad_e = EigenTensor::From(loss_grad); // (T, B, D) - auto warpctc_grad_e = EigenTensor::From(warpctc_grad); + auto warpctcgrad_e = EigenTensor::From(warpctcgrad); auto logits_grad_e = EigenTensor::From(*logits_grad); Eigen::DSizes grad_shape(1, num_sequences, 1); Eigen::DSizes bcast(max_seq_length, 1, seq_width); - auto logits_g = warpctc_grad_e * - loss_grad_e.reshape(grad_shape).broadcast(bcast).eval(); + auto logits_g = + warpctcgrad_e * loss_grad_e.reshape(grad_shape).broadcast(bcast).eval(); auto* place = dev_ctx.eigen_device(); if (norm_by_times) { @@ -71,7 +71,7 @@ void WarpctcGradKernel(const Context& dev_ctx, } else { paddle::operators::math::UnpaddingLoDTensorFunctor()( dev_ctx, - warpctc_grad, + warpctcgrad, logits_grad, -1, 0, diff --git a/paddle/phi/kernels/impl/warpctc_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_kernel_impl.h index c8f8d28ce11ed3bbda5cd96b6cd8aff6fdf2f0af..efa9bd1ae064d8975e2e0aecf66ad64510d2eb28 100644 --- a/paddle/phi/kernels/impl/warpctc_kernel_impl.h +++ b/paddle/phi/kernels/impl/warpctc_kernel_impl.h @@ -233,8 +233,8 @@ void WarpctcKernel(const Context& dev_ctx, const paddle::optional& labels_length, int blank, bool norm_by_times, - DenseTensor* warpctc_grad, - DenseTensor* loss) { + DenseTensor* loss, + DenseTensor* warpctcgrad) { size_t num_sequences, sequence_width, max_sequence_length; paddle::framework::Vector logits_lod; paddle::framework::Vector label_lod; @@ -383,11 +383,11 @@ void WarpctcKernel(const Context& dev_ctx, // warpctc computes loss and gradient in one call, gradient data also stored // in batch format - warpctc_grad->Resize(warpctc_logits.dims()); - T* warpctc_grad_data = dev_ctx.template Alloc(warpctc_grad); + warpctcgrad->Resize(warpctc_logits.dims()); + T* warpctcgrad_data = dev_ctx.template Alloc(warpctcgrad); phi::funcs::SetConstant()( - dev_ctx, warpctc_grad, static_cast(0)); + dev_ctx, warpctcgrad, static_cast(0)); // warpctc accesses labels in CPU memory DenseTensor warpctc_label; @@ -439,7 +439,7 @@ void WarpctcKernel(const Context& dev_ctx, T* warpctc_loss_data = dev_ctx.template HostAlloc(&warpctc_loss); WarpCTCFunctor()(dev_ctx, warpctc_logits_data, - warpctc_grad_data, + warpctcgrad_data, warpctc_label_data, warpctc_label_lengths.data(), warpctc_logits_lengths.data(), diff --git a/paddle/phi/kernels/warpctc_grad_kernel.h b/paddle/phi/kernels/warpctc_grad_kernel.h index 8a8251aabe46804bef2be35391a8cfc265186706..cc87130c7f622410a1162ac71ded9b0e6cce8e98 100644 --- a/paddle/phi/kernels/warpctc_grad_kernel.h +++ b/paddle/phi/kernels/warpctc_grad_kernel.h @@ -21,10 +21,10 @@ namespace phi { template void WarpctcGradKernel(const Context& dev_ctx, - const DenseTensor& warpctc_grad, const DenseTensor& logits, - const DenseTensor& loss_grad, const paddle::optional& logits_length, + const DenseTensor& warpctcgrad, + const DenseTensor& loss_grad, int blank, bool norm_by_times, DenseTensor* logits_grad); diff --git a/paddle/phi/kernels/warpctc_kernel.h b/paddle/phi/kernels/warpctc_kernel.h index 0b9e9eb87f675952627e06561fac3c98a7b6de51..a4b1defd050c775e82271e2668fafe96a90237ae 100644 --- a/paddle/phi/kernels/warpctc_kernel.h +++ b/paddle/phi/kernels/warpctc_kernel.h @@ -27,7 +27,7 @@ void WarpctcKernel(const Context& dev_ctx, const paddle::optional& labels_length, int blank, bool norm_by_times, - DenseTensor* warpctc_grad, - DenseTensor* loss); + DenseTensor* loss, + DenseTensor* warpctcgrad); } // namespace phi diff --git a/paddle/phi/ops/compat/warpctc_sig.cc b/paddle/phi/ops/compat/warpctc_sig.cc index ac3dc366ad8c624f7707f98249c0897a6b70e0c7..44878aabff2833a410c8a8fcba598ca6e2b4da31 100644 --- a/paddle/phi/ops/compat/warpctc_sig.cc +++ b/paddle/phi/ops/compat/warpctc_sig.cc @@ -20,13 +20,13 @@ KernelSignature WarpctcOpArgumentMapping(const ArgumentMappingContext& ctx) { return KernelSignature("warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}, {"blank", "norm_by_times"}, - {"WarpCTCGrad", "Loss"}); + {"Loss", "WarpCTCGrad"}); } KernelSignature WarpctcGradOpArgumentMapping( const ArgumentMappingContext& ctx) { return KernelSignature("warpctc_grad", - {"WarpCTCGrad", "Logits", "Loss@GRAD", "LogitsLength"}, + {"Logits", "LogitsLength", "WarpCTCGrad", "Loss@GRAD"}, {"blank", "norm_by_times"}, {"Logits@GRAD"}); } diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 00c2aa56fa3e06d064953923b3e0c80b2792cd8b..0ee2338ac1ae154d7fc04b37cd2d44c5c1c7c12a 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -546,6 +546,15 @@ def warpctc(input, fetch_list=[cost.name]) print(output) """ + if in_dygraph_mode(): + if input_length is None or label_length is None: + raise ValueError( + "input_length and label_length must not be None in dygraph mode!" + ) + loss_out = _C_ops.final_state_warpctc(input, label, input_length, + label_length, blank, + norm_by_times) + return loss_out if _non_static_mode(): if input_length is None or label_length is None: raise ValueError( diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index 1d9d9a180d014f25f525b724842b00d0a64f3eb5..ad31bbd58a9c07f59e3534cfd2cbf692574d1ee9 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -191,6 +191,16 @@ class CTCForward(object): return self.loss +def python_api(logits, + label, + logits_length=None, + labels_length=None, + blank=0, + norm_by_times=False): + return paddle.fluid.layers.warpctc(logits, label, blank, norm_by_times, + logits_length, labels_length) + + class TestWarpCTCOp(OpTest): def config(self): @@ -280,6 +290,8 @@ class TestWarpCTCOpWithPadding(OpTest): def setUp(self): self.op_type = "warpctc" + self.python_api = python_api + self.python_out_sig = ["Loss"] self.config() logits = np.random.uniform( @@ -344,7 +356,7 @@ class TestWarpCTCOpWithPadding(OpTest): } def test_check_output(self): - self.check_output() + self.check_output(check_eager=True) def test_check_grad(self): self.outputs['WarpCTCGrad'] = self.gradient @@ -387,6 +399,8 @@ class TestWarpCTCOpFp64(OpTest): def setUp(self): self.op_type = "warpctc" + self.python_api = python_api + self.python_out_sig = ["Loss"] self.config() logits = np.random.uniform( @@ -451,11 +465,11 @@ class TestWarpCTCOpFp64(OpTest): } def test_check_output(self): - self.check_output() + self.check_output(check_eager=True) def test_check_grad(self): self.outputs['WarpCTCGrad'] = self.gradient - self.check_grad(["Logits"], "Loss") + self.check_grad(["Logits"], "Loss", check_eager=True) class TestWarpCTCOpError(unittest.TestCase):