From c128ab701974ceba98491414c66c6871adbd7f16 Mon Sep 17 00:00:00 2001 From: Li Fuchen Date: Wed, 8 Apr 2020 09:57:48 +0800 Subject: [PATCH] fix the en doc and example code of warpctc (#22325) (#23532) * Fixed warpctc, test=develop * Set lod level of sequence_unpad's output to 1 in compile time test=develop * fix the en doc and example code of warpctc, test=develop, test=document_fix --- python/paddle/fluid/layers/loss.py | 107 ++++++++++++++++------------- 1 file changed, 58 insertions(+), 49 deletions(-) diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 1a541cfa969..bfff64e5034 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -503,21 +503,23 @@ def warpctc(input, Args: input (Variable): The unscaled probabilities of variable-length sequences, which is a 2-D Tensor with LoD information, or a 3-D Tensor without Lod - information. When it is a 2-D LodTensor, it's shape is - [Lp, num_classes + 1], where Lp is the sum of all input - sequences' length and num_classes is the true number of classes. - (not including the blank label). When it is a 3-D Tensor, it's shape - is [max_logit_length, batch_size, num_classes + 1], - where max_logit_length is the length of the longest + information. When it is a 2-D LodTensor, its shape is + `[Lp, num_classes + 1]`, where `Lp` is the sum of all input + sequences' length and `num_classes` is the true number of classes. + (not including the blank label). When it is a 3-D Tensor, its shape + is `[max_logit_length, batch_size, num_classes + 1]`, + where `max_logit_length` is the longest length of input logit sequence. The data type must be float32. label (Variable): The ground truth of variable-length sequence, - which is a 2-D Tensor with LoD information or a 2-D Tensor without - LoD information. When it is a 2-D LoDTensor or 2-D Tensor, - it is of the shape [Lg, 1], where Lg is th sum of all labels' length. - The data type must be int32. + which must be a 2-D Tensor with LoD information or a 3-D Tensor without + LoD information, needs to be consistent with the coressponding input. + When it is a 2-D LoDTensor, its shape is `[Lg, 1]`, where `Lg` is the sum + of all labels' length. When it is a 3-D Tensor, its shape is + `[batch_size, max_label_length]`, where `max_label_length` is the longest + length of label sequence. Data type must be int32. blank (int, default 0): The blank label index of Connectionist Temporal Classification (CTC) loss, which is in the - half-opened interval [0, num_classes + 1). The data type must be int32. + half-opened interval `[0, num_classes + 1)`. The data type must be int32. norm_by_times(bool, default false): Whether to normalize the gradients by the number of time-step, which is also the sequence's length. There is no need to normalize the gradients if warpctc layer was @@ -529,7 +531,7 @@ def warpctc(input, Returns: Variable: The Connectionist Temporal Classification (CTC) loss, - which is a 2-D Tensor with the shape [batch_size, 1]. + which is a 2-D Tensor with the shape `[batch_size, 1]`. The date type is the same as input. Examples: @@ -539,60 +541,67 @@ def warpctc(input, # using LoDTensor import paddle.fluid as fluid import numpy as np - - predict = fluid.data(name='predict', - shape=[None, 5], - dtype='float32',lod_level=1) + + # lengths of logit sequences + seq_lens = [2,6] + # lengths of label sequences + label_lens = [2,3] + # class num + class_num = 5 + + logits = fluid.data(name='logits',shape=[None, class_num+1], + dtype='float32',lod_level=1) label = fluid.data(name='label', shape=[None, 1], - dtype='int32', lod_level=1) - cost = fluid.layers.warpctc(input=predict, label=label) + dtype='int32', lod_level=1) + cost = fluid.layers.warpctc(input=logits, label=label) place = fluid.CPUPlace() - x=fluid.LoDTensor() - data = np.random.rand(8, 5).astype("float32") - x.set(data, place) - x.set_lod([[0,4,8]]) - y=fluid.LoDTensor() - data = np.random.randint(0, 5, [4, 1]).astype("int32") - y.set(data, place) - y.set_lod([[0,2,4]]) + x = fluid.create_lod_tensor( + np.random.rand(np.sum(seq_lens), class_num+1).astype("float32"), + [seq_lens], place) + y = fluid.create_lod_tensor( + np.random.randint(0, class_num, [np.sum(label_lens), 1]).astype("int32"), + [label_lens], place) exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - output= exe.run(feed={"predict": x,"label": y}, - fetch_list=[cost.name]) - print output + output= exe.run(fluid.default_main_program(), + feed={"logits": x,"label": y}, + fetch_list=[cost.name]) + print(output) .. code-block:: python # using Tensor import paddle.fluid as fluid import numpy as np - + # length of the longest logit sequence max_seq_length = 5 + #length of the longest label sequence + max_label_length = 3 # number of logit sequences - batch_size = None - logits = fluid.data(name='logits', - shape=[max_seq_length, batch_size, 5], - dtype='float32') + batch_size = 16 + # class num + class_num = 5 + logits = fluid.data(name='logits', + shape=[max_seq_length, batch_size, class_num+1], + dtype='float32') logits_length = fluid.data(name='logits_length', shape=[None], - dtype='int64') - label = fluid.layers.data(name='label', shape=[None, 1], - dtype='int32') - label_length = fluid.layers.data(name='labels_length', shape=[None], - dtype='int64') + dtype='int64') + label = fluid.data(name='label', shape=[batch_size, max_label_length], + dtype='int32') + label_length = fluid.data(name='labels_length', shape=[None], + dtype='int64') cost = fluid.layers.warpctc(input=logits, label=label, - input_length=logits_length, - label_length=label_length) + input_length=logits_length, + label_length=label_length) place = fluid.CPUPlace() - batch_size = 2 - x = np.random.rand(max_seq_length, batch_size, 5).astype("float32") - y = np.random.randint(0, 5, [max_seq_length * batch_size, 1]).astype("int32") + x = np.random.rand(max_seq_length, batch_size, class_num+1).astype("float32") + y = np.random.randint(0, class_num, [batch_size, max_label_length]).astype("int32") exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - output= exe.run(feed={"logits": x, + output= exe.run(fluid.default_main_program(), + feed={"logits": x, "label": y, - "logits_length": np.array([5, 4]).astype("int64"), - "labels_length": np.array([3, 2]).astype("int64")}, + "logits_length": np.array([max_seq_length]*batch_size).astype("int64"), + "labels_length": np.array([max_label_length]*batch_size).astype("int64")}, fetch_list=[cost.name]) print(output) """ -- GitLab