未验证 提交 51e147a1 编写于 作者: L Li Fuchen 提交者: GitHub

fix the en doc and example code of warpctc (#22325)

* Fixed warpctc, test=develop

* Set lod level of sequence_unpad's output to 1 in compile time

test=develop

* fix the en doc and example code of warpctc, test=develop, test=document_fix
上级 805328e1
......@@ -503,21 +503,23 @@ def warpctc(input,
Args:
input (Variable): The unscaled probabilities of variable-length sequences,
which is a 2-D Tensor with LoD information, or a 3-D Tensor without Lod
information. When it is a 2-D LodTensor, it's shape is
[Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes.
(not including the blank label). When it is a 3-D Tensor, it's shape
is [max_logit_length, batch_size, num_classes + 1],
where max_logit_length is the length of the longest
information. When it is a 2-D LodTensor, its shape is
`[Lp, num_classes + 1]`, where `Lp` is the sum of all input
sequences' length and `num_classes` is the true number of classes.
(not including the blank label). When it is a 3-D Tensor, its shape
is `[max_logit_length, batch_size, num_classes + 1]`,
where `max_logit_length` is the longest length of
input logit sequence. The data type must be float32.
label (Variable): The ground truth of variable-length sequence,
which is a 2-D Tensor with LoD information or a 2-D Tensor without
LoD information. When it is a 2-D LoDTensor or 2-D Tensor,
it is of the shape [Lg, 1], where Lg is th sum of all labels' length.
The data type must be int32.
which must be a 2-D Tensor with LoD information or a 3-D Tensor without
LoD information, needs to be consistent with the coressponding input.
When it is a 2-D LoDTensor, its shape is `[Lg, 1]`, where `Lg` is the sum
of all labels' length. When it is a 3-D Tensor, its shape is
`[batch_size, max_label_length]`, where `max_label_length` is the longest
length of label sequence. Data type must be int32.
blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1). The data type must be int32.
half-opened interval `[0, num_classes + 1)`. The data type must be int32.
norm_by_times(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was
......@@ -529,7 +531,7 @@ def warpctc(input,
Returns:
Variable: The Connectionist Temporal Classification (CTC) loss,
which is a 2-D Tensor with the shape [batch_size, 1].
which is a 2-D Tensor with the shape `[batch_size, 1]`.
The date type is the same as input.
Examples:
......@@ -540,26 +542,30 @@ def warpctc(input,
import paddle.fluid as fluid
import numpy as np
predict = fluid.data(name='predict',
shape=[None, 5],
# lengths of logit sequences
seq_lens = [2,6]
# lengths of label sequences
label_lens = [2,3]
# class num
class_num = 5
logits = fluid.data(name='logits',shape=[None, class_num+1],
dtype='float32',lod_level=1)
label = fluid.data(name='label', shape=[None, 1],
dtype='int32', lod_level=1)
cost = fluid.layers.warpctc(input=predict, label=label)
cost = fluid.layers.warpctc(input=logits, label=label)
place = fluid.CPUPlace()
x=fluid.LoDTensor()
data = np.random.rand(8, 5).astype("float32")
x.set(data, place)
x.set_lod([[0,4,8]])
y=fluid.LoDTensor()
data = np.random.randint(0, 5, [4, 1]).astype("int32")
y.set(data, place)
y.set_lod([[0,2,4]])
x = fluid.create_lod_tensor(
np.random.rand(np.sum(seq_lens), class_num+1).astype("float32"),
[seq_lens], place)
y = fluid.create_lod_tensor(
np.random.randint(0, class_num, [np.sum(label_lens), 1]).astype("int32"),
[label_lens], place)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output= exe.run(feed={"predict": x,"label": y},
output= exe.run(fluid.default_main_program(),
feed={"logits": x,"label": y},
fetch_list=[cost.name])
print output
print(output)
.. code-block:: python
......@@ -569,30 +575,33 @@ def warpctc(input,
# length of the longest logit sequence
max_seq_length = 5
#length of the longest label sequence
max_label_length = 3
# number of logit sequences
batch_size = None
batch_size = 16
# class num
class_num = 5
logits = fluid.data(name='logits',
shape=[max_seq_length, batch_size, 5],
shape=[max_seq_length, batch_size, class_num+1],
dtype='float32')
logits_length = fluid.data(name='logits_length', shape=[None],
dtype='int64')
label = fluid.layers.data(name='label', shape=[None, 1],
label = fluid.data(name='label', shape=[batch_size, max_label_length],
dtype='int32')
label_length = fluid.layers.data(name='labels_length', shape=[None],
label_length = fluid.data(name='labels_length', shape=[None],
dtype='int64')
cost = fluid.layers.warpctc(input=logits, label=label,
input_length=logits_length,
label_length=label_length)
place = fluid.CPUPlace()
batch_size = 2
x = np.random.rand(max_seq_length, batch_size, 5).astype("float32")
y = np.random.randint(0, 5, [max_seq_length * batch_size, 1]).astype("int32")
x = np.random.rand(max_seq_length, batch_size, class_num+1).astype("float32")
y = np.random.randint(0, class_num, [batch_size, max_label_length]).astype("int32")
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output= exe.run(feed={"logits": x,
output= exe.run(fluid.default_main_program(),
feed={"logits": x,
"label": y,
"logits_length": np.array([5, 4]).astype("int64"),
"labels_length": np.array([3, 2]).astype("int64")},
"logits_length": np.array([max_seq_length]*batch_size).astype("int64"),
"labels_length": np.array([max_label_length]*batch_size).astype("int64")},
fetch_list=[cost.name])
print(output)
"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册