Merge pull request #7438 from wanghaoshuang/ctc_py

Add python API for Warp-CTC op

Merge pull request #7438 from wanghaoshuang/ctc_py
Add python API for Warp-CTC op
2360dd20 · whs · GitHub · c73f00fe · 4de6cbd3 · 2360dd20
显示空白变更内容
内联并排

Showing with 73 addition and 43 deletion

python/paddle/v2/fluid/layers/nn.py python/paddle/v2/fluid/layers/nn.py +73 -43

未找到文件。
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -22,36 +22,13 @@ from ..param_attr import ParamAttr
 from tensor import concat

 __all__ = [
-    'fc',
-    'embedding',
-    'dynamic_lstm',
-    'gru_unit',
-    'linear_chain_crf',
-    'crf_decoding',
-    'cos_sim',
-    'cross_entropy',
-    'square_error_cost',
-    'accuracy',
-    'chunk_eval',
-    'sequence_conv',
-    'conv2d',
-    'sequence_pool',
-    'pool2d',
-    'batch_norm',
-    'beam_search_decode',
-    'conv2d_transpose',
-    'sequence_expand',
-    'lstm_unit',
-    'reduce_sum',
-    'reduce_mean',
-    'reduce_max',
-    'reduce_min',
-    'sequence_first_step',
-    'sequence_last_step',
-    'dropout',
-    'split',
-    'l2_normalize',
-    'matmul',
+    'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf',
+    'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy',
+    'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
+    'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
+    'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
+    'sequence_first_step', 'sequence_last_step', 'dropout', 'split',
+    'l2_normalize', 'matmul', 'warpctc'
 ]


@@ -1788,3 +1765,56 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
        attrs={'transpose_X': transpose_x,
               'transpose_Y': transpose_y})
    return out
+
+
+def warpctc(input, label, blank=0, norm_by_times=False, **kwargs):
+    """
+    An operator integrating the open source Warp-CTC library
+    (https://github.com/baidu-research/warp-ctc)
+    to compute Connectionist Temporal Classification (CTC) loss.
+    It can be aliased as softmax with CTC, since a native softmax activation is
+    interated to the Warp-CTC library, to to normlize values for each row of the
+    input tensor.
+
+    Args:
+       input(Variable): (LodTensor, default: LoDTensor<float>),
+         the unscaled probabilities of variable-length sequences,
+         which is a 2-D Tensor with LoD information.
+         It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
+         sequences' length and num_classes is the true number of classes.
+         (not including the blank label).
+       label(Variable): (LodTensor, default: LoDTensor<int>), the ground truth
+         of variable-length sequence, which is a 2-D Tensor with LoD
+         information. It is of the shape [Lg, 1], where Lg is th sum of
+         all labels' length.
+       blank: (int, default: 0), the blank label index of Connectionist
+         Temporal Classification (CTC) loss, which is in the
+         half-opened interval [0, num_classes + 1).
+       norm_by_times: (bool, default: false), whether to normalize
+       the gradients by the number of time-step,which is also the
+       sequence's length. There is no need to normalize the gradients
+       if warpctc layer was follewed by a mean_op.
+
+    Returns:
+        Variable: The Connectionist Temporal Classification (CTC) loss,
+        which is a 2-D Tensor of the shape [batch_size, 1].
+
+    Examples:
+        .. code-block:: python
+            y = layers.data(name='y', shape=[11, 8], dtype='float32', lod_level=1)
+            y_predict = layers.data(name='y_predict', shape=[11, 1], dtype='float32')
+            cost = layers.warpctc(input=y_predict, label=y)
+
+    """
+    helper = LayerHelper('warpctc', **kwargs)
+    loss_out = helper.create_tmp_variable(dtype=input.dtype)
+    grad_out = helper.create_tmp_variable(dtype=input.dtype)
+    helper.append_op(
+        type='warpctc',
+        inputs={'Logits': [input],
+                'Label': [label]},
+        outputs={'WarpCTCGrad': [grad_out],
+                 'Loss': [loss_out]},
+        attrs={'blank': blank,
+               'norm_by_times': norm_by_times})
+    return loss_out