Supprot dy2st for conformer

294b7b00 · 0x45f · e8184927 · 294b7b00 · 294b7b00
隐藏空白更改
内联并排

Showing with 16 addition and 9 deletion

paddlespeech/s2t/__init__.py paddlespeech/s2t/__init__.py +14 -7

paddlespeech/s2t/modules/encoder.py paddlespeech/s2t/modules/encoder.py +2 -2

未找到文件。
--- a/paddlespeech/s2t/__init__.py
+++ b/paddlespeech/s2t/__init__.py
@@ -159,9 +159,7 @@ if not hasattr(paddle.Tensor, 'new_full'):
 def eq(xs: paddle.Tensor, ys: Union[paddle.Tensor, float]) -> paddle.Tensor:
    if convert_dtype_to_string(xs.dtype) == paddle.bool:
        xs = xs.astype(paddle.int)
-    return xs.equal(
-        paddle.to_tensor(
-            ys, dtype=convert_dtype_to_string(xs.dtype), place=xs.place))
+    return xs.equal(ys)


 if not hasattr(paddle.Tensor, 'eq'):
@@ -219,13 +217,22 @@ def is_broadcastable(shp1, shp2):
    return True


+def broadcast_shape(shp1, shp2):
+    result = []
+    for a, b in zip(shp1[::-1], shp2[::-1]):
+        result.append(max(a, b))
+    return result[::-1]
+
+
 def masked_fill(xs: paddle.Tensor,
                mask: paddle.Tensor,
                value: Union[float, int]):
-    assert is_broadcastable(xs.shape, mask.shape) is True, (xs.shape,
-                                                            mask.shape)
-    bshape = paddle.broadcast_shape(xs.shape, mask.shape)
-    mask = mask.broadcast_to(bshape)
+    bshape = broadcast_shape(xs.shape, mask.shape)
+    mask.stop_gradient = True
+    tmp = paddle.ones(shape=[len(bshape)], dtype='int32')
+    for index in range(len(bshape)):
+        tmp[index] = bshape[index]
+    mask = mask.broadcast_to(tmp)
    trues = paddle.ones_like(xs) * value
    xs = paddle.where(mask, trues, xs)
    return xs

--- a/paddlespeech/s2t/modules/encoder.py
+++ b/paddlespeech/s2t/modules/encoder.py
@@ -253,8 +253,8 @@ class BaseEncoder(nn.Layer):
            # cnn_cache[i] = (B=1, hidden-dim, cache_t2)
            xs, _, new_att_cache, new_cnn_cache = layer(
                xs, att_mask, pos_emb,
-                att_cache=att_cache[i:i+1] if elayers > 0 else att_cache,
-                cnn_cache=cnn_cache[i] if paddle.shape(cnn_cache)[0] > 0 else cnn_cache,
+                att_cache=att_cache if elayers == 0 else att_cache[i:i+1],
+                cnn_cache=cnn_cache if paddle.shape(cnn_cache)[0] == 0 else cnn_cache[i],
            )
            # new_att_cache = (1, head, attention_key_size, d_k*2)
            # new_cnn_cache = (B=1, hidden-dim, cache_t2)