eliminate shape and slice

3adb20b4 · Hui Zhang · 46088c0a · 3adb20b4 · 3adb20b4 · 3adb20b4
5 changed file
--- a/paddlespeech/s2t/modules/conformer_convolution.py
+++ b/paddlespeech/s2t/modules/conformer_convolution.py
@@ -127,11 +127,11 @@ class ConvolutionModule(nn.Layer):
        x = x.transpose([0, 2, 1])  # [B, C, T]
        # mask batch padding
-        if paddle.shape(mask_pad)[2] > 0:  # time > 0
+        if mask_pad.shape[2] > 0:  # time > 0
            x = x.masked_fill(mask_pad, 0.0)
        if self.lorder > 0:
-            if paddle.shape(cache)[2] == 0:  # cache_t == 0
+            if cache.shape[2] == 0:  # cache_t == 0
                x = nn.functional.pad(
                    x, [self.lorder, 0], 'constant', 0.0, data_format='NCL')
            else:
@@ -161,7 +161,7 @@ class ConvolutionModule(nn.Layer):
        x = self.pointwise_conv2(x)
        # mask batch padding
-        if paddle.shape(mask_pad)[2] > 0:  # time > 0
+        if mask_pad.shape[2] > 0:  # time > 0
            x = x.masked_fill(mask_pad, 0.0)
        x = x.transpose([0, 2, 1])  # [B, T, C]

--- a/paddlespeech/s2t/modules/decoder.py
+++ b/paddlespeech/s2t/modules/decoder.py
@@ -243,7 +243,7 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer):
            ]
        # batch decoding
-        ys_mask = subsequent_mask(paddle.shape(ys)[-1]).unsqueeze(0)  # (B,L,L)
+        ys_mask = subsequent_mask(ys.shape[-1]).unsqueeze(0)  # (B,L,L)
        xs_mask = make_xs_mask(xs).unsqueeze(1)  # (B,1,T)
        logp, states = self.forward_one_step(
            xs, xs_mask, ys, ys_mask, cache=batch_state)

--- a/paddlespeech/s2t/modules/encoder.py
+++ b/paddlespeech/s2t/modules/encoder.py
@@ -246,7 +246,7 @@ class BaseEncoder(nn.Layer):
            # tensor zeros([0,0,0,0]) support [i:i+1] slice, will return zeros([0,0,0,0]) tensor
            # raw code as below:
            #   att_cache=att_cache[i:i+1] if elayers > 0 else att_cache,
-            #   cnn_cache=cnn_cache[i:i+1] if paddle.shape(cnn_cache)[0] > 0 else cnn_cache,
+            #   cnn_cache=cnn_cache[i:i+1] if cnn_cache.shape[0] > 0 else cnn_cache,
            xs, _, new_att_cache, new_cnn_cache = layer(
                xs,
                att_mask,

--- a/paddlespeech/s2t/modules/loss.py
+++ b/paddlespeech/s2t/modules/loss.py
@@ -85,7 +85,7 @@ class CTCLoss(nn.Layer):
        Returns:
            [paddle.Tensor]: scalar. If reduction is 'none', then (N), where N = \text{batch size}.
        """
-        B = paddle.shape(logits)[0]
+        B = logits.shape[0]
        # warp-ctc need logits, and do softmax on logits by itself
        # warp-ctc need activation with shape [T, B, V + 1]
        # logits: (B, L, D) -> (L, B, D)
@@ -158,7 +158,7 @@ class LabelSmoothingLoss(nn.Layer):
        Returns:
            loss (paddle.Tensor) : The KL loss, scalar float value
        """
-        B, T, D = paddle.shape(x)
+        B, T, D = x.shape
        assert D == self.size
        x = x.reshape((-1, self.size))
        target = target.reshape([-1])

--- a/paddlespeech/s2t/modules/subsampling.py
+++ b/paddlespeech/s2t/modules/subsampling.py
@@ -192,8 +192,8 @@ class Conv2dSubsampling6(Conv2dSubsampling):
        """
        x = x.unsqueeze(1)  # (b, c, t, f)
        x = self.conv(x)
-        b, c, t, f = paddle.shape(x)
+        b, c, t, f = x.shape
-        x = self.linear(x.transpose([0, 2, 1, 3]).reshape([b, t, c * f]))
+        x = self.linear(x.transpose([0, 2, 1, 3]).reshape([b, -1, c * f]))
        x, pos_emb = self.pos_enc(x, offset)
        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-4:3]
@@ -245,6 +245,7 @@ class Conv2dSubsampling8(Conv2dSubsampling):
        """
        x = x.unsqueeze(1)  # (b, c, t, f)
        x = self.conv(x)
-        x = self.linear(x.transpose([0, 2, 1, 3]).reshape([b, t, c * f]))
+        b, c, t, f = x.shape
+        x = self.linear(x.transpose([0, 2, 1, 3]).reshape([b, -1, c * f]))
        x, pos_emb = self.pos_enc(x, offset)
        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2][:, :, :-2:2]