From ed80b0e2c3a01382effb8f0f85a4a135679ca980 Mon Sep 17 00:00:00 2001 From: tianhao zhang <15600919271@163.com> Date: Tue, 30 Aug 2022 12:41:59 +0000 Subject: [PATCH] fix multigpu training test=asr --- paddlespeech/s2t/models/u2/u2.py | 4 +-- paddlespeech/s2t/modules/attention.py | 35 +++++++++++-------- .../s2t/modules/conformer_convolution.py | 10 +++--- paddlespeech/s2t/modules/encoder.py | 6 ++-- paddlespeech/s2t/modules/encoder_layer.py | 14 ++++---- 5 files changed, 39 insertions(+), 30 deletions(-) diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index a812abcb..813e1e52 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -605,8 +605,8 @@ class U2BaseModel(ASRInterface, nn.Layer): xs: paddle.Tensor, offset: int, required_cache_size: int, - att_cache: paddle.Tensor, - cnn_cache: paddle.Tensor, + att_cache: paddle.Tensor, # paddle.zeros([0, 0, 0, 0]) + cnn_cache: paddle.Tensor, # paddle.zeros([0, 0, 0, 0]) ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]: """ Export interface for c++ call, give input chunk xs, and return output from time 0 to current chunk. diff --git a/paddlespeech/s2t/modules/attention.py b/paddlespeech/s2t/modules/attention.py index cbcaccc2..92990048 100644 --- a/paddlespeech/s2t/modules/attention.py +++ b/paddlespeech/s2t/modules/attention.py @@ -86,7 +86,8 @@ class MultiHeadedAttention(nn.Layer): self, value: paddle.Tensor, scores: paddle.Tensor, - mask: paddle.Tensor, ) -> paddle.Tensor: + mask: paddle.Tensor, # paddle.ones([0, 0, 0], dtype=paddle.bool) + ) -> paddle.Tensor: """Compute attention context vector. Args: value (paddle.Tensor): Transformed value, size @@ -126,13 +127,15 @@ class MultiHeadedAttention(nn.Layer): return self.linear_out(x) # (batch, time1, d_model) - def forward(self, - query: paddle.Tensor, - key: paddle.Tensor, - value: paddle.Tensor, - mask: paddle.Tensor, - pos_emb: paddle.Tensor, - cache: paddle.Tensor) -> Tuple[paddle.Tensor, paddle.Tensor]: + def forward( + self, + query: paddle.Tensor, + key: paddle.Tensor, + value: paddle.Tensor, + mask: paddle.Tensor, # paddle.ones([0,0,0], dtype=paddle.bool) + pos_emb: paddle.Tensor, # paddle.empty([0]) + cache: paddle.Tensor # paddle.zeros([0,0,0,0]) + ) -> Tuple[paddle.Tensor, paddle.Tensor]: """Compute scaled dot product attention. Args: query (paddle.Tensor): Query tensor (#batch, time1, size). @@ -241,13 +244,15 @@ class RelPositionMultiHeadedAttention(MultiHeadedAttention): return x - def forward(self, - query: paddle.Tensor, - key: paddle.Tensor, - value: paddle.Tensor, - mask: paddle.Tensor, - pos_emb: paddle.Tensor, - cache: paddle.Tensor) -> Tuple[paddle.Tensor, paddle.Tensor]: + def forward( + self, + query: paddle.Tensor, + key: paddle.Tensor, + value: paddle.Tensor, + mask: paddle.Tensor, # paddle.ones([0,0,0], dtype=paddle.bool) + pos_emb: paddle.Tensor, # paddle.empty([0]) + cache: paddle.Tensor # paddle.zeros([0,0,0,0]) + ) -> Tuple[paddle.Tensor, paddle.Tensor]: """Compute 'Scaled Dot Product Attention' with rel. positional encoding. Args: query (paddle.Tensor): Query tensor (#batch, time1, size). diff --git a/paddlespeech/s2t/modules/conformer_convolution.py b/paddlespeech/s2t/modules/conformer_convolution.py index 23aecd7f..b35fea5b 100644 --- a/paddlespeech/s2t/modules/conformer_convolution.py +++ b/paddlespeech/s2t/modules/conformer_convolution.py @@ -105,10 +105,12 @@ class ConvolutionModule(nn.Layer): ) self.activation = activation - def forward(self, - x: paddle.Tensor, - mask_pad: paddle.Tensor, - cache: paddle.Tensor) -> Tuple[paddle.Tensor, paddle.Tensor]: + def forward( + self, + x: paddle.Tensor, + mask_pad: paddle.Tensor, # paddle.ones([0,0,0], dtype=paddle.bool) + cache: paddle.Tensor # paddle.zeros([0,0,0,0]) + ) -> Tuple[paddle.Tensor, paddle.Tensor]: """Compute convolution module. Args: x (paddle.Tensor): Input tensor (#batch, time, channels). diff --git a/paddlespeech/s2t/modules/encoder.py b/paddlespeech/s2t/modules/encoder.py index 6001afd4..abdaf5ea 100644 --- a/paddlespeech/s2t/modules/encoder.py +++ b/paddlespeech/s2t/modules/encoder.py @@ -190,9 +190,9 @@ class BaseEncoder(nn.Layer): xs: paddle.Tensor, offset: int, required_cache_size: int, - att_cache: paddle.Tensor, - cnn_cache: paddle.Tensor, - att_mask: paddle.Tensor, + att_cache: paddle.Tensor, # paddle.zeros([0,0,0,0]) + cnn_cache: paddle.Tensor, # paddle.zeros([0,0,0,0]), + att_mask: paddle.Tensor, # paddle.ones([0,0,0], dtype=paddle.bool) ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]: """ Forward just one chunk Args: diff --git a/paddlespeech/s2t/modules/encoder_layer.py b/paddlespeech/s2t/modules/encoder_layer.py index 8fd991ec..3972ff90 100644 --- a/paddlespeech/s2t/modules/encoder_layer.py +++ b/paddlespeech/s2t/modules/encoder_layer.py @@ -76,9 +76,10 @@ class TransformerEncoderLayer(nn.Layer): x: paddle.Tensor, mask: paddle.Tensor, pos_emb: paddle.Tensor, - mask_pad: paddle.Tensor, - att_cache: paddle.Tensor, - cnn_cache: paddle.Tensor, + mask_pad: paddle. + Tensor, # paddle.ones([0, 0, 0], dtype=paddle.bool) + att_cache: paddle.Tensor, # paddle.zeros([0, 0, 0, 0]) + cnn_cache: paddle.Tensor, # paddle.zeros([0, 0, 0, 0]) ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]: """Compute encoded features. Args: @@ -194,9 +195,10 @@ class ConformerEncoderLayer(nn.Layer): x: paddle.Tensor, mask: paddle.Tensor, pos_emb: paddle.Tensor, - mask_pad: paddle.Tensor, - att_cache: paddle.Tensor, - cnn_cache: paddle.Tensor, + mask_pad: paddle. + Tensor, # paddle.ones([0, 0, 0], dtype=paddle.bool) + att_cache: paddle.Tensor, # paddle.zeros([0, 0, 0, 0]) + cnn_cache: paddle.Tensor, # paddle.zeros([0, 0, 0, 0]) ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]: """Compute encoded features. Args: -- GitLab