“300aacbbf5d48d24989860358c6c1f1698ce99d0”上不存在“tools/layers_validate.py”
提交 1a8c5278 编写于 作者: H Hui Zhang

export ctc grad norm config

上级 932889d9
...@@ -128,8 +128,8 @@ class DeepSpeech2Model(nn.Layer): ...@@ -128,8 +128,8 @@ class DeepSpeech2Model(nn.Layer):
num_rnn_layers=3, #Number of stacking RNN layers. num_rnn_layers=3, #Number of stacking RNN layers.
rnn_layer_size=1024, #RNN layer size (number of RNN cells). rnn_layer_size=1024, #RNN layer size (number of RNN cells).
use_gru=True, #Use gru if set True. Use simple rnn if set False. use_gru=True, #Use gru if set True. Use simple rnn if set False.
share_rnn_weights=True #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported. share_rnn_weights=True, #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported.
)) ctc_grad_norm_type='instance', ))
if config is not None: if config is not None:
config.merge_from_other_cfg(default) config.merge_from_other_cfg(default)
return default return default
...@@ -142,7 +142,8 @@ class DeepSpeech2Model(nn.Layer): ...@@ -142,7 +142,8 @@ class DeepSpeech2Model(nn.Layer):
rnn_size=1024, rnn_size=1024,
use_gru=False, use_gru=False,
share_rnn_weights=True, share_rnn_weights=True,
blank_id=0): blank_id=0,
ctc_grad_norm_type='instance'):
super().__init__() super().__init__()
self.encoder = CRNNEncoder( self.encoder = CRNNEncoder(
feat_size=feat_size, feat_size=feat_size,
...@@ -160,7 +161,8 @@ class DeepSpeech2Model(nn.Layer): ...@@ -160,7 +161,8 @@ class DeepSpeech2Model(nn.Layer):
blank_id=blank_id, blank_id=blank_id,
dropout_rate=0.0, dropout_rate=0.0,
reduction=True, # sum reduction=True, # sum
batch_average=True) # sum / batch_size batch_average=True, # sum / batch_size
grad_norm_type=ctc_grad_norm_type)
def forward(self, audio, audio_len, text, text_len): def forward(self, audio, audio_len, text, text_len):
"""Compute Model loss """Compute Model loss
......
...@@ -289,7 +289,8 @@ class DeepSpeech2ModelOnline(nn.Layer): ...@@ -289,7 +289,8 @@ class DeepSpeech2ModelOnline(nn.Layer):
blank_id=blank_id, blank_id=blank_id,
dropout_rate=0.0, dropout_rate=0.0,
reduction=True, # sum reduction=True, # sum
batch_average=True) # sum / batch_size batch_average=True, # sum / batch_size
grad_norm_type='instance')
def forward(self, audio, audio_len, text, text_len): def forward(self, audio, audio_len, text, text_len):
"""Compute Model loss """Compute Model loss
......
...@@ -864,7 +864,8 @@ class U2Model(U2BaseModel): ...@@ -864,7 +864,8 @@ class U2Model(U2BaseModel):
blank_id=0, blank_id=0,
dropout_rate=0.0, dropout_rate=0.0,
reduction=True, # sum reduction=True, # sum
batch_average=True) # sum / batch_size batch_average=True, # sum / batch_size
grad_norm_type='instance')
return vocab_size, encoder, decoder, ctc return vocab_size, encoder, decoder, ctc
......
...@@ -649,7 +649,8 @@ class U2STModel(U2STBaseModel): ...@@ -649,7 +649,8 @@ class U2STModel(U2STBaseModel):
blank_id=0, blank_id=0,
dropout_rate=0.0, dropout_rate=0.0,
reduction=True, # sum reduction=True, # sum
batch_average=True) # sum / batch_size batch_average=True, # sum / batch_size
grad_norm_type='instance')
return vocab_size, encoder, (st_decoder, decoder, ctc) return vocab_size, encoder, (st_decoder, decoder, ctc)
else: else:
......
...@@ -39,7 +39,8 @@ class CTCDecoder(nn.Layer): ...@@ -39,7 +39,8 @@ class CTCDecoder(nn.Layer):
blank_id=0, blank_id=0,
dropout_rate: float=0.0, dropout_rate: float=0.0,
reduction: bool=True, reduction: bool=True,
batch_average: bool=True): batch_average: bool=True,
grad_norm_type: str="instance"):
"""CTC decoder """CTC decoder
Args: Args:
...@@ -48,6 +49,7 @@ class CTCDecoder(nn.Layer): ...@@ -48,6 +49,7 @@ class CTCDecoder(nn.Layer):
dropout_rate (float): dropout rate (0.0 ~ 1.0) dropout_rate (float): dropout rate (0.0 ~ 1.0)
reduction (bool): reduce the CTC loss into a scalar, True for 'sum' or 'none' reduction (bool): reduce the CTC loss into a scalar, True for 'sum' or 'none'
batch_average (bool): do batch dim wise average. batch_average (bool): do batch dim wise average.
grad_norm_type (str): one of 'instance', 'batchsize', 'frame', None.
""" """
assert check_argument_types() assert check_argument_types()
super().__init__() super().__init__()
...@@ -60,7 +62,8 @@ class CTCDecoder(nn.Layer): ...@@ -60,7 +62,8 @@ class CTCDecoder(nn.Layer):
self.criterion = CTCLoss( self.criterion = CTCLoss(
blank=self.blank_id, blank=self.blank_id,
reduction=reduction_type, reduction=reduction_type,
batch_average=batch_average) batch_average=batch_average,
grad_norm_type=grad_norm_type)
# CTCDecoder LM Score handle # CTCDecoder LM Score handle
self._ext_scorer = None self._ext_scorer = None
......
...@@ -23,11 +23,32 @@ __all__ = ['CTCLoss', "LabelSmoothingLoss"] ...@@ -23,11 +23,32 @@ __all__ = ['CTCLoss', "LabelSmoothingLoss"]
class CTCLoss(nn.Layer): class CTCLoss(nn.Layer):
def __init__(self, blank=0, reduction='sum', batch_average=False): def __init__(self,
blank=0,
reduction='sum',
batch_average=False,
grad_norm_type=None):
super().__init__() super().__init__()
# last token id as blank id # last token id as blank id
self.loss = nn.CTCLoss(blank=blank, reduction=reduction) self.loss = nn.CTCLoss(blank=blank, reduction=reduction)
self.batch_average = batch_average self.batch_average = batch_average
logger.info(
f"CTCLoss Loss reduction: {reduction}, div-bs: {batch_average}")
# instance for norm_by_times
# batchsize for norm_by_batchsize
# frame for norm_by_total_logits_len
assert grad_norm_type in ('instance', 'batchsize', 'frame', None)
self.norm_by_times = False
self.norm_by_batchsize = False
self.norm_by_total_logits_len = False
logger.info(f"CTCLoss Grad Norm Type: {grad_norm_type}")
if grad_norm_type == 'instance':
self.norm_by_times = True
if grad_norm_type == 'batchsize':
self.norm_by_times = True
if grad_norm_type == 'frame':
self.norm_by_total_logits_len = True
def forward(self, logits, ys_pad, hlens, ys_lens): def forward(self, logits, ys_pad, hlens, ys_lens):
"""Compute CTC loss. """Compute CTC loss.
...@@ -46,10 +67,15 @@ class CTCLoss(nn.Layer): ...@@ -46,10 +67,15 @@ class CTCLoss(nn.Layer):
# warp-ctc need activation with shape [T, B, V + 1] # warp-ctc need activation with shape [T, B, V + 1]
# logits: (B, L, D) -> (L, B, D) # logits: (B, L, D) -> (L, B, D)
logits = logits.transpose([1, 0, 2]) logits = logits.transpose([1, 0, 2])
# (TODO:Hui Zhang) ctc loss does not support int64 labels
ys_pad = ys_pad.astype(paddle.int32) ys_pad = ys_pad.astype(paddle.int32)
loss = self.loss( loss = self.loss(
logits, ys_pad, hlens, ys_lens, norm_by_times=self.batch_average) logits,
ys_pad,
hlens,
ys_lens,
norm_by_times=self.norm_by_times,
norm_by_batchsize=self.norm_by_batchsize,
norm_by_total_logits_len=self.norm_by_total_logits_len)
if self.batch_average: if self.batch_average:
# Batch-size average # Batch-size average
loss = loss / B loss = loss / B
......
...@@ -41,6 +41,7 @@ model: ...@@ -41,6 +41,7 @@ model:
use_gru: True use_gru: True
share_rnn_weights: False share_rnn_weights: False
blank_id: 0 blank_id: 0
ctc_grad_norm_type: instance
training: training:
n_epoch: 80 n_epoch: 80
......
...@@ -43,6 +43,7 @@ model: ...@@ -43,6 +43,7 @@ model:
fc_layers_size_list: -1, fc_layers_size_list: -1,
use_gru: False use_gru: False
blank_id: 0 blank_id: 0
ctc_grad_norm_type: instance
training: training:
n_epoch: 50 n_epoch: 50
......
...@@ -41,6 +41,7 @@ model: ...@@ -41,6 +41,7 @@ model:
use_gru: False use_gru: False
share_rnn_weights: True share_rnn_weights: True
blank_id: 0 blank_id: 0
ctc_grad_norm_type: instance
training: training:
n_epoch: 50 n_epoch: 50
......
...@@ -43,6 +43,7 @@ model: ...@@ -43,6 +43,7 @@ model:
fc_layers_size_list: 512, 256 fc_layers_size_list: 512, 256
use_gru: False use_gru: False
blank_id: 0 blank_id: 0
ctc_grad_norm_type: instance
training: training:
n_epoch: 50 n_epoch: 50
......
...@@ -42,6 +42,7 @@ model: ...@@ -42,6 +42,7 @@ model:
use_gru: False use_gru: False
share_rnn_weights: True share_rnn_weights: True
blank_id: 0 blank_id: 0
ctc_grad_norm_type: instance
training: training:
n_epoch: 10 n_epoch: 10
......
...@@ -44,6 +44,7 @@ model: ...@@ -44,6 +44,7 @@ model:
fc_layers_size_list: 512, 256 fc_layers_size_list: 512, 256
use_gru: True use_gru: True
blank_id: 0 blank_id: 0
ctc_grad_norm_type: instance
training: training:
n_epoch: 10 n_epoch: 10
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册