diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index e84de61574be3040db674d21215fbee89e5f0bc9..3dc8286d2fb57a93c09c8b454b7dd56e119673a1 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -87,7 +87,8 @@ class DeepSpeech2Trainer(Trainer): # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. - context = self.model.no_sync + context = self.model.no_sync if (hasattr(self.model, "no_sync") and + self.parallel) else nullcontext else: # Used for single gpu training and DDP gradient synchronization # processes. diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 9cb3fa3cfa3a09a283885ced01ee6122dcd0f55d..65ec5174f91215f2d22e4f4d15b88bd4a307473a 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -106,7 +106,8 @@ class U2Trainer(Trainer): # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. # When using cpu w/o DDP, model does not have `no_sync` - context = self.model.no_sync if self.parallel else nullcontext + context = self.model.no_sync if (hasattr(self.model, "no_sync") and + self.parallel) else nullcontext else: # Used for single gpu training and DDP gradient synchronization # processes. diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index d38afe25cf315b9bf5a0bae603397fd83ac61aa6..5a72e44d810c9c65eab033cc12d24f2ccb45cde7 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -105,7 +105,8 @@ class U2Trainer(Trainer): # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. - context = self.model.no_sync + context = self.model.no_sync if (hasattr(self.model, "no_sync") and + self.parallel) else nullcontext else: # Used for single gpu training and DDP gradient synchronization # processes. diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py index c480499c7b397ff12eb4f7d5047101168a33d149..08060d975989f09ce281cf28d00e841e629a41ef 100644 --- a/deepspeech/exps/u2_st/model.py +++ b/deepspeech/exps/u2_st/model.py @@ -110,7 +110,8 @@ class U2STTrainer(Trainer): # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. - context = self.model.no_sync + context = self.model.no_sync if (hasattr(self.model, "no_sync") and + self.parallel) else nullcontext else: # Used for single gpu training and DDP gradient synchronization # processes.