no_sync if paddle support else nullcontext

466672e1 · Hui Zhang · b4e16eb8 · 466672e1 · 466672e1 · 466672e1
4 changed file
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -87,7 +87,8 @@ class DeepSpeech2Trainer(Trainer):
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
-            context = self.model.no_sync
+            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
+                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.

--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -106,7 +106,8 @@ class U2Trainer(Trainer):
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            # When using cpu w/o DDP, model does not have `no_sync`
-            context = self.model.no_sync if self.parallel else nullcontext
+            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
+                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.

--- a/deepspeech/exps/u2_kaldi/model.py
+++ b/deepspeech/exps/u2_kaldi/model.py
@@ -105,7 +105,8 @@ class U2Trainer(Trainer):
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
-            context = self.model.no_sync
+            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
+                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.

--- a/deepspeech/exps/u2_st/model.py
+++ b/deepspeech/exps/u2_st/model.py
@@ -110,7 +110,8 @@ class U2STTrainer(Trainer):
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
-            context = self.model.no_sync
+            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
+                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.