fix tb logger

77e5641a · Hui Zhang · 16b8b982 · 77e5641a · 77e5641a · 77e5641a
Showing with 20 addition and 16 deletion

deepspeech/exps/u2/model.py deepspeech/exps/u2/model.py +6 -4

deepspeech/utils/checkpoint.py deepspeech/utils/checkpoint.py +3 -3

deepspeech/utils/layer_tools.py deepspeech/utils/layer_tools.py +11 -9

未找到文件。
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -88,10 +88,6 @@ class U2Trainer(Trainer):
            losses_np['ctc_loss'] = float(ctc_loss)

        if (batch_index + 1) % train_conf.accum_grad == 0:
-            if dist.get_rank() == 0 and self.visualizer:
-                losses_np_v = losses_np.copy()
-                losses_np_v.update({"lr": self.lr_scheduler()})
-                self.visualizer.add_scalars("step", losses_np_v, self.iteration)
            self.optimizer.step()
            self.optimizer.clear_grad()
            self.lr_scheduler.step()
@@ -107,6 +103,12 @@ class U2Trainer(Trainer):
                             for k, v in losses_np.items())
            logger.info(msg)

+            if dist.get_rank() == 0 and self.visualizer:
+                losses_np_v = losses_np.copy()
+                losses_np_v.update({"lr": self.lr_scheduler()})
+                self.visualizer.add_scalars("step", losses_np_v,
+                                            self.iteration - 1)
+
    def train(self):
        """The training process control by step."""
        # !!!IMPORTANT!!!

--- a/deepspeech/utils/checkpoint.py
+++ b/deepspeech/utils/checkpoint.py
@@ -46,8 +46,8 @@ def _load_latest_checkpoint(checkpoint_dir: str) -> int:
    return iteration


-def _save_checkpoint(checkpoint_dir: str, iteration: int):
-    """Save the iteration number of the latest model to be checkpointed.
+def _save_record(checkpoint_dir: str, iteration: int):
+    """Save the iteration number of the latest model to be checkpoint record.
    Args:
        checkpoint_dir (str): the directory where checkpoint is saved.
        iteration (int): the latest iteration number.
@@ -149,4 +149,4 @@ def save_parameters(checkpoint_dir: str,
        fout.write(data)

    if isinstance(tag_or_iteration, int):
-        _save_checkpoint(checkpoint_dir, tag_or_iteration)
+        _save_record(checkpoint_dir, tag_or_iteration)
--- a/deepspeech/utils/layer_tools.py
+++ b/deepspeech/utils/layer_tools.py
@@ -21,6 +21,8 @@ __all__ = [


 def summary(layer: nn.Layer, print_func=print):
+    if print_func is None:
+        return
    num_params = num_elements = 0
    for name, param in layer.state_dict().items():
        if print_func:
@@ -32,15 +34,6 @@ def summary(layer: nn.Layer, print_func=print):
        print_func(f"Total parameters: {num_params}, {num_elements} elements.")


-def gradient_norm(layer: nn.Layer):
-    grad_norm_dict = {}
-    for name, param in layer.state_dict().items():
-        if param.trainable:
-            grad = param.gradient()  # return numpy.ndarray
-            grad_norm_dict[name] = np.linalg.norm(grad) / grad.size
-    return grad_norm_dict
-
-
 def print_grads(model, print_func=print):
    if print_func is None:
        return
@@ -64,6 +57,15 @@ def print_params(model, print_func=print):
        print_func(f"Total parameters: {num_params}, {total} elements.")


+def gradient_norm(layer: nn.Layer):
+    grad_norm_dict = {}
+    for name, param in layer.state_dict().items():
+        if param.trainable:
+            grad = param.gradient()  # return numpy.ndarray
+            grad_norm_dict[name] = np.linalg.norm(grad) / grad.size
+    return grad_norm_dict
+
+
 def recursively_remove_weight_norm(layer: nn.Layer):
    for layer in layer.sublayers():
        try: