fix cmvn and print prarams

28658cc1 · Hui Zhang · 48f4bda3 · 28658cc1 · 28658cc1 · 28658cc1
5 changed file
--- a/.notebook/u2_model.ipynb
+++ b/.notebook/u2_model.ipynb
--- a/deepspeech/frontend/normalizer.py
+++ b/deepspeech/frontend/normalizer.py
@@ -77,15 +77,19 @@ class FeatureNormalizer(object):
        :param filepath: File to write mean and stddev.
        :type filepath: str
        """
-        np.savez(filepath, mean=self._mean, std=self._std)
+        np.savez(filepath, mean=self._mean, istd=self._istd)
    def _read_mean_std_from_file(self, filepath, eps=1e-20):
        """Load mean and std from file."""
-        mean, std = load_cmvn(filepath, filetype='npz')
+        mean, istd = load_cmvn(filepath, filetype='npz')
        self._mean = mean.T
-        self._istd = 1.0 / std.T
+        self._istd = istd.T
-    def _compute_mean_std(self, manifest_path, featurize_func, num_samples):
+    def _compute_mean_std(self,
+                          manifest_path,
+                          featurize_func,
+                          num_samples,
+                          eps=1e-20):
        """Compute mean and std from randomly sampled instances."""
        manifest = read_manifest(manifest_path)
        if num_samples == -1:
@@ -98,4 +102,6 @@ class FeatureNormalizer(object):
                featurize_func(AudioSegment.from_file(instance["feat"])))
        features = np.hstack(features)  #(D, T)
        self._mean = np.mean(features, axis=1).reshape([1, -1])  #(1, D)
-        self._std = np.std(features, axis=1).reshape([1, -1])  #(1, D)
+        std = np.std(features, axis=1).reshape([1, -1])  #(1, D)
+        std = np.clip(std, eps, None)
+        self._istd = 1.0 / std
--- a/deepspeech/frontend/utility.py
+++ b/deepspeech/frontend/utility.py
@@ -238,10 +238,8 @@ def _load_kaldi_cmvn(kaldi_cmvn_file):
 def _load_npz_cmvn(npz_cmvn_file, eps=1e-20):
    npzfile = np.load(npz_cmvn_file)
    means = npzfile["mean"]  #(1, D)
-    std = npzfile["std"]  #(1, D)
+    istd = npzfile["istd"]  #(1, D)
-    std = np.clip(std, eps, None)
+    cmvn = np.array([means, istd])
-    variance = 1.0 / std
-    cmvn = np.array([means, variance])
    return cmvn

--- a/deepspeech/modules/mask.py
+++ b/deepspeech/modules/mask.py
@@ -25,7 +25,7 @@ __all__ = [
 def sequence_mask(x_len, max_len=None, dtype='float32'):
-    """[summary]
+    """batch sequence mask.
    Args:
        x_len ([paddle.Tensor]): xs lenght, [B]

--- a/deepspeech/utils/layer_tools.py
+++ b/deepspeech/utils/layer_tools.py
@@ -22,8 +22,6 @@ __all__ = [
 def summary(layer: nn.Layer, print_func=print):
    num_params = num_elements = 0
-    if print_func:
-        print_func(f"{layer.__class__.__name__} summary:")
    for name, param in layer.state_dict().items():
        if print_func:
            print_func(
@@ -31,9 +29,7 @@ def summary(layer: nn.Layer, print_func=print):
        num_elements += np.prod(param.shape)
        num_params += 1
    if print_func:
-        print_func(
+        print_func(f"Total parameters: {num_params}, {num_elements} elements.")
-            f"{layer.__class__.__name__} has {num_params} parameters, {num_elements} elements."
-        )
 def gradient_norm(layer: nn.Layer):
@@ -45,25 +41,6 @@ def gradient_norm(layer: nn.Layer):
    return grad_norm_dict
-def recursively_remove_weight_norm(layer: nn.Layer):
-    for layer in layer.sublayers():
-        try:
-            nn.utils.remove_weight_norm(layer)
-        except ValueError as e:
-            # ther is not weight norm hoom in this layer
-            pass
-def freeze(layer: nn.Layer):
-    for param in layer.parameters():
-        param.trainable = False
-def unfreeze(layer: nn.Layer):
-    for param in layer.parameters():
-        param.trainable = True
 def print_grads(model, print_func=print):
    if print_func is None:
        return
@@ -75,12 +52,32 @@ def print_grads(model, print_func=print):
 def print_params(model, print_func=print):
    if print_func is None:
        return
    total = 0.0
+    num_params = 0.0
    for n, p in model.named_parameters():
-        msg = f"param: {n}: shape: {p.shape} stop_grad: {p.stop_gradient}"
+        msg = f"{n} | {p.shape} | {np.prod(p.shape)} | {not p.stop_gradient}"
        total += np.prod(p.shape)
+        num_params += 1
        if print_func:
            print_func(msg)
    if print_func:
-        print_func(f"Total parameters: {total}!")
+        print_func(f"Total parameters: {num_params}, {total} elements.")
+def recursively_remove_weight_norm(layer: nn.Layer):
+    for layer in layer.sublayers():
+        try:
+            nn.utils.remove_weight_norm(layer)
+        except ValueError as e:
+            # ther is not weight norm hoom in this layer
+            pass
+def freeze(layer: nn.Layer):
+    for param in layer.parameters():
+        param.trainable = False
+def unfreeze(layer: nn.Layer):
+    for param in layer.parameters():
+        param.trainable = True