From dee672a7538ac4eb5b97c16b1c16de1e33f9541a Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 12 Apr 2021 09:03:48 +0000
Subject: [PATCH] flake8

---
 .flake8                                       | 49 +++++++++++++++++++
 deepspeech/__init__.py                        |  6 +--
 deepspeech/decoders/decoders_deprecated.py    | 10 ++--
 deepspeech/decoders/scorer_deprecated.py      |  2 +-
 deepspeech/decoders/swig/setup.py             | 13 ++---
 .../exps/deepspeech2/bin/deploy/client.py     |  4 +-
 .../exps/deepspeech2/bin/deploy/runtime.py    | 10 +---
 .../exps/deepspeech2/bin/deploy/send.py       |  2 -
 .../exps/deepspeech2/bin/deploy/server.py     |  6 +--
 deepspeech/exps/deepspeech2/bin/export.py     |  9 ----
 deepspeech/exps/deepspeech2/bin/infer.py      |  9 ----
 deepspeech/exps/deepspeech2/bin/test.py       |  9 ----
 deepspeech/exps/deepspeech2/bin/train.py      |  6 ---
 deepspeech/exps/deepspeech2/bin/tune.py       |  6 +--
 deepspeech/exps/deepspeech2/model.py          |  4 --
 deepspeech/exps/u2/bin/export.py              |  8 ---
 deepspeech/exps/u2/bin/test.py                |  8 ---
 deepspeech/exps/u2/bin/train.py               |  5 --
 deepspeech/exps/u2/model.py                   |  6 ---
 deepspeech/frontend/audio.py                  |  1 -
 deepspeech/frontend/augmentor/augmentation.py |  2 +-
 deepspeech/frontend/augmentor/base.py         |  2 +-
 .../frontend/featurizer/audio_featurizer.py   |  4 +-
 .../frontend/featurizer/text_featurizer.py    |  1 -
 deepspeech/frontend/utility.py                |  8 ---
 deepspeech/io/__init__.py                     |  5 +-
 deepspeech/io/collator.py                     |  1 -
 deepspeech/io/dataset.py                      |  3 --
 deepspeech/io/sampler.py                      | 13 ++---
 deepspeech/io/utility.py                      |  1 -
 deepspeech/models/deepspeech2.py              |  8 ---
 deepspeech/models/u2.py                       |  8 +--
 deepspeech/modules/__init__.py                |  2 +-
 deepspeech/modules/activation.py              |  5 --
 deepspeech/modules/attention.py               |  1 -
 deepspeech/modules/cmvn.py                    |  2 -
 deepspeech/modules/conformer_convolution.py   |  2 -
 deepspeech/modules/conv.py                    |  2 -
 deepspeech/modules/ctc.py                     |  5 +-
 deepspeech/modules/decoder.py                 |  8 ++-
 deepspeech/modules/decoder_layer.py           |  2 -
 deepspeech/modules/embedding.py               |  3 --
 deepspeech/modules/encoder.py                 |  2 -
 deepspeech/modules/encoder_layer.py           |  2 -
 deepspeech/modules/loss.py                    |  1 -
 deepspeech/modules/mask.py                    |  3 --
 .../modules/positionwise_feed_forward.py      |  2 -
 deepspeech/modules/subsampling.py             |  2 -
 deepspeech/training/__init__.py               |  2 -
 deepspeech/training/cli.py                    |  9 ++--
 deepspeech/training/scheduler.py              |  1 -
 deepspeech/training/trainer.py                |  3 --
 deepspeech/utils/checkpoint.py                |  3 --
 deepspeech/utils/error_rate.py                |  6 +--
 deepspeech/utils/layer_tools.py               |  2 +-
 deepspeech/utils/mp_tools.py                  |  1 -
 deepspeech/utils/tensor_utils.py              |  1 -
 deepspeech/utils/utility.py                   |  1 -
 examples/dataset/aishell/aishell.py           |  5 +-
 .../chime3_background/chime3_background.py    | 10 ++--
 examples/dataset/librispeech/librispeech.py   |  1 -
 .../mini_librispeech/mini_librispeech.py      |  2 -
 examples/dataset/rir_noise/rir_noise.py       |  2 +-
 examples/dataset/voxforge/voxforge.py         |  2 +-
 tests/deepspeech2_model_test.py               |  4 +-
 utils/build_vocab.py                          | 27 +++++-----
 utils/format_data.py                          |  9 +---
 utils/utility.py                              | 10 ++--
 68 files changed, 127 insertions(+), 247 deletions(-)
 create mode 100644 .flake8

diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..b49cbf1a
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,49 @@
+[flake8]
+
+########## OPTIONS ##########
+# Set the maximum length that any line (with some exceptions) may be.
+max-line-length = 120
+
+
+################### FILE PATTERNS ##########################
+# Provide a comma-separated list of glob patterns to exclude from checks.
+exclude =
+    # git folder
+    .git,
+    # python cache
+    __pycache__,
+# Provide a comma-separate list of glob patterns to include for checks.
+filename =
+    *.py
+
+
+########## RULES ##########
+
+# ERROR CODES
+#
+# E/W  - PEP8 errors/warnings (pycodestyle)
+# F    - linting errors (pyflakes)
+# C    - McCabe complexity error (mccabe)
+#
+# W503 - line break before binary operator
+
+# Specify a list of codes to ignore.
+ignore =
+    W503
+    E252,E262,E127,E265,E126,E266,E241,E261,E128,E125
+    W291,W293,W605
+    E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
+    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
+    # to line this up with executable bit
+    EXE001,
+    # these ignores are from flake8-bugbear; please fix!
+    B007,B008,
+    # these ignores are from flake8-comprehensions; please fix!
+    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
+
+# Specify the list of error codes you wish Flake8 to report.
+select =
+    E,
+    W,
+    F,
+    C
\ No newline at end of file
diff --git a/deepspeech/__init__.py b/deepspeech/__init__.py
index 7f7e946c..c6c2e607 100644
--- a/deepspeech/__init__.py
+++ b/deepspeech/__init__.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 import logging
 from typing import Union
-from typing import Optional
 from typing import List
 from typing import Tuple
 from typing import Any
@@ -21,7 +20,6 @@ from typing import Any
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I
 #TODO(Hui Zhang): remove  fluid import
 from paddle.fluid import core
 logger = logging.getLogger(__name__)
@@ -242,7 +240,7 @@ def is_broadcastable(shp1, shp2):
 def masked_fill(xs: paddle.Tensor,
                 mask: paddle.Tensor,
                 value: Union[float, int]):
-    assert is_broadcastable(xs.shape, mask.shape) == True
+    assert is_broadcastable(xs.shape, mask.shape) is True
     bshape = paddle.broadcast_shape(xs.shape, mask.shape)
     mask = mask.broadcast_to(bshape)
     trues = paddle.ones_like(xs) * value
@@ -259,7 +257,7 @@ if not hasattr(paddle.Tensor, 'masked_fill'):
 def masked_fill_(xs: paddle.Tensor,
                  mask: paddle.Tensor,
                  value: Union[float, int]):
-    assert is_broadcastable(xs.shape, mask.shape) == True
+    assert is_broadcastable(xs.shape, mask.shape) is True
     bshape = paddle.broadcast_shape(xs.shape, mask.shape)
     mask = mask.broadcast_to(bshape)
     trues = paddle.ones_like(xs) * value
diff --git a/deepspeech/decoders/decoders_deprecated.py b/deepspeech/decoders/decoders_deprecated.py
index 99e14e49..af05e61b 100644
--- a/deepspeech/decoders/decoders_deprecated.py
+++ b/deepspeech/decoders/decoders_deprecated.py
@@ -104,14 +104,14 @@ def ctc_beam_search_decoder(probs_seq,
         global ext_nproc_scorer
         ext_scoring_func = ext_nproc_scorer
 
-    ## initialize
+    # initialize
     # prefix_set_prev: the set containing selected prefixes
     # probs_b_prev: prefixes' probability ending with blank in previous step
     # probs_nb_prev: prefixes' probability ending with non-blank in previous step
     prefix_set_prev = {'\t': 1.0}
     probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0}
 
-    ## extend prefix in loop
+    # extend prefix in loop
     for time_step in range(len(probs_seq)):
         # prefix_set_next: the set containing candidate prefixes
         # probs_b_cur: prefixes' probability ending with blank in current step
@@ -120,7 +120,7 @@ def ctc_beam_search_decoder(probs_seq,
 
         prob_idx = list(enumerate(probs_seq[time_step]))
         cutoff_len = len(prob_idx)
-        #If pruning is enabled
+        # If pruning is enabled
         if cutoff_prob < 1.0 or cutoff_top_n < cutoff_len:
             prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True)
             cutoff_len, cum_prob = 0, 0.0
@@ -172,7 +172,7 @@ def ctc_beam_search_decoder(probs_seq,
         # update probs
         probs_b_prev, probs_nb_prev = probs_b_cur, probs_nb_cur
 
-        ## store top beam_size prefixes
+        # store top beam_size prefixes
         prefix_set_prev = sorted(
             prefix_set_next.items(), key=lambda asd: asd[1], reverse=True)
         if beam_size < len(prefix_set_prev):
@@ -191,7 +191,7 @@ def ctc_beam_search_decoder(probs_seq,
         else:
             beam_result.append((float('-inf'), ''))
 
-    ## output top beam_size decoding results
+    # output top beam_size decoding results
     beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True)
     return beam_result
 
diff --git a/deepspeech/decoders/scorer_deprecated.py b/deepspeech/decoders/scorer_deprecated.py
index 919b0294..ad4d745f 100644
--- a/deepspeech/decoders/scorer_deprecated.py
+++ b/deepspeech/decoders/scorer_deprecated.py
@@ -71,7 +71,7 @@ class Scorer(object):
         """
         lm = self._language_model_score(sentence)
         word_cnt = self._word_count(sentence)
-        if log == False:
+        if log is False:
             score = np.power(lm, self._alpha) * np.power(word_cnt, self._beta)
         else:
             score = self._alpha * np.log(lm) + self._beta * np.log(word_cnt)
diff --git a/deepspeech/decoders/swig/setup.py b/deepspeech/decoders/swig/setup.py
index f6dc048d..b02dc767 100644
--- a/deepspeech/decoders/swig/setup.py
+++ b/deepspeech/decoders/swig/setup.py
@@ -16,7 +16,8 @@
 from setuptools import setup, Extension, distutils
 import glob
 import platform
-import os, sys
+import os
+import sys
 import multiprocessing.pool
 import argparse
 
@@ -65,9 +66,9 @@ def parallelCCompile(self,
 def compile_test(header, library):
     dummy_path = os.path.join(os.path.dirname(__file__), "dummy")
     command = "bash -c \"g++ -include " + header \
-                + " -l" + library + " -x c++ - <<<'int main() {}' -o " \
-                + dummy_path + " >/dev/null 2>/dev/null && rm " \
-                + dummy_path + " 2>/dev/null\""
+        + " -l" + library + " -x c++ - <<<'int main() {}' -o " \
+        + dummy_path + " >/dev/null 2>/dev/null && rm " \
+        + dummy_path + " 2>/dev/null\""
     return os.system(command) == 0
 
 
@@ -75,8 +76,8 @@ def compile_test(header, library):
 distutils.ccompiler.CCompiler.compile = parallelCCompile
 
 FILES = glob.glob('kenlm/util/*.cc') \
-        + glob.glob('kenlm/lm/*.cc') \
-        + glob.glob('kenlm/util/double-conversion/*.cc')
+    + glob.glob('kenlm/lm/*.cc') \
+    + glob.glob('kenlm/util/double-conversion/*.cc')
 
 FILES += glob.glob('openfst-1.6.3/src/lib/*.cc')
 
diff --git a/deepspeech/exps/deepspeech2/bin/deploy/client.py b/deepspeech/exps/deepspeech2/bin/deploy/client.py
index 766fdc5a..e27e561b 100644
--- a/deepspeech/exps/deepspeech2/bin/deploy/client.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/client.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 """Client-end for the ASR demo."""
 import keyboard
-import struct
-import socket
 import sys
 import argparse
 import pyaudio
@@ -49,7 +47,7 @@ def on_press_release(x):
             sys.stdout.flush()
             is_recording = True
     if x.event_type == 'up' and x.name == release.name:
-        if is_recording == True:
+        if is_recording:
             is_recording = False
 
 
diff --git a/deepspeech/exps/deepspeech2/bin/deploy/runtime.py b/deepspeech/exps/deepspeech2/bin/deploy/runtime.py
index eff1fa4e..ed033887 100644
--- a/deepspeech/exps/deepspeech2/bin/deploy/runtime.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/runtime.py
@@ -12,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Server-end for the ASR demo."""
-import os
-import time
-import argparse
 import functools
 import paddle
 import numpy as np
@@ -26,7 +23,6 @@ from deepspeech.utils.socket_server import AsrRequestHandler
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults
 
-from deepspeech.frontend.utility import read_manifest
 from deepspeech.utils.utility import add_arguments, print_arguments
 
 from deepspeech.models.deepspeech2 import DeepSpeech2Model
@@ -159,15 +155,13 @@ if __name__ == "__main__":
         "--params_file",
         type=str,
         default="",
-        help=
-        "Parameter filename, Specify this when your model is a combined model."
+        help="Parameter filename, Specify this when your model is a combined model."
     )
     add_arg(
         "--model_dir",
         type=str,
         default=None,
-        help=
-        "Model dir, If you load a non-combined model, specify the directory of the model."
+        help="Model dir, If you load a non-combined model, specify the directory of the model."
     )
     add_arg("--use_gpu",
                         type=bool,
diff --git a/deepspeech/exps/deepspeech2/bin/deploy/send.py b/deepspeech/exps/deepspeech2/bin/deploy/send.py
index 84411f91..ce6c7c84 100644
--- a/deepspeech/exps/deepspeech2/bin/deploy/send.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/send.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Socket client to send wav to ASR server."""
-import struct
-import socket
 import argparse
 import wave
 
diff --git a/deepspeech/exps/deepspeech2/bin/deploy/server.py b/deepspeech/exps/deepspeech2/bin/deploy/server.py
index 48eaa9e3..f5b0a7d5 100644
--- a/deepspeech/exps/deepspeech2/bin/deploy/server.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/server.py
@@ -12,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Server-end for the ASR demo."""
-import os
-import time
-import argparse
 import functools
 import paddle
 import numpy as np
@@ -26,7 +23,6 @@ from deepspeech.utils.socket_server import AsrRequestHandler
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults
 
-from deepspeech.frontend.utility import read_manifest
 from deepspeech.utils.utility import add_arguments, print_arguments
 
 from deepspeech.models.deepspeech2 import DeepSpeech2Model
@@ -100,7 +96,7 @@ if __name__ == "__main__":
     add_arg('speech_save_dir',  str,
             'demo_cache',
             "Directory to save demo audios.")
-    add_arg('warmup_manifest',  str, None, "Filepath of manifest to warm up.")
+    add_arg('warmup_manifest', str, None, "Filepath of manifest to warm up.")
     args = parser.parse_args()
     print_arguments(args)
 
diff --git a/deepspeech/exps/deepspeech2/bin/export.py b/deepspeech/exps/deepspeech2/bin/export.py
index f19060ef..7eb54330 100644
--- a/deepspeech/exps/deepspeech2/bin/export.py
+++ b/deepspeech/exps/deepspeech2/bin/export.py
@@ -12,17 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Export for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors
 
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults
 from deepspeech.exps.deepspeech2.model import DeepSpeech2Tester as Tester
diff --git a/deepspeech/exps/deepspeech2/bin/infer.py b/deepspeech/exps/deepspeech2/bin/infer.py
index 6f52c812..815ca833 100644
--- a/deepspeech/exps/deepspeech2/bin/infer.py
+++ b/deepspeech/exps/deepspeech2/bin/infer.py
@@ -12,17 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Inferer for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors
 
 # TODO(hui zhang): dynamic load 
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults
diff --git a/deepspeech/exps/deepspeech2/bin/test.py b/deepspeech/exps/deepspeech2/bin/test.py
index 72b38f48..090ee0de 100644
--- a/deepspeech/exps/deepspeech2/bin/test.py
+++ b/deepspeech/exps/deepspeech2/bin/test.py
@@ -12,17 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Evaluation for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors
 
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults
 from deepspeech.exps.deepspeech2.model import DeepSpeech2Tester as Tester
diff --git a/deepspeech/exps/deepspeech2/bin/train.py b/deepspeech/exps/deepspeech2/bin/train.py
index 0c1d0891..f2df5fa9 100644
--- a/deepspeech/exps/deepspeech2/bin/train.py
+++ b/deepspeech/exps/deepspeech2/bin/train.py
@@ -12,12 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Trainer for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
 from paddle import distributed as dist
 
 from deepspeech.utils.utility import print_arguments
diff --git a/deepspeech/exps/deepspeech2/bin/tune.py b/deepspeech/exps/deepspeech2/bin/tune.py
index 40ff04d4..3a2907a5 100644
--- a/deepspeech/exps/deepspeech2/bin/tune.py
+++ b/deepspeech/exps/deepspeech2/bin/tune.py
@@ -14,12 +14,8 @@
 """Beam search parameters tuning for DeepSpeech2 model."""
 
 import sys
-import os
 import numpy as np
-import argparse
 import functools
-import gzip
-import logging
 
 from paddle.io import DataLoader
 
@@ -122,7 +118,7 @@ def tune(config, args):
             if index % 2 == 0:
                 sys.stdout.write('.')
                 sys.stdout.flush()
-            print(f"tuneing: one grid done!")
+            print("tuneing: one grid done!")
 
         # output on-line tuning result at the end of current batch
         err_ave_min = min(err_ave)
diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py
index 00ae6790..170d47d9 100644
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -14,13 +14,10 @@
 """Contains DeepSpeech2 model."""
 
 import io
-import sys
-import os
 import time
 import logging
 import numpy as np
 from collections import defaultdict
-from functools import partial
 from pathlib import Path
 
 import paddle
@@ -39,7 +36,6 @@ from deepspeech.io.sampler import SortagradDistributedBatchSampler
 from deepspeech.io.sampler import SortagradBatchSampler
 from deepspeech.io.dataset import ManifestDataset
 
-from deepspeech.modules.loss import CTCLoss
 from deepspeech.models.deepspeech2 import DeepSpeech2Model
 from deepspeech.models.deepspeech2 import DeepSpeech2InferModel
 
diff --git a/deepspeech/exps/u2/bin/export.py b/deepspeech/exps/u2/bin/export.py
index f9e9eb21..a9737814 100644
--- a/deepspeech/exps/u2/bin/export.py
+++ b/deepspeech/exps/u2/bin/export.py
@@ -13,16 +13,8 @@
 # limitations under the License.
 """Export for U2 model."""
 
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors
 
 from deepspeech.exps.u2.config import get_cfg_defaults
 from deepspeech.exps.u2.model import U2Tester as Tester
diff --git a/deepspeech/exps/u2/bin/test.py b/deepspeech/exps/u2/bin/test.py
index 06882296..dfde68e1 100644
--- a/deepspeech/exps/u2/bin/test.py
+++ b/deepspeech/exps/u2/bin/test.py
@@ -13,16 +13,8 @@
 # limitations under the License.
 """Evaluation for U2 model."""
 
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors
 
 # TODO(hui zhang): dynamic load 
 from deepspeech.exps.u2.config import get_cfg_defaults
diff --git a/deepspeech/exps/u2/bin/train.py b/deepspeech/exps/u2/bin/train.py
index 2742d94d..0e1f4025 100644
--- a/deepspeech/exps/u2/bin/train.py
+++ b/deepspeech/exps/u2/bin/train.py
@@ -13,11 +13,6 @@
 # limitations under the License.
 """Trainer for U2 model."""
 
-import io
-import logging
-import argparse
-import functools
-
 from paddle import distributed as dist
 
 from deepspeech.utils.utility import print_arguments
diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py
index 9d9f9961..29f7f03c 100644
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -13,14 +13,10 @@
 # limitations under the License.
 """Contains U2 model."""
 
-import io
-import sys
-import os
 import time
 import logging
 import numpy as np
 from collections import defaultdict
-from functools import partial
 from pathlib import Path
 
 import paddle
@@ -40,8 +36,6 @@ from deepspeech.io.sampler import SortagradDistributedBatchSampler
 from deepspeech.io.sampler import SortagradBatchSampler
 from deepspeech.io.dataset import ManifestDataset
 
-from deepspeech.modules.loss import CTCLoss
-
 from deepspeech.models.u2 import U2Model
 
 logger = logging.getLogger(__name__)
diff --git a/deepspeech/frontend/audio.py b/deepspeech/frontend/audio.py
index 2ba7019a..3ed50e76 100644
--- a/deepspeech/frontend/audio.py
+++ b/deepspeech/frontend/audio.py
@@ -22,7 +22,6 @@ import resampy
 from scipy import signal
 import random
 import copy
-import io
 
 
 class AudioSegment(object):
diff --git a/deepspeech/frontend/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py
index 6c5d76ba..aa1c4841 100644
--- a/deepspeech/frontend/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -22,7 +22,7 @@ from deepspeech.frontend.augmentor.noise_perturb import NoisePerturbAugmentor
 from deepspeech.frontend.augmentor.impulse_response import ImpulseResponseAugmentor
 from deepspeech.frontend.augmentor.resample import ResampleAugmentor
 from deepspeech.frontend.augmentor.online_bayesian_normalization import \
-     OnlineBayesianNormalizationAugmentor
+    OnlineBayesianNormalizationAugmentor
 
 
 class AugmentationPipeline():
diff --git a/deepspeech/frontend/augmentor/base.py b/deepspeech/frontend/augmentor/base.py
index 3bc37e68..4d48d6de 100644
--- a/deepspeech/frontend/augmentor/base.py
+++ b/deepspeech/frontend/augmentor/base.py
@@ -54,4 +54,4 @@ class AugmentorBase():
         :param spec_segment: Spectrogram segment to add effects to.
         :type spec_segment: Spectrogram
         """
-        pass
\ No newline at end of file
+        pass
diff --git a/deepspeech/frontend/featurizer/audio_featurizer.py b/deepspeech/frontend/featurizer/audio_featurizer.py
index 51af3830..72e70c0d 100644
--- a/deepspeech/frontend/featurizer/audio_featurizer.py
+++ b/deepspeech/frontend/featurizer/audio_featurizer.py
@@ -14,8 +14,6 @@
 """Contains the audio featurizer class."""
 
 import numpy as np
-from deepspeech.frontend.utility import read_manifest
-from deepspeech.frontend.audio import AudioSegment
 from python_speech_features import mfcc
 from python_speech_features import logfbank
 from python_speech_features import delta
@@ -320,7 +318,7 @@ class AudioFeaturizer(object):
         if stride_ms > window_ms:
             raise ValueError("Stride size must not be greater than "
                              "window size.")
-        #(T, D)
+        # (T, D)
         fbank_feat = logfbank(
             signal=samples,
             samplerate=sample_rate,
diff --git a/deepspeech/frontend/featurizer/text_featurizer.py b/deepspeech/frontend/featurizer/text_featurizer.py
index 62baf28d..e1d34e5a 100644
--- a/deepspeech/frontend/featurizer/text_featurizer.py
+++ b/deepspeech/frontend/featurizer/text_featurizer.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 """Contains the text featurizer class."""
 
-import os
 import sentencepiece as spm
 
 from deepspeech.frontend.utility import UNK
diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py
index f2a53833..a8529c30 100644
--- a/deepspeech/frontend/utility.py
+++ b/deepspeech/frontend/utility.py
@@ -16,15 +16,7 @@ import numpy as np
 import math
 import json
 import codecs
-import os
-import tarfile
-import time
 import logging
-from typing import List
-from threading import Thread
-from multiprocessing import Process, Manager, Value
-
-from paddle.dataset.common import md5file
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/io/__init__.py b/deepspeech/io/__init__.py
index 290ffae0..9fe0e0eb 100644
--- a/deepspeech/io/__init__.py
+++ b/deepspeech/io/__init__.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import functools
 import numpy as np
 from paddle.io import DataLoader
 
@@ -131,7 +130,7 @@ def create_dataloader(manifest_path,
             if keep_transcription_text:
                 padded_text[:len(text)] = [ord(t) for t in text]  # string
             else:
-                padded_text[:len(text)] = text  #ids
+                padded_text[:len(text)] = text  # ids
             texts.append(padded_text)
             text_lens.append(len(text))
 
@@ -141,7 +140,7 @@ def create_dataloader(manifest_path,
         text_lens = np.array(text_lens).astype('int64')
         return padded_audios, audio_lens, texts, text_lens
 
-    #collate_fn=functools.partial(padding_batch, keep_transcription_text=keep_transcription_text),
+    # collate_fn=functools.partial(padding_batch, keep_transcription_text=keep_transcription_text),
     collate_fn = SpeechCollator(keep_transcription_text=keep_transcription_text)
     loader = DataLoader(
         dataset,
diff --git a/deepspeech/io/collator.py b/deepspeech/io/collator.py
index e577d7a1..4edb4ca9 100644
--- a/deepspeech/io/collator.py
+++ b/deepspeech/io/collator.py
@@ -14,7 +14,6 @@
 
 import logging
 import numpy as np
-from collections import namedtuple
 
 from deepspeech.io.utility import pad_sequence
 from deepspeech.frontend.utility import IGNORE_ID
diff --git a/deepspeech/io/dataset.py b/deepspeech/io/dataset.py
index 3db407dc..f783d827 100644
--- a/deepspeech/io/dataset.py
+++ b/deepspeech/io/dataset.py
@@ -13,13 +13,10 @@
 # limitations under the License.
 
 import io
-import math
 import random
 import tarfile
 import logging
-import numpy as np
 from collections import namedtuple
-from functools import partial
 from yacs.config import CfgNode
 
 from paddle.io import Dataset
diff --git a/deepspeech/io/sampler.py b/deepspeech/io/sampler.py
index 5bc49dad..2dbaf90c 100644
--- a/deepspeech/io/sampler.py
+++ b/deepspeech/io/sampler.py
@@ -13,14 +13,9 @@
 # limitations under the License.
 
 import math
-import random
-import tarfile
 import logging
 import numpy as np
-from collections import namedtuple
-from functools import partial
 
-import paddle
 from paddle.io import BatchSampler
 from paddle.io import DistributedBatchSampler
 from paddle import distributed as dist
@@ -59,7 +54,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
     batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
     rng.shuffle(batch_indices)
     batch_indices = [item for batch in batch_indices for item in batch]
-    assert (clipped == False)
+    assert clipped is False
     if not clipped:
         res_len = len(indices) - shift_len - len(batch_indices)
         # when res_len is 0, will return whole list, len(List[-0:]) = len(List[:])
@@ -195,13 +190,13 @@ class SortagradBatchSampler(BatchSampler):
         self.dataset = dataset
 
         assert isinstance(batch_size, int) and batch_size > 0, \
-                "batch_size should be a positive integer"
+            "batch_size should be a positive integer"
         self.batch_size = batch_size
         assert isinstance(shuffle, bool), \
-                "shuffle should be a boolean value"
+            "shuffle should be a boolean value"
         self.shuffle = shuffle
         assert isinstance(drop_last, bool), \
-                "drop_last should be a boolean number"
+            "drop_last should be a boolean number"
 
         self.drop_last = drop_last
         self.epoch = 0
diff --git a/deepspeech/io/utility.py b/deepspeech/io/utility.py
index 46c9fbd2..4c2ff76b 100644
--- a/deepspeech/io/utility.py
+++ b/deepspeech/io/utility.py
@@ -14,7 +14,6 @@
 
 import logging
 import numpy as np
-from collections import namedtuple
 from typing import List
 
 logger = logging.getLogger(__name__)
diff --git a/deepspeech/models/deepspeech2.py b/deepspeech/models/deepspeech2.py
index 88043937..727f64c0 100644
--- a/deepspeech/models/deepspeech2.py
+++ b/deepspeech/models/deepspeech2.py
@@ -12,20 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Deepspeech2 ASR Model"""
-import math
-import collections
-import numpy as np
 import logging
 from typing import Optional
 from yacs.config import CfgNode
 
-import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
-from deepspeech.modules.mask import sequence_mask
-from deepspeech.modules.activation import brelu
 from deepspeech.modules.conv import ConvStack
 from deepspeech.modules.rnn import RNNStack
 from deepspeech.modules.ctc import CTCDecoder
diff --git a/deepspeech/models/u2.py b/deepspeech/models/u2.py
index f563024d..38a781e1 100644
--- a/deepspeech/models/u2.py
+++ b/deepspeech/models/u2.py
@@ -15,10 +15,8 @@
 Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition 
 (https://arxiv.org/pdf/2012.05481.pdf)
 """
-import math
-import collections
+
 from collections import defaultdict
-import numpy as np
 import logging
 from yacs.config import CfgNode
 from typing import List, Optional, Tuple
@@ -26,8 +24,6 @@ from typing import List, Optional, Tuple
 import paddle
 from paddle import jit
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 from deepspeech.modules.mask import make_pad_mask
 from deepspeech.modules.mask import mask_finished_preds
@@ -54,7 +50,7 @@ from deepspeech.utils.ctc_utils import remove_duplicates_and_blank
 
 logger = logging.getLogger(__name__)
 
-__all__ = ['U2TransformerModel', "U2ConformerModel"]
+__all__ = ["U2Model", "U2InferModel"]
 
 
 class U2BaseModel(nn.Module):
diff --git a/deepspeech/modules/__init__.py b/deepspeech/modules/__init__.py
index 61d5aa21..185a92b8 100644
--- a/deepspeech/modules/__init__.py
+++ b/deepspeech/modules/__init__.py
@@ -10,4 +10,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
diff --git a/deepspeech/modules/activation.py b/deepspeech/modules/activation.py
index 60be811e..6fd2fa51 100644
--- a/deepspeech/modules/activation.py
+++ b/deepspeech/modules/activation.py
@@ -12,16 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Union
 import logging
-import numpy as np
-import math
 from collections import OrderedDict
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/attention.py b/deepspeech/modules/attention.py
index e9336c03..f2eb77e4 100644
--- a/deepspeech/modules/attention.py
+++ b/deepspeech/modules/attention.py
@@ -18,7 +18,6 @@ from typing import Optional, Tuple
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
 from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
diff --git a/deepspeech/modules/cmvn.py b/deepspeech/modules/cmvn.py
index 961755ab..562efd41 100644
--- a/deepspeech/modules/cmvn.py
+++ b/deepspeech/modules/cmvn.py
@@ -16,8 +16,6 @@ import logging
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/conformer_convolution.py b/deepspeech/modules/conformer_convolution.py
index 3ea44fee..1a9c59f4 100644
--- a/deepspeech/modules/conformer_convolution.py
+++ b/deepspeech/modules/conformer_convolution.py
@@ -19,8 +19,6 @@ import logging
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/conv.py b/deepspeech/modules/conv.py
index d17f3052..4c337428 100644
--- a/deepspeech/modules/conv.py
+++ b/deepspeech/modules/conv.py
@@ -14,10 +14,8 @@
 
 import logging
 
-import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 from deepspeech.modules.mask import sequence_mask
 from deepspeech.modules.activation import brelu
diff --git a/deepspeech/modules/ctc.py b/deepspeech/modules/ctc.py
index 58942d50..be283165 100644
--- a/deepspeech/modules/ctc.py
+++ b/deepspeech/modules/ctc.py
@@ -18,7 +18,6 @@ from typeguard import check_argument_types
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 from deepspeech.modules.loss import CTCLoss
 from deepspeech.utils import ctc_utils
@@ -151,7 +150,7 @@ class CTCDecoder(nn.Layer):
         :type vocab_list: list
         """
         # init once
-        if self._ext_scorer != None:
+        if self._ext_scorer is not None:
             return
 
         if language_model_path != '':
@@ -199,7 +198,7 @@ class CTCDecoder(nn.Layer):
         :return: List of transcription texts.
         :rtype: List of str
         """
-        if self._ext_scorer != None:
+        if self._ext_scorer is not None:
             self._ext_scorer.reset_params(beam_alpha, beam_beta)
 
         # beam search decode
diff --git a/deepspeech/modules/decoder.py b/deepspeech/modules/decoder.py
index 796c09ab..3e52bc7a 100644
--- a/deepspeech/modules/decoder.py
+++ b/deepspeech/modules/decoder.py
@@ -18,8 +18,6 @@ import logging
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 from deepspeech.modules.attention import MultiHeadedAttention
 from deepspeech.modules.decoder_layer import DecoderLayer
@@ -125,7 +123,7 @@ class TransformerDecoder(nn.Module):
         m = subsequent_mask(tgt_mask.size(-1)).unsqueeze(0)
         # tgt_mask: (B, L, L)
         # TODO(Hui Zhang): not support & for tensor
-        #tgt_mask = tgt_mask & m
+        # tgt_mask = tgt_mask & m
         tgt_mask = tgt_mask.logical_and(m)
 
         x, _ = self.embed(tgt)
@@ -137,8 +135,8 @@ class TransformerDecoder(nn.Module):
         if self.use_output_layer:
             x = self.output_layer(x)
 
-        #TODO(Hui Zhang): reduce_sum not support bool type
-        #olens = tgt_mask.sum(1)
+        # TODO(Hui Zhang): reduce_sum not support bool type
+        # olens = tgt_mask.sum(1)
         olens = tgt_mask.astype(paddle.int).sum(1)
         return x, olens
 
diff --git a/deepspeech/modules/decoder_layer.py b/deepspeech/modules/decoder_layer.py
index 8e5ae1ac..64e16b75 100644
--- a/deepspeech/modules/decoder_layer.py
+++ b/deepspeech/modules/decoder_layer.py
@@ -17,8 +17,6 @@ import logging
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/embedding.py b/deepspeech/modules/embedding.py
index 4746e1d0..df8e346a 100644
--- a/deepspeech/modules/embedding.py
+++ b/deepspeech/modules/embedding.py
@@ -15,13 +15,10 @@
 
 import math
 import logging
-import numpy as np
 from typing import Tuple
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/encoder.py b/deepspeech/modules/encoder.py
index 2efdc582..e2ea57d3 100644
--- a/deepspeech/modules/encoder.py
+++ b/deepspeech/modules/encoder.py
@@ -18,8 +18,6 @@ from typeguard import check_argument_types
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 from deepspeech.modules.attention import MultiHeadedAttention
 from deepspeech.modules.attention import RelPositionMultiHeadedAttention
diff --git a/deepspeech/modules/encoder_layer.py b/deepspeech/modules/encoder_layer.py
index d00e9f0a..6ae69852 100644
--- a/deepspeech/modules/encoder_layer.py
+++ b/deepspeech/modules/encoder_layer.py
@@ -17,8 +17,6 @@ import logging
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/loss.py b/deepspeech/modules/loss.py
index cc5a982f..cb65ba14 100644
--- a/deepspeech/modules/loss.py
+++ b/deepspeech/modules/loss.py
@@ -17,7 +17,6 @@ import logging
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/mask.py b/deepspeech/modules/mask.py
index d11dd555..d9430a26 100644
--- a/deepspeech/modules/mask.py
+++ b/deepspeech/modules/mask.py
@@ -15,9 +15,6 @@
 import logging
 
 import paddle
-from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/positionwise_feed_forward.py b/deepspeech/modules/positionwise_feed_forward.py
index 89cf6033..4c4c3a66 100644
--- a/deepspeech/modules/positionwise_feed_forward.py
+++ b/deepspeech/modules/positionwise_feed_forward.py
@@ -16,8 +16,6 @@ import logging
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 logger = logging.getLogger(__name__)
 
diff --git a/deepspeech/modules/subsampling.py b/deepspeech/modules/subsampling.py
index 4b0547d4..b3370896 100644
--- a/deepspeech/modules/subsampling.py
+++ b/deepspeech/modules/subsampling.py
@@ -18,8 +18,6 @@ import logging
 
 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I
 
 from deepspeech.modules.embedding import PositionalEncoding
 
diff --git a/deepspeech/training/__init__.py b/deepspeech/training/__init__.py
index 1071a3dd..185a92b8 100644
--- a/deepspeech/training/__init__.py
+++ b/deepspeech/training/__init__.py
@@ -11,5 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from deepspeech.training.trainer import *
diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py
index 0994f71f..e36c9264 100644
--- a/deepspeech/training/cli.py
+++ b/deepspeech/training/cli.py
@@ -58,12 +58,15 @@ def default_argument_parser():
     parser.add_argument("--export_path", type=str, help="path of the jit model to save")
 
     # running
-    parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], help="device type to use, cpu and gpu are supported.")
+    parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"],
+                        help="device type to use, cpu and gpu are supported.")
     parser.add_argument("--nprocs", type=int, default=1, help="number of parallel processes to use.")
 
     # overwrite extra config and default config
-    #parser.add_argument("--opts", nargs=argparse.REMAINDER, help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
-    parser.add_argument("--opts", type=str, default=[], nargs='+', help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
+    # parser.add_argument("--opts", nargs=argparse.REMAINDER, 
+    # help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
+    parser.add_argument("--opts", type=str, default=[], nargs='+',
+                        help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
     # yapd: enable
 
     return parser
diff --git a/deepspeech/training/scheduler.py b/deepspeech/training/scheduler.py
index 8eb8096f..a4527950 100644
--- a/deepspeech/training/scheduler.py
+++ b/deepspeech/training/scheduler.py
@@ -14,7 +14,6 @@
 
 import logging
 
-import paddle
 from paddle.optimizer.lr import LRScheduler
 
 logger = logging.getLogger(__name__)
diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py
index 6846fdc0..b03508cb 100644
--- a/deepspeech/training/trainer.py
+++ b/deepspeech/training/trainer.py
@@ -16,12 +16,9 @@ import time
 import logging
 import logging.handlers
 from pathlib import Path
-import numpy as np
-from collections import defaultdict
 
 import paddle
 from paddle import distributed as dist
-from paddle.distributed.utils import get_gpus
 from tensorboardX import SummaryWriter
 
 from deepspeech.utils import checkpoint
diff --git a/deepspeech/utils/checkpoint.py b/deepspeech/utils/checkpoint.py
index c265e592..622811d0 100644
--- a/deepspeech/utils/checkpoint.py
+++ b/deepspeech/utils/checkpoint.py
@@ -13,15 +13,12 @@
 # limitations under the License.
 
 import os
-import time
 import logging
-import numpy as np
 import re
 import json
 
 import paddle
 from paddle import distributed as dist
-from paddle.nn import Layer
 from paddle.optimizer import Optimizer
 
 from deepspeech.utils import mp_tools
diff --git a/deepspeech/utils/error_rate.py b/deepspeech/utils/error_rate.py
index 3fb6b769..b6399bab 100644
--- a/deepspeech/utils/error_rate.py
+++ b/deepspeech/utils/error_rate.py
@@ -81,7 +81,7 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
     :return: Levenshtein distance and word number of reference sentence.
     :rtype: list
     """
-    if ignore_case == True:
+    if ignore_case:
         reference = reference.lower()
         hypothesis = hypothesis.lower()
 
@@ -107,12 +107,12 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
     :return: Levenshtein distance and length of reference sentence.
     :rtype: list
     """
-    if ignore_case == True:
+    if ignore_case:
         reference = reference.lower()
         hypothesis = hypothesis.lower()
 
     join_char = ' '
-    if remove_space == True:
+    if remove_space:
         join_char = ''
 
     reference = join_char.join(list(filter(None, reference.split(' '))))
diff --git a/deepspeech/utils/layer_tools.py b/deepspeech/utils/layer_tools.py
index e3350dce..9232c7fa 100644
--- a/deepspeech/utils/layer_tools.py
+++ b/deepspeech/utils/layer_tools.py
@@ -51,7 +51,7 @@ def recursively_remove_weight_norm(layer: nn.Layer):
     for layer in layer.sublayers():
         try:
             nn.utils.remove_weight_norm(layer)
-        except:
+        except ValueError as e:
             # ther is not weight norm hoom in this layer
             pass
 
diff --git a/deepspeech/utils/mp_tools.py b/deepspeech/utils/mp_tools.py
index 9c3c3d54..871735a4 100644
--- a/deepspeech/utils/mp_tools.py
+++ b/deepspeech/utils/mp_tools.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import paddle
 from paddle import distributed as dist
 from functools import wraps
 
diff --git a/deepspeech/utils/tensor_utils.py b/deepspeech/utils/tensor_utils.py
index 68204d8d..66d40e67 100644
--- a/deepspeech/utils/tensor_utils.py
+++ b/deepspeech/utils/tensor_utils.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Unility functions for Transformer."""
-import math
 import logging
 from typing import Tuple, List
 
diff --git a/deepspeech/utils/utility.py b/deepspeech/utils/utility.py
index 5f376c24..7db01dd1 100644
--- a/deepspeech/utils/utility.py
+++ b/deepspeech/utils/utility.py
@@ -14,7 +14,6 @@
 """Contains common utility functions."""
 
 import math
-import numpy as np
 import distutils.util
 from typing import List
 
diff --git a/examples/dataset/aishell/aishell.py b/examples/dataset/aishell/aishell.py
index 764cc50c..2037546d 100644
--- a/examples/dataset/aishell/aishell.py
+++ b/examples/dataset/aishell/aishell.py
@@ -55,7 +55,8 @@ def create_manifest(data_dir, manifest_path_prefix):
     transcript_dict = {}
     for line in codecs.open(transcript_path, 'r', 'utf-8'):
         line = line.strip()
-        if line == '': continue
+        if line == '':
+            continue
         audio_id, text = line.split(' ', 1)
         # remove withespace
         text = ''.join(text.split())
@@ -82,7 +83,7 @@ def create_manifest(data_dir, manifest_path_prefix):
                             os.path.splitext(os.path.basename(audio_path))[0],
                             'feat':
                             audio_path,
-                            'feat_shape': (duration, ),  #second
+                            'feat_shape': (duration, ),  # second
                             'text':
                             text
                         },
diff --git a/examples/dataset/chime3_background/chime3_background.py b/examples/dataset/chime3_background/chime3_background.py
index 3f4fd1dc..04c18c83 100644
--- a/examples/dataset/chime3_background/chime3_background.py
+++ b/examples/dataset/chime3_background/chime3_background.py
@@ -19,7 +19,6 @@ meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
 """
 
-import distutils.util
 import os
 import wget
 import zipfile
@@ -29,7 +28,7 @@ import json
 import io
 from paddle.v2.dataset.common import md5file
 
-#DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+# DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 DATA_HOME = os.path.expanduser('.')
 
 URL = "https://d4s.myairbridge.com/packagev2/AG0Y3DNBE5IWRRTV/?dlid=W19XG7T0NNHB027139H0EQ"
@@ -51,9 +50,10 @@ args = parser.parse_args()
 
 def download(url, md5sum, target_dir, filename=None):
     """Download file from url to target_dir, and check md5sum."""
-    if filename == None:
+    if filename is None:
         filename = url.split("/")[-1]
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
     filepath = os.path.join(target_dir, filename)
     if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
         print("Downloading %s ..." % url)
@@ -100,7 +100,7 @@ def create_manifest(data_dir, manifest_path):
                             'utt': os.path.splitext(os.path.basename(filepath))[
                                 0],
                             'feat': filepath,
-                            'feat_shape': (duration, ),  #second
+                            'feat_shape': (duration, ),  # second
                             'type': 'background'
                         }))
     with io.open(manifest_path, mode='w', encoding='utf8') as out_file:
diff --git a/examples/dataset/librispeech/librispeech.py b/examples/dataset/librispeech/librispeech.py
index 52c940fa..849b6c73 100644
--- a/examples/dataset/librispeech/librispeech.py
+++ b/examples/dataset/librispeech/librispeech.py
@@ -21,7 +21,6 @@ of each audio file in the data set.
 
 import distutils.util
 import os
-import sys
 import argparse
 import soundfile
 import json
diff --git a/examples/dataset/mini_librispeech/mini_librispeech.py b/examples/dataset/mini_librispeech/mini_librispeech.py
index 34a1c0dc..9b441c14 100644
--- a/examples/dataset/mini_librispeech/mini_librispeech.py
+++ b/examples/dataset/mini_librispeech/mini_librispeech.py
@@ -19,9 +19,7 @@ meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
 """
 
-import distutils.util
 import os
-import sys
 import argparse
 import soundfile
 import json
diff --git a/examples/dataset/rir_noise/rir_noise.py b/examples/dataset/rir_noise/rir_noise.py
index 900fc269..ca91f57d 100644
--- a/examples/dataset/rir_noise/rir_noise.py
+++ b/examples/dataset/rir_noise/rir_noise.py
@@ -27,7 +27,7 @@ import codecs
 import soundfile
 import json
 import argparse
-from utils.utility import download, unpack, unzip
+from utils.utility import download, unzip
 
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 
diff --git a/examples/dataset/voxforge/voxforge.py b/examples/dataset/voxforge/voxforge.py
index c32b783d..5c84d1ee 100644
--- a/examples/dataset/voxforge/voxforge.py
+++ b/examples/dataset/voxforge/voxforge.py
@@ -91,7 +91,7 @@ def select_dialects(target_dir, dialect_list):
     for dialect in dialect_list:
         # filter files by dialect
         command = 'find %s -iwholename "*etc/readme*" -exec egrep -iHl \
-                   "pronunciation dialect.*%s" {} \;' % (audio_dir, dialect)
+            "pronunciation dialect.*%s" {} \;' % (audio_dir, dialect)
         p = subprocess.Popen(
             command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
         output, err = p.communicate()
diff --git a/tests/deepspeech2_model_test.py b/tests/deepspeech2_model_test.py
index 5400e6db..98c82cd2 100644
--- a/tests/deepspeech2_model_test.py
+++ b/tests/deepspeech2_model_test.py
@@ -26,11 +26,11 @@ class TestDeepSpeech2Model(unittest.TestCase):
         self.feat_dim = 161
         max_len = 64
 
-        #(B, T, D)
+        # (B, T, D)
         audio = np.random.randn(self.batch_size, max_len, self.feat_dim)
         audio_len = np.random.randint(max_len, size=self.batch_size)
         audio_len[-1] = max_len
-        #(B, U)
+        # (B, U)
         text = np.array([[1, 2], [1, 2]])
         text_len = np.array([2] * self.batch_size)
 
diff --git a/utils/build_vocab.py b/utils/build_vocab.py
index 0587fd71..6f496ef7 100644
--- a/utils/build_vocab.py
+++ b/utils/build_vocab.py
@@ -17,10 +17,8 @@ Each item in vocabulary file is a character.
 
 import argparse
 import functools
-import json
 from collections import Counter
 import os
-import copy
 import tempfile
 
 from deepspeech.frontend.utility import read_manifest
@@ -35,23 +33,21 @@ parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
 # yapf: disable
 add_arg('unit_type', str, "char", "Unit type, e.g. char, word, spm")
-add_arg('count_threshold',  int,    0,
-     "Truncation threshold for char/word counts.Default 0, no truncate.")
-add_arg('vocab_path',       str,
+add_arg('count_threshold', int, 0,
+        "Truncation threshold for char/word counts.Default 0, no truncate.")
+add_arg('vocab_path', str,
         'examples/librispeech/data/vocab.txt',
         "Filepath to write the vocabulary.")
-add_arg('manifest_paths',   str,
+add_arg('manifest_paths', str,
         None,
         "Filepaths of manifests for building vocabulary. "
         "You can provide multiple manifest files.",
         nargs='+',
         required=True)
 # bpe
-add_arg('vocab_size',  int,    0,  "Vocab size for spm.")
-add_arg('spm_mode', str, 'unigram',
-    "spm model type, e.g. unigram, spm, char, word. only need when `unit_type` is spm")
-add_arg('spm_model_prefix', str, "spm_model_%(spm_mode)_%(count_threshold)",
-    "spm model prefix, only need when `unit_type` is spm")
+add_arg('vocab_size', int, 0, "Vocab size for spm.")
+add_arg('spm_mode', str, 'unigram', "spm model type, e.g. unigram, spm, char, word. only need when `unit_type` is spm")
+add_arg('spm_model_prefix', str, "spm_model_%(spm_mode)_%(count_threshold)", "spm model prefix, only need when `unit_type` is spm")
 # yapf: disable
 args = parser.parse_args()
 
@@ -71,8 +67,8 @@ def main():
     print_arguments(args)
 
     fout = open(args.vocab_path, 'w', encoding='utf-8')
-    fout.write(BLANK + "\n") # 0 will be used for "blank" in CTC
-    fout.write(UNK + '\n')   # <unk> must be 1
+    fout.write(BLANK + "\n")  # 0 will be used for "blank" in CTC
+    fout.write(UNK + '\n')  # <unk> must be 1
 
     if args.unit_type == 'spm':
         # tools/spm_train --input=$wave_data/lang_char/input.txt 
@@ -104,14 +100,15 @@ def main():
     count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True)
     tokens = []
     for token, count in count_sorted:
-        if count < args.count_threshold: break
+        if count < args.count_threshold:
+            break
         tokens.append(token)
 
     tokens = sorted(tokens)
     for token in tokens:
         fout.write(token + '\n')
 
-    fout.write(SOS + "\n") # <sos/eos>
+    fout.write(SOS + "\n")  # <sos/eos>
     fout.close()
 
 
diff --git a/utils/format_data.py b/utils/format_data.py
index cfe8a22d..6ba2e2b0 100644
--- a/utils/format_data.py
+++ b/utils/format_data.py
@@ -15,15 +15,8 @@
 import argparse
 import functools
 import json
-from collections import Counter
-import os
-import copy
-import tempfile
 
 from deepspeech.frontend.utility import read_manifest
-from deepspeech.frontend.utility import UNK
-from deepspeech.frontend.utility import BLANK
-from deepspeech.frontend.utility import SOS
 from deepspeech.frontend.utility import load_cmvn
 from deepspeech.utils.utility import add_arguments
 from deepspeech.utils.utility import print_arguments
@@ -82,7 +75,7 @@ def main():
             if args.feat_type == 'raw':
                 feat_shape.append(feat_dim)
             else: # kaldi
-                raise NotImplemented('no support kaldi feat now!')
+                raise NotImplementedError('no support kaldi feat now!')
             fout.write(json.dumps(line_json) + '\n')
             count += 1
 
diff --git a/utils/utility.py b/utils/utility.py
index b13bc112..aaedfa95 100644
--- a/utils/utility.py
+++ b/utils/utility.py
@@ -30,7 +30,8 @@ def getfile_insensitive(path):
 
 def download_multi(url, target_dir, extra_args):
     """Download multiple files from url to target_dir."""
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
     print("Downloading %s ..." % url)
     ret_code = os.system("wget -c " + url + ' ' + extra_args + " -P " +
                          target_dir)
@@ -39,7 +40,8 @@ def download_multi(url, target_dir, extra_args):
 
 def download(url, md5sum, target_dir):
     """Download file from url to target_dir, and check md5sum."""
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
     filepath = os.path.join(target_dir, url.split("/")[-1])
     if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
         print("Downloading %s ..." % url)
@@ -58,7 +60,7 @@ def unpack(filepath, target_dir, rm_tar=False):
     tar = tarfile.open(filepath)
     tar.extractall(target_dir)
     tar.close()
-    if rm_tar == True:
+    if rm_tar:
         os.remove(filepath)
 
 
@@ -68,5 +70,5 @@ def unzip(filepath, target_dir, rm_tar=False):
     tar = zipfile.ZipFile(filepath, 'r')
     tar.extractall(target_dir)
     tar.close()
-    if rm_tar == True:
+    if rm_tar:
         os.remove(filepath)
-- 
GitLab