flake8

dee672a7 · Hui Zhang · 64f177cc · dee672a7 · dee672a7 · dee672a7
68 changed file
--- a/.flake8
+++ b/.flake8
+[flake8]
+
+########## OPTIONS ##########
+# Set the maximum length that any line (with some exceptions) may be.
+max-line-length = 120
+
+
+################### FILE PATTERNS ##########################
+# Provide a comma-separated list of glob patterns to exclude from checks.
+exclude =
+    # git folder
+    .git,
+    # python cache
+    __pycache__,
+# Provide a comma-separate list of glob patterns to include for checks.
+filename =
+    *.py
+
+
+########## RULES ##########
+
+# ERROR CODES
+#
+# E/W  - PEP8 errors/warnings (pycodestyle)
+# F    - linting errors (pyflakes)
+# C    - McCabe complexity error (mccabe)
+#
+# W503 - line break before binary operator
+
+# Specify a list of codes to ignore.
+ignore =
+    W503
+    E252,E262,E127,E265,E126,E266,E241,E261,E128,E125
+    W291,W293,W605
+    E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
+    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
+    # to line this up with executable bit
+    EXE001,
+    # these ignores are from flake8-bugbear; please fix!
+    B007,B008,
+    # these ignores are from flake8-comprehensions; please fix!
+    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415
+
+# Specify the list of error codes you wish Flake8 to report.
+select =
+    E,
+    W,
+    F,
+    C
\ No newline at end of file
--- a/deepspeech/__init__.py
+++ b/deepspeech/__init__.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 import logging
 from typing import Union
-from typing import Optional
 from typing import List
 from typing import Tuple
 from typing import Any
@@ -21,7 +20,6 @@ from typing import Any
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I
 #TODO(Hui Zhang): remove  fluid import
 from paddle.fluid import core
 logger = logging.getLogger(__name__)
@@ -242,7 +240,7 @@ def is_broadcastable(shp1, shp2):
 def masked_fill(xs: paddle.Tensor,
                mask: paddle.Tensor,
                value: Union[float, int]):
-    assert is_broadcastable(xs.shape, mask.shape) == True
+    assert is_broadcastable(xs.shape, mask.shape) is True
    bshape = paddle.broadcast_shape(xs.shape, mask.shape)
    mask = mask.broadcast_to(bshape)
    trues = paddle.ones_like(xs) * value
@@ -259,7 +257,7 @@ if not hasattr(paddle.Tensor, 'masked_fill'):
 def masked_fill_(xs: paddle.Tensor,
                 mask: paddle.Tensor,
                 value: Union[float, int]):
-    assert is_broadcastable(xs.shape, mask.shape) == True
+    assert is_broadcastable(xs.shape, mask.shape) is True
    bshape = paddle.broadcast_shape(xs.shape, mask.shape)
    mask = mask.broadcast_to(bshape)
    trues = paddle.ones_like(xs) * value

--- a/deepspeech/decoders/decoders_deprecated.py
+++ b/deepspeech/decoders/decoders_deprecated.py
@@ -104,14 +104,14 @@ def ctc_beam_search_decoder(probs_seq,
        global ext_nproc_scorer
        ext_scoring_func = ext_nproc_scorer

-    ## initialize
+    # initialize
    # prefix_set_prev: the set containing selected prefixes
    # probs_b_prev: prefixes' probability ending with blank in previous step
    # probs_nb_prev: prefixes' probability ending with non-blank in previous step
    prefix_set_prev = {'\t': 1.0}
    probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0}

-    ## extend prefix in loop
+    # extend prefix in loop
    for time_step in range(len(probs_seq)):
        # prefix_set_next: the set containing candidate prefixes
        # probs_b_cur: prefixes' probability ending with blank in current step
@@ -120,7 +120,7 @@ def ctc_beam_search_decoder(probs_seq,

        prob_idx = list(enumerate(probs_seq[time_step]))
        cutoff_len = len(prob_idx)
-        #If pruning is enabled
+        # If pruning is enabled
        if cutoff_prob < 1.0 or cutoff_top_n < cutoff_len:
            prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True)
            cutoff_len, cum_prob = 0, 0.0
@@ -172,7 +172,7 @@ def ctc_beam_search_decoder(probs_seq,
        # update probs
        probs_b_prev, probs_nb_prev = probs_b_cur, probs_nb_cur

-        ## store top beam_size prefixes
+        # store top beam_size prefixes
        prefix_set_prev = sorted(
            prefix_set_next.items(), key=lambda asd: asd[1], reverse=True)
        if beam_size < len(prefix_set_prev):
@@ -191,7 +191,7 @@ def ctc_beam_search_decoder(probs_seq,
        else:
            beam_result.append((float('-inf'), ''))

-    ## output top beam_size decoding results
+    # output top beam_size decoding results
    beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True)
    return beam_result


--- a/deepspeech/decoders/scorer_deprecated.py
+++ b/deepspeech/decoders/scorer_deprecated.py
@@ -71,7 +71,7 @@ class Scorer(object):
        """
        lm = self._language_model_score(sentence)
        word_cnt = self._word_count(sentence)
-        if log == False:
+        if log is False:
            score = np.power(lm, self._alpha) * np.power(word_cnt, self._beta)
        else:
            score = self._alpha * np.log(lm) + self._beta * np.log(word_cnt)

--- a/deepspeech/decoders/swig/setup.py
+++ b/deepspeech/decoders/swig/setup.py
@@ -16,7 +16,8 @@
 from setuptools import setup, Extension, distutils
 import glob
 import platform
-import os, sys
+import os
+import sys
 import multiprocessing.pool
 import argparse

@@ -65,9 +66,9 @@ def parallelCCompile(self,
 def compile_test(header, library):
    dummy_path = os.path.join(os.path.dirname(__file__), "dummy")
    command = "bash -c \"g++ -include " + header \
-                + " -l" + library + " -x c++ - <<<'int main() {}' -o " \
-                + dummy_path + " >/dev/null 2>/dev/null && rm " \
-                + dummy_path + " 2>/dev/null\""
+        + " -l" + library + " -x c++ - <<<'int main() {}' -o " \
+        + dummy_path + " >/dev/null 2>/dev/null && rm " \
+        + dummy_path + " 2>/dev/null\""
    return os.system(command) == 0


@@ -75,8 +76,8 @@ def compile_test(header, library):
 distutils.ccompiler.CCompiler.compile = parallelCCompile

 FILES = glob.glob('kenlm/util/*.cc') \
-        + glob.glob('kenlm/lm/*.cc') \
-        + glob.glob('kenlm/util/double-conversion/*.cc')
+    + glob.glob('kenlm/lm/*.cc') \
+    + glob.glob('kenlm/util/double-conversion/*.cc')

 FILES += glob.glob('openfst-1.6.3/src/lib/*.cc')


--- a/deepspeech/exps/deepspeech2/bin/deploy/client.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/client.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 """Client-end for the ASR demo."""
 import keyboard
-import struct
-import socket
 import sys
 import argparse
 import pyaudio
@@ -49,7 +47,7 @@ def on_press_release(x):
            sys.stdout.flush()
            is_recording = True
    if x.event_type == 'up' and x.name == release.name:
-        if is_recording == True:
+        if is_recording:
            is_recording = False



--- a/deepspeech/exps/deepspeech2/bin/deploy/runtime.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/runtime.py
@@ -12,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Server-end for the ASR demo."""
-import os
-import time
-import argparse
 import functools
 import paddle
 import numpy as np
@@ -26,7 +23,6 @@ from deepspeech.utils.socket_server import AsrRequestHandler
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults

-from deepspeech.frontend.utility import read_manifest
 from deepspeech.utils.utility import add_arguments, print_arguments

 from deepspeech.models.deepspeech2 import DeepSpeech2Model
@@ -159,15 +155,13 @@ if __name__ == "__main__":
        "--params_file",
        type=str,
        default="",
-        help=
-        "Parameter filename, Specify this when your model is a combined model."
+        help="Parameter filename, Specify this when your model is a combined model."
    )
    add_arg(
        "--model_dir",
        type=str,
        default=None,
-        help=
-        "Model dir, If you load a non-combined model, specify the directory of the model."
+        help="Model dir, If you load a non-combined model, specify the directory of the model."
    )
    add_arg("--use_gpu",
                        type=bool,

--- a/deepspeech/exps/deepspeech2/bin/deploy/send.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/send.py
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Socket client to send wav to ASR server."""
-import struct
-import socket
 import argparse
 import wave


--- a/deepspeech/exps/deepspeech2/bin/deploy/server.py
+++ b/deepspeech/exps/deepspeech2/bin/deploy/server.py
@@ -12,9 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Server-end for the ASR demo."""
-import os
-import time
-import argparse
 import functools
 import paddle
 import numpy as np
@@ -26,7 +23,6 @@ from deepspeech.utils.socket_server import AsrRequestHandler
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults

-from deepspeech.frontend.utility import read_manifest
 from deepspeech.utils.utility import add_arguments, print_arguments

 from deepspeech.models.deepspeech2 import DeepSpeech2Model
@@ -100,7 +96,7 @@ if __name__ == "__main__":
    add_arg('speech_save_dir',  str,
            'demo_cache',
            "Directory to save demo audios.")
-    add_arg('warmup_manifest',  str, None, "Filepath of manifest to warm up.")
+    add_arg('warmup_manifest', str, None, "Filepath of manifest to warm up.")
    args = parser.parse_args()
    print_arguments(args)


--- a/deepspeech/exps/deepspeech2/bin/export.py
+++ b/deepspeech/exps/deepspeech2/bin/export.py
@@ -12,17 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Export for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors

 from deepspeech.exps.deepspeech2.config import get_cfg_defaults
 from deepspeech.exps.deepspeech2.model import DeepSpeech2Tester as Tester

--- a/deepspeech/exps/deepspeech2/bin/infer.py
+++ b/deepspeech/exps/deepspeech2/bin/infer.py
@@ -12,17 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Inferer for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors

 # TODO(hui zhang): dynamic load 
 from deepspeech.exps.deepspeech2.config import get_cfg_defaults

--- a/deepspeech/exps/deepspeech2/bin/test.py
+++ b/deepspeech/exps/deepspeech2/bin/test.py
@@ -12,17 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Evaluation for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors

 from deepspeech.exps.deepspeech2.config import get_cfg_defaults
 from deepspeech.exps.deepspeech2.model import DeepSpeech2Tester as Tester

--- a/deepspeech/exps/deepspeech2/bin/train.py
+++ b/deepspeech/exps/deepspeech2/bin/train.py
@@ -12,12 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Trainer for DeepSpeech2 model."""
-
-import io
-import logging
-import argparse
-import functools
-
 from paddle import distributed as dist

 from deepspeech.utils.utility import print_arguments

--- a/deepspeech/exps/deepspeech2/bin/tune.py
+++ b/deepspeech/exps/deepspeech2/bin/tune.py
@@ -14,12 +14,8 @@
 """Beam search parameters tuning for DeepSpeech2 model."""

 import sys
-import os
 import numpy as np
-import argparse
 import functools
-import gzip
-import logging

 from paddle.io import DataLoader

@@ -122,7 +118,7 @@ def tune(config, args):
            if index % 2 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
-            print(f"tuneing: one grid done!")
+            print("tuneing: one grid done!")

        # output on-line tuning result at the end of current batch
        err_ave_min = min(err_ave)

--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -14,13 +14,10 @@
 """Contains DeepSpeech2 model."""

 import io
-import sys
-import os
 import time
 import logging
 import numpy as np
 from collections import defaultdict
-from functools import partial
 from pathlib import Path

 import paddle
@@ -39,7 +36,6 @@ from deepspeech.io.sampler import SortagradDistributedBatchSampler
 from deepspeech.io.sampler import SortagradBatchSampler
 from deepspeech.io.dataset import ManifestDataset

-from deepspeech.modules.loss import CTCLoss
 from deepspeech.models.deepspeech2 import DeepSpeech2Model
 from deepspeech.models.deepspeech2 import DeepSpeech2InferModel


--- a/deepspeech/exps/u2/bin/export.py
+++ b/deepspeech/exps/u2/bin/export.py
@@ -13,16 +13,8 @@
 # limitations under the License.
 """Export for U2 model."""

-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors

 from deepspeech.exps.u2.config import get_cfg_defaults
 from deepspeech.exps.u2.model import U2Tester as Tester

--- a/deepspeech/exps/u2/bin/test.py
+++ b/deepspeech/exps/u2/bin/test.py
@@ -13,16 +13,8 @@
 # limitations under the License.
 """Evaluation for U2 model."""

-import io
-import logging
-import argparse
-import functools
-
-from paddle import distributed as dist
-
 from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.utility import print_arguments
-from deepspeech.utils.error_rate import char_errors, word_errors

 # TODO(hui zhang): dynamic load 
 from deepspeech.exps.u2.config import get_cfg_defaults

--- a/deepspeech/exps/u2/bin/train.py
+++ b/deepspeech/exps/u2/bin/train.py
@@ -13,11 +13,6 @@
 # limitations under the License.
 """Trainer for U2 model."""

-import io
-import logging
-import argparse
-import functools
-
 from paddle import distributed as dist

 from deepspeech.utils.utility import print_arguments

--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -13,14 +13,10 @@
 # limitations under the License.
 """Contains U2 model."""

-import io
-import sys
-import os
 import time
 import logging
 import numpy as np
 from collections import defaultdict
-from functools import partial
 from pathlib import Path

 import paddle
@@ -40,8 +36,6 @@ from deepspeech.io.sampler import SortagradDistributedBatchSampler
 from deepspeech.io.sampler import SortagradBatchSampler
 from deepspeech.io.dataset import ManifestDataset

-from deepspeech.modules.loss import CTCLoss
-
 from deepspeech.models.u2 import U2Model

 logger = logging.getLogger(__name__)

--- a/deepspeech/frontend/audio.py
+++ b/deepspeech/frontend/audio.py
@@ -22,7 +22,6 @@ import resampy
 from scipy import signal
 import random
 import copy
-import io


 class AudioSegment(object):

--- a/deepspeech/frontend/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -22,7 +22,7 @@ from deepspeech.frontend.augmentor.noise_perturb import NoisePerturbAugmentor
 from deepspeech.frontend.augmentor.impulse_response import ImpulseResponseAugmentor
 from deepspeech.frontend.augmentor.resample import ResampleAugmentor
 from deepspeech.frontend.augmentor.online_bayesian_normalization import \
-     OnlineBayesianNormalizationAugmentor
+    OnlineBayesianNormalizationAugmentor


 class AugmentationPipeline():

--- a/deepspeech/frontend/augmentor/base.py
+++ b/deepspeech/frontend/augmentor/base.py
@@ -54,4 +54,4 @@ class AugmentorBase():
        :param spec_segment: Spectrogram segment to add effects to.
        :type spec_segment: Spectrogram
        """
-        pass
\ No newline at end of file
+        pass
--- a/deepspeech/frontend/featurizer/audio_featurizer.py
+++ b/deepspeech/frontend/featurizer/audio_featurizer.py
@@ -14,8 +14,6 @@
 """Contains the audio featurizer class."""

 import numpy as np
-from deepspeech.frontend.utility import read_manifest
-from deepspeech.frontend.audio import AudioSegment
 from python_speech_features import mfcc
 from python_speech_features import logfbank
 from python_speech_features import delta
@@ -320,7 +318,7 @@ class AudioFeaturizer(object):
        if stride_ms > window_ms:
            raise ValueError("Stride size must not be greater than "
                             "window size.")
-        #(T, D)
+        # (T, D)
        fbank_feat = logfbank(
            signal=samples,
            samplerate=sample_rate,

--- a/deepspeech/frontend/featurizer/text_featurizer.py
+++ b/deepspeech/frontend/featurizer/text_featurizer.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 """Contains the text featurizer class."""

-import os
 import sentencepiece as spm

 from deepspeech.frontend.utility import UNK

--- a/deepspeech/frontend/utility.py
+++ b/deepspeech/frontend/utility.py
@@ -16,15 +16,7 @@ import numpy as np
 import math
 import json
 import codecs
-import os
-import tarfile
-import time
 import logging
-from typing import List
-from threading import Thread
-from multiprocessing import Process, Manager, Value
-
-from paddle.dataset.common import md5file

 logger = logging.getLogger(__name__)


--- a/deepspeech/io/__init__.py
+++ b/deepspeech/io/__init__.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import functools
 import numpy as np
 from paddle.io import DataLoader

@@ -131,7 +130,7 @@ def create_dataloader(manifest_path,
            if keep_transcription_text:
                padded_text[:len(text)] = [ord(t) for t in text]  # string
            else:
-                padded_text[:len(text)] = text  #ids
+                padded_text[:len(text)] = text  # ids
            texts.append(padded_text)
            text_lens.append(len(text))

@@ -141,7 +140,7 @@ def create_dataloader(manifest_path,
        text_lens = np.array(text_lens).astype('int64')
        return padded_audios, audio_lens, texts, text_lens

-    #collate_fn=functools.partial(padding_batch, keep_transcription_text=keep_transcription_text),
+    # collate_fn=functools.partial(padding_batch, keep_transcription_text=keep_transcription_text),
    collate_fn = SpeechCollator(keep_transcription_text=keep_transcription_text)
    loader = DataLoader(
        dataset,

--- a/deepspeech/io/collator.py
+++ b/deepspeech/io/collator.py
@@ -14,7 +14,6 @@

 import logging
 import numpy as np
-from collections import namedtuple

 from deepspeech.io.utility import pad_sequence
 from deepspeech.frontend.utility import IGNORE_ID

--- a/deepspeech/io/dataset.py
+++ b/deepspeech/io/dataset.py
@@ -13,13 +13,10 @@
 # limitations under the License.

 import io
-import math
 import random
 import tarfile
 import logging
-import numpy as np
 from collections import namedtuple
-from functools import partial
 from yacs.config import CfgNode

 from paddle.io import Dataset

--- a/deepspeech/io/sampler.py
+++ b/deepspeech/io/sampler.py
@@ -13,14 +13,9 @@
 # limitations under the License.

 import math
-import random
-import tarfile
 import logging
 import numpy as np
-from collections import namedtuple
-from functools import partial

-import paddle
 from paddle.io import BatchSampler
 from paddle.io import DistributedBatchSampler
 from paddle import distributed as dist
@@ -59,7 +54,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
    rng.shuffle(batch_indices)
    batch_indices = [item for batch in batch_indices for item in batch]
-    assert (clipped == False)
+    assert clipped is False
    if not clipped:
        res_len = len(indices) - shift_len - len(batch_indices)
        # when res_len is 0, will return whole list, len(List[-0:]) = len(List[:])
@@ -195,13 +190,13 @@ class SortagradBatchSampler(BatchSampler):
        self.dataset = dataset

        assert isinstance(batch_size, int) and batch_size > 0, \
-                "batch_size should be a positive integer"
+            "batch_size should be a positive integer"
        self.batch_size = batch_size
        assert isinstance(shuffle, bool), \
-                "shuffle should be a boolean value"
+            "shuffle should be a boolean value"
        self.shuffle = shuffle
        assert isinstance(drop_last, bool), \
-                "drop_last should be a boolean number"
+            "drop_last should be a boolean number"

        self.drop_last = drop_last
        self.epoch = 0

--- a/deepspeech/io/utility.py
+++ b/deepspeech/io/utility.py
@@ -14,7 +14,6 @@

 import logging
 import numpy as np
-from collections import namedtuple
 from typing import List

 logger = logging.getLogger(__name__)

--- a/deepspeech/models/deepspeech2.py
+++ b/deepspeech/models/deepspeech2.py
@@ -12,20 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Deepspeech2 ASR Model"""
-import math
-import collections
-import numpy as np
 import logging
 from typing import Optional
 from yacs.config import CfgNode

-import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

-from deepspeech.modules.mask import sequence_mask
-from deepspeech.modules.activation import brelu
 from deepspeech.modules.conv import ConvStack
 from deepspeech.modules.rnn import RNNStack
 from deepspeech.modules.ctc import CTCDecoder

--- a/deepspeech/models/u2.py
+++ b/deepspeech/models/u2.py
@@ -15,10 +15,8 @@
 Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition 
 (https://arxiv.org/pdf/2012.05481.pdf)
 """
-import math
-import collections
+
 from collections import defaultdict
-import numpy as np
 import logging
 from yacs.config import CfgNode
 from typing import List, Optional, Tuple
@@ -26,8 +24,6 @@ from typing import List, Optional, Tuple
 import paddle
 from paddle import jit
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 from deepspeech.modules.mask import make_pad_mask
 from deepspeech.modules.mask import mask_finished_preds
@@ -54,7 +50,7 @@ from deepspeech.utils.ctc_utils import remove_duplicates_and_blank

 logger = logging.getLogger(__name__)

-__all__ = ['U2TransformerModel', "U2ConformerModel"]
+__all__ = ["U2Model", "U2InferModel"]


 class U2BaseModel(nn.Module):

--- a/deepspeech/modules/__init__.py
+++ b/deepspeech/modules/__init__.py
@@ -10,4 +10,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
--- a/deepspeech/modules/activation.py
+++ b/deepspeech/modules/activation.py
@@ -12,16 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from typing import Union
 import logging
-import numpy as np
-import math
 from collections import OrderedDict

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/attention.py
+++ b/deepspeech/modules/attention.py
@@ -18,7 +18,6 @@ from typing import Optional, Tuple

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
 from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)

--- a/deepspeech/modules/cmvn.py
+++ b/deepspeech/modules/cmvn.py
@@ -16,8 +16,6 @@ import logging

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/conformer_convolution.py
+++ b/deepspeech/modules/conformer_convolution.py
@@ -19,8 +19,6 @@ import logging

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/conv.py
+++ b/deepspeech/modules/conv.py
@@ -14,10 +14,8 @@

 import logging

-import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I

 from deepspeech.modules.mask import sequence_mask
 from deepspeech.modules.activation import brelu

--- a/deepspeech/modules/ctc.py
+++ b/deepspeech/modules/ctc.py
@@ -18,7 +18,6 @@ from typeguard import check_argument_types
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I

 from deepspeech.modules.loss import CTCLoss
 from deepspeech.utils import ctc_utils
@@ -151,7 +150,7 @@ class CTCDecoder(nn.Layer):
        :type vocab_list: list
        """
        # init once
-        if self._ext_scorer != None:
+        if self._ext_scorer is not None:
            return

        if language_model_path != '':
@@ -199,7 +198,7 @@ class CTCDecoder(nn.Layer):
        :return: List of transcription texts.
        :rtype: List of str
        """
-        if self._ext_scorer != None:
+        if self._ext_scorer is not None:
            self._ext_scorer.reset_params(beam_alpha, beam_beta)

        # beam search decode

--- a/deepspeech/modules/decoder.py
+++ b/deepspeech/modules/decoder.py
@@ -18,8 +18,6 @@ import logging

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 from deepspeech.modules.attention import MultiHeadedAttention
 from deepspeech.modules.decoder_layer import DecoderLayer
@@ -125,7 +123,7 @@ class TransformerDecoder(nn.Module):
        m = subsequent_mask(tgt_mask.size(-1)).unsqueeze(0)
        # tgt_mask: (B, L, L)
        # TODO(Hui Zhang): not support & for tensor
-        #tgt_mask = tgt_mask & m
+        # tgt_mask = tgt_mask & m
        tgt_mask = tgt_mask.logical_and(m)

        x, _ = self.embed(tgt)
@@ -137,8 +135,8 @@ class TransformerDecoder(nn.Module):
        if self.use_output_layer:
            x = self.output_layer(x)

-        #TODO(Hui Zhang): reduce_sum not support bool type
-        #olens = tgt_mask.sum(1)
+        # TODO(Hui Zhang): reduce_sum not support bool type
+        # olens = tgt_mask.sum(1)
        olens = tgt_mask.astype(paddle.int).sum(1)
        return x, olens


--- a/deepspeech/modules/decoder_layer.py
+++ b/deepspeech/modules/decoder_layer.py
@@ -17,8 +17,6 @@ import logging

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/embedding.py
+++ b/deepspeech/modules/embedding.py
@@ -15,13 +15,10 @@

 import math
 import logging
-import numpy as np
 from typing import Tuple

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/encoder.py
+++ b/deepspeech/modules/encoder.py
@@ -18,8 +18,6 @@ from typeguard import check_argument_types

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 from deepspeech.modules.attention import MultiHeadedAttention
 from deepspeech.modules.attention import RelPositionMultiHeadedAttention

--- a/deepspeech/modules/encoder_layer.py
+++ b/deepspeech/modules/encoder_layer.py
@@ -17,8 +17,6 @@ import logging

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/loss.py
+++ b/deepspeech/modules/loss.py
@@ -17,7 +17,6 @@ import logging
 import paddle
 from paddle import nn
 from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/mask.py
+++ b/deepspeech/modules/mask.py
@@ -15,9 +15,6 @@
 import logging

 import paddle
-from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/positionwise_feed_forward.py
+++ b/deepspeech/modules/positionwise_feed_forward.py
@@ -16,8 +16,6 @@ import logging

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 logger = logging.getLogger(__name__)


--- a/deepspeech/modules/subsampling.py
+++ b/deepspeech/modules/subsampling.py
@@ -18,8 +18,6 @@ import logging

 import paddle
 from paddle import nn
-from paddle.nn import functional as F
-from paddle.nn import initializer as I

 from deepspeech.modules.embedding import PositionalEncoding


--- a/deepspeech/training/__init__.py
+++ b/deepspeech/training/__init__.py
@@ -11,5 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from deepspeech.training.trainer import *
--- a/deepspeech/training/cli.py
+++ b/deepspeech/training/cli.py
@@ -58,12 +58,15 @@ def default_argument_parser():
    parser.add_argument("--export_path", type=str, help="path of the jit model to save")

    # running
-    parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], help="device type to use, cpu and gpu are supported.")
+    parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"],
+                        help="device type to use, cpu and gpu are supported.")
    parser.add_argument("--nprocs", type=int, default=1, help="number of parallel processes to use.")

    # overwrite extra config and default config
-    #parser.add_argument("--opts", nargs=argparse.REMAINDER, help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
-    parser.add_argument("--opts", type=str, default=[], nargs='+', help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
+    # parser.add_argument("--opts", nargs=argparse.REMAINDER, 
+    # help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
+    parser.add_argument("--opts", type=str, default=[], nargs='+',
+                        help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
    # yapd: enable

    return parser
--- a/deepspeech/training/scheduler.py
+++ b/deepspeech/training/scheduler.py
@@ -14,7 +14,6 @@

 import logging

-import paddle
 from paddle.optimizer.lr import LRScheduler

 logger = logging.getLogger(__name__)

--- a/deepspeech/training/trainer.py
+++ b/deepspeech/training/trainer.py
@@ -16,12 +16,9 @@ import time
 import logging
 import logging.handlers
 from pathlib import Path
-import numpy as np
-from collections import defaultdict

 import paddle
 from paddle import distributed as dist
-from paddle.distributed.utils import get_gpus
 from tensorboardX import SummaryWriter

 from deepspeech.utils import checkpoint

--- a/deepspeech/utils/checkpoint.py
+++ b/deepspeech/utils/checkpoint.py
@@ -13,15 +13,12 @@
 # limitations under the License.

 import os
-import time
 import logging
-import numpy as np
 import re
 import json

 import paddle
 from paddle import distributed as dist
-from paddle.nn import Layer
 from paddle.optimizer import Optimizer

 from deepspeech.utils import mp_tools

--- a/deepspeech/utils/error_rate.py
+++ b/deepspeech/utils/error_rate.py
@@ -81,7 +81,7 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
    :return: Levenshtein distance and word number of reference sentence.
    :rtype: list
    """
-    if ignore_case == True:
+    if ignore_case:
        reference = reference.lower()
        hypothesis = hypothesis.lower()

@@ -107,12 +107,12 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
    :return: Levenshtein distance and length of reference sentence.
    :rtype: list
    """
-    if ignore_case == True:
+    if ignore_case:
        reference = reference.lower()
        hypothesis = hypothesis.lower()

    join_char = ' '
-    if remove_space == True:
+    if remove_space:
        join_char = ''

    reference = join_char.join(list(filter(None, reference.split(' '))))

--- a/deepspeech/utils/layer_tools.py
+++ b/deepspeech/utils/layer_tools.py
@@ -51,7 +51,7 @@ def recursively_remove_weight_norm(layer: nn.Layer):
    for layer in layer.sublayers():
        try:
            nn.utils.remove_weight_norm(layer)
-        except:
+        except ValueError as e:
            # ther is not weight norm hoom in this layer
            pass


--- a/deepspeech/utils/mp_tools.py
+++ b/deepspeech/utils/mp_tools.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import paddle
 from paddle import distributed as dist
 from functools import wraps


--- a/deepspeech/utils/tensor_utils.py
+++ b/deepspeech/utils/tensor_utils.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Unility functions for Transformer."""
-import math
 import logging
 from typing import Tuple, List


--- a/deepspeech/utils/utility.py
+++ b/deepspeech/utils/utility.py
@@ -14,7 +14,6 @@
 """Contains common utility functions."""

 import math
-import numpy as np
 import distutils.util
 from typing import List


--- a/examples/dataset/aishell/aishell.py
+++ b/examples/dataset/aishell/aishell.py
@@ -55,7 +55,8 @@ def create_manifest(data_dir, manifest_path_prefix):
    transcript_dict = {}
    for line in codecs.open(transcript_path, 'r', 'utf-8'):
        line = line.strip()
-        if line == '': continue
+        if line == '':
+            continue
        audio_id, text = line.split(' ', 1)
        # remove withespace
        text = ''.join(text.split())
@@ -82,7 +83,7 @@ def create_manifest(data_dir, manifest_path_prefix):
                            os.path.splitext(os.path.basename(audio_path))[0],
                            'feat':
                            audio_path,
-                            'feat_shape': (duration, ),  #second
+                            'feat_shape': (duration, ),  # second
                            'text':
                            text
                        },

--- a/examples/dataset/chime3_background/chime3_background.py
+++ b/examples/dataset/chime3_background/chime3_background.py
@@ -19,7 +19,6 @@ meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
 """

-import distutils.util
 import os
 import wget
 import zipfile
@@ -29,7 +28,7 @@ import json
 import io
 from paddle.v2.dataset.common import md5file

-#DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+# DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 DATA_HOME = os.path.expanduser('.')

 URL = "https://d4s.myairbridge.com/packagev2/AG0Y3DNBE5IWRRTV/?dlid=W19XG7T0NNHB027139H0EQ"
@@ -51,9 +50,10 @@ args = parser.parse_args()

 def download(url, md5sum, target_dir, filename=None):
    """Download file from url to target_dir, and check md5sum."""
-    if filename == None:
+    if filename is None:
        filename = url.split("/")[-1]
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
    filepath = os.path.join(target_dir, filename)
    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
        print("Downloading %s ..." % url)
@@ -100,7 +100,7 @@ def create_manifest(data_dir, manifest_path):
                            'utt': os.path.splitext(os.path.basename(filepath))[
                                0],
                            'feat': filepath,
-                            'feat_shape': (duration, ),  #second
+                            'feat_shape': (duration, ),  # second
                            'type': 'background'
                        }))
    with io.open(manifest_path, mode='w', encoding='utf8') as out_file:

--- a/examples/dataset/librispeech/librispeech.py
+++ b/examples/dataset/librispeech/librispeech.py
@@ -21,7 +21,6 @@ of each audio file in the data set.

 import distutils.util
 import os
-import sys
 import argparse
 import soundfile
 import json

--- a/examples/dataset/mini_librispeech/mini_librispeech.py
+++ b/examples/dataset/mini_librispeech/mini_librispeech.py
@@ -19,9 +19,7 @@ meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
 """

-import distutils.util
 import os
-import sys
 import argparse
 import soundfile
 import json

--- a/examples/dataset/rir_noise/rir_noise.py
+++ b/examples/dataset/rir_noise/rir_noise.py
@@ -27,7 +27,7 @@ import codecs
 import soundfile
 import json
 import argparse
-from utils.utility import download, unpack, unzip
+from utils.utility import download, unzip

 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')


--- a/examples/dataset/voxforge/voxforge.py
+++ b/examples/dataset/voxforge/voxforge.py
@@ -91,7 +91,7 @@ def select_dialects(target_dir, dialect_list):
    for dialect in dialect_list:
        # filter files by dialect
        command = 'find %s -iwholename "*etc/readme*" -exec egrep -iHl \
-                   "pronunciation dialect.*%s" {} \;' % (audio_dir, dialect)
+            "pronunciation dialect.*%s" {} \;' % (audio_dir, dialect)
        p = subprocess.Popen(
            command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
        output, err = p.communicate()

--- a/tests/deepspeech2_model_test.py
+++ b/tests/deepspeech2_model_test.py
@@ -26,11 +26,11 @@ class TestDeepSpeech2Model(unittest.TestCase):
        self.feat_dim = 161
        max_len = 64

-        #(B, T, D)
+        # (B, T, D)
        audio = np.random.randn(self.batch_size, max_len, self.feat_dim)
        audio_len = np.random.randint(max_len, size=self.batch_size)
        audio_len[-1] = max_len
-        #(B, U)
+        # (B, U)
        text = np.array([[1, 2], [1, 2]])
        text_len = np.array([2] * self.batch_size)


--- a/utils/build_vocab.py
+++ b/utils/build_vocab.py
@@ -17,10 +17,8 @@ Each item in vocabulary file is a character.

 import argparse
 import functools
-import json
 from collections import Counter
 import os
-import copy
 import tempfile

 from deepspeech.frontend.utility import read_manifest
@@ -35,23 +33,21 @@ parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
 # yapf: disable
 add_arg('unit_type', str, "char", "Unit type, e.g. char, word, spm")
-add_arg('count_threshold',  int,    0,
-     "Truncation threshold for char/word counts.Default 0, no truncate.")
-add_arg('vocab_path',       str,
+add_arg('count_threshold', int, 0,
+        "Truncation threshold for char/word counts.Default 0, no truncate.")
+add_arg('vocab_path', str,
        'examples/librispeech/data/vocab.txt',
        "Filepath to write the vocabulary.")
-add_arg('manifest_paths',   str,
+add_arg('manifest_paths', str,
        None,
        "Filepaths of manifests for building vocabulary. "
        "You can provide multiple manifest files.",
        nargs='+',
        required=True)
 # bpe
-add_arg('vocab_size',  int,    0,  "Vocab size for spm.")
-add_arg('spm_mode', str, 'unigram',
-    "spm model type, e.g. unigram, spm, char, word. only need when `unit_type` is spm")
-add_arg('spm_model_prefix', str, "spm_model_%(spm_mode)_%(count_threshold)",
-    "spm model prefix, only need when `unit_type` is spm")
+add_arg('vocab_size', int, 0, "Vocab size for spm.")
+add_arg('spm_mode', str, 'unigram', "spm model type, e.g. unigram, spm, char, word. only need when `unit_type` is spm")
+add_arg('spm_model_prefix', str, "spm_model_%(spm_mode)_%(count_threshold)", "spm model prefix, only need when `unit_type` is spm")
 # yapf: disable
 args = parser.parse_args()

@@ -71,8 +67,8 @@ def main():
    print_arguments(args)

    fout = open(args.vocab_path, 'w', encoding='utf-8')
-    fout.write(BLANK + "\n") # 0 will be used for "blank" in CTC
-    fout.write(UNK + '\n')   # <unk> must be 1
+    fout.write(BLANK + "\n")  # 0 will be used for "blank" in CTC
+    fout.write(UNK + '\n')  # <unk> must be 1

    if args.unit_type == 'spm':
        # tools/spm_train --input=$wave_data/lang_char/input.txt 
@@ -104,14 +100,15 @@ def main():
    count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    tokens = []
    for token, count in count_sorted:
-        if count < args.count_threshold: break
+        if count < args.count_threshold:
+            break
        tokens.append(token)

    tokens = sorted(tokens)
    for token in tokens:
        fout.write(token + '\n')

-    fout.write(SOS + "\n") # <sos/eos>
+    fout.write(SOS + "\n")  # <sos/eos>
    fout.close()



--- a/utils/format_data.py
+++ b/utils/format_data.py
@@ -15,15 +15,8 @@
 import argparse
 import functools
 import json
-from collections import Counter
-import os
-import copy
-import tempfile

 from deepspeech.frontend.utility import read_manifest
-from deepspeech.frontend.utility import UNK
-from deepspeech.frontend.utility import BLANK
-from deepspeech.frontend.utility import SOS
 from deepspeech.frontend.utility import load_cmvn
 from deepspeech.utils.utility import add_arguments
 from deepspeech.utils.utility import print_arguments
@@ -82,7 +75,7 @@ def main():
            if args.feat_type == 'raw':
                feat_shape.append(feat_dim)
            else: # kaldi
-                raise NotImplemented('no support kaldi feat now!')
+                raise NotImplementedError('no support kaldi feat now!')
            fout.write(json.dumps(line_json) + '\n')
            count += 1


--- a/utils/utility.py
+++ b/utils/utility.py
@@ -30,7 +30,8 @@ def getfile_insensitive(path):

 def download_multi(url, target_dir, extra_args):
    """Download multiple files from url to target_dir."""
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
    print("Downloading %s ..." % url)
    ret_code = os.system("wget -c " + url + ' ' + extra_args + " -P " +
                         target_dir)
@@ -39,7 +40,8 @@ def download_multi(url, target_dir, extra_args):

 def download(url, md5sum, target_dir):
    """Download file from url to target_dir, and check md5sum."""
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
    filepath = os.path.join(target_dir, url.split("/")[-1])
    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
        print("Downloading %s ..." % url)
@@ -58,7 +60,7 @@ def unpack(filepath, target_dir, rm_tar=False):
    tar = tarfile.open(filepath)
    tar.extractall(target_dir)
    tar.close()
-    if rm_tar == True:
+    if rm_tar:
        os.remove(filepath)


@@ -68,5 +70,5 @@ def unzip(filepath, target_dir, rm_tar=False):
    tar = zipfile.ZipFile(filepath, 'r')
    tar.extractall(target_dir)
    tar.close()
-    if rm_tar == True:
+    if rm_tar:
        os.remove(filepath)