fix compliance test bug, and format

4051e7b7 · Hui Zhang · fc1bee35 · 4051e7b7 · 4051e7b7 · 4051e7b7
13 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -35,3 +35,5 @@ tools/miniconda.sh
 tools/CRF++-0.58/

 speechx/fc_patch/
+
+third_party/ctc_decoders/paddlespeech_ctcdecoders.py
--- a/demos/audio_searching/requirements.txt
+++ b/demos/audio_searching/requirements.txt
-soundfile==0.10.3.post1
+diskcache==5.2.1
+fastapi
 librosa==0.8.0
 numpy
+pydanticpymilvus==2.0.1
 pymysql
-fastapi
-uvicorn
-diskcache==5.2.1
-pymilvus==2.0.1
 python-multipart
-typing
+soundfile==0.10.3.post1
 starlette
-pydantic
\ No newline at end of file
+typing
+uvicorn
--- a/demos/audio_searching/src/config.py
+++ b/demos/audio_searching/src/config.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import os

 ############### Milvus Configuration ###############

--- a/demos/audio_searching/src/logs.py
+++ b/demos/audio_searching/src/logs.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import codecs
 import datetime
 import logging
 import os
@@ -124,7 +123,7 @@ class MultiprocessHandler(logging.FileHandler):
            logging.FileHandler.emit(self, record)
        except (KeyboardInterrupt, SystemExit):
            raise
-        except:
+        except Exception as e:
            self.handleError(record)



--- a/demos/audio_searching/src/operations/load.py
+++ b/demos/audio_searching/src/operations/load.py
@@ -26,8 +26,7 @@ def get_audios(path):
    """
    supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
    return [
-        item
-        for sublist in [[os.path.join(dir, file) for file in files]
+        item for sublist in [[os.path.join(dir, file) for file in files]
                             for dir, _, files in list(os.walk(path))]
        for item in sublist if os.path.splitext(item)[1] in supported_formats
    ]

--- a/examples/ami/sd0/local/ami_prepare.py
+++ b/examples/ami/sd0/local/ami_prepare.py
@@ -18,7 +18,6 @@ Download: http://groups.inf.ed.ac.uk/ami/download/

 Prepares metadata files (JSON) from manual annotations "segments/" using RTTM format (Oracle VAD).
 """
-
 import argparse
 import glob
 import json

--- a/paddleaudio/.gitignore
+++ b/paddleaudio/.gitignore
+.eggs
+*.wav
--- a/paddleaudio/docs/README.md
+++ b/paddleaudio/docs/README.md
--- a/paddleaudio/docs/source/conf.py
+++ b/paddleaudio/docs/source/conf.py
@@ -5,18 +5,14 @@
 # This file does only contain a selection of the most common options. For a
 # full list see the documentation:
 # http://www.sphinx-doc.org/en/master/config
-
 # -- Path setup --------------------------------------------------------------
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-
 import os
 import sys
 sys.path.insert(0, os.path.abspath('../..'))

-
 # -- Project information -----------------------------------------------------

 project = 'PaddleAudio'
@@ -28,7 +24,6 @@ version = ''
 # The full version, including alpha/beta/rc tags
 release = '0.2.0'

-
 # -- General configuration ---------------------------------------------------

 # If your documentation needs a minimal Sphinx version, state it here.
@@ -75,7 +70,6 @@ exclude_patterns = []
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None

-
 # -- Options for HTML output -------------------------------------------------

 # The theme to use for HTML and HTML Help pages.  See the documentation for
@@ -112,13 +106,11 @@ html_css_files = [
 #
 # html_sidebars = {}

-
 # -- Options for HTMLHelp output ---------------------------------------------

 # Output file base name for HTML help builder.
 htmlhelp_basename = 'PaddleAudiodoc'

-
 # -- Options for LaTeX output ------------------------------------------------

 latex_elements = {
@@ -143,20 +135,16 @@ latex_elements = {
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'PaddleAudio.tex', 'PaddleAudio Documentation',
-     'PaddlePaddle', 'manual'),
+    (master_doc, 'PaddleAudio.tex', 'PaddleAudio Documentation', 'PaddlePaddle',
+     'manual'),
 ]

-
 # -- Options for manual page output ------------------------------------------

 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'paddleaudio', 'PaddleAudio Documentation',
-     [author], 1)
-]
-
+man_pages = [(master_doc, 'paddleaudio', 'PaddleAudio Documentation', [author],
+              1)]

 # -- Options for Texinfo output ----------------------------------------------

@@ -164,12 +152,10 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'PaddleAudio', 'PaddleAudio Documentation',
-     author, 'PaddleAudio', 'One line description of project.',
-     'Miscellaneous'),
+    (master_doc, 'PaddleAudio', 'PaddleAudio Documentation', author,
+     'PaddleAudio', 'One line description of project.', 'Miscellaneous'),
 ]

-
 # -- Options for Epub output -------------------------------------------------

 # Bibliographic Dublin Core info.
@@ -187,7 +173,6 @@ epub_title = project
 # A list of files that should not be packed into the epub file.
 epub_exclude_files = ['search.html']

-
 # -- Extension configuration -------------------------------------------------

 # -- Options for intersphinx extension ---------------------------------------

--- a/paddleaudio/paddleaudio/compliance/__init__.py
+++ b/paddleaudio/paddleaudio/compliance/__init__.py
@@ -11,3 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from . import kaldi
+from . import librosa
--- a/paddleaudio/paddleaudio/metric/mcd.py
+++ b/paddleaudio/paddleaudio/metric/mcd.py
@@ -22,7 +22,9 @@ __all__ = [
 ]


-def mcd_distance(xs: np.ndarray, ys: np.ndarray, cost_fn: Callable=mt.logSpecDbDist) -> float:
+def mcd_distance(xs: np.ndarray,
+                 ys: np.ndarray,
+                 cost_fn: Callable=mt.logSpecDbDist) -> float:
    """Mel cepstral distortion (MCD), dtw distance.

    Dynamic Time Warping.

--- a/paddlespeech/t2s/frontend/zh_normalization/chronology.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/chronology.py
@@ -64,7 +64,7 @@ def replace_time(match) -> str:
    result = f"{num2str(hour)}点"
    if minute.lstrip('0'):
        if int(minute) == 30:
-            result += f"半"
+            result += "半"
        else:
            result += f"{_time_num2str(minute)}分"
    if second and second.lstrip('0'):
@@ -75,7 +75,7 @@ def replace_time(match) -> str:
        result += f"{num2str(hour_2)}点"
        if minute_2.lstrip('0'):
            if int(minute) == 30:
-                result += f"半"
+                result += "半"
            else:
                result += f"{_time_num2str(minute_2)}分"
        if second_2 and second_2.lstrip('0'):

--- a/paddlespeech/vector/cluster/diarization.py
+++ b/paddlespeech/vector/cluster/diarization.py
@@ -16,22 +16,20 @@ This script contains basic functions used for speaker diarization.
 This script has an optional dependency on open source sklearn library.
 A few sklearn functions are modified in this script as per requirement.
 """
-
 import argparse
 import warnings
-import scipy
-import numpy as np
 from distutils.util import strtobool

+import numpy as np
+import scipy
+import sklearn
 from scipy import sparse
-from scipy.sparse.linalg import eigsh
 from scipy.sparse.csgraph import connected_components
 from scipy.sparse.csgraph import laplacian as csgraph_laplacian
-
-import sklearn
-from sklearn.neighbors import kneighbors_graph
+from scipy.sparse.linalg import eigsh
 from sklearn.cluster import SpectralClustering
 from sklearn.cluster._kmeans import k_means
+from sklearn.neighbors import kneighbors_graph


 def _graph_connected_component(graph, node_id):