未验证 提交 a8588ab3 编写于 作者: F Félix Voituret 提交者: GitHub

Merge pull request #529 from deezer/typer

Spleeter 2.1.0
name: conda
on:
push:
branches:
- master
- workflow_dispatch
env:
ANACONDA_USERNAME: ${{ secrets.ANACONDA_USERNAME }}
ANACONDA_PASSWORD: ${{ secrets.ANACONDA_PASSWORD }}
......@@ -11,7 +9,7 @@ jobs:
strategy:
matrix:
python: [3.7, 3.8]
package: [spleeter]
package: [spleeter, spleeter-gpu]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
......
......@@ -4,40 +4,22 @@ on:
branches:
- master
env:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
jobs:
package-and-deploy:
strategy:
matrix:
platform: [cpu, gpu]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.7
- uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- uses: actions/cache@v2
with:
path: ${{ env.GITHUB_WORKSPACE }}/dist
key: sdist-${{ matrix.platform }}-${{ hashFiles('**/setup.py') }}
restore-keys: |
sdist-${{ matrix.platform }}-${{ hashFiles('**/setup.py') }}
sdist-${{ matrix.platform }}
sdist-
- name: Install dependencies
run: pip install --upgrade pip setuptools twine
- if: ${{ matrix.platform == 'cpu' }}
name: Package CPU distribution
run: make build
- if: ${{ matrix.platform == 'gpu' }}
name: Package GPU distribution)
run: make build-gpu
- name: Install Poetry
run: |
pip install poetry
poetry config virtualenvs.in-project false
poetry config virtualenvs.path ~/.virtualenvs
poetry config pypi-token.pypi $PYPI_TOKEN
- name: Deploy to pypi
run: make deploy
\ No newline at end of file
run: |
poetry build
poetry publish
\ No newline at end of file
name: pytest
name: test
on:
pull_request:
branches:
......@@ -8,20 +8,13 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.7, 3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- uses: actions/cache@v2
id: spleeter-pip-cache
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
- uses: actions/cache@v2
env:
model-release: 1
......@@ -31,11 +24,28 @@ jobs:
key: models-${{ env.model-release }}
restore-keys: |
models-${{ env.model-release }}
- name: Install dependencies
- name: Install ffmpeg
run: |
sudo apt-get update && sudo apt-get install -y ffmpeg
pip install --upgrade pip setuptools
pip install pytest==5.4.3 pytest-xdist==1.32.0 pytest-forked==1.1.3 musdb museval
python setup.py install
- name: Install Poetry
run: |
pip install poetry
poetry config virtualenvs.in-project false
poetry config virtualenvs.path ~/.virtualenvs
- name: Cache Poetry virtualenv
uses: actions/cache@v1
id: cache
with:
path: ~/.virtualenvs
key: poetry-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
poetry-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install Dependencies
run: poetry install
if: steps.cache.outputs.cache-hit != 'true'
- name: Code quality checks
run: |
poetry run black spleeter --check
poetry run isort spleeter --check
- name: Test with pytest
run: make test
\ No newline at end of file
run: poetry run pytest tests/
\ No newline at end of file
# Changelog History
## 2.1.0
This version introduce design related changes, especially transition to Typer for CLI managment and Poetry as
library build backend.
* `-i` option is now deprecated and replaced by traditional CLI input argument listing
* Project is now built using Poetry
* Project requires code formatting using Black and iSort
* Dedicated GPU package `spleeter-gpu` is not supported anymore, `spleeter` package will support both CPU and GPU hardware
### API changes:
* function `get_default_audio_adapter` is now available as `default()` class method within `AudioAdapter` class
* function `get_default_model_provider` is now available as `default()` class method within `ModelProvider` class
* `STFTBackend` and `Codec` are now string enum
* `GithubModelProvider` now use `httpx` with HTTP/2 support
* Commands are now located in `__main__` module, wrapped as simple function using Typer options module provide specification for each available option and argument
* `types` module provide custom type specification and must be enhanced in future release to provide more robust typing support with MyPy
* `utils.logging` module has been cleaned, logger instance is now a module singleton, and a single function is used to configure it with verbose parameter
* Added a custom logger handler (see tiangolo/typer#203 discussion)
## 2.0
First release, October 9th 2020
......
include spleeter/resources/*.json
include README.md
include LICENSE
\ No newline at end of file
# =======================================================
# Library lifecycle management.
#
# @author Deezer Research <spleeter@deezer.com>
# @licence MIT Licence
# =======================================================
FEEDSTOCK = spleeter-feedstock
FEEDSTOCK_REPOSITORY = https://github.com/deezer/$(FEEDSTOCK)
FEEDSTOCK_RECIPE = $(FEEDSTOCK)/recipe/spleeter/meta.yaml
PYTEST_CMD = pytest -W ignore::FutureWarning -W ignore::DeprecationWarning -vv --forked
all: clean build test deploy
clean:
rm -Rf *.egg-info
rm -Rf dist
build: clean
sed -i "s/project_name = '[^']*'/project_name = 'spleeter'/g" setup.py
sed -i "s/tensorflow_dependency = '[^']*'/tensorflow_dependency = 'tensorflow'/g" setup.py
python3 setup.py sdist
build-gpu: clean
sed -i "s/project_name = '[^']*'/project_name = 'spleeter-gpu'/g" setup.py
sed -i "s/tensorflow_dependency = '[^']*'/tensorflow_dependency = 'tensorflow-gpu'/g" setup.py
python3 setup.py sdist
test:
$(PYTEST_CMD) tests/
deploy:
pip install twine
twine upload --skip-existing dist/*
......@@ -2,6 +2,9 @@
[![Github actions](https://github.com/deezer/spleeter/workflows/pytest/badge.svg)](https://github.com/deezer/spleeter/actions) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/spleeter) [![PyPI version](https://badge.fury.io/py/spleeter.svg)](https://badge.fury.io/py/spleeter) [![Conda](https://img.shields.io/conda/vn/conda-forge/spleeter)](https://anaconda.org/conda-forge/spleeter) [![Docker Pulls](https://img.shields.io/docker/pulls/researchdeezer/spleeter)](https://hub.docker.com/r/researchdeezer/spleeter) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deezer/spleeter/blob/master/spleeter.ipynb) [![Gitter chat](https://badges.gitter.im/gitterHQ/gitter.png)](https://gitter.im/spleeter/community) [![status](https://joss.theoj.org/papers/259e5efe669945a343bad6eccb89018b/status.svg)](https://joss.theoj.org/papers/259e5efe669945a343bad6eccb89018b)
> :warning: [Spleeter 2.1.0](https://pypi.org/project/spleeter/) release introduces some breaking changes, including new CLI option naming for input, and the drop
> of dedicated GPU package. Please read [CHANGELOG](CHANGELOG.md) for more details.
## About
**Spleeter** is [Deezer](https://www.deezer.com/) source separation library with pretrained models
......@@ -46,7 +49,7 @@ conda install -c conda-forge spleeter
# download an example audio file (if you don't have wget, use another tool for downloading)
wget https://github.com/deezer/spleeter/raw/master/audio_example.mp3
# separate the example audio into two components
spleeter separate -i audio_example.mp3 -p spleeter:2stems -o output
spleeter separate -p spleeter:2stems -o output audio_example.mp3
```
You should get two separated audio files (`vocals.wav` and `accompaniment.wav`) in the `output/audio_example` folder.
......@@ -55,13 +58,18 @@ For a detailed documentation, please check the [repository wiki](https://github.
## Development and Testing
The following set of commands will clone this repository, create a virtual environment provisioned with the dependencies and run the tests (will take a few minutes):
This project is managed using [Poetry](https://python-poetry.org/docs/basic-usage/), to run test suite you
can execute the following set of commands:
```bash
# Clone spleeter repository
git clone https://github.com/Deezer/spleeter && cd spleeter
python -m venv spleeterenv && source spleeterenv/bin/activate
pip install . && pip install pytest pytest-xdist
make test
# Install poetry
pip install poetry
# Install spleeter dependencies
poetry install
# Run unit test suite
poetry run pytest tests/
```
## Reference
......
{% set name = "spleeter-gpu" %}
{% set version = "2.0.2" %}
package:
name: {{ name|lower }}
version: {{ version }}
source:
- url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz
sha256: ecd3518a98f9978b9088d1cb2ef98f766401fd9007c2bf72a34e5b5bc5a6fdc3
build:
number: 0
script: {{ PYTHON }} -m pip install . -vv
skip: True # [osx]
entry_points:
- spleeter = spleeter.__main__:entrypoint
requirements:
host:
- python {{ python }}
- pip
run:
- python {{ python }}
- tensorflow-gpu ==2.2.0 # [linux]
- tensorflow-gpu ==23.0 # [win]
- pandas
- ffmpeg-python
- norbert
- librosa
test:
imports:
- spleeter
- spleeter.commands
- spleeter.model
- spleeter.utils
- spleeter.separator
about:
home: https://github.com/deezer/spleeter
license: MIT
license_family: MIT
license_file: LICENSE
summary: The Deezer source separation library with pretrained models based on tensorflow.
doc_url: https://github.com/deezer/spleeter/wiki
dev_url: https://github.com/deezer/spleeter
extra:
recipe-maintainers:
- Faylixe
- romi1502
\ No newline at end of file
python:
- 3.7
- 3.8
\ No newline at end of file
此差异已折叠。
[tool.poetry]
name = "spleeter"
version = "2.1.0"
description = "The Deezer source separation library with pretrained models based on tensorflow."
authors = ["Deezer Research <spleeter@deezer.com>"]
license = "MIT License"
readme = "README.md"
repository = "https://github.com/deezer/spleeter"
homepage = "https://github.com/deezer/spleeter"
classifiers = [
"Environment :: Console",
"Environment :: MacOS X",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: MacOS",
"Operating System :: Microsoft :: Windows",
"Operating System :: POSIX :: Linux",
"Operating System :: Unix",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Artistic Software",
"Topic :: Multimedia",
"Topic :: Multimedia :: Sound/Audio",
"Topic :: Multimedia :: Sound/Audio :: Analysis",
"Topic :: Multimedia :: Sound/Audio :: Conversion",
"Topic :: Multimedia :: Sound/Audio :: Sound Synthesis",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Software Development",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Utilities"
]
packages = [ { include = "spleeter" } ]
include = ["LICENSE", "spleeter/resources/*.json"]
[tool.poetry.dependencies]
python = "^3.7"
ffmpeg-python = "0.2.0"
norbert = "0.2.1"
httpx = {extras = ["http2"], version = "^0.16.1"}
typer = "^0.3.2"
librosa = "0.8.0"
musdb = {version = "0.3.1", optional = true}
museval = {version = "0.3.0", optional = true}
tensorflow = "2.3.0"
pandas = "1.1.2"
numpy = "<1.19.0,>=1.16.0"
[tool.poetry.dev-dependencies]
pytest = "^6.2.1"
isort = "^5.7.0"
black = "^20.8b1"
mypy = "^0.790"
pytest-forked = "^1.3.0"
musdb = "0.3.1"
museval = "0.3.0"
[tool.poetry.scripts]
spleeter = 'spleeter.__main__:entrypoint'
[tool.poetry.extras]
evaluation = ["musdb", "museval"]
[tool.isort]
profile = "black"
multi_line_output = 3
[tool.pytest.ini_options]
addopts = "-W ignore::FutureWarning -W ignore::DeprecationWarning -vv --forked"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
#!/usr/bin/env python
# coding: utf8
""" Distribution script. """
import sys
from os import path
from setuptools import setup
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
# Default project values.
project_name = 'spleeter'
project_version = '2.0.2'
tensorflow_dependency = 'tensorflow'
tensorflow_version = '2.3.0'
here = path.abspath(path.dirname(__file__))
readme_path = path.join(here, 'README.md')
with open(readme_path, 'r') as stream:
readme = stream.read()
# Package setup entrypoint.
setup(
name=project_name,
version=project_version,
description='''
The Deezer source separation library with
pretrained models based on tensorflow.
''',
long_description=readme,
long_description_content_type='text/markdown',
author='Deezer Research',
author_email='spleeter@deezer.com',
url='https://github.com/deezer/spleeter',
license='MIT License',
packages=[
'spleeter',
'spleeter.audio',
'spleeter.commands',
'spleeter.model',
'spleeter.model.functions',
'spleeter.model.provider',
'spleeter.resources',
'spleeter.utils',
],
package_data={'spleeter.resources': ['*.json']},
python_requires='>=3.6, <3.9',
include_package_data=True,
install_requires=[
'ffmpeg-python==0.2.0',
'importlib_resources ; python_version<"3.7"',
'norbert==0.2.1',
'numpy<1.19.0,>=1.16.0',
'pandas==1.1.2',
'requests',
'scipy==1.4.1',
'setuptools>=41.0.0',
'librosa==0.8.0',
'{}=={}'.format(tensorflow_dependency, tensorflow_version),
],
extras_require={
'evaluation': ['musdb==0.3.1', 'museval==0.3.0']
},
entry_points={
'console_scripts': ['spleeter=spleeter.__main__:entrypoint']
},
classifiers=[
'Environment :: Console',
'Environment :: MacOS X',
'Intended Audience :: Developers',
'Intended Audience :: Information Technology',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: MacOS',
'Operating System :: Microsoft :: Windows',
'Operating System :: POSIX :: Linux',
'Operating System :: Unix',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: Implementation :: CPython',
'Topic :: Artistic Software',
'Topic :: Multimedia',
'Topic :: Multimedia :: Sound/Audio',
'Topic :: Multimedia :: Sound/Audio :: Analysis',
'Topic :: Multimedia :: Sound/Audio :: Conversion',
'Topic :: Multimedia :: Sound/Audio :: Sound Synthesis',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Utilities']
)
......@@ -13,9 +13,9 @@
by providing train, evaluation and source separation action.
"""
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
__email__ = "spleeter@deezer.com"
__author__ = "Deezer Research"
__license__ = "MIT License"
class SpleeterError(Exception):
......
......@@ -5,54 +5,252 @@
Python oneliner script usage.
USAGE: python -m spleeter {train,evaluate,separate} ...
Notes:
All critical import involving TF, numpy or Pandas are deported to
command function scope to avoid heavy import on CLI evaluation,
leading to large bootstraping time.
"""
import sys
import warnings
import json
from functools import partial
from glob import glob
from itertools import product
from os.path import join
from pathlib import Path
from typing import Container, Dict, List, Optional
# pyright: reportMissingImports=false
# pylint: disable=import-error
from typer import Exit, Typer
from . import SpleeterError
from .commands import create_argument_parser
from .utils.configuration import load_configuration
from .utils.logging import (
enable_logging,
enable_tensorflow_logging,
get_logger)
from .options import *
from .utils.logging import configure_logger, logger
# pylint: enable=import-error
spleeter: Typer = Typer(add_completion=False)
""" CLI application. """
@spleeter.command()
def train(
adapter: str = AudioAdapterOption,
data: Path = TrainingDataDirectoryOption,
params_filename: str = ModelParametersOption,
verbose: bool = VerboseOption,
) -> None:
"""
Train a source separation model
"""
import tensorflow as tf
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
from .audio.adapter import AudioAdapter
from .dataset import get_training_dataset, get_validation_dataset
from .model import model_fn
from .model.provider import ModelProvider
from .utils.configuration import load_configuration
configure_logger(verbose)
audio_adapter = AudioAdapter.get(adapter)
audio_path = str(data)
params = load_configuration(params_filename)
session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.per_process_gpu_memory_fraction = 0.45
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir=params["model_dir"],
params=params,
config=tf.estimator.RunConfig(
save_checkpoints_steps=params["save_checkpoints_steps"],
tf_random_seed=params["random_seed"],
save_summary_steps=params["save_summary_steps"],
session_config=session_config,
log_step_count_steps=10,
keep_checkpoint_max=2,
),
)
input_fn = partial(get_training_dataset, params, audio_adapter, audio_path)
train_spec = tf.estimator.TrainSpec(
input_fn=input_fn, max_steps=params["train_max_steps"]
)
input_fn = partial(get_validation_dataset, params, audio_adapter, audio_path)
evaluation_spec = tf.estimator.EvalSpec(
input_fn=input_fn, steps=None, throttle_secs=params["throttle_secs"]
)
logger.info("Start model training")
tf.estimator.train_and_evaluate(estimator, train_spec, evaluation_spec)
ModelProvider.writeProbe(params["model_dir"])
logger.info("Model training done")
def main(argv):
""" Spleeter runner. Parse provided command line arguments
and run entrypoint for required command (either train,
evaluate or separate).
:param argv: Provided command line arguments.
@spleeter.command()
def separate(
deprecated_files: Optional[str] = AudioInputOption,
files: List[Path] = AudioInputArgument,
adapter: str = AudioAdapterOption,
bitrate: str = AudioBitrateOption,
codec: Codec = AudioCodecOption,
duration: float = AudioDurationOption,
offset: float = AudioOffsetOption,
output_path: Path = AudioOutputOption,
stft_backend: STFTBackend = AudioSTFTBackendOption,
filename_format: str = FilenameFormatOption,
params_filename: str = ModelParametersOption,
mwf: bool = MWFOption,
verbose: bool = VerboseOption,
) -> None:
"""
Separate audio file(s)
"""
from .audio.adapter import AudioAdapter
from .separator import Separator
configure_logger(verbose)
if deprecated_files is not None:
logger.error(
"⚠️ -i option is not supported anymore, audio files must be supplied "
"using input argument instead (see spleeter separate --help)"
)
raise Exit(20)
audio_adapter: AudioAdapter = AudioAdapter.get(adapter)
separator: Separator = Separator(
params_filename, MWF=mwf, stft_backend=stft_backend
)
for filename in files:
separator.separate_to_file(
str(filename),
str(output_path),
audio_adapter=audio_adapter,
offset=offset,
duration=duration,
codec=codec,
bitrate=bitrate,
filename_format=filename_format,
synchronous=False,
)
separator.join()
EVALUATION_SPLIT: str = "test"
EVALUATION_METRICS_DIRECTORY: str = "metrics"
EVALUATION_INSTRUMENTS: Container[str] = ("vocals", "drums", "bass", "other")
EVALUATION_METRICS: Container[str] = ("SDR", "SAR", "SIR", "ISR")
EVALUATION_MIXTURE: str = "mixture.wav"
EVALUATION_AUDIO_DIRECTORY: str = "audio"
def _compile_metrics(metrics_output_directory) -> Dict:
"""
Compiles metrics from given directory and returns results as dict.
Parameters:
metrics_output_directory (str):
Directory to get metrics from.
Returns:
Dict:
Compiled metrics as dict.
"""
import numpy as np
import pandas as pd
songs = glob(join(metrics_output_directory, "test/*.json"))
index = pd.MultiIndex.from_tuples(
product(EVALUATION_INSTRUMENTS, EVALUATION_METRICS),
names=["instrument", "metric"],
)
pd.DataFrame([], index=["config1", "config2"], columns=index)
metrics = {
instrument: {k: [] for k in EVALUATION_METRICS}
for instrument in EVALUATION_INSTRUMENTS
}
for song in songs:
with open(song, "r") as stream:
data = json.load(stream)
for target in data["targets"]:
instrument = target["name"]
for metric in EVALUATION_METRICS:
sdr_med = np.median(
[
frame["metrics"][metric]
for frame in target["frames"]
if not np.isnan(frame["metrics"][metric])
]
)
metrics[instrument][metric].append(sdr_med)
return metrics
@spleeter.command()
def evaluate(
adapter: str = AudioAdapterOption,
output_path: Path = AudioOutputOption,
stft_backend: STFTBackend = AudioSTFTBackendOption,
params_filename: str = ModelParametersOption,
mus_dir: Path = MUSDBDirectoryOption,
mwf: bool = MWFOption,
verbose: bool = VerboseOption,
) -> Dict:
"""
Evaluate a model on the musDB test dataset
"""
import numpy as np
configure_logger(verbose)
try:
parser = create_argument_parser()
arguments = parser.parse_args(argv[1:])
enable_logging()
if arguments.verbose:
enable_tensorflow_logging()
if arguments.command == 'separate':
from .commands.separate import entrypoint
elif arguments.command == 'train':
from .commands.train import entrypoint
elif arguments.command == 'evaluate':
from .commands.evaluate import entrypoint
params = load_configuration(arguments.configuration)
entrypoint(arguments, params)
except SpleeterError as e:
get_logger().error(e)
import musdb
import museval
except ImportError:
logger.error("Extra dependencies musdb and museval not found")
logger.error("Please install musdb and museval first, abort")
raise Exit(10)
# Separate musdb sources.
songs = glob(join(mus_dir, EVALUATION_SPLIT, "*/"))
mixtures = [join(song, EVALUATION_MIXTURE) for song in songs]
audio_output_directory = join(output_path, EVALUATION_AUDIO_DIRECTORY)
separate(
deprecated_files=None,
files=mixtures,
adapter=adapter,
bitrate="128k",
codec=Codec.WAV,
duration=600.0,
offset=0,
output_path=join(audio_output_directory, EVALUATION_SPLIT),
stft_backend=stft_backend,
filename_format="{foldername}/{instrument}.{codec}",
params_filename=params_filename,
mwf=mwf,
verbose=verbose,
)
# Compute metrics with musdb.
metrics_output_directory = join(output_path, EVALUATION_METRICS_DIRECTORY)
logger.info("Starting musdb evaluation (this could be long) ...")
dataset = musdb.DB(root=mus_dir, is_wav=True, subsets=[EVALUATION_SPLIT])
museval.eval_mus_dir(
dataset=dataset,
estimates_dir=audio_output_directory,
output_dir=metrics_output_directory,
)
logger.info("musdb evaluation done")
# Compute and pretty print median metrics.
metrics = _compile_metrics(metrics_output_directory)
for instrument, metric in metrics.items():
logger.info(f"{instrument}:")
for metric, value in metric.items():
logger.info(f"{metric}: {np.median(value):.3f}")
return metrics
def entrypoint():
""" Command line entrypoint. """
warnings.filterwarnings('ignore')
main(sys.argv)
""" Application entrypoint. """
try:
spleeter()
except SpleeterError as e:
logger.error(e)
if __name__ == '__main__':
if __name__ == "__main__":
entrypoint()
......@@ -10,6 +10,43 @@
- Waveform convertion and transforming functions.
"""
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
from enum import Enum
__email__ = "spleeter@deezer.com"
__author__ = "Deezer Research"
__license__ = "MIT License"
class Codec(str, Enum):
""" Enumeration of supported audio codec. """
WAV: str = "wav"
MP3: str = "mp3"
OGG: str = "ogg"
M4A: str = "m4a"
WMA: str = "wma"
FLAC: str = "flac"
class STFTBackend(str, Enum):
""" Enumeration of supported STFT backend. """
AUTO: str = "auto"
TENSORFLOW: str = "tensorflow"
LIBROSA: str = "librosa"
@classmethod
def resolve(cls: type, backend: str) -> str:
# NOTE: import is resolved here to avoid performance issues on command
# evaluation.
# pyright: reportMissingImports=false
# pylint: disable=import-error
import tensorflow as tf
if backend not in cls.__members__.values():
raise ValueError(f"Unsupported backend {backend}")
if backend == cls.AUTO:
if len(tf.config.list_physical_devices("GPU")):
return cls.TENSORFLOW
return cls.LIBROSA
return backend
......@@ -3,70 +3,101 @@
""" AudioAdapter class defintion. """
import subprocess
from abc import ABC, abstractmethod
from importlib import import_module
from os.path import exists
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
# pyright: reportMissingImports=false
# pylint: disable=import-error
import numpy as np
import tensorflow as tf
from tensorflow.signal import stft, hann_window
# pylint: enable=import-error
from spleeter.audio import Codec
from .. import SpleeterError
from ..utils.logging import get_logger
from ..types import AudioDescriptor, Signal
from ..utils.logging import logger
# pylint: enable=import-error
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
__email__ = "spleeter@deezer.com"
__author__ = "Deezer Research"
__license__ = "MIT License"
class AudioAdapter(ABC):
""" An abstract class for manipulating audio signal. """
# Default audio adapter singleton instance.
DEFAULT = None
_DEFAULT: "AudioAdapter" = None
""" Default audio adapter singleton instance. """
@abstractmethod
def load(
self, audio_descriptor, offset, duration,
sample_rate, dtype=np.float32):
""" Loads the audio file denoted by the given audio descriptor
and returns it data as a waveform. Aims to be implemented
by client.
:param audio_descriptor: Describe song to load, in case of file
based audio adapter, such descriptor would
be a file path.
:param offset: Start offset to load from in seconds.
:param duration: Duration to load in seconds.
:param sample_rate: Sample rate to load audio with.
:param dtype: Numpy data type to use, default to float32.
:returns: Loaded data as (wf, sample_rate) tuple.
self,
audio_descriptor: AudioDescriptor,
offset: Optional[float] = None,
duration: Optional[float] = None,
sample_rate: Optional[float] = None,
dtype: np.dtype = np.float32,
) -> Signal:
"""
Loads the audio file denoted by the given audio descriptor and
returns it data as a waveform. Aims to be implemented by client.
Parameters:
audio_descriptor (AudioDescriptor):
Describe song to load, in case of file based audio adapter,
such descriptor would be a file path.
offset (Optional[float]):
Start offset to load from in seconds.
duration (Optional[float]):
Duration to load in seconds.
sample_rate (Optional[float]):
Sample rate to load audio with.
dtype (numpy.dtype):
(Optional) Numpy data type to use, default to `float32`.
Returns:
Signal:
Loaded data as (wf, sample_rate) tuple.
"""
pass
def load_tf_waveform(
self, audio_descriptor,
offset=0.0, duration=1800., sample_rate=44100,
dtype=b'float32', waveform_name='waveform'):
""" Load the audio and convert it to a tensorflow waveform.
:param audio_descriptor: Describe song to load, in case of file
based audio adapter, such descriptor would
be a file path.
:param offset: Start offset to load from in seconds.
:param duration: Duration to load in seconds.
:param sample_rate: Sample rate to load audio with.
:param dtype: Numpy data type to use, default to float32.
:param waveform_name: (Optional) Name of the key in output dict.
:returns: TF output dict with waveform as
(T x chan numpy array) and a boolean that
tells whether there were an error while
trying to load the waveform.
self,
audio_descriptor,
offset: float = 0.0,
duration: float = 1800.0,
sample_rate: int = 44100,
dtype: bytes = b"float32",
waveform_name: str = "waveform",
) -> Dict[str, Any]:
"""
Load the audio and convert it to a tensorflow waveform.
Parameters:
audio_descriptor ():
Describe song to load, in case of file based audio adapter,
such descriptor would be a file path.
offset (float):
Start offset to load from in seconds.
duration (float):
Duration to load in seconds.
sample_rate (float):
Sample rate to load audio with.
dtype (bytes):
(Optional)data type to use, default to `b'float32'`.
waveform_name (str):
(Optional) Name of the key in output dict, default to
`'waveform'`.
Returns:
Dict[str, Any]:
TF output dict with waveform as `(T x chan numpy array)`
and a boolean that tells whether there were an error while
trying to load the waveform.
"""
# Cast parameters to TF format.
offset = tf.cast(offset, tf.float64)
......@@ -74,76 +105,96 @@ class AudioAdapter(ABC):
# Defined safe loading function.
def safe_load(path, offset, duration, sample_rate, dtype):
logger = get_logger()
logger.info(
f'Loading audio {path} from {offset} to {offset + duration}')
logger.info(f"Loading audio {path} from {offset} to {offset + duration}")
try:
(data, _) = self.load(
path.numpy(),
offset.numpy(),
duration.numpy(),
sample_rate.numpy(),
dtype=dtype.numpy())
logger.info('Audio data loaded successfully')
dtype=dtype.numpy(),
)
logger.info("Audio data loaded successfully")
return (data, False)
except Exception as e:
logger.exception(
'An error occurs while loading audio',
exc_info=e)
logger.exception("An error occurs while loading audio", exc_info=e)
return (np.float32(-1.0), True)
# Execute function and format results.
results = tf.py_function(
safe_load,
[audio_descriptor, offset, duration, sample_rate, dtype],
(tf.float32, tf.bool)),
results = (
tf.py_function(
safe_load,
[audio_descriptor, offset, duration, sample_rate, dtype],
(tf.float32, tf.bool),
),
)
waveform, error = results[0]
return {
waveform_name: waveform,
f'{waveform_name}_error': error
}
return {waveform_name: waveform, f"{waveform_name}_error": error}
@abstractmethod
def save(
self, path, data, sample_rate,
codec=None, bitrate=None):
""" Save the given audio data to the file denoted by
the given path.
:param path: Path of the audio file to save data in.
:param data: Waveform data to write.
:param sample_rate: Sample rate to write file in.
:param codec: (Optional) Writing codec to use.
:param bitrate: (Optional) Bitrate of the written audio file.
self,
path: Union[Path, str],
data: np.ndarray,
sample_rate: float,
codec: Codec = None,
bitrate: str = None,
) -> None:
"""
Save the given audio data to the file denoted by the given path.
Parameters:
path (Union[Path, str]):
Path like of the audio file to save data in.
data (numpy.ndarray):
Waveform data to write.
sample_rate (float):
Sample rate to write file in.
codec ():
(Optional) Writing codec to use, default to `None`.
bitrate (str):
(Optional) Bitrate of the written audio file, default to
`None`.
"""
pass
@classmethod
def default(cls: type) -> "AudioAdapter":
"""
Builds and returns a default audio adapter instance.
Returns:
AudioAdapter:
Default adapter instance to use.
"""
if cls._DEFAULT is None:
from .ffmpeg import FFMPEGProcessAudioAdapter
cls._DEFAULT = FFMPEGProcessAudioAdapter()
return cls._DEFAULT
def get_default_audio_adapter():
""" Builds and returns a default audio adapter instance.
:returns: An audio adapter instance.
"""
if AudioAdapter.DEFAULT is None:
from .ffmpeg import FFMPEGProcessAudioAdapter
AudioAdapter.DEFAULT = FFMPEGProcessAudioAdapter()
return AudioAdapter.DEFAULT
def get_audio_adapter(descriptor):
""" Load dynamically an AudioAdapter from given class descriptor.
:param descriptor: Adapter class descriptor (module.Class)
:returns: Created adapter instance.
"""
if descriptor is None:
return get_default_audio_adapter()
module_path = descriptor.split('.')
adapter_class_name = module_path[-1]
module_path = '.'.join(module_path[:-1])
adapter_module = import_module(module_path)
adapter_class = getattr(adapter_module, adapter_class_name)
if not isinstance(adapter_class, AudioAdapter):
raise SpleeterError(
f'{adapter_class_name} is not a valid AudioAdapter class')
return adapter_class()
@classmethod
def get(cls: type, descriptor: str) -> "AudioAdapter":
"""
Load dynamically an AudioAdapter from given class descriptor.
Parameters:
descriptor (str):
Adapter class descriptor (module.Class)
Returns:
AudioAdapter:
Created adapter instance.
"""
if not descriptor:
return cls.default()
module_path: List[str] = descriptor.split(".")
adapter_class_name: str = module_path[-1]
module_path: str = ".".join(module_path[:-1])
adapter_module = import_module(module_path)
adapter_class = getattr(adapter_module, adapter_class_name)
if not issubclass(adapter_class, AudioAdapter):
raise SpleeterError(
f"{adapter_class_name} is not a valid AudioAdapter class"
)
return adapter_class()
......@@ -3,39 +3,54 @@
""" This module provides audio data convertion functions. """
# pyright: reportMissingImports=false
# pylint: disable=import-error
import numpy as np
import tensorflow as tf
# pylint: enable=import-error
from ..utils.tensor import from_float32_to_uint8, from_uint8_to_float32
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
# pylint: enable=import-error
__email__ = "spleeter@deezer.com"
__author__ = "Deezer Research"
__license__ = "MIT License"
def to_n_channels(waveform, n_channels):
""" Convert a waveform to n_channels by removing or
duplicating channels if needed (in tensorflow).
:param waveform: Waveform to transform.
:param n_channels: Number of channel to reshape waveform in.
:returns: Reshaped waveform.
def to_n_channels(waveform: tf.Tensor, n_channels: int) -> tf.Tensor:
"""
Convert a waveform to n_channels by removing or duplicating channels if
needed (in tensorflow).
Parameters:
waveform (tensorflow.Tensor):
Waveform to transform.
n_channels (int):
Number of channel to reshape waveform in.
Returns:
tensorflow.Tensor:
Reshaped waveform.
"""
return tf.cond(
tf.shape(waveform)[1] >= n_channels,
true_fn=lambda: waveform[:, :n_channels],
false_fn=lambda: tf.tile(waveform, [1, n_channels])[:, :n_channels]
false_fn=lambda: tf.tile(waveform, [1, n_channels])[:, :n_channels],
)
def to_stereo(waveform):
""" Convert a waveform to stereo by duplicating if mono,
or truncating if too many channels.
def to_stereo(waveform: np.ndarray) -> np.ndarray:
"""
Convert a waveform to stereo by duplicating if mono, or truncating
if too many channels.
Parameters:
waveform (numpy.ndarray):
a `(N, d)` numpy array.
:param waveform: a (N, d) numpy array.
:returns: A stereo waveform as a (N, 1) numpy array.
Returns:
numpy.ndarray:
A stereo waveform as a `(N, 1)` numpy array.
"""
if waveform.shape[1] == 1:
return np.repeat(waveform, 2, axis=-1)
......@@ -44,45 +59,81 @@ def to_stereo(waveform):
return waveform
def gain_to_db(tensor, espilon=10e-10):
""" Convert from gain to decibel in tensorflow.
def gain_to_db(tensor: tf.Tensor, espilon: float = 10e-10) -> tf.Tensor:
"""
Convert from gain to decibel in tensorflow.
Parameters:
tensor (tensorflow.Tensor):
Tensor to convert
epsilon (float):
Operation constant.
:param tensor: Tensor to convert.
:param epsilon: Operation constant.
:returns: Converted tensor.
Returns:
tensorflow.Tensor:
Converted tensor.
"""
return 20. / np.log(10) * tf.math.log(tf.maximum(tensor, espilon))
return 20.0 / np.log(10) * tf.math.log(tf.maximum(tensor, espilon))
def db_to_gain(tensor):
""" Convert from decibel to gain in tensorflow.
def db_to_gain(tensor: tf.Tensor) -> tf.Tensor:
"""
Convert from decibel to gain in tensorflow.
:param tensor_db: Tensor to convert.
:returns: Converted tensor.
Parameters:
tensor (tensorflow.Tensor):
Tensor to convert
Returns:
tensorflow.Tensor:
Converted tensor.
"""
return tf.pow(10., (tensor / 20.))
return tf.pow(10.0, (tensor / 20.0))
def spectrogram_to_db_uint(
spectrogram: tf.Tensor, db_range: float = 100.0, **kwargs
) -> tf.Tensor:
"""
Encodes given spectrogram into uint8 using decibel scale.
def spectrogram_to_db_uint(spectrogram, db_range=100., **kwargs):
""" Encodes given spectrogram into uint8 using decibel scale.
Parameters:
spectrogram (tensorflow.Tensor):
Spectrogram to be encoded as TF float tensor.
db_range (float):
Range in decibel for encoding.
:param spectrogram: Spectrogram to be encoded as TF float tensor.
:param db_range: Range in decibel for encoding.
:returns: Encoded decibel spectrogram as uint8 tensor.
Returns:
tensorflow.Tensor:
Encoded decibel spectrogram as `uint8` tensor.
"""
db_spectrogram = gain_to_db(spectrogram)
max_db_spectrogram = tf.reduce_max(db_spectrogram)
db_spectrogram = tf.maximum(db_spectrogram, max_db_spectrogram - db_range)
db_spectrogram: tf.Tensor = gain_to_db(spectrogram)
max_db_spectrogram: tf.Tensor = tf.reduce_max(db_spectrogram)
db_spectrogram: tf.Tensor = tf.maximum(
db_spectrogram, max_db_spectrogram - db_range
)
return from_float32_to_uint8(db_spectrogram, **kwargs)
def db_uint_spectrogram_to_gain(db_uint_spectrogram, min_db, max_db):
""" Decode spectrogram from uint8 decibel scale.
:param db_uint_spectrogram: Decibel pectrogram to decode.
:param min_db: Lower bound limit for decoding.
:param max_db: Upper bound limit for decoding.
:returns: Decoded spectrogram as float2 tensor.
def db_uint_spectrogram_to_gain(
db_uint_spectrogram: tf.Tensor, min_db: tf.Tensor, max_db: tf.Tensor
) -> tf.Tensor:
"""
Decode spectrogram from uint8 decibel scale.
Paramters:
db_uint_spectrogram (tensorflow.Tensor):
Decibel spectrogram to decode.
min_db (tensorflow.Tensor):
Lower bound limit for decoding.
max_db (tensorflow.Tensor):
Upper bound limit for decoding.
Returns:
tensorflow.Tensor:
Decoded spectrogram as `float32` tensor.
"""
db_spectrogram = from_uint8_to_float32(db_uint_spectrogram, min_db, max_db)
db_spectrogram: tf.Tensor = from_uint8_to_float32(
db_uint_spectrogram, min_db, max_db
)
return db_to_gain(db_spectrogram)
......@@ -8,143 +8,178 @@
used within this library.
"""
import datetime as dt
import os
import shutil
from pathlib import Path
from typing import Dict, Optional, Union
# pyright: reportMissingImports=false
# pylint: disable=import-error
import ffmpeg
import numpy as np
# pylint: enable=import-error
from .adapter import AudioAdapter
from .. import SpleeterError
from ..utils.logging import get_logger
from ..types import Signal
from ..utils.logging import logger
from . import Codec
from .adapter import AudioAdapter
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
# pylint: enable=import-error
__email__ = "spleeter@deezer.com"
__author__ = "Deezer Research"
__license__ = "MIT License"
def _check_ffmpeg_install():
""" Ensure FFMPEG binaries are available.
:raise SpleeterError: If ffmpeg or ffprobe is not found.
class FFMPEGProcessAudioAdapter(AudioAdapter):
"""
for binary in ('ffmpeg', 'ffprobe'):
if shutil.which(binary) is None:
raise SpleeterError('{} binary not found'.format(binary))
An AudioAdapter implementation that use FFMPEG binary through
subprocess in order to perform I/O operation for audio processing.
def _to_ffmpeg_time(n):
""" Format number of seconds to time expected by FFMPEG.
:param n: Time in seconds to format.
:returns: Formatted time in FFMPEG format.
When created, FFMPEG binary path will be checked and expended,
raising exception if not found. Such path could be infered using
`FFMPEG_PATH` environment variable.
"""
m, s = divmod(n, 60)
h, m = divmod(m, 60)
return '%d:%02d:%09.6f' % (h, m, s)
def _to_ffmpeg_codec(codec):
ffmpeg_codecs = {
'm4a': 'aac',
'ogg': 'libvorbis',
'wma': 'wmav2',
SUPPORTED_CODECS: Dict[Codec, str] = {
Codec.M4A: "aac",
Codec.OGG: "libvorbis",
Codec.WMA: "wmav2",
}
return ffmpeg_codecs.get(codec) or codec
""" FFMPEG codec name mapping. """
def __init__(_) -> None:
"""
Default constructor, ensure FFMPEG binaries are available.
class FFMPEGProcessAudioAdapter(AudioAdapter):
""" An AudioAdapter implementation that use FFMPEG binary through
subprocess in order to perform I/O operation for audio processing.
When created, FFMPEG binary path will be checked and expended,
raising exception if not found. Such path could be infered using
FFMPEG_PATH environment variable.
"""
Raises:
SpleeterError:
If ffmpeg or ffprobe is not found.
"""
for binary in ("ffmpeg", "ffprobe"):
if shutil.which(binary) is None:
raise SpleeterError("{} binary not found".format(binary))
def load(
self, path, offset=None, duration=None,
sample_rate=None, dtype=np.float32):
""" Loads the audio file denoted by the given path
_,
path: Union[Path, str],
offset: Optional[float] = None,
duration: Optional[float] = None,
sample_rate: Optional[float] = None,
dtype: np.dtype = np.float32,
) -> Signal:
"""
Loads the audio file denoted by the given path
and returns it data as a waveform.
:param path: Path of the audio file to load data from.
:param offset: (Optional) Start offset to load from in seconds.
:param duration: (Optional) Duration to load in seconds.
:param sample_rate: (Optional) Sample rate to load audio with.
:param dtype: (Optional) Numpy data type to use, default to float32.
:returns: Loaded data a (waveform, sample_rate) tuple.
:raise SpleeterError: If any error occurs while loading audio.
Parameters:
path (Union[Path, str]:
Path of the audio file to load data from.
offset (Optional[float]):
Start offset to load from in seconds.
duration (Optional[float]):
Duration to load in seconds.
sample_rate (Optional[float]):
Sample rate to load audio with.
dtype (numpy.dtype):
(Optional) Numpy data type to use, default to `float32`.
Returns:
Signal:
Loaded data a (waveform, sample_rate) tuple.
Raises:
SpleeterError:
If any error occurs while loading audio.
"""
_check_ffmpeg_install()
if isinstance(path, Path):
path = str(path)
if not isinstance(path, str):
path = path.decode()
try:
probe = ffmpeg.probe(path)
except ffmpeg._run.Error as e:
raise SpleeterError(
'An error occurs with ffprobe (see ffprobe output below)\n\n{}'
.format(e.stderr.decode()))
if 'streams' not in probe or len(probe['streams']) == 0:
raise SpleeterError('No stream was found with ffprobe')
"An error occurs with ffprobe (see ffprobe output below)\n\n{}".format(
e.stderr.decode()
)
)
if "streams" not in probe or len(probe["streams"]) == 0:
raise SpleeterError("No stream was found with ffprobe")
metadata = next(
stream
for stream in probe['streams']
if stream['codec_type'] == 'audio')
n_channels = metadata['channels']
stream for stream in probe["streams"] if stream["codec_type"] == "audio"
)
n_channels = metadata["channels"]
if sample_rate is None:
sample_rate = metadata['sample_rate']
output_kwargs = {'format': 'f32le', 'ar': sample_rate}
sample_rate = metadata["sample_rate"]
output_kwargs = {"format": "f32le", "ar": sample_rate}
if duration is not None:
output_kwargs['t'] = _to_ffmpeg_time(duration)
output_kwargs["t"] = str(dt.timedelta(seconds=duration))
if offset is not None:
output_kwargs['ss'] = _to_ffmpeg_time(offset)
output_kwargs["ss"] = str(dt.timedelta(seconds=offset))
process = (
ffmpeg
.input(path)
.output('pipe:', **output_kwargs)
.run_async(pipe_stdout=True, pipe_stderr=True))
ffmpeg.input(path)
.output("pipe:", **output_kwargs)
.run_async(pipe_stdout=True, pipe_stderr=True)
)
buffer, _ = process.communicate()
waveform = np.frombuffer(buffer, dtype='<f4').reshape(-1, n_channels)
waveform = np.frombuffer(buffer, dtype="<f4").reshape(-1, n_channels)
if not waveform.dtype == np.dtype(dtype):
waveform = waveform.astype(dtype)
return (waveform, sample_rate)
def save(
self, path, data, sample_rate,
codec=None, bitrate=None):
""" Write waveform data to the file denoted by the given path
using FFMPEG process.
:param path: Path of the audio file to save data in.
:param data: Waveform data to write.
:param sample_rate: Sample rate to write file in.
:param codec: (Optional) Writing codec to use.
:param bitrate: (Optional) Bitrate of the written audio file.
:raise IOError: If any error occurs while using FFMPEG to write data.
self,
path: Union[Path, str],
data: np.ndarray,
sample_rate: float,
codec: Codec = None,
bitrate: str = None,
) -> None:
"""
Write waveform data to the file denoted by the given path using
FFMPEG process.
Parameters:
path (Union[Path, str]):
Path like of the audio file to save data in.
data (numpy.ndarray):
Waveform data to write.
sample_rate (float):
Sample rate to write file in.
codec ():
(Optional) Writing codec to use, default to `None`.
bitrate (str):
(Optional) Bitrate of the written audio file, default to
`None`.
Raises:
IOError:
If any error occurs while using FFMPEG to write data.
"""
_check_ffmpeg_install()
if isinstance(path, Path):
path = str(path)
directory = os.path.dirname(path)
if not os.path.exists(directory):
raise SpleeterError(f'output directory does not exists: {directory}')
get_logger().debug('Writing file %s', path)
input_kwargs = {'ar': sample_rate, 'ac': data.shape[1]}
output_kwargs = {'ar': sample_rate, 'strict': '-2'}
raise SpleeterError(f"output directory does not exists: {directory}")
logger.debug(f"Writing file {path}")
input_kwargs = {"ar": sample_rate, "ac": data.shape[1]}
output_kwargs = {"ar": sample_rate, "strict": "-2"}
if bitrate:
output_kwargs['audio_bitrate'] = bitrate
if codec is not None and codec != 'wav':
output_kwargs['codec'] = _to_ffmpeg_codec(codec)
output_kwargs["audio_bitrate"] = bitrate
if codec is not None and codec != "wav":
output_kwargs["codec"] = self.SUPPORTED_CODECS.get(codec, codec)
process = (
ffmpeg
.input('pipe:', format='f32le', **input_kwargs)
ffmpeg.input("pipe:", format="f32le", **input_kwargs)
.output(path, **output_kwargs)
.overwrite_output()
.run_async(pipe_stdin=True, pipe_stderr=True, quiet=True))
.run_async(pipe_stdin=True, pipe_stderr=True, quiet=True)
)
try:
process.stdin.write(data.astype('<f4').tobytes())
process.stdin.write(data.astype("<f4").tobytes())
process.stdin.close()
process.wait()
except IOError:
raise SpleeterError(f'FFMPEG error: {process.stderr.read()}')
get_logger().info('File %s written succesfully', path)
raise SpleeterError(f"FFMPEG error: {process.stderr.read()}")
logger.info(f"File {path} written succesfully")
#!/usr/bin/env python
# coding: utf8
""" Spectrogram specific data augmentation """
""" Spectrogram specific data augmentation. """
# pyright: reportMissingImports=false
# pylint: disable=import-error
import numpy as np
import tensorflow as tf
from tensorflow.signal import hann_window, stft
from tensorflow.signal import stft, hann_window
# pylint: enable=import-error
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
__email__ = "spleeter@deezer.com"
__author__ = "Deezer Research"
__license__ = "MIT License"
def compute_spectrogram_tf(
waveform,
frame_length=2048, frame_step=512,
spec_exponent=1., window_exponent=1.):
""" Compute magnitude / power spectrogram from waveform as
a n_samples x n_channels tensor.
:param waveform: Input waveform as (times x number of channels)
tensor.
:param frame_length: Length of a STFT frame to use.
:param frame_step: HOP between successive frames.
:param spec_exponent: Exponent of the spectrogram (usually 1 for
magnitude spectrogram, or 2 for power spectrogram).
:param window_exponent: Exponent applied to the Hann windowing function
(may be useful for making perfect STFT/iSTFT
reconstruction).
:returns: Computed magnitude / power spectrogram as a
(T x F x n_channels) tensor.
waveform: tf.Tensor,
frame_length: int = 2048,
frame_step: int = 512,
spec_exponent: float = 1.0,
window_exponent: float = 1.0,
) -> tf.Tensor:
"""
stft_tensor = tf.transpose(
Compute magnitude / power spectrogram from waveform as a
`n_samples x n_channels` tensor.
Parameters:
waveform (tensorflow.Tensor):
Input waveform as `(times x number of channels)` tensor.
frame_length (int):
Length of a STFT frame to use.
frame_step (int):
HOP between successive frames.
spec_exponent (float):
Exponent of the spectrogram (usually 1 for magnitude
spectrogram, or 2 for power spectrogram).
window_exponent (float):
Exponent applied to the Hann windowing function (may be
useful for making perfect STFT/iSTFT reconstruction).
Returns:
tensorflow.Tensor:
Computed magnitude / power spectrogram as a
`(T x F x n_channels)` tensor.
"""
stft_tensor: tf.Tensor = tf.transpose(
stft(
tf.transpose(waveform),
frame_length,
frame_step,
window_fn=lambda f, dtype: hann_window(
f,
periodic=True,
dtype=waveform.dtype) ** window_exponent),
perm=[1, 2, 0])
f, periodic=True, dtype=waveform.dtype
)
** window_exponent,
),
perm=[1, 2, 0],
)
return tf.abs(stft_tensor) ** spec_exponent
def time_stretch(
spectrogram,
factor=1.0,
method=tf.image.ResizeMethod.BILINEAR):
""" Time stretch a spectrogram preserving shape in tensorflow. Note that
spectrogram: tf.Tensor,
factor: float = 1.0,
method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR,
) -> tf.Tensor:
"""
Time stretch a spectrogram preserving shape in tensorflow. Note that
this is an approximation in the frequency domain.
:param spectrogram: Input spectrogram to be time stretched as tensor.
:param factor: (Optional) Time stretch factor, must be >0, default to 1.
:param mehtod: (Optional) Interpolation method, default to BILINEAR.
:returns: Time stretched spectrogram as tensor with same shape.
Parameters:
spectrogram (tensorflow.Tensor):
Input spectrogram to be time stretched as tensor.
factor (float):
(Optional) Time stretch factor, must be > 0, default to `1`.
method (tensorflow.image.ResizeMethod):
(Optional) Interpolation method, default to `BILINEAR`.
Returns:
tensorflow.Tensor:
Time stretched spectrogram as tensor with same shape.
"""
T = tf.shape(spectrogram)[0]
T_ts = tf.cast(tf.cast(T, tf.float32) * factor, tf.int32)[0]
F = tf.shape(spectrogram)[1]
ts_spec = tf.image.resize_images(
spectrogram,
[T_ts, F],
method=method,
align_corners=True)
spectrogram, [T_ts, F], method=method, align_corners=True
)
return tf.image.resize_image_with_crop_or_pad(ts_spec, T, F)
def random_time_stretch(spectrogram, factor_min=0.9, factor_max=1.1, **kwargs):
""" Time stretch a spectrogram preserving shape with random ratio in
tensorflow. Applies time_stretch to spectrogram with a random ratio drawn
uniformly in [factor_min, factor_max].
:param spectrogram: Input spectrogram to be time stretched as tensor.
:param factor_min: (Optional) Min time stretch factor, default to 0.9.
:param factor_max: (Optional) Max time stretch factor, default to 1.1.
:returns: Randomly time stretched spectrogram as tensor with same shape.
def random_time_stretch(
spectrogram: tf.Tensor, factor_min: float = 0.9, factor_max: float = 1.1, **kwargs
) -> tf.Tensor:
"""
Time stretch a spectrogram preserving shape with random ratio in
tensorflow. Applies time_stretch to spectrogram with a random ratio
drawn uniformly in `[factor_min, factor_max]`.
Parameters:
spectrogram (tensorflow.Tensor):
Input spectrogram to be time stretched as tensor.
factor_min (float):
(Optional) Min time stretch factor, default to `0.9`.
factor_max (float):
(Optional) Max time stretch factor, default to `1.1`.
Returns:
tensorflow.Tensor:
Randomly time stretched spectrogram as tensor with same shape.
"""
factor = tf.random_uniform(
shape=(1,),
seed=0) * (factor_max - factor_min) + factor_min
factor = (
tf.random_uniform(shape=(1,), seed=0) * (factor_max - factor_min) + factor_min
)
return time_stretch(spectrogram, factor=factor, **kwargs)
def pitch_shift(
spectrogram,
semitone_shift=0.0,
method=tf.image.ResizeMethod.BILINEAR):
""" Pitch shift a spectrogram preserving shape in tensorflow. Note that
spectrogram: tf.Tensor,
semitone_shift: float = 0.0,
method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR,
) -> tf.Tensor:
"""
Pitch shift a spectrogram preserving shape in tensorflow. Note that
this is an approximation in the frequency domain.
:param spectrogram: Input spectrogram to be pitch shifted as tensor.
:param semitone_shift: (Optional) Pitch shift in semitone, default to 0.0.
:param mehtod: (Optional) Interpolation method, default to BILINEAR.
:returns: Pitch shifted spectrogram (same shape as spectrogram).
Parameters:
spectrogram (tensorflow.Tensor):
Input spectrogram to be pitch shifted as tensor.
semitone_shift (float):
(Optional) Pitch shift in semitone, default to `0.0`.
method (tensorflow.image.ResizeMethod):
(Optional) Interpolation method, default to `BILINEAR`.
Returns:
tensorflow.Tensor:
Pitch shifted spectrogram (same shape as spectrogram).
"""
factor = 2 ** (semitone_shift / 12.)
factor = 2 ** (semitone_shift / 12.0)
T = tf.shape(spectrogram)[0]
F = tf.shape(spectrogram)[1]
F_ps = tf.cast(tf.cast(F, tf.float32) * factor, tf.int32)[0]
ps_spec = tf.image.resize_images(
spectrogram,
[T, F_ps],
method=method,
align_corners=True)
spectrogram, [T, F_ps], method=method, align_corners=True
)
paddings = [[0, 0], [0, tf.maximum(0, F - F_ps)], [0, 0]]
return tf.pad(ps_spec[:, :F, :], paddings, 'CONSTANT')
return tf.pad(ps_spec[:, :F, :], paddings, "CONSTANT")
def random_pitch_shift(spectrogram, shift_min=-1., shift_max=1., **kwargs):
""" Pitch shift a spectrogram preserving shape with random ratio in
def random_pitch_shift(
spectrogram: tf.Tensor, shift_min: float = -1.0, shift_max: float = 1.0, **kwargs
) -> tf.Tensor:
"""
Pitch shift a spectrogram preserving shape with random ratio in
tensorflow. Applies pitch_shift to spectrogram with a random shift
amount (expressed in semitones) drawn uniformly in [shift_min, shift_max].
:param spectrogram: Input spectrogram to be pitch shifted as tensor.
:param shift_min: (Optional) Min pitch shift in semitone, default to -1.
:param shift_max: (Optional) Max pitch shift in semitone, default to 1.
:returns: Randomly pitch shifted spectrogram (same shape as spectrogram).
amount (expressed in semitones) drawn uniformly in
`[shift_min, shift_max]`.
Parameters:
spectrogram (tensorflow.Tensor):
Input spectrogram to be pitch shifted as tensor.
shift_min (float):
(Optional) Min pitch shift in semitone, default to -1.
shift_max (float):
(Optional) Max pitch shift in semitone, default to 1.
Returns:
tensorflow.Tensor:
Randomly pitch shifted spectrogram (same shape as spectrogram).
"""
semitone_shift = tf.random_uniform(
shape=(1,),
seed=0) * (shift_max - shift_min) + shift_min
semitone_shift = (
tf.random_uniform(shape=(1,), seed=0) * (shift_max - shift_min) + shift_min
)
return pitch_shift(spectrogram, semitone_shift=semitone_shift, **kwargs)
#!/usr/bin/env python
# coding: utf8
""" This modules provides spleeter command as well as CLI parsing methods. """
import json
import logging
from argparse import ArgumentParser
from tempfile import gettempdir
from os.path import exists, join
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
# -i opt specification (separate).
OPT_INPUT = {
'dest': 'inputs',
'nargs': '+',
'help': 'List of input audio filenames',
'required': True
}
# -o opt specification (evaluate and separate).
OPT_OUTPUT = {
'dest': 'output_path',
'default': join(gettempdir(), 'separated_audio'),
'help': 'Path of the output directory to write audio files in'
}
# -f opt specification (separate).
OPT_FORMAT = {
'dest': 'filename_format',
'default': '{filename}/{instrument}.{codec}',
'help': (
'Template string that will be formatted to generated'
'output filename. Such template should be Python formattable'
'string, and could use {filename}, {instrument}, and {codec}'
'variables.'
)
}
# -p opt specification (train, evaluate and separate).
OPT_PARAMS = {
'dest': 'configuration',
'default': 'spleeter:2stems',
'type': str,
'action': 'store',
'help': 'JSON filename that contains params'
}
# -s opt specification (separate).
OPT_OFFSET = {
'dest': 'offset',
'type': float,
'default': 0.,
'help': 'Set the starting offset to separate audio from.'
}
# -d opt specification (separate).
OPT_DURATION = {
'dest': 'duration',
'type': float,
'default': 600.,
'help': (
'Set a maximum duration for processing audio '
'(only separate offset + duration first seconds of '
'the input file)')
}
# -w opt specification (separate)
OPT_STFT_BACKEND = {
'dest': 'stft_backend',
'type': str,
'choices' : ["tensorflow", "librosa", "auto"],
'default': "auto",
'help': 'Who should be in charge of computing the stfts. Librosa is faster than tensorflow on CPU and uses'
' less memory. "auto" will use tensorflow when GPU acceleration is available and librosa when not.'
}
# -c opt specification (separate).
OPT_CODEC = {
'dest': 'codec',
'choices': ('wav', 'mp3', 'ogg', 'm4a', 'wma', 'flac'),
'default': 'wav',
'help': 'Audio codec to be used for the separated output'
}
# -b opt specification (separate).
OPT_BITRATE = {
'dest': 'bitrate',
'default': '128k',
'help': 'Audio bitrate to be used for the separated output'
}
# -m opt specification (evaluate and separate).
OPT_MWF = {
'dest': 'MWF',
'action': 'store_const',
'const': True,
'default': False,
'help': 'Whether to use multichannel Wiener filtering for separation',
}
# --mus_dir opt specification (evaluate).
OPT_MUSDB = {
'dest': 'mus_dir',
'type': str,
'required': True,
'help': 'Path to folder with musDB'
}
# -d opt specification (train).
OPT_DATA = {
'dest': 'audio_path',
'type': str,
'required': True,
'help': 'Path of the folder containing audio data for training'
}
# -a opt specification (train, evaluate and separate).
OPT_ADAPTER = {
'dest': 'audio_adapter',
'type': str,
'help': 'Name of the audio adapter to use for audio I/O'
}
# -a opt specification (train, evaluate and separate).
OPT_VERBOSE = {
'action': 'store_true',
'help': 'Shows verbose logs'
}
def _add_common_options(parser):
""" Add common option to the given parser.
:param parser: Parser to add common opt to.
"""
parser.add_argument('-a', '--adapter', **OPT_ADAPTER)
parser.add_argument('-p', '--params_filename', **OPT_PARAMS)
parser.add_argument('--verbose', **OPT_VERBOSE)
def _create_train_parser(parser_factory):
""" Creates an argparser for training command
:param parser_factory: Factory to use to create parser instance.
:returns: Created and configured parser.
"""
parser = parser_factory('train', help='Train a source separation model')
_add_common_options(parser)
parser.add_argument('-d', '--data', **OPT_DATA)
return parser
def _create_evaluate_parser(parser_factory):
""" Creates an argparser for evaluation command
:param parser_factory: Factory to use to create parser instance.
:returns: Created and configured parser.
"""
parser = parser_factory(
'evaluate',
help='Evaluate a model on the musDB test dataset')
_add_common_options(parser)
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
parser.add_argument('--mus_dir', **OPT_MUSDB)
parser.add_argument('-m', '--mwf', **OPT_MWF)
parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
return parser
def _create_separate_parser(parser_factory):
""" Creates an argparser for separation command
:param parser_factory: Factory to use to create parser instance.
:returns: Created and configured parser.
"""
parser = parser_factory('separate', help='Separate audio files')
_add_common_options(parser)
parser.add_argument('-i', '--inputs', **OPT_INPUT)
parser.add_argument('-o', '--output_path', **OPT_OUTPUT)
parser.add_argument('-f', '--filename_format', **OPT_FORMAT)
parser.add_argument('-d', '--duration', **OPT_DURATION)
parser.add_argument('-s', '--offset', **OPT_OFFSET)
parser.add_argument('-c', '--codec', **OPT_CODEC)
parser.add_argument('-b', '--birate', **OPT_BITRATE)
parser.add_argument('-m', '--mwf', **OPT_MWF)
parser.add_argument('-B', '--stft-backend', **OPT_STFT_BACKEND)
return parser
def create_argument_parser():
""" Creates overall command line parser for Spleeter.
:returns: Created argument parser.
"""
parser = ArgumentParser(prog='spleeter')
subparsers = parser.add_subparsers()
subparsers.dest = 'command'
subparsers.required = True
_create_separate_parser(subparsers.add_parser)
_create_train_parser(subparsers.add_parser)
_create_evaluate_parser(subparsers.add_parser)
return parser
#!/usr/bin/env python
# coding: utf8
"""
Entrypoint provider for performing model evaluation.
Evaluation is performed against musDB dataset.
USAGE: python -m spleeter evaluate \
-p /path/to/params \
-o /path/to/output/dir \
[-m] \
--mus_dir /path/to/musdb dataset
"""
import sys
import json
from argparse import Namespace
from itertools import product
from glob import glob
from os.path import join, exists
# pylint: disable=import-error
import numpy as np
import pandas as pd
# pylint: enable=import-error
from .separate import entrypoint as separate_entrypoint
from ..utils.logging import get_logger
try:
import musdb
import museval
except ImportError:
logger = get_logger()
logger.error('Extra dependencies musdb and museval not found')
logger.error('Please install musdb and museval first, abort')
sys.exit(1)
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
_SPLIT = 'test'
_MIXTURE = 'mixture.wav'
_AUDIO_DIRECTORY = 'audio'
_METRICS_DIRECTORY = 'metrics'
_INSTRUMENTS = ('vocals', 'drums', 'bass', 'other')
_METRICS = ('SDR', 'SAR', 'SIR', 'ISR')
def _separate_evaluation_dataset(arguments, musdb_root_directory, params):
""" Performs audio separation on the musdb dataset from
the given directory and params.
:param arguments: Entrypoint arguments.
:param musdb_root_directory: Directory to retrieve dataset from.
:param params: Spleeter configuration to apply to separation.
:returns: Separation output directory path.
"""
songs = glob(join(musdb_root_directory, _SPLIT, '*/'))
mixtures = [join(song, _MIXTURE) for song in songs]
audio_output_directory = join(
arguments.output_path,
_AUDIO_DIRECTORY)
separate_entrypoint(
Namespace(
audio_adapter=arguments.audio_adapter,
configuration=arguments.configuration,
inputs=mixtures,
output_path=join(audio_output_directory, _SPLIT),
filename_format='{foldername}/{instrument}.{codec}',
codec='wav',
duration=600.,
offset=0.,
bitrate='128k',
MWF=arguments.MWF,
verbose=arguments.verbose,
stft_backend=arguments.stft_backend),
params)
return audio_output_directory
def _compute_musdb_metrics(
arguments,
musdb_root_directory,
audio_output_directory):
""" Generates musdb metrics fro previsouly computed audio estimation.
:param arguments: Entrypoint arguments.
:param audio_output_directory: Directory to get audio estimation from.
:returns: Path of generated metrics directory.
"""
metrics_output_directory = join(
arguments.output_path,
_METRICS_DIRECTORY)
get_logger().info('Starting musdb evaluation (this could be long) ...')
dataset = musdb.DB(
root=musdb_root_directory,
is_wav=True,
subsets=[_SPLIT])
museval.eval_mus_dir(
dataset=dataset,
estimates_dir=audio_output_directory,
output_dir=metrics_output_directory)
get_logger().info('musdb evaluation done')
return metrics_output_directory
def _compile_metrics(metrics_output_directory):
""" Compiles metrics from given directory and returns
results as dict.
:param metrics_output_directory: Directory to get metrics from.
:returns: Compiled metrics as dict.
"""
songs = glob(join(metrics_output_directory, 'test/*.json'))
index = pd.MultiIndex.from_tuples(
product(_INSTRUMENTS, _METRICS),
names=['instrument', 'metric'])
pd.DataFrame([], index=['config1', 'config2'], columns=index)
metrics = {
instrument: {k: [] for k in _METRICS}
for instrument in _INSTRUMENTS}
for song in songs:
with open(song, 'r') as stream:
data = json.load(stream)
for target in data['targets']:
instrument = target['name']
for metric in _METRICS:
sdr_med = np.median([
frame['metrics'][metric]
for frame in target['frames']
if not np.isnan(frame['metrics'][metric])])
metrics[instrument][metric].append(sdr_med)
return metrics
def entrypoint(arguments, params):
""" Command entrypoint.
:param arguments: Command line parsed argument as argparse.Namespace.
:param params: Deserialized JSON configuration file provided in CLI args.
"""
# Parse and check musdb directory.
musdb_root_directory = arguments.mus_dir
if not exists(musdb_root_directory):
raise IOError(f'musdb directory {musdb_root_directory} not found')
# Separate musdb sources.
audio_output_directory = _separate_evaluation_dataset(
arguments,
musdb_root_directory,
params)
# Compute metrics with musdb.
metrics_output_directory = _compute_musdb_metrics(
arguments,
musdb_root_directory,
audio_output_directory)
# Compute and pretty print median metrics.
metrics = _compile_metrics(metrics_output_directory)
for instrument, metric in metrics.items():
get_logger().info('%s:', instrument)
for metric, value in metric.items():
get_logger().info('%s: %s', metric, f'{np.median(value):.3f}')
return metrics
#!/usr/bin/env python
# coding: utf8
"""
Entrypoint provider for performing source separation.
USAGE: python -m spleeter separate \
-p /path/to/params \
-i inputfile1 inputfile2 ... inputfilen
-o /path/to/output/dir \
-i /path/to/audio1.wav /path/to/audio2.mp3
"""
from ..audio.adapter import get_audio_adapter
from ..separator import Separator
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
def entrypoint(arguments, params):
""" Command entrypoint.
:param arguments: Command line parsed argument as argparse.Namespace.
:param params: Deserialized JSON configuration file provided in CLI args.
"""
# TODO: check with output naming.
audio_adapter = get_audio_adapter(arguments.audio_adapter)
separator = Separator(
arguments.configuration,
MWF=arguments.MWF,
stft_backend=arguments.stft_backend)
for filename in arguments.inputs:
separator.separate_to_file(
filename,
arguments.output_path,
audio_adapter=audio_adapter,
offset=arguments.offset,
duration=arguments.duration,
codec=arguments.codec,
bitrate=arguments.bitrate,
filename_format=arguments.filename_format,
synchronous=False
)
separator.join()
#!/usr/bin/env python
# coding: utf8
"""
Entrypoint provider for performing model training.
USAGE: python -m spleeter train -p /path/to/params
"""
from functools import partial
# pylint: disable=import-error
import tensorflow as tf
# pylint: enable=import-error
from ..audio.adapter import get_audio_adapter
from ..dataset import get_training_dataset, get_validation_dataset
from ..model import model_fn
from ..model.provider import ModelProvider
from ..utils.logging import get_logger
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
def _create_estimator(params):
""" Creates estimator.
:param params: TF params to build estimator from.
:returns: Built estimator.
"""
session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.per_process_gpu_memory_fraction = 0.45
estimator = tf.estimator.Estimator(
model_fn=model_fn,
model_dir=params['model_dir'],
params=params,
config=tf.estimator.RunConfig(
save_checkpoints_steps=params['save_checkpoints_steps'],
tf_random_seed=params['random_seed'],
save_summary_steps=params['save_summary_steps'],
session_config=session_config,
log_step_count_steps=10,
keep_checkpoint_max=2))
return estimator
def _create_train_spec(params, audio_adapter, audio_path):
""" Creates train spec.
:param params: TF params to build spec from.
:returns: Built train spec.
"""
input_fn = partial(get_training_dataset, params, audio_adapter, audio_path)
train_spec = tf.estimator.TrainSpec(
input_fn=input_fn,
max_steps=params['train_max_steps'])
return train_spec
def _create_evaluation_spec(params, audio_adapter, audio_path):
""" Setup eval spec evaluating ever n seconds
:param params: TF params to build spec from.
:returns: Built evaluation spec.
"""
input_fn = partial(
get_validation_dataset,
params,
audio_adapter,
audio_path)
evaluation_spec = tf.estimator.EvalSpec(
input_fn=input_fn,
steps=None,
throttle_secs=params['throttle_secs'])
return evaluation_spec
def entrypoint(arguments, params):
""" Command entrypoint.
:param arguments: Command line parsed argument as argparse.Namespace.
:param params: Deserialized JSON configuration file provided in CLI args.
"""
audio_adapter = get_audio_adapter(arguments.audio_adapter)
audio_path = arguments.audio_path
estimator = _create_estimator(params)
train_spec = _create_train_spec(params, audio_adapter, audio_path)
evaluation_spec = _create_evaluation_spec(
params,
audio_adapter,
audio_path)
get_logger().info('Start model training')
tf.estimator.train_and_evaluate(
estimator,
train_spec,
evaluation_spec)
ModelProvider.writeProbe(params['model_dir'])
get_logger().info('Model training done')
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -3,6 +3,6 @@
""" Packages that provides static resources file for the library. """
__email__ = 'spleeter@deezer.com'
__author__ = 'Deezer Research'
__license__ = 'MIT License'
__email__ = "spleeter@deezer.com"
__author__ = "Deezer Research"
__license__ = "MIT License"
此差异已折叠。
#!/usr/bin/env python
# coding: utf8
""" Custom types definition. """
from typing import Any, Tuple
# pyright: reportMissingImports=false
# pylint: disable=import-error
import numpy as np
# pylint: enable=import-error
AudioDescriptor: type = Any
Signal: type = Tuple[np.ndarray, float]
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册