未验证 提交 ad8856aa 编写于 作者: R ranchlai 提交者: GitHub

Update doc string with examples/shapes, controllable backends, and some bug fixed (#5324)

* added sound classication

* added liscense, clean code, add pre-commit

* update req

* moved to PaddlePaddle-models

* code re-structure

* update README.md

* update README.md

* Update README.md

* add audioset training

* default resample mode to kaiser_fast

* delete some comments

* precommit check

* sha->rev

* add config.ymal

* remove SoundClassification from paddlespeech, since it's in PaddleAudio now

* add labels

* remove old labels

* update code

* empty

* #5300

* add evaluate, etc

* remove trace|

* import evaluate

* path update

* precommit check

* recover slowfast

* restore README.md to paddle:develop

* refactor

* update readme

* update README.md

* refactor

* refactor

* refactor

* refactor

* precommit fixed

* update README.md

* Update README.md

* Update README.md

* Update train.py

changed prefixed, removed some comments

* add wav file for testing

* bug fixed eval,new checkpoint map=0.416

* Update README.md

* added dcase task1b example

* update README.md

* code fixed for last review

* fixed level string formating

* fixed according to PR reviews

* added wav2vec2.0

* restore datatsets

* add liscense, remove scipy, move test_audio to cloud

* remove 3rd-party dependency:pathos

* add testing for wav2vec2

* update README.md

* updated README.md, added librispeech results

* Revert "updated README.md, added librispeech results"

This reverts commit da4012958e8e0bf2d7f4b608f74518583dd7d73b.

* code fixed from reviews

* add librispeech test

* remove pathos imports

* updated README.md

* update README.md

* minor-fix according to code reviews

* updated README_LP.MD

* fixed according to code review

* fixed according to code review

* added preprocessing example

* removed dcase2021_task1b from examples

* remove preprocessing from examples

* added amsoftmax to losses

* added eer/min_dcf to metrics

* updated __init__.py

* add stft,spectrogram, melspectrogram, log-melspectrogram

* add _internal, transoform, functional to imports

* add new module: functional

* add new module: window.py to _internel/

* add correspoding new unit-test for the new modules

* added ISTFT

* clean code and docstring, clean unit test

* clean code and docstring

* functional

* added back preprocessing

* add README.md

* remove preprocessing for now

* clean code, add doc

* change _internal to signal

* add new transoforms

* add new functionals

* add eps to amsoftmax, return the prediction

* add ffmpeg backend

* remove dithering in depth-convert, add ffmpeg to backend

* add Mudecode/enccode/RandomCodec

* changed variable name, fixed bug

* use namedtuple for returning

* refactor utils

* refactor

* add melspectrogram/spectrogram, add doc string

* add doc string, clean code

* rename window to windowing

* updated docstring, minor bug fixed

* move losses.py to future examples

* remove mu_encode/decode

* refactor

* move metrics to future examples

* remove features/

* naming changes for mu law algorithms

* update test, add testing utils

* fixed import

* fixed import

* fixed duplicate output in logging

* add code examples, shape info, etc

* add doc for public functions

* make backend controllable

* fixed coding stype in docstring
上级 a9cd9789
......@@ -13,6 +13,8 @@
# limitations under the License.
__all__ = [
'set_backend',
'get_backends',
'resample',
'to_mono',
'depth_convert',
......@@ -36,16 +38,39 @@ from ._ffmpeg import DecodingError, FFmpegAudioFile
NORMALMIZE_TYPES = ['linear', 'gaussian']
MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast']
SUPPORT_BACKENDS = ['ffmpeg', 'soundfile']
EPS = 1e-8
BACK_END = None
def set_backend(backend: Union[str, None] = 'ffmpeg'):
"""Set audio decoding backend.
Parameters:
backend(str|None): The name of the backend to use. If None, paddleaudio will
choose the optimal backend automatically.
Notes:
Use get_backends() to get available backends.
"""
global BACK_END
if backend and backend not in SUPPORT_BACKENDS:
raise ParameterError(f'Unsupported backend {backend} ,' +
f'supported backends are {SUPPORT_BACKENDS}')
BACK_END = backend
def get_backends():
return SUPPORT_BACKENDS
def _safe_cast(y: array, dtype: Union[type, str]) -> array:
"""Data type casting in a safe way, i.e., prevent overflow or underflow.
Notes:
This function is used internally.
"""
import pdb
pdb.set_trace()
return np.clip(y, np.iinfo(dtype).min, np.iinfo(dtype).max).astype(dtype)
......@@ -80,8 +105,8 @@ def _sound_file_load(file: os.PathLike,
offset: Optional[float] = None,
dtype: str = 'int16',
duration: Optional[int] = None) -> Tuple[array, int]:
"""Load audio using soundfile library
This function load audio file using libsndfile.
"""Load audio using soundfile library.
This function loads audio file using libsndfile.
Reference:
http://www.mega-nerd.com/libsndfile/#Features
......@@ -102,8 +127,8 @@ def _sound_file_load(file: os.PathLike,
def _sox_file_load():
"""Load audio using sox library
This function load audio file using sox.
"""Load audio using sox library.
This function loads audio file using sox.
Reference:
http://sox.sourceforge.net/
......@@ -127,13 +152,13 @@ def depth_convert(y: array, dtype: Union[type, str]) -> array:
SUPPORT_DTYPE = ['int16', 'int8', 'float32', 'float64']
if y.dtype not in SUPPORT_DTYPE:
raise ParameterError(
f'Unsupported audio dtype, '
'y.dtype is {y.dtype}, supported dtypes are {SUPPORT_DTYPE}')
f'Unsupported audio dtype, ' +
f'y.dtype is {y.dtype}, supported dtypes are {SUPPORT_DTYPE}')
if dtype not in SUPPORT_DTYPE:
raise ParameterError(
f'Unsupported audio dtype, '
'target dtype is {dtype}, supported dtypes are {SUPPORT_DTYPE}')
f'Unsupported audio dtype, ' +
f'target dtype is {dtype}, supported dtypes are {SUPPORT_DTYPE}')
if dtype == y.dtype:
return y
......@@ -171,21 +196,22 @@ def resample(y: array,
src_sr: int,
target_sr: int,
mode: str = 'kaiser_fast') -> array:
"""Apply resampling to the input audio array.
"""Apply audio resampling to the input audio array.
Notes:
1. This function uses resampy.resample to do the resampling.
2. The default mode is kaiser_fast. For better audio quality,
use mode = 'kaiser_fast'
use mode = 'kaiser_best'
"""
if mode == 'kaiser_best':
warnings.warn(
f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
we recommend the mode kaiser_fast in large scale audio trainning')
f'Using resampy in kaiser_best to {src_sr}=>{target_sr}.' +
f'This function is pretty slow, ' +
f'we recommend the mode kaiser_fast in large scale audio training')
if not isinstance(y, np.ndarray):
raise ParameterError(
'Only support numpy array, but received y in {type(y)}')
raise TypeError(
f'Only support numpy array, but received y in {type(y)}')
if mode not in RESAMPLE_MODES:
raise ParameterError(f'resample mode must in {RESAMPLE_MODES}')
......@@ -193,7 +219,7 @@ def resample(y: array,
return resampy.resample(y, src_sr, target_sr, filter=mode)
def to_mono(y: array, merge_type: str = 'average') -> array:
def to_mono(y: array, merge_type: str = 'ch0') -> array:
"""Convert stereo audio to mono audio.
Parameters:
y(array): the input audio array of shape [2,n], where n is the number of audio samples.
......@@ -229,17 +255,14 @@ def to_mono(y: array, merge_type: str = 'average') -> array:
# need to do averaging according to dtype
if y.dtype == 'float32':
y_out = (y[0] + y[1]) * 0.5
y_out = y.mean(0)
elif y.dtype == 'int16':
y_out = y.astype('int32')
y_out = (y_out[0] + y_out[1]) // 2
y_out = y.mean(0)
y_out = np.clip(y_out,
np.iinfo(y.dtype).min,
np.iinfo(y.dtype).max).astype(y.dtype)
elif y.dtype == 'int8':
y_out = y.astype('int16')
y_out = (y_out[0] + y_out[1]) // 2
y_out = y.mean(0)
y_out = np.clip(y_out,
np.iinfo(y.dtype).min,
np.iinfo(y.dtype).max).astype(y.dtype)
......@@ -293,6 +316,11 @@ def save_wav(y: array, sr: int, file: os.PathLike) -> None:
Notes:
The function only supports raw wav format.
"""
if y.ndim == 2 and y.shape[0] > y.shape[1]:
warnings.warn(
f'The audio array tried to saved has {y.shape[0]} channels ' +
f'and the wave length is {y.shape[1]}. It\'s that what you mean?' +
f'If not, try to tranpose the array before saving.')
if not file.endswith('.wav'):
raise ParameterError(
f'only .wav file supported, but dst file name is: {file}')
......@@ -309,7 +337,7 @@ def save_wav(y: array, sr: int, file: os.PathLike) -> None:
else:
y_out = y
wavfile.write(file, sr, y_out)
wavfile.write(file, sr, y_out.T)
def load(
......@@ -337,7 +365,7 @@ def load(
if it is originally steore. See to_mono() for more details.
The default value is True.
merge_type(str): the merging algorithm. See to_mono() for more details.
The default value is 'average'.
The default value is 'ch0'.
normal(bool): whether to normalize the audio waveform. If True, the audio will be normalized using algorithm
specified in norm_type. See normalize() for more details.
The default value is True.
......@@ -360,19 +388,27 @@ def load(
DecodingError, if audio file is not supported
"""
try:
if BACK_END == 'ffmpeg':
y, r = _ffmpeg_load(file, offset=offset, duration=duration)
elif BACK_END == 'soundfile':
y, r = _sound_file_load(file,
offset=offset,
dtype=dtype,
duration=duration)
except FileNotFoundError:
raise FileNotFoundError(
f'Trying to load a file that doesnot exist {file}')
except:
else:
try:
y, r = _ffmpeg_load(file, offset=offset, duration=duration)
except DecodingError:
raise DecodingError(f'Failed to load and decode file {file}')
y, r = _sound_file_load(file,
offset=offset,
dtype=dtype,
duration=duration)
except FileNotFoundError:
raise FileNotFoundError(
f'Trying to load a file that doesnot exist {file}')
except:
try:
y, r = _ffmpeg_load(file, offset=offset, duration=duration)
except DecodingError:
raise DecodingError(f'Failed to load and decode file {file}')
if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
return np.array([], dtype=dtype) # return empty audio
......
......@@ -30,7 +30,7 @@ __all__ = [
'tukey',
'taylor',
]
_PI = 3.141592653589793
math.pi = 3.141592653589793
def _cat(a: List[Tensor], data_type: str) -> Tensor:
......@@ -93,11 +93,17 @@ def general_hamming(M: int, alpha: float, sym: bool = True) -> Tensor:
def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
"""Compute a Taylor window.
The Taylor window taper function approximates the Dolph-Chebyshev window's
constant sidelobe level for a parameterized number of near-in sidelobes.
This function is consistent with scipy.signal.windows.taylor().
Parameters:
M(int): window size
nbar, sil, norm: the window-specific parameter.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.taylor().
"""
if _len_guards(M):
return paddle.ones((M, ), dtype='float32')
......@@ -106,7 +112,7 @@ def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
# it in the calculation of B. To keep consistent with other methods we
# assume the sidelobe level parameter to be positive.
B = 10**(sll / 20)
A = _acosh(B) / _PI
A = _acosh(B) / math.pi
s2 = nbar**2 / (A**2 + (nbar - 0.5)**2)
ma = paddle.arange(1, nbar, dtype='float32')
......@@ -131,7 +137,7 @@ def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True) -> Tensor:
def W(n):
return 1 + 2 * paddle.matmul(
Fm.unsqueeze(0),
paddle.cos(2 * _PI * ma.unsqueeze(1) * (n - M / 2. + 0.5) / M))
paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2. + 0.5) / M))
w = W(paddle.arange(0, M, dtype='float32'))
......@@ -151,7 +157,7 @@ def general_cosine(M: int, a: float, sym: bool = True) -> Tensor:
if _len_guards(M):
return paddle.ones((M, ), dtype='float32')
M, needs_trunc = _extend(M, sym)
fac = paddle.linspace(-_PI, _PI, M)
fac = paddle.linspace(-math.pi, math.pi, M)
w = paddle.zeros((M, ), dtype='float32')
for k in range(len(a)):
w += a[k] * paddle.cos(k * fac)
......@@ -162,8 +168,14 @@ def hamming(M: int, sym: bool = True) -> Tensor:
"""Compute a Hamming window.
The Hamming window is a taper formed by using a raised cosine with
non-zero endpoints, optimized to minimize the nearest side lobe.
This function is consistent with scipy.signal.windows.hamming().
Parameters:
M(int): window size
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.hamming().
"""
return general_hamming(M, 0.54, sym)
......@@ -172,8 +184,14 @@ def hann(M: int, sym: bool = True) -> Tensor:
"""Compute a Hann window.
The Hann window is a taper formed by using a raised cosine or sine-squared
with ends that touch zero.
This function is consistent with scipy.signal.windows.hann().
Parameters:
M(int): window size
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.hann().
"""
return general_hamming(M, 0.5, sym)
......@@ -181,8 +199,14 @@ def hann(M: int, sym: bool = True) -> Tensor:
def tukey(M: int, alpha=0.5, sym: bool = True) -> Tensor:
"""Compute a Tukey window.
The Tukey window is also known as a tapered cosine window.
This function is consistent with scipy.signal.windows.tukey().
Parameters:
M(int): window size
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.tukey().
"""
if _len_guards(M):
return paddle.ones((M, ), dtype='float32')
......@@ -200,10 +224,10 @@ def tukey(M: int, alpha=0.5, sym: bool = True) -> Tensor:
n2 = n[width + 1:M - width - 1]
n3 = n[M - width - 1:]
w1 = 0.5 * (1 + paddle.cos(_PI * (-1 + 2.0 * n1 / alpha / (M - 1))))
w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1))))
w2 = paddle.ones(n2.shape, dtype='float32')
w3 = 0.5 * (1 + paddle.cos(_PI * (-2.0 / alpha + 1 + 2.0 * n3 / alpha /
(M - 1))))
w3 = 0.5 * (1 + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha /
(M - 1))))
w = paddle.concat([w1, w2, w3])
return _truncate(w, needs_trunc)
......@@ -212,6 +236,15 @@ def tukey(M: int, alpha=0.5, sym: bool = True) -> Tensor:
def kaiser(M: int, beta: float, sym: bool = True) -> Tensor:
"""Compute a Kaiser window.
The Kaiser window is a taper formed by using a Bessel function.
Parameters:
M(int): window size.
beta(float): the window-specific parameter.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.kaiser().
"""
......@@ -222,7 +255,15 @@ def gaussian(M: int, std: float, sym: bool = True) -> Tensor:
"""Compute a Gaussian window.
The Gaussian widows has a Gaussian shape defined by the standard deviation(std).
This function is consistent with scipy.signal.windows.gaussian().
Parameters:
M(int): window size.
std(float): the window-specific parameter.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.gaussian().
"""
if _len_guards(M):
return paddle.ones((M, ), dtype='float32')
......@@ -237,8 +278,15 @@ def gaussian(M: int, std: float, sym: bool = True) -> Tensor:
def exponential(M: int, center=None, tau=1., sym: bool = True) -> Tensor:
"""Compute an exponential (or Poisson) window.
This function is consistent with scipy.signal.windows.exponential().
Parameters:
M(int): window size.
tau(float): the window-specific parameter.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.exponential().
"""
if sym and center is not None:
raise ValueError("If sym==True, center must be None.")
......@@ -257,8 +305,14 @@ def exponential(M: int, center=None, tau=1., sym: bool = True) -> Tensor:
def triang(M: int, sym: bool = True) -> Tensor:
"""Compute a triangular window.
This function is consistent with scipy.signal.windows.triang().
Parameters:
M(int): window size.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.triang().
"""
if _len_guards(M):
return paddle.ones((M, ), dtype='float32')
......@@ -278,15 +332,22 @@ def triang(M: int, sym: bool = True) -> Tensor:
def bohman(M: int, sym: bool = True) -> Tensor:
"""Compute a Bohman window.
The Bohman window is the autocorrelation of a cosine window.
This function is consistent with scipy.signal.windows.bohman().
Parameters:
M(int): window size.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.bohman().
"""
if _len_guards(M):
return paddle.ones((M, ), dtype='float32')
M, needs_trunc = _extend(M, sym)
fac = paddle.abs(paddle.linspace(-1, 1, M)[1:-1])
w = (1 - fac) * paddle.cos(_PI * fac) + 1.0 / _PI * paddle.sin(_PI * fac)
w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin(
math.pi * fac)
w = _cat([0, w, 0], 'float32')
return _truncate(w, needs_trunc)
......@@ -299,19 +360,32 @@ def blackman(M: int, sym: bool = True) -> Tensor:
leakage possible. It is close to optimal, only slightly worse than a
Kaiser window.
This function is consistent with scipy.signal.windows.blackman().
Parameters:
M(int): window size.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.blackman().
"""
return general_cosine(M, [0.42, 0.50, 0.08], sym)
def cosine(M: int, sym: bool = True) -> Tensor:
"""Compute a window with a simple cosine shape.
This function is consistent with scipy.signal.windows.cosine().
Parameters:
M(int): window size.
sym(bool):whether to return symmetric window.
The default value is True
Returns:
Tensor: the window tensor
Notes:
This function is consistent with scipy.signal.windows.cosine().
"""
if _len_guards(M):
return paddle.ones((M, ), dtype='float32')
M, needs_trunc = _extend(M, sym)
w = paddle.sin(_PI / M * (paddle.arange(0, M) + .5))
w = paddle.sin(math.pi / M * (paddle.arange(0, M) + .5))
return _truncate(w, needs_trunc)
......@@ -30,8 +30,8 @@ __all__ = [
'CenterPadding',
'RandomCropping',
'RandomMuLawCodec',
'MuLawDecoding',
'MuLawEncoding',
'MuLawDecoding',
]
......@@ -69,6 +69,19 @@ class STFT(nn.Layer):
The batch_size is set to 1 if input singal x is 1D tensor.
Notes:
This result of stft transform is consistent with librosa.stft() for the default value setting.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
stft = T.STFT(n_fft=512)
x = paddle.randn((8, 16000,))
y = stft(x)
print(y.shape)
>> [8, 257, 126, 2]
"""
def __init__(self,
n_fft: int = 2048,
......@@ -186,8 +199,19 @@ class Spectrogram(nn.Layer):
The Spectrogram transform relies on STFT transform to compute the spectrogram.
By default, the weights are not learnable. To fine-tune the Fourier coefficients,
set stop_gradient=False before training.
For more information, see STFT().
Examples:
.. code-block:: python
For more information, see STFT().
import paddle
import paddleaudio.transforms as T
spectrogram = T.Spectrogram(n_fft=512)
x = paddle.randn((8, 16000))
y = spectrogram(x)
print(y.shape)
>> [8, 257, 126]
"""
super(Spectrogram, self).__init__()
......@@ -259,6 +283,17 @@ class MelSpectrogram(nn.Layer):
By default, the Fourier coefficients are not learnable. To fine-tune the Fourier coefficients,
set stop_gradient=False before training. The fbank matrix is handcrafted and not learnable
regardless of the setting of stop_gradient.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
melspectrogram = T.MelSpectrogram(n_fft=512, n_mels=64)
x = paddle.randn((8, 16000,))
y = melspectrogram(x)
print(y.shape)
>> [8, 64, 126]
"""
super(MelSpectrogram, self).__init__()
......@@ -339,6 +374,18 @@ class LogMelSpectrogram(nn.Layer):
By default, the weights are not learnable. To fine-tune the Fourier coefficients,
set stop_gradient=False before training.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
melspectrogram = T.LogMelSpectrogram(n_fft=512, n_mels=64)
x = paddle.randn((8, 16000,))
y = melspectrogram(x)
print(y.shape)
>> [8, 64, 126]
"""
super(LogMelSpectrogram, self).__init__()
self._melspectrogram = MelSpectrogram(sr, n_fft, hop_length, win_length,
......@@ -388,6 +435,17 @@ class ISTFT(nn.Layer):
- output: the signal represented as a 2-D tensor with shape [batch_size, single_length]
The batch_size is set to 1 if input singal x is 1D tensor.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
melspectrogram = T.LogMelSpectrogram(n_fft=512, n_mels=64)
x = paddle.randn((8, 16000,))
y = melspectrogram(x)
print(y.shape)
>> [8, 64, 126]
"""
def __init__(self,
n_fft: int = 2048,
......@@ -479,6 +537,18 @@ class RandomMasking(nn.Layer):
Notes:
Please refer to paddleaudio.functional.random_masking() for more details.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
transform = T.RandomMasking(max_mask_count=10, max_mask_width=2, axis=1)
x = paddle.rand((64, 100))
x = transform(x)
print((x[0, :] == 0).astype('int32').sum())
>> Tensor(shape=[1], dtype=int32, place=CUDAPlace(0), stop_gradient=True,
[8])
"""
def __init__(self,
max_mask_count: int = 3,
......@@ -508,6 +578,21 @@ class Compose():
Parameters:
transforms: a list of transforms.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
x = paddle.randn((2, 18000))
transform = T.Compose([
T.RandomCropping(target_size=16000),
T.MelSpectrogram(sr=16000, n_fft=256, n_mels=64),
T.RandomMasking()
])
y = transform(x)
print(y.shape)
>> [2, 64, 251]
"""
def __init__(self, transforms: List[Any]):
......@@ -537,6 +622,21 @@ class RandomCropping(nn.Layer):
Notes:
Please refer to paddleaudio.functional.RandomCropping() for more details.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
transform = T.RandomCropping(target_size=8, axis=1)
y = transform(x)
print(y.shape)
>> [64, 8]
transform = T.RandomCropping(target_size=100, axis=1)
y = transform(x)
print(y.shape)
>> [64, 100]
"""
def __init__(self, target_size: int, axis: int = -1):
super(RandomCropping, self).__init__()
......@@ -562,6 +662,18 @@ class CenterPadding(nn.Layer):
Notes:
Please refer to paddleaudio.functional.center_padding() for more details.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
x = paddle.rand((8, 10))
transform = T.CenterPadding(target_size=12, axis=1)
y = transform(x)
print(y.shape)
>> [8, 12]
"""
def __init__(self, target_size: int, axis: int = -1):
super(CenterPadding, self).__init__()
......@@ -588,7 +700,21 @@ class MuLawEncoding(nn.Layer):
the result will be converted to integer in range [0,mu-1]. Otherwise, the
resulting signal is in range [-1,1]
Notes:
Please refer to paddleaudio.functional.mu_encode() for more details.
Please refer to paddleaudio.functional.mu_law_encode() for more details.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
x = paddle.randn((2,8))
transform = T.MuLawEncoding()
y = transform(x)
print(y)
>> Tensor(shape=[2, 8], dtype=int32, place=CUDAPlace(0), stop_gradient=True,
[[0 , 252, 77 , 250, 221, 34 , 51 , 0 ],
[227, 33 , 0 , 255, 11 , 213, 255, 10 ]])
"""
def __init__(self, mu: int = 256):
......@@ -597,7 +723,7 @@ class MuLawEncoding(nn.Layer):
self.mu = mu
def forward(self, x: Tensor) -> Tensor:
return F.mu_encode(x, mu=self.mu)
return F.mu_law_encode(x, mu=self.mu)
def __repr__(self, ):
return self.__class__.__name__ + f'(mu={self.mu})'
......@@ -613,7 +739,20 @@ class MuLawDecoding(nn.Layer):
quantized(bool): indicate whether the signal has been quantized. The value of quantized parameter should be
consistent with that used in MuLawEncoding.
Notes:
Please refer to paddleaudio.functional.mu_decode() for more details.
Please refer to paddleaudio.functional.mu_law_decode() for more details.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
x = paddle.randint(0, 255, shape=(2, 8))
transform = T.MuLawDecoding()
y = transform(x)
print(y)
>> Tensor(shape=[2, 8], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
[[-0.01151094, -0.02702747, 0.00796641, -0.91636580, 0.45497340, 0.49667698, 0.01151095, -0.24569811],
[0.21516445, -0.30633399, 0.01291343, -0.01991909, -0.00904676, 0.00105976, 0.03990653, -0.20584014]])
"""
def __init__(self, mu: int = 256):
......@@ -622,7 +761,7 @@ class MuLawDecoding(nn.Layer):
self.mu = mu
def forward(self, x: Tensor) -> Tensor:
return F.mu_decode(x, mu=self.mu)
return F.mu_law_decode(x, mu=self.mu)
def __repr__(self, ):
return self.__class__.__name__ + f'(mu={self.mu})'
......@@ -640,6 +779,20 @@ class RandomMuLawCodec(nn.Layer):
Notes:
Please refer to MuLawDecoding() and MuLawEncoding() for more details.
Examples:
.. code-block:: python
import paddle
import paddleaudio.transforms as T
x = paddle.randn((2, 8))
transform = T.RandomMuLawCodec()
y = transform(x)
print(y)
>> Tensor(shape=[2, 8], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
[[0.61542195, -0.35218054, 0.30605811, -0.12115669, -0.75794631, 0.03876950, -0.23082513, -0.49945647],
[-0.35218054, -0.87066686, -0.53548712, 1., -1., 0.49945661, 1., -0.93311179]])
"""
def __init__(self, min_mu: int = 63, max_mu: int = 255):
super(RandomMuLawCodec, self).__init__()
......
......@@ -46,6 +46,9 @@ def get_logger(name: Optional[str] = None,
if name is None:
name = __file__
def list_handlers(logger):
return {str(h) for h in logger.handlers}
logger = logging.getLogger(name)
logging_level = getattr(logging, 'INFO')
logger.setLevel(logging_level)
......@@ -55,8 +58,8 @@ def get_logger(name: Optional[str] = None,
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setLevel(logging_level)
stdout_handler.setFormatter(formatter)
logger.addHandler(stdout_handler)
if str(stdout_handler) not in list_handlers(logger):
logger.addHandler(stdout_handler)
if log_dir: #logging to file
if log_file_name is None:
log_file_name = 'log'
......@@ -64,15 +67,17 @@ def get_logger(name: Optional[str] = None,
fh = logging.FileHandler(log_file)
fh.setLevel(logging_level)
fh.setFormatter(formatter)
logger.addHandler(fh)
if str(fh) not in list_handlers(logger):
logger.addHandler(fh)
if use_error_log:
stderr_handler = logging.StreamHandler(sys.stderr)
stderr_handler.setLevel(logging.WARNING)
stderr_handler.setFormatter(formatter)
logger.addHandler(stderr_handler)
if str(stderr_handler) not in list_handlers(logger):
logger.addHandler(stderr_handler)
logger.propagate = False
logger.propagate = 0
return logger
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册