augmentation.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
import paddle

from ..backends import depth_convert
from .utils import randint, weighted_sampling

__all__ = ['depth_augment', 'spect_augment', 'random_crop1d', 'random_crop2d']


# example y = depth_augment(y,['int8','int16'],[0.8,0.1])
def depth_augment(y, choices=['int8', 'int16'], probs=[0.5, 0.5]):
    assert len(probs) == len(choices), 'number of choices {} must be equal to size of probs {}'.format(
        len(choices), len(probs))
    k = weighted_sampling(probs)
    #k = randint(len(choices))
    src_depth = y.dtype
    y1 = depth_convert(y, choices[k])
    y2 = depth_convert(y1, src_depth)
    return y2


def adaptive_spect_augment(spect, tempo_axis=0, level=0.1):

    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
    if tempo_axis == 0:
        nt, nf = spect.shape
    else:
        nf, nt = spect.shape

    time_mask_width = int(nt * level * 0.5)
    freq_mask_width = int(nf * level * 0.5)

    num_time_mask = int(10 * level)
    num_freq_mask = int(10 * level)

    # num_zeros = num_time_mask*time_mask_width*nf + num_freq_mask*freq_mask_width*nt
    # factor = (nt*nf)/(nt*nf-num_zeros)

    if tempo_axis == 0:
        for i in range(num_time_mask):
            start = randint(nt - time_mask_width)
            spect[start:start + time_mask_width, :] = 0
        for i in range(num_freq_mask):
            start = randint(nf - freq_mask_width)
            spect[:, start:start + freq_mask_width] = 0
    else:
        for i in range(num_time_mask):
            start = randint(nt - time_mask_width)
            spect[:, start:start + time_mask_width] = 0
        for i in range(num_freq_mask):
            start = randint(nf - freq_mask_width)
            spect[start:start + freq_mask_width, :] = 0

    return spect


def spect_augment(
    spect,
    tempo_axis=0,
    max_time_mask=3,
    max_freq_mask=3,
    max_time_mask_width=30,
    max_freq_mask_width=20,
):

    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
    if tempo_axis == 0:
        nt, nf = spect.shape
    else:
        nf, nt = spect.shape

    num_time_mask = randint(max_time_mask)
    num_freq_mask = randint(max_freq_mask)

    time_mask_width = randint(max_time_mask_width)
    freq_mask_width = randint(max_freq_mask_width)

    #print(num_time_mask)
    #print(num_freq_mask)

    if tempo_axis == 0:
        for i in range(num_time_mask):
            start = randint(nt - time_mask_width)
            spect[start:start + time_mask_width, :] = 0
        for i in range(num_freq_mask):
            start = randint(nf - freq_mask_width)
            spect[:, start:start + freq_mask_width] = 0
    else:
        for i in range(num_time_mask):
            start = randint(nt - time_mask_width)
            spect[:, start:start + time_mask_width] = 0
        for i in range(num_freq_mask):
            start = randint(nf - freq_mask_width)
            spect[start:start + freq_mask_width, :] = 0

    return spect


def random_crop1d(y, crop_len):
    assert y.ndim == 1, 'only accept 1d tensor or numpy array'
    n = len(y)
    idx = randint(n - crop_len)
    return y[idx:idx + crop_len]


def random_crop2d(s, crop_len, tempo_axis=0):  # random crop according to temporal direction
    assert tempo_axis < s.ndim, 'axis out of range'
    n = s.shape[tempo_axis]
    idx = randint(high=n - crop_len)
    if type(s) == np.ndarray:
        sli = [slice(None) for i in range(s.ndim)]
        sli[tempo_axis] = slice(idx, idx + crop_len)
        out = s[tuple(sli)]
    else:
        out = paddle.index_select(s, paddle.Tensor(np.array([i for i in range(idx, idx + crop_len)])), axis=tempo_axis)
    return out