urban_sound.py 3.7 KB
Newer Older
K
KP 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import os
from typing import List, Tuple

from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset

__all__ = ['UrbanSound8K']


class UrbanSound8K(AudioClassificationDataset):
    """
K
KP 已提交
28 29 30 31 32 33 34 35
    UrbanSound8K dataset contains 8732 labeled sound excerpts (<=4s) of urban
    sounds from 10 classes: air_conditioner, car_horn, children_playing, dog_bark,
    drilling, enginge_idling, gun_shot, jackhammer, siren, and street_music. The
    classes are drawn from the urban sound taxonomy.

    Reference:
        A Dataset and Taxonomy for Urban Sound Research
        https://dl.acm.org/doi/10.1145/2647868.2655045
K
KP 已提交
36 37 38 39 40 41 42 43
    """

    archieves = [
        {
            'url': 'https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz',
            'md5': '9aa69802bbf37fb986f71ec1483a196e',
        },
    ]
K
KP 已提交
44 45
    label_list = ["air_conditioner", "car_horn", "children_playing", "dog_bark", "drilling", \
        "engine_idling", "gun_shot", "jackhammer", "siren", "street_music"]
K
KP 已提交
46 47 48 49 50 51 52
    meta = os.path.join('UrbanSound8K', 'metadata', 'UrbanSound8K.csv')
    meta_info = collections.namedtuple('META_INFO',
                                       ('filename', 'fsid', 'start', 'end', 'salience', 'fold', 'class_id', 'label'))
    audio_path = os.path.join('UrbanSound8K', 'audio')

    def __init__(self, mode: str = 'train', split: int = 1, feat_type: str = 'raw', **kwargs):
        files, labels = self._get_data(mode, split)
K
KP 已提交
53
        super(UrbanSound8K, self).__init__(files=files, labels=labels, feat_type=feat_type, **kwargs)
K
KP 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
        """
        Ags:
            mode (:obj:`str`, `optional`, defaults to `train`):
                It identifies the dataset mode (train or dev).
            split (:obj:`int`, `optional`, defaults to 1):
                It specify the fold of dev dataset.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extrace of an audio file.
        """

    def _get_meta_info(self):
        ret = []
        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
            for line in rf.readlines()[1:]:
                ret.append(self.meta_info(*line.strip().split(',')))
        return ret

    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
            download_and_decompress(self.archieves, DATA_HOME)

        meta_info = self._get_meta_info()

        files = []
        labels = []
        for sample in meta_info:
            filename, _, _, _, _, fold, target, _ = sample
            if mode == 'train' and int(fold) != split:
                files.append(os.path.join(DATA_HOME, self.audio_path, f'fold{fold}', filename))
                labels.append(int(target))

            if mode != 'train' and int(fold) == split:
                files.append(os.path.join(DATA_HOME, self.audio_path, f'fold{fold}', filename))
                labels.append(int(target))

        return files, labels