# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import collections import os from typing import List from typing import Tuple from paddle.utils import download from paddle.dataset.common import DATA_HOME from .dataset import AudioClassificationDataset __all__ = [] class ESC50(AudioClassificationDataset): """ The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings suitable for benchmarking methods of environmental sound classification. The dataset consists of 5-second-long recordings organized into 50 semantical classes (with 40 examples per class) Reference: ESC: Dataset for Environmental Sound Classification http://dx.doi.org/10.1145/2733373.2806390 Args: mode (str, optional): It identifies the dataset mode (train or dev). Default:train. split (int, optional): It specify the fold of dev dataset. Default:1. feat_type (str, optional): It identifies the feature type that user wants to extrace of an audio file. Default:raw. archive(dict, optional): it tells where to download the audio archive. Default:None. Returns: :ref:`api_paddle_io_Dataset`. An instance of ESC50 dataset. Examples: .. code-block:: python import paddle mode = 'dev' esc50_dataset = paddle.audio.datasets.ESC50(mode=mode, feat_type='raw') for idx in range(5): audio, label = esc50_dataset[idx] # do something with audio, label print(audio.shape, label) # [audio_data_length] , label_id esc50_dataset = paddle.audio.datasets.ESC50(mode=mode, feat_type='mfcc', n_mfcc=40) for idx in range(5): audio, label = esc50_dataset[idx] # do something with mfcc feature, label print(audio.shape, label) # [feature_dim, length] , label_id """ archive = { 'url': 'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip', 'md5': '7771e4b9d86d0945acce719c7a59305a', } label_list = [ # Animals 'Dog', 'Rooster', 'Pig', 'Cow', 'Frog', 'Cat', 'Hen', 'Insects (flying)', 'Sheep', 'Crow', # Natural soundscapes & water sounds 'Rain', 'Sea waves', 'Crackling fire', 'Crickets', 'Chirping birds', 'Water drops', 'Wind', 'Pouring water', 'Toilet flush', 'Thunderstorm', # Human, non-speech sounds 'Crying baby', 'Sneezing', 'Clapping', 'Breathing', 'Coughing', 'Footsteps', 'Laughing', 'Brushing teeth', 'Snoring', 'Drinking, sipping', # Interior/domestic sounds 'Door knock', 'Mouse click', 'Keyboard typing', 'Door, wood creaks', 'Can opening', 'Washing machine', 'Vacuum cleaner', 'Clock alarm', 'Clock tick', 'Glass breaking', # Exterior/urban noises 'Helicopter', 'Chainsaw', 'Siren', 'Car horn', 'Engine', 'Train', 'Church bells', 'Airplane', 'Fireworks', 'Hand saw', ] meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv') meta_info = collections.namedtuple( 'META_INFO', ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'), ) audio_path = os.path.join('ESC-50-master', 'audio') def __init__( self, mode: str = 'train', split: int = 1, feat_type: str = 'raw', archive=None, **kwargs, ): assert split in range( 1, 6 ), f'The selected split should be integer, and 1 <= split <= 5, but got {split}' if archive is not None: self.archive = archive files, labels = self._get_data(mode, split) super(ESC50, self).__init__( files=files, labels=labels, feat_type=feat_type, **kwargs ) def _get_meta_info(self) -> List[collections.namedtuple]: ret = [] with open(os.path.join(DATA_HOME, self.meta), 'r') as rf: for line in rf.readlines()[1:]: ret.append(self.meta_info(*line.strip().split(','))) return ret def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]: if not os.path.isdir( os.path.join(DATA_HOME, self.audio_path) ) or not os.path.isfile(os.path.join(DATA_HOME, self.meta)): download.get_path_from_url( self.archive['url'], DATA_HOME, self.archive['md5'], decompress=True, ) meta_info = self._get_meta_info() files = [] labels = [] for sample in meta_info: filename, fold, target, _, _, _, _ = sample if mode == 'train' and int(fold) != split: files.append(os.path.join(DATA_HOME, self.audio_path, filename)) labels.append(int(target)) if mode != 'train' and int(fold) == split: files.append(os.path.join(DATA_HOME, self.audio_path, filename)) labels.append(int(target)) return files, labels