[Cherry-pick][audio] fix tess split fold (#47350)

* fix tess split fold * format

[Cherry-pick][audio] fix tess split fold (#47350)
* fix tess split fold * format
85094bce · YangZhou · GitHub · 12e6dfcf · 85094bce · 85094bce
隐藏空白更改
内联并排

Showing with 57 addition and 43 deletion

python/paddle/audio/datasets/esc50.py python/paddle/audio/datasets/esc50.py +25 -17

python/paddle/audio/datasets/tess.py python/paddle/audio/datasets/tess.py +32 -26

未找到文件。
--- a/python/paddle/audio/datasets/esc50.py
+++ b/python/paddle/audio/datasets/esc50.py
@@ -133,22 +133,27 @@ class ESC50(AudioClassificationDataset):
    meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
    meta_info = collections.namedtuple(
        'META_INFO',
-        ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'))
+        ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'),
+    )
    audio_path = os.path.join('ESC-50-master', 'audio')
-    def __init__(self,
+    def __init__(
-                 mode: str = 'train',
+        self,
-                 split: int = 1,
+        mode: str = 'train',
-                 feat_type: str = 'raw',
+        split: int = 1,
-                 archive=None,
+        feat_type: str = 'raw',
-                 **kwargs):
+        archive=None,
+        **kwargs,
+    ):
+        assert split in range(
+            1, 6
+        ), f'The selected split should be integer, and 1 <= split <= 5, but got {split}'
        if archive is not None:
            self.archive = archive
        files, labels = self._get_data(mode, split)
-        super(ESC50, self).__init__(files=files,
+        super(ESC50, self).__init__(
-                                    labels=labels,
+            files=files, labels=labels, feat_type=feat_type, **kwargs
-                                    feat_type=feat_type,
+        )
-                                    **kwargs)
    def _get_meta_info(self) -> List[collections.namedtuple]:
        ret = []
@@ -158,12 +163,15 @@ class ESC50(AudioClassificationDataset):
        return ret
    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
-        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
+        if not os.path.isdir(
-            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
+            os.path.join(DATA_HOME, self.audio_path)
-            download.get_path_from_url(self.archive['url'],
+        ) or not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-                                       DATA_HOME,
+            download.get_path_from_url(
-                                       self.archive['md5'],
+                self.archive['url'],
-                                       decompress=True)
+                DATA_HOME,
+                self.archive['md5'],
+                decompress=True,
+            )
        meta_info = self._get_meta_info()

--- a/python/paddle/audio/datasets/tess.py
+++ b/python/paddle/audio/datasets/tess.py
@@ -71,8 +71,7 @@ class TESS(AudioClassificationDataset):
    """
    archive = {
-        'url':
+        'url': 'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip',
-        'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip',
        'md5': '1465311b24d1de704c4c63e4ccc470c7',
    }
@@ -85,28 +84,32 @@ class TESS(AudioClassificationDataset):
        'ps',  # pleasant surprise
        'sad',
    ]
-    meta_info = collections.namedtuple('META_INFO',
+    meta_info = collections.namedtuple(
-                                       ('speaker', 'word', 'emotion'))
+        'META_INFO', ('speaker', 'word', 'emotion')
+    )
    audio_path = 'TESS_Toronto_emotional_speech_set'
-    def __init__(self,
+    def __init__(
-                 mode='train',
+        self,
-                 n_folds=5,
+        mode: str = 'train',
-                 split=1,
+        n_folds: int = 5,
-                 feat_type='raw',
+        split: int = 1,
-                 archive=None,
+        feat_type: str = 'raw',
-                 **kwargs):
+        archive=None,
-        """
+        **kwargs,
+    ):
-        """
+        assert isinstance(n_folds, int) and (
-        assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
+            n_folds >= 1
+        ), f'the n_folds should be integer and n_folds >= 1, but got {n_folds}'
+        assert split in range(
+            1, n_folds + 1
+        ), f'The selected split should be integer and should be 1 <= split <= {n_folds}, but got {split}'
        if archive is not None:
            self.archive = archive
        files, labels = self._get_data(mode, n_folds, split)
-        super(TESS, self).__init__(files=files,
+        super(TESS, self).__init__(
-                                   labels=labels,
+            files=files, labels=labels, feat_type=feat_type, **kwargs
-                                   feat_type=feat_type,
+        )
-                                   **kwargs)
    def _get_meta_info(self, files) -> List[collections.namedtuple]:
        ret = []
@@ -115,12 +118,16 @@ class TESS(AudioClassificationDataset):
            ret.append(self.meta_info(*basename_without_extend.split('_')))
        return ret
-    def _get_data(self, mode, n_folds, split) -> Tuple[List[str], List[int]]:
+    def _get_data(
+        self, mode: str, n_folds: int, split: int
+    ) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)):
-            download.get_path_from_url(self.archive['url'],
+            download.get_path_from_url(
-                                       DATA_HOME,
+                self.archive['url'],
-                                       self.archive['md5'],
+                DATA_HOME,
-                                       decompress=True)
+                self.archive['md5'],
+                decompress=True,
+            )
        wav_files = []
        for root, _, files in os.walk(os.path.join(DATA_HOME, self.audio_path)):
@@ -132,11 +139,10 @@ class TESS(AudioClassificationDataset):
        files = []
        labels = []
-        n_samples_per_fold = len(meta_info) // n_folds
        for idx, sample in enumerate(meta_info):
            _, _, emotion = sample
            target = self.label_list.index(emotion)
-            fold = idx // n_samples_per_fold + 1
+            fold = idx % n_folds + 1
            if mode == 'train' and int(fold) != split:
                files.append(wav_files[idx])