Add batch processing pipeline with xmap_reader.

f2898533 · Xinghai Sun · b56a548e · f2898533 · f2898533
隐藏空白更改
内联并排

Showing with 9 addition and 4 deletion

deep_speech_2/data_utils/data.py deep_speech_2/data_utils/data.py +8 -3

deep_speech_2/train.py deep_speech_2/train.py +1 -1

未找到文件。
--- a/deep_speech_2/data_utils/data.py
+++ b/deep_speech_2/data_utils/data.py
@@ -186,13 +186,18 @@ class DataGenerator(object):
            for instance in instance_reader():
                batch.append(instance)
                if len(batch) == batch_size:
-                    yield self._padding_batch(batch, padding_to, flatten)
+                    yield batch
                    batch = []
            if len(batch) >= min_batch_size:
-                yield self._padding_batch(batch, padding_to, flatten)
+                yield batch
            self._epoch += 1

-        return batch_reader
+        return paddle.reader.xmap_readers(
+            lambda batch: self._padding_batch(batch, padding_to, flatten),
+            batch_reader,
+            process_num=1,
+            buffer_size=8,
+            order=True)

    @property
    def feeding(self):

--- a/deep_speech_2/train.py
+++ b/deep_speech_2/train.py
@@ -101,7 +101,7 @@ def train():
        rnn_layer_size=args.rnn_layer_size,
        use_gru=args.use_gru,
        pretrained_model_path=args.init_model_path,
-        share_rnn_weights=args.share_weights)
+        share_rnn_weights=args.share_rnn_weights)
    ds2_model.train(
        train_batch_reader=train_batch_reader,
        dev_batch_reader=dev_batch_reader,