未验证 提交 0ad8918f 编写于 作者: Y Yang yaming 提交者: GitHub

Merge pull request #154 from loongw/fix148

Cleanup worker processes every epoch end.
...@@ -11,7 +11,6 @@ import multiprocessing ...@@ -11,7 +11,6 @@ import multiprocessing
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
from threading import local from threading import local
import atexit
from data_utils.utility import read_manifest from data_utils.utility import read_manifest
from data_utils.utility import xmap_readers_mp from data_utils.utility import xmap_readers_mp
from data_utils.augmentor.augmentation import AugmentationPipeline from data_utils.augmentor.augmentation import AugmentationPipeline
...@@ -194,15 +193,18 @@ class DataGenerator(object): ...@@ -194,15 +193,18 @@ class DataGenerator(object):
raise ValueError("Unknown shuffle method %s." % raise ValueError("Unknown shuffle method %s." %
shuffle_method) shuffle_method)
# prepare batches # prepare batches
instance_reader = self._instance_reader_creator(manifest) instance_reader, cleanup = self._instance_reader_creator(manifest)
batch = [] batch = []
for instance in instance_reader(): try:
batch.append(instance) for instance in instance_reader():
if len(batch) == batch_size: batch.append(instance)
if len(batch) == batch_size:
yield self._padding_batch(batch, padding_to, flatten)
batch = []
if len(batch) >= min_batch_size:
yield self._padding_batch(batch, padding_to, flatten) yield self._padding_batch(batch, padding_to, flatten)
batch = [] finally:
if len(batch) >= min_batch_size: cleanup()
yield self._padding_batch(batch, padding_to, flatten)
self._epoch += 1 self._epoch += 1
return batch_reader return batch_reader
...@@ -280,10 +282,7 @@ class DataGenerator(object): ...@@ -280,10 +282,7 @@ class DataGenerator(object):
lambda instance: self.process_utterance(instance["audio_filepath"], instance["text"]), lambda instance: self.process_utterance(instance["audio_filepath"], instance["text"]),
reader, self._num_threads, 4096) reader, self._num_threads, 4096)
# register callback to main process return reader, cleanup_callback
atexit.register(cleanup_callback)
return reader
def _padding_batch(self, batch, padding_to=-1, flatten=False): def _padding_batch(self, batch, padding_to=-1, flatten=False):
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册