Paddle reader线程数过多
Created by: velconia
当前xreader的实现如下:
def xreader():
in_queue = Queue(buffer_size)
out_queue = Queue(buffer_size)
out_order = [0]
# start a read worker in a thread
target = order_read_worker if order else read_worker
t = Thread(target=target, args=(reader, in_queue))
t.daemon = True
t.start()
# start several handle_workers
target = order_handle_worker if order else handle_worker
args = (in_queue, out_queue, mapper, out_order) if order else (
in_queue, out_queue, mapper)
workers = []
for i in xrange(process_num):
worker = Thread(target=target, args=args)
worker.daemon = True
workers.append(worker)
for w in workers:
w.start()
这会导致每一次yield数据, xreader都会申请一个Thread, 1000 个pass之后, Thread会非常多, 在PaddleCloud上实测达到2000个, 然后无法再申请Thread, 之后报错Traceback如下:
Traceback (most recent call last):
File "fluid_benchmark.py", line 387, in <module>
main()
File "fluid_benchmark.py", line 362, in main
train_parallel(*train_args)
File "fluid_benchmark.py", line 288, in train_parallel
batch_acc)
File "fluid_benchmark.py", line 107, in test
for batch_id, data in enumerate(test_reader()):
File "/usr/local/lib/python2.7/site-packages/paddle/batch.py", line 35, in batch_reader
for instance in r:
File "/usr/local/lib/python2.7/site-packages/paddle/reader/decorator.py", line 314, in xreader
w.start()
File "/usr/local/lib/python2.7/threading.py", line 745, in start
_start_new_thread(self.__bootstrap, ())