DeepSpeech2 core dump when trianing
Created by: jacquesqiao
root@202d9cb56687:/baidu/models/deep_speech_2# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u train.py --trainer_count 8 --train_manifest='data
/librispeech/manifest.train-clean-100' --dev_manifest='data/librispeech/manifest.dev-clean'
----------- Configuration Arguments -----------
augment_conf_path: conf/augmentation.config
batch_size: 256
dev_manifest: data/librispeech/manifest.dev-clean
init_model_path: None
is_local: True
learning_rate: 0.0005
max_duration: 27.0
mean_std_path: data/librispeech/mean_std.npz
min_duration: 0.0
num_conv_layers: 2
num_iter_print: 100
num_passes: 200
num_proc_data: 12
num_rnn_layers: 3
output_model_dir: ./checkpoints/libri
rnn_layer_size: 2048
share_rnn_weights: True
shuffle_method: batch_shuffle_clipped
specgram_type: linear
train_manifest: data/librispeech/manifest.train-clean-100
trainer_count: 8
use_gpu: True
use_gru: False
use_sortagrad: True
vocab_path: data/librispeech/vocab.txt
------------------------------------------------
I0917 02:31:41.509305 862 Util.cpp:166] commandline: --use_gpu=True --trainer_count=8
[INFO 2017-09-17 02:31:45,061 layers.py:2539] output for __conv_0__: c = 32, h = 81, w = 54, size = 139968
[INFO 2017-09-17 02:31:45,063 layers.py:3062] output for __batch_norm_0__: c = 32, h = 81, w = 54, size = 139968
[INFO 2017-09-17 02:31:45,064 layers.py:2539] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848
[INFO 2017-09-17 02:31:45,065 layers.py:3062] output for __batch_norm_1__: c = 32, h = 41, w = 54, size = 70848
[INFO 2017-09-17 02:31:45,064 layers.py:2539] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848
[INFO 2017-09-17 02:31:45,065 layers.py:3062] output for __batch_norm_1__: c = 32, h = 41, w = 54, size = 70848
I0917 02:31:45.082847 862 MultiGradientMachine.cpp:99] numLogicalDevices=1 numThreads=8 numDevices=8
I0917 02:31:45.264881 862 GradientMachine.cpp:85] Initing parameters..
I0917 02:31:47.380720 862 GradientMachine.cpp:92] Init parameters done.
*** Aborted at 1505615508 (unix time) try "date -d @1505615508" if you are using GNU date ***
PC: @ 0x0 (unknown)
*** SIGSEGV (@0x50) received by PID 862 (TID 0x7f2f477fe700) from PID 80; stack trace: ***
@ 0x7f302047d390 (unknown)
@ 0x7f302069573c (unknown)
@ 0x7f302069e851 (unknown)
@ 0x7f3020699564 (unknown)
@ 0x7f302069dda9 (unknown)
@ 0x7f30201e556d (unknown)
@ 0x7f3020699564 (unknown)
@ 0x7f30201e5624 __libc_dlopen_mode
@ 0x7f30201b7a45 (unknown)
@ 0x7f302047aa99 __pthread_once_slow
@ 0x7f30201b7b64 backtrace
@ 0x7f301e1d7ec3 check_callers.part.0
@ 0x7f301e1d8546 can_elide_temp_unary
@ 0x7f301e1c2f33 array_power
@ 0x55372c PyNumber_Power
@ 0x4c6050 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca099 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca099 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4de8b8 (unknown)
Segmentation fault (core dumped)