Deep Speech Model will core dump when using nvidia-docker
Created by: reyoung
The log as below. It seems that the same memory is operated by C++ and Python.
root@53ac34831359:/ds2/examples/librispeech# bash run_train.sh
----------- Configuration Arguments -----------
augment_conf_path: conf/augmentation.config
batch_size: 64
dev_manifest: data/librispeech/manifest.dev
init_model_path: None
is_local: 1
learning_rate: 0.0005
max_duration: 27.0
mean_std_path: data/librispeech/mean_std.npz
min_duration: 0.0
num_conv_layers: 2
num_iter_print: 100
num_passes: 50
num_proc_data: 12
num_rnn_layers: 3
output_model_dir: ./checkpoints/libri
rnn_layer_size: 2048
share_rnn_weights: 1
shuffle_method: batch_shuffle_clipped
specgram_type: linear
train_manifest: data/librispeech/manifest.train
trainer_count: 1
use_gpu: 1
use_gru: 0
use_sortagrad: 1
vocab_path: data/librispeech/vocab.txt
------------------------------------------------
I0917 06:55:44.676560 8826 Util.cpp:166] commandline: --use_gpu=1 --trainer_count=1
[INFO 2017-09-17 06:55:53,917 layers.py:2539] output for __conv_0__: c = 32, h = 81, w = 54, size = 139968
[INFO 2017-09-17 06:55:53,918 layers.py:3062] output for __batch_norm_0__: c = 32, h = 81, w = 54, size = 139968
[INFO 2017-09-17 06:55:53,919 layers.py:2539] output for __conv_1__: c = 32, h = 41, w = 54, size = 70848
[INFO 2017-09-17 06:55:53,920 layers.py:3062] output for __batch_norm_1__: c = 32, h = 41, w = 54, size = 70848
/ds2/checkpoints/libri
I0917 06:55:53.998572 8826 GradientMachine.cpp:85] Initing parameters..
I0917 06:55:55.796345 8826 GradientMachine.cpp:92] Init parameters done.
...........*** Aborted at 1505631395 (unix time) try "date -d @1505631395" if you are using GNU date ***
PC: @ 0x0 (unknown)
*** SIGSEGV (@0x50) received by PID 8826 (TID 0x7f646527a700) from PID 80; stack trace: ***
@ 0x7f656ec1f390 (unknown)
@ 0x7f656ee3773c (unknown)
@ 0x7f656ee40851 (unknown)
@ 0x7f656ee3b564 (unknown)
@ 0x7f656ee3fda9 (unknown)
@ 0x7f656e98756d (unknown)
@ 0x7f656ee3b564 (unknown)
@ 0x7f656e987624 __libc_dlopen_mode
@ 0x7f656e959a45 (unknown)
@ 0x7f656ec1ca99 __pthread_once_slow
@ 0x7f656e959b64 backtrace
@ 0x7f656c945ec3 check_callers.part.0
@ 0x7f656c946546 can_elide_temp_unary
@ 0x7f656c930f33 array_power
@ 0x55372c PyNumber_Power
@ 0x4c6050 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca099 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca099 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4ca8d1 PyEval_EvalFrameEx
@ 0x4c2765 PyEval_EvalCodeEx
@ 0x4de8b8 (unknown)