Can not resume single device's training from the checkpoint of ParallelDo
Created by: kuke
Error complaint:
----------- Configuration Arguments -----------
batch_size: 32
checkpoints:
device: GPU
hidden_dim: 1024
infer_models:
init_model_path: ../deep_asr_models/pass.7.ckpt
learning_rate: 0.00016
mean_var: data/global_mean_var_search26kHr
minimum_batch_size: 1
parallel: False
pass_num: 100
print_per_batches: 10
proj_dim: 512
stacked_num: 5
train_feature_lst: data/local_feature.lst
train_label_lst: data/local_label.lst
val_feature_lst: data/val_feature.lst
val_label_lst: data/val_label.lst
------------------------------------------------
Traceback (most recent call last):
File "train.py", line 265, in <module>
train(args)
File "train.py", line 223, in train
return_numpy=False)
File "/home/disk1/liuyibing/envs/paddle_dev_latest/lib/python2.7/site-packages/paddle/fluid/executor.py", line 292, in run
self.executor.run(program.desc, scope, 0, True, True)
paddle.fluid.core.EnforceNotMet: enforce param_dims == ctx->GetInputDim("Moment1") failed, 512, 4096 != 512
Param and Moment1 input of AdamOp should have same dimension at [/home/disk1/liuyibing/paddle_work/Paddle/paddle/fluid/operators/adam_op.cc:63]
PaddlePaddle Call Stacks:
0 0x7f3c064b0916p paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 486
1 0x7f3c06a2ac60p paddle::operators::AdamOp::InferShape(paddle::framework::InferShapeContext*) const + 2368
2 0x7f3c06eac968p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 88
3 0x7f3c065379bep paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool) + 1214
4 0x7f3c064c43cbp void pybind11::cpp_function::initialize<pybind11::cpp_function::initialize<void, paddle::framework::Executor, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, pybind11::name, pybind11::is_method, pybind11::sibling>(void (paddle::framework::Executor::*)(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool)#1}, void, paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, pybind11::name, pybind11::is_method, pybind11::sibling>(pybind11::cpp_function::initialize<void, paddle::framework::Executor, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, pybind11::name, pybind11::is_method, pybind11::sibling>(void (paddle::framework::Executor::*)(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool)#1}&&, void (*)(paddle::framework::Executor*, paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(pybind11::detail::function_call&)#3}::_FUN(pybind11::detail::function_call) + 555
5 0x7f3c064bde1ap pybind11::cpp_function::dispatcher(_object*, _object*, _object*) + 2602
6 0x7f3c54ebb3d4p PyEval_EvalFrameEx + 25956
7 0x7f3c54ebc120p PyEval_EvalCodeEx + 2240
8 0x7f3c54eba491p PyEval_EvalFrameEx + 22049
9 0x7f3c54ebc120p PyEval_EvalCodeEx + 2240
10 0x7f3c54eba491p PyEval_EvalFrameEx + 22049
11 0x7f3c54ebc120p PyEval_EvalCodeEx + 2240
12 0x7f3c54ebc232p PyEval_EvalCode + 50
13 0x7f3c54ed661cp
14 0x7f3c54ed66f0p PyRun_FileExFlags + 144
15 0x7f3c54ed7bfcp PyRun_SimpleFileExFlags + 220
16 0x7f3c54ee94bcp Py_Main + 3164
17 0x318ae1ecddp __libc_start_main + 253
18 0x400659p