v100训练:terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
Created by: liushanshan07
paddlecloud v100训练ctc ocr识别模型。出现如下错误。
terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
what(): an illegal memory access was encountered at [/paddle/paddle/fluid/framework/details/op_handle_base.cc:37]
PaddlePaddle Call Stacks:
0 0x7f441b971d06p paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 486
1 0x7f441ccb0fa2p paddle::framework::details::OpHandleBase::~OpHandleBase() + 402
2 0x7f441cc8a9e1p paddle::framework::details::FetchOpHandle::~FetchOpHandle() + 17
3 0x7f441cc4ca4ep std::vector<std::unique_ptr<paddle::framework::details::FetchOpHandle, std::default_delete<paddle::framework::details::FetchOpHandle> >, std::allocator<std::unique_ptr<paddle::framework::details::FetchOpHandle, std::default_delete<paddle::framework::details::FetchOpHandle> > > >::~vector() + 46
4 0x7f441cc4bb46p paddle::framework::details::ThreadedSSAGraphExecutor::Run(std::vector<std::string, std::allocator<std::string> > const&) + 4390
5 0x7f441cc4fb37p paddle::framework::details::ScopeBufferedSSAGraphExecutor::Run(std::vector<std::string, std::allocator<std::string> > const&) + 391
6 0x7f441ba520b9p paddle::framework::ParallelExecutor::Run(std::vector<std::string, std::allocator<std::string> > const&, std::string const&) + 489
7 0x7f441b966540p
8 0x7f441b988414p pybind11::cpp_function::dispatcher(_object*, _object*, _object*) + 2596
9 0x7f44a72b4ddcp PyEval_EvalFrameEx + 19596
10 0x7f44a72b621dp PyEval_EvalCodeEx + 2061
11 0x7f44a72b44f1p PyEval_EvalFrameEx + 17313
12 0x7f44a72b621dp PyEval_EvalCodeEx + 2061
13 0x7f44a72b44f1p PyEval_EvalFrameEx + 17313
14 0x7f44a72b621dp PyEval_EvalCodeEx + 2061
15 0x7f44a72b44f1p PyEval_EvalFrameEx + 17313
16 0x7f44a72b497ep PyEval_EvalFrameEx + 18478
17 0x7f44a72b621dp PyEval_EvalCodeEx + 2061
18 0x7f44a72b6352p PyEval_EvalCode + 50
19 0x7f44a72e0f22p PyRun_FileExFlags + 146
20 0x7f44a72e2459p PyRun_SimpleFileExFlags + 217
21 0x7f44a72f7e9dp Py_Main + 3149
22 0x7f44a64f9bd5p __libc_start_main + 245
23 0x4007a1p
*** Aborted at 1541572190 (unix time) try "date -d @1541572190" if you are using GNU date ***
PC: @ 0x0 (unknown)
*** SIGABRT (@0x386) received by PID 902 (TID 0x7f44a79d1700) from PID 902; stack trace: ***
@ 0x7f44a6f9f160 (unknown)
@ 0x7f44a650d3f7 __GI_raise
@ 0x7f44a650e7d8 __GI_abort
@ 0x7f4435f62c65 __gnu_cxx::__verbose_terminate_handler()
@ 0x7f4435f60e06 __cxxabiv1::__terminate()
@ 0x7f4435f5fec9 __cxa_call_terminate
@ 0x7f4435f60a7a __gxx_personality_v0
@ 0x7f4436432853 _Unwind_RaiseException_Phase2
@ 0x7f4436432beb _Unwind_RaiseException
@ 0x7f4435f61045 __cxa_throw
@ 0x7f441ccb0fc0 paddle::framework::details::OpHandleBase::~OpHandleBase()
@ 0x7f441cc8a9e1 paddle::framework::details::FetchOpHandle::~FetchOpHandle()
@ 0x7f441cc4ca4e std::vector<>::~vector()
@ 0x7f441cc4bb46 paddle::framework::details::ThreadedSSAGraphExecutor::Run()
@ 0x7f441cc4fb37 paddle::framework::details::ScopeBufferedSSAGraphExecutor::Run()
@ 0x7f441ba520b9 paddle::framework::ParallelExecutor::Run()
@ 0x7f441b966540 _ZZN8pybind1112cpp_function10initializeIZN6paddle6pybindL13pybind11_initEvEUlRNS2_9framework16ParallelExecutorERKSt6vectorISsSaISsEERKSsE102_vIS6_SB_SD_EINS_4nameENS_9is_methodENS_7siblingEEEEvOT_PFT0_DpT1_EDpRKT2_ENUlRNS_6detail13function_callEE1_4_FUNESV_
@ 0x7f441b988414 pybind11::cpp_function::dispatcher()
@ 0x7f44a72b4ddc PyEval_EvalFrameEx
@ 0x7f44a72b621d PyEval_EvalCodeEx
@ 0x7f44a72b44f1 PyEval_EvalFrameEx
@ 0x7f44a72b621d PyEval_EvalCodeEx
@ 0x7f44a72b44f1 PyEval_EvalFrameEx
@ 0x7f44a72b621d PyEval_EvalCodeEx
@ 0x7f44a72b44f1 PyEval_EvalFrameEx
@ 0x7f44a72b497e PyEval_EvalFrameEx
@ 0x7f44a72b621d PyEval_EvalCodeEx
@ 0x7f44a72b6352 PyEval_EvalCode
@ 0x7f44a72e0f22 PyRun_FileExFlags
@ 0x7f44a72e2459 PyRun_SimpleFileExFlags
@ 0x7f44a72f7e9d Py_Main
@ 0x7f44a64f9bd5 __libc_start_main
/root/paddlejob/run.sh: line 307: 902 Aborted (core dumped) python train.py