正常训练训着训着就崩了,很频繁,之前已经训了很多epoch了,从中间ckpt加载继续训练就很频繁的崩,yolo-v3
Created by: HUSTLX
terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
what():
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::memory::detail::MetadataCache::load(paddle::memory::detail::MemoryBlock const*) const
2 paddle::memory::detail::MemoryBlock::type(paddle::memory::detail::MetadataCache const&) const
3 paddle::memory::detail::BuddyAllocator::Free(void*)
4 void paddle::memory::legacy::Free<paddle::platform::CPUPlace>(paddle::platform::CPUPlace const&, void*, unsigned long)
5 paddle::memory::allocation::NaiveBestFitAllocator::FreeImpl(paddle::memory::allocation::Allocation*)
6 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release()
7 paddle::operators::Yolov3LossKernel<float>::Compute(paddle::framework::ExecutionContext const&) const
8 std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CPUPlace, false, 0ul, paddle::operators::Yolov3LossKernel<float>, paddle::operators::Yolov3LossKernel<double> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&)
9 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const
10 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const
11 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&)
12 paddle::framework::details::ComputationOpHandle::RunImpl()
13 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync(paddle::framework::details::OpHandleBase*)
14 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std::shared_ptr<paddle::framework::BlockingQueue<unsigned long> > const&, unsigned long*)
15 std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> (), std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result<void>, std::__future_base::_Result_base::_Deleter>, void> >::_M_invoke(std::_Any_data const&)
16 std::__future_base::_State_base::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>&, bool&)
17 ThreadPool::ThreadPool(unsigned long)::{lambda()#1}::operator()() const
----------------------
Error Message Summary:
----------------------
PaddleCheckError: Expected desc->check_guards() == true, but received desc->check_guards():0 != true:1.
at [/paddle/paddle/fluid/memory/detail/meta_cache.cc:33]
W0907 15:58:50.885568 24554 init.cc:205] *** Aborted at 1599465530 (unix time) try "date -d @1599465530" if you are using GNU date ***
W0907 15:58:50.888680 24554 init.cc:205] PC: @ 0x0 (unknown)
W0907 15:58:50.888793 24554 init.cc:205] *** SIGABRT (@0x1f4000057e9) received by PID 22505 (TID 0x7fe887fff700) from PID 22505; stack trace: ***
W0907 15:58:50.891361 24554 init.cc:205] @ 0x7feb90638130 (unknown)
W0907 15:58:50.893832 24554 init.cc:205] @ 0x7feb8fb949d9 __GI_raise
W0907 15:58:50.896459 24554 init.cc:205] @ 0x7feb8fb960e8 __GI_abort
W0907 15:58:50.898330 24554 init.cc:205] @ 0x7feb7a2723df __gnu_cxx::__verbose_terminate_handler()
W0907 15:58:50.899927 24554 init.cc:205] @ 0x7feb7a270b16 __cxxabiv1::__terminate()
W0907 15:58:50.901532 24554 init.cc:205] @ 0x7feb7a26ff91 __cxa_call_terminate
W0907 15:58:50.903163 24554 init.cc:205] @ 0x7feb7a27079d __gxx_personality_v0
W0907 15:58:50.905349 24554 init.cc:205] @ 0x7feb88770f56 _Unwind_RaiseException_Phase2
W0907 15:58:50.907541 24554 init.cc:205] @ 0x7feb887713e9 _Unwind_Resume
W0907 15:58:50.910499 24554 init.cc:205] @ 0x7feb537d33cd paddle::memory::detail::BuddyAllocator::Free()
W0907 15:58:50.916157 24554 init.cc:205] @ 0x7feb537c2d65 paddle::memory::legacy::Free<>()
W0907 15:58:50.919320 24554 init.cc:205] @ 0x7feb537c3bf5 paddle::memory::allocation::NaiveBestFitAllocator::FreeImpl()
W0907 15:58:50.921888 24554 init.cc:205] @ 0x7feb50fb1619 std::_Sp_counted_base<>::_M_release()
W0907 15:58:50.926118 24554 init.cc:205] @ 0x7feb5307f313 paddle::operators::Yolov3LossKernel<>::Compute()
W0907 15:58:50.929284 24554 init.cc:205] @ 0x7feb5307ff63 _ZNSt17_Function_handlerIFvRKN6paddle9framework16ExecutionContextEEZNKS1_24OpKernelRegistrarFunctorINS0_8platform8CPUPlaceELb0ELm0EJNS0_9operators16Yolov3LossKernelIfEENSA_IdEEEEclEPKcSF_iEUlS4_E_E9_M_invokeERKSt9_Any_dataS4_
W0907 15:58:50.933338 24554 init.cc:205] @ 0x7feb5372e5eb paddle::framework::OperatorWithKernel::RunImpl()
W0907 15:58:50.943776 24554 init.cc:205] @ 0x7feb5372ebe1 paddle::framework::OperatorWithKernel::RunImpl()
W0907 15:58:50.948038 24554 init.cc:205] @ 0x7feb53728a1c paddle::framework::OperatorBase::Run()
W0907 15:58:50.952919 24554 init.cc:205] @ 0x7feb53512996 paddle::framework::details::ComputationOpHandle::RunImpl()
W0907 15:58:50.957610 24554 init.cc:205] @ 0x7feb534ccfb6 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync()
W0907 15:58:50.962818 24554 init.cc:205] @ 0x7feb534cbcff paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp()
W0907 15:58:50.964987 24554 init.cc:205] @ 0x7feb534cbfc4 _ZNSt17_Function_handlerIFvvESt17reference_wrapperISt12_Bind_simpleIFS1_ISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS6_12OpHandleBaseESt6atomicIiESt4hashISA_ESt8equal_toISA_ESaISt4pairIKSA_SC_EEESA_RKSt10shared_ptrINS5_13BlockingQueueImEEEEUlvE_vEEEvEEEE9_M_invokeERKSt9_Any_data
W0907 15:58:50.970392 24554 init.cc:205] @ 0x7feb51225213 std::_Function_handler<>::_M_invoke()
W0907 15:58:50.975973 24554 init.cc:205] @ 0x7feb5107fab7 std::__future_base::_State_base::_M_do_set()
W0907 15:58:50.978785 24554 init.cc:205] @ 0x7feb90635be0 __GI___pthread_once
W0907 15:58:50.980633 24554 init.cc:205] @ 0x7feb534c77a2 _ZNSt13__future_base11_Task_stateISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS4_12OpHandleBaseESt6atomicIiESt4hashIS8_ESt8equal_toIS8_ESaISt4pairIKS8_SA_EEES8_RKSt10shared_ptrINS3_13BlockingQueueImEEEEUlvE_vEESaIiEFvvEE6_M_runEv
W0907 15:58:50.985916 24554 init.cc:205] @ 0x7feb51081034 _ZZN10ThreadPoolC1EmENKUlvE_clEv
W0907 15:58:50.987867 24554 init.cc:205] @ 0x7feb7a28c678 execute_native_thread_routine_compat
W0907 15:58:50.990347 24554 init.cc:205] @ 0x7feb90630df3 start_thread
W0907 15:58:50.993029 24554 init.cc:205] @ 0x7feb8fc552cd __clone
W0907 15:58:50.995551 24554 init.cc:205] @ 0x0 (unknown)
train.sh: line 17: 22505 Aborted python train_local.py --model_save_dir=snapshot/ --data_dir=data/logo_data/ --class_num=23 --pretrain=snapshot/model_iter24000/ --input_size=608 --batch_size=20 --start_iter=24001 --snapshot_iter=2000