Created by: sneaxiy
fix https://github.com/PaddlePaddle/models/issues/3011
Error message is something like:
Out of memory error on GPU 0. Cannot allocate 145.468994MB memory on GPU 0, available memory is only 30.312500MB.
Please check whether there is any other process using GPU 0.
1. If yes, please stop them, or start PaddlePaddle on another GPU.
2. If no, please try one of the following suggestions:
1) Decrease the batch size of your model.
2) FLAGS_fraction_of_gpu_memory_to_use is 0.92 now, please set it to a higher value but less than 1.0.
The command is `export FLAGS_fraction_of_gpu_memory_to_use=xxx`.
at [/Paddle/Paddle/paddle/fluid/memory/detail/system_allocator.cc:142]
PaddlePaddle Call Stacks:
0 0x7efcf79c0e0fp std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > paddle::platform::GetTraceBackString<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&&, char const*, int) + 1519
1 0x7efcf79e4443p paddle::memory::detail::GPUAllocator::Alloc(unsigned long*, unsigned long) + 1507
2 0x7efcf79d524dp paddle::memory::detail::BuddyAllocator::RefillPool(unsigned long) + 141
3 0x7efcf79d5a9bp paddle::memory::detail::BuddyAllocator::Alloc(unsigned long) + 763
4 0x7efcf79c425ep void* paddle::memory::legacy::Alloc<paddle::platform::CUDAPlace>(paddle::platform::CUDAPlace const&, unsigned long) + 62
5 0x7efcf79c4850p paddle::memory::allocation::NaiveBestFitAllocator::AllocateImpl(unsigned long) + 384
6 0x7efcf79c326cp paddle::memory::allocation::Allocator::Allocate(unsigned long) + 44
7 0x7efcf79c2e62p paddle::memory::allocation::RetryAllocator::AllocateImpl(unsigned long) + 770
8 0x7efcf79b8b87p paddle::memory::allocation::AllocatorFacade::Alloc(boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, unsigned long) + 247
9 0x7efcf79b8deap paddle::memory::allocation::AllocatorFacade::AllocShared(boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, unsigned long) + 42
10 0x7efcf73fe68bp paddle::memory::AllocShared(boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, unsigned long) + 59
11 0x7efcf79951aep paddle::framework::Tensor::mutable_data(boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_>, paddle::framework::proto::VarType_Type, unsigned long) + 238
12 0x7efcf7040f2dp paddle::operators::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, float>::Compute(paddle::framework::ExecutionContext const&) const + 477
13 0x7efcf7041d7fp std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, float>, paddle::operators::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, double>, paddle::operators::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, int>, paddle::operators::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, long>, paddle::operators::ElementwiseAddKernel<paddle::platform::CUDADeviceContext, paddle::platform::float16> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&) + 47
14 0x7efcf791eed7p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const + 359
15 0x7efcf791fda7p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 487
16 0x7efcf79188cep paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 318
17 0x7efcf75c1dc6p paddle::framework::details::ComputationOpHandle::RunImpl() + 166
18 0x7efcf7564b56p paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync(paddle::framework::details::OpHandleBase*) + 358
19 0x7efcf75601c2p paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std::shared_ptr<paddle::framework::BlockingQueue<unsigned long> > const&, unsigned long*) + 50