未验证 提交 0a73f720 编写于 作者: Z Zeng Jinle 提交者: GitHub

Add retry_allocator for gpu (#19409)

* add retry_allocator for gpu, test=develop

* follow chengduoZH's comments, test=develop

* follow huihuang's comments,test=develop

* change f,l in enforce.h to be file,line, test=develop

* increase code coverage by adding unittests, test=develop

* fix CMakeLists.txt, test=develop
上级 c756b5d2
...@@ -46,7 +46,12 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy) ...@@ -46,7 +46,12 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator locked_allocator cpu_allocator) cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator locked_allocator cpu_allocator)
if (WITH_TESTING) if (WITH_TESTING)
if (WITH_GPU)
target_link_libraries(retry_allocator_test cuda_allocator)
endif()
set_tests_properties(retry_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(retry_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE")
set_property(TEST retry_allocator_test PROPERTY ENVIRONMENT GLOG_vmodule=retry_allocator=10)
endif() endif()
cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade)
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/inlined_vector.h" #include "paddle/fluid/framework/inlined_vector.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
namespace paddle { namespace paddle {
...@@ -26,14 +27,14 @@ namespace memory { ...@@ -26,14 +27,14 @@ namespace memory {
namespace allocation { namespace allocation {
// Exception when `Alloc`/`AllocShared` failed // Exception when `Alloc`/`AllocShared` failed
class BadAlloc : public std::exception { struct BadAlloc : public std::exception {
public: inline explicit BadAlloc(std::string err_msg, const char* file, int line)
inline explicit BadAlloc(std::string msg) : msg_(std::move(msg)) {} : err_str_(platform::GetTraceBackString(std::move(err_msg), file, line)) {
}
inline const char* what() const noexcept override { return msg_.c_str(); } const char* what() const noexcept override { return err_str_.c_str(); }
private: std::string err_str_;
std::string msg_;
}; };
class Allocator; class Allocator;
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#endif #endif
DEFINE_int64( DEFINE_int64(
gpu_allocator_retry_time, 0, gpu_allocator_retry_time, 100,
"The retry time (milliseconds) when allocator fails " "The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0"); "to allocate memory. No retry if this value is not greater than 0");
...@@ -80,6 +80,12 @@ class AllocatorFacadePrivate { ...@@ -80,6 +80,12 @@ class AllocatorFacadePrivate {
} }
} }
InitZeroSizeAllocators(); InitZeroSizeAllocators();
if (FLAGS_gpu_allocator_retry_time > 0) {
WrapCUDARetryAllocator(FLAGS_gpu_allocator_retry_time);
}
CheckAllocThreadSafe();
} }
inline const std::shared_ptr<Allocator>& GetAllocator( inline const std::shared_ptr<Allocator>& GetAllocator(
...@@ -118,6 +124,8 @@ class AllocatorFacadePrivate { ...@@ -118,6 +124,8 @@ class AllocatorFacadePrivate {
public: public:
explicit ZeroSizeAllocator(platform::Place place) : place_(place) {} explicit ZeroSizeAllocator(platform::Place place) : place_(place) {}
bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation* AllocateImpl(size_t size) override { Allocation* AllocateImpl(size_t size) override {
return new Allocation(nullptr, 0, place_); return new Allocation(nullptr, 0, place_);
...@@ -145,6 +153,25 @@ class AllocatorFacadePrivate { ...@@ -145,6 +153,25 @@ class AllocatorFacadePrivate {
} }
} }
void CheckAllocThreadSafe() const {
for (auto& pair : allocators_) {
PADDLE_ENFORCE_EQ(pair.second->IsAllocThreadSafe(), true);
}
for (auto& pair : zero_size_allocators_) {
PADDLE_ENFORCE_EQ(pair.second->IsAllocThreadSafe(), true);
}
}
void WrapCUDARetryAllocator(size_t retry_time) {
PADDLE_ENFORCE_GT(retry_time, 0, "Retry time must be larger than 0");
for (auto& pair : allocators_) {
if (platform::is_gpu_place(pair.first)) {
pair.second = std::make_shared<RetryAllocator>(pair.second, retry_time);
}
}
}
private: private:
std::map<platform::Place, std::shared_ptr<Allocator>> allocators_; std::map<platform::Place, std::shared_ptr<Allocator>> allocators_;
std::map<platform::Place, std::shared_ptr<Allocator>> zero_size_allocators_; std::map<platform::Place, std::shared_ptr<Allocator>> zero_size_allocators_;
......
...@@ -150,8 +150,8 @@ Allocation* BestFitAllocator::AllocateImpl(size_t size) { ...@@ -150,8 +150,8 @@ Allocation* BestFitAllocator::AllocateImpl(size_t size) {
} }
} }
if (UNLIKELY(highest_set_bit == free_chunks_.size())) { if (UNLIKELY(highest_set_bit == free_chunks_.size())) {
throw BadAlloc(string::Sprintf( PADDLE_THROW_BAD_ALLOC("Cannot allocate %d, All fragments size is %d", size,
"Cannot allocate %d, All fragments size is %d", size, FreeSize())); FreeSize());
} }
auto chunk_it = SplitChunk(size, highest_set_bit, map_it); auto chunk_it = SplitChunk(size, highest_set_bit, map_it);
return new BestFitAllocation(this, chunk_it); return new BestFitAllocation(this, chunk_it);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <string> #include <string>
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
namespace paddle { namespace paddle {
...@@ -36,9 +37,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) { ...@@ -36,9 +37,9 @@ Allocation* CUDAAllocator::AllocateImpl(size_t size) {
void* ptr; void* ptr;
auto status = cudaMalloc(&ptr, size); auto status = cudaMalloc(&ptr, size);
if (UNLIKELY(status != cudaSuccess)) { if (UNLIKELY(status != cudaSuccess)) {
throw BadAlloc(string::Sprintf( PADDLE_THROW_BAD_ALLOC("Cannot allocate %d on GPU %d, cuda status %d, %s",
"Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device, size, place_.device, status,
status, cudaGetErrorString(status))); cudaGetErrorString(status));
} }
return new Allocation(ptr, size, platform::Place(place_)); return new Allocation(ptr, size, platform::Place(place_));
} }
......
...@@ -28,6 +28,8 @@ class NaiveBestFitAllocator : public Allocator { ...@@ -28,6 +28,8 @@ class NaiveBestFitAllocator : public Allocator {
public: public:
explicit NaiveBestFitAllocator(const platform::Place &p) : place_(p) {} explicit NaiveBestFitAllocator(const platform::Place &p) : place_(p) {}
bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation *AllocateImpl(size_t size) override; Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
......
...@@ -13,14 +13,40 @@ ...@@ -13,14 +13,40 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
class WaitedAllocateSizeGuard {
public:
WaitedAllocateSizeGuard(std::atomic<size_t>* waited_size,
size_t requested_size)
: waited_size_(waited_size), requested_size_(requested_size) {
waited_size_->fetch_add(requested_size_,
std::memory_order::memory_order_relaxed);
}
~WaitedAllocateSizeGuard() {
waited_size_->fetch_sub(requested_size_,
std::memory_order::memory_order_relaxed);
}
private:
std::atomic<size_t>* waited_size_;
size_t requested_size_;
};
void RetryAllocator::FreeImpl(Allocation* allocation) { void RetryAllocator::FreeImpl(Allocation* allocation) {
// Delete underlying allocation first. // Delete underlying allocation first.
size_t size = allocation->size();
underlying_allocator_->Free(allocation); underlying_allocator_->Free(allocation);
if (UNLIKELY(waited_allocate_size_)) {
VLOG(10) << "Free " << size << " bytes and notify all waited threads, "
"where waited_allocate_size_ = "
<< waited_allocate_size_;
cv_.notify_all(); cv_.notify_all();
}
} }
Allocation* RetryAllocator::AllocateImpl(size_t size) { Allocation* RetryAllocator::AllocateImpl(size_t size) {
...@@ -31,29 +57,38 @@ Allocation* RetryAllocator::AllocateImpl(size_t size) { ...@@ -31,29 +57,38 @@ Allocation* RetryAllocator::AllocateImpl(size_t size) {
// But it would add lock even when allocation success at the first time // But it would add lock even when allocation success at the first time
try { try {
return alloc_func(); return alloc_func();
} catch (BadAlloc& bad_alloc) { } catch (BadAlloc&) {
{ {
WaitedAllocateSizeGuard guard(&waited_allocate_size_, size);
VLOG(10) << "Allocation failed when allocating " << size
<< " bytes, waited_allocate_size_ = " << waited_allocate_size_;
// We can just write allocation retry inside the predicate function of // We can just write allocation retry inside the predicate function of
// wait_until // wait_until. But it needs to acquire the lock when executing predicate
// But it needs to acquire the lock when executing predicate function // function. For better performance, we use loop here
// For better performance, we use loop here
auto end_time = std::chrono::high_resolution_clock::now() + retry_time_; auto end_time = std::chrono::high_resolution_clock::now() + retry_time_;
auto wait_until = [&, this] { auto wait_until = [&, this] {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
return cv_.wait_until(lock, end_time); return cv_.wait_until(lock, end_time);
}; };
size_t retry_time = 0;
while (wait_until() != std::cv_status::timeout) { while (wait_until() != std::cv_status::timeout) {
try { try {
return alloc_func(); return alloc_func();
} catch (BadAlloc& ex) { } catch (BadAlloc&) {
bad_alloc = ex; // do nothing when it is not timeout
++retry_time;
VLOG(10) << "Allocation failed when retrying " << retry_time
<< " times when allocating " << size
<< " bytes. Wait still.";
} catch (...) { } catch (...) {
throw; throw;
} }
} }
throw; // rethrow the original exception or throw the internal bad_alloc
} }
VLOG(10) << "Allocation failed because of timeout when allocating " << size
<< " bytes.";
return alloc_func(); // If timeout, try last allocation request.
} catch (...) { } catch (...) {
throw; throw;
} }
......
...@@ -14,12 +14,14 @@ ...@@ -14,12 +14,14 @@
#pragma once #pragma once
#include <atomic> // NOLINT
#include <chrono> // NOLINT #include <chrono> // NOLINT
#include <condition_variable> // NOLINT #include <condition_variable> // NOLINT
#include <memory> #include <memory>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <utility> #include <utility>
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
...@@ -48,9 +50,7 @@ class RetryAllocator : public Allocator { ...@@ -48,9 +50,7 @@ class RetryAllocator : public Allocator {
std::mutex mutex_; std::mutex mutex_;
std::condition_variable cv_; std::condition_variable cv_;
// For debug, We can add an atomic integer to record how many memory sizes are std::atomic<size_t> waited_allocate_size_{0};
// waited to allocate
// std::atomic<size_t> waited_allocate_size_{0};
}; };
} // namespace allocation } // namespace allocation
......
...@@ -17,12 +17,16 @@ ...@@ -17,12 +17,16 @@
#include <chrono> // NOLINT #include <chrono> // NOLINT
#include <condition_variable> // NOLINT #include <condition_variable> // NOLINT
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
#endif
namespace paddle { namespace paddle {
namespace memory { namespace memory {
...@@ -93,6 +97,51 @@ TEST(RetryAllocator, RetryAllocator) { ...@@ -93,6 +97,51 @@ TEST(RetryAllocator, RetryAllocator) {
} }
} }
class DummyAllocator : public Allocator {
public:
bool IsAllocThreadSafe() const override { return true; }
protected:
Allocation *AllocateImpl(size_t size) override {
PADDLE_THROW_BAD_ALLOC("Always BadAlloc");
}
void FreeImpl(Allocation *) override {}
};
TEST(RetryAllocator, RetryAllocatorLastAllocFailure) {
size_t retry_ms = 10;
{
RetryAllocator allocator(std::make_shared<DummyAllocator>(), retry_ms);
try {
auto allocation = allocator.Allocate(100);
ASSERT_TRUE(false);
allocation.reset();
} catch (BadAlloc &ex) {
ASSERT_TRUE(std::string(ex.what()).find("Always BadAlloc") !=
std::string::npos);
}
}
#ifdef PADDLE_WITH_CUDA
{
platform::CUDAPlace p(0);
RetryAllocator allocator(std::make_shared<CUDAAllocator>(p), retry_ms);
size_t allocate_size = -1UL; // Very large number
try {
auto allocation = allocator.Allocate(allocate_size);
ASSERT_TRUE(false);
allocation.reset();
} catch (BadAlloc &ex) {
ASSERT_TRUE(std::string(ex.what()).find(
"Cannot allocate " + std::to_string(allocate_size) +
" on GPU " + std::to_string(p.device)) !=
std::string::npos);
}
}
#endif
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -164,15 +164,6 @@ void BuddyAllocator::Free(void* p) { ...@@ -164,15 +164,6 @@ void BuddyAllocator::Free(void* p) {
<< block->total_size(cache_) << ")"; << block->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block)); IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
if (FLAGS_free_idle_memory) {
// Clean up if existing too much free memory
// Prefer freeing fallback allocation first
CleanIdleFallBackAlloc();
// Free normal allocation
CleanIdleNormalAlloc();
}
} }
size_t BuddyAllocator::Used() { return total_used_; } size_t BuddyAllocator::Used() { return total_used_; }
...@@ -225,12 +216,6 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool( ...@@ -225,12 +216,6 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index, static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
allocate_bytes, nullptr, nullptr); allocate_bytes, nullptr, nullptr);
// gpu fallback allocation
if (system_allocator_->UseGpu() &&
static_cast<MemoryBlock*>(p)->index(cache_) == 1) {
fallback_alloc_count_++;
}
total_free_ += allocate_bytes; total_free_ += allocate_bytes;
// dump the block into pool // dump the block into pool
...@@ -288,70 +273,6 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, ...@@ -288,70 +273,6 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
return block; return block;
} }
void BuddyAllocator::CleanIdleFallBackAlloc() {
// If fallback allocation does not exist, return directly
if (!fallback_alloc_count_) return;
for (auto pool = pool_.rbegin(); pool != pool_.rend();) {
// If free memory block less than max_chunk_size_, return directly
if (std::get<1>(*pool) < max_chunk_size_) return;
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
// If no GPU fallback allocator, return
if (!system_allocator_->UseGpu() || block->index(cache_) == 0) {
return;
}
VLOG(10) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
cache_.invalidate(block);
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
total_free_ -= block->size(cache_);
fallback_alloc_count_--;
// If no fall allocation exists, return directly
if (!fallback_alloc_count_) return;
}
}
void BuddyAllocator::CleanIdleNormalAlloc() {
auto shall_free_alloc = [&]() -> bool {
// free all fallback allocations
if (fallback_alloc_count_ > 0) {
return true;
}
// keep 2x overhead if we haven't fallen back
if ((total_used_ + max_chunk_size_) * 2 < total_free_) {
return true;
}
return false;
};
if (!shall_free_alloc()) return;
for (auto pool = pool_.rbegin(); pool != pool_.rend();) {
// If free memory block less than max_chunk_size_, return directly
if (std::get<1>(*pool) < max_chunk_size_) return;
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
VLOG(10) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, block->size(cache_), block->index(cache_));
cache_.invalidate(block);
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
total_free_ -= block->size(cache_);
if (!shall_free_alloc()) return;
}
}
} // namespace detail } // namespace detail
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -75,12 +75,6 @@ class BuddyAllocator { ...@@ -75,12 +75,6 @@ class BuddyAllocator {
/*! \brief Find the existing chunk which used to allocation */ /*! \brief Find the existing chunk which used to allocation */
PoolSet::iterator FindExistChunk(size_t size); PoolSet::iterator FindExistChunk(size_t size);
/*! \brief Clean idle fallback allocation */
void CleanIdleFallBackAlloc();
/*! \brief Clean idle normal allocation */
void CleanIdleNormalAlloc();
private: private:
size_t total_used_ = 0; // the total size of used memory size_t total_used_ = 0; // the total size of used memory
size_t total_free_ = 0; // the total size of free memory size_t total_free_ = 0; // the total size of free memory
...@@ -98,9 +92,6 @@ class BuddyAllocator { ...@@ -98,9 +92,6 @@ class BuddyAllocator {
*/ */
PoolSet pool_; PoolSet pool_;
/*! Record fallback allocation count for auto-scaling */
size_t fallback_alloc_count_ = 0;
private: private:
/*! Unify the metadata format between GPU and CPU allocations */ /*! Unify the metadata format between GPU and CPU allocations */
MetadataCache cache_; MetadataCache cache_;
......
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
#include <algorithm> // for std::max #include <algorithm> // for std::max
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
...@@ -117,19 +118,19 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { ...@@ -117,19 +118,19 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
gpu_alloc_size_ += size; gpu_alloc_size_ += size;
return p; return p;
} else { } else {
LOG(WARNING) << "Cannot malloc " << size / 1024.0 / 1024.0 PADDLE_THROW_BAD_ALLOC(
<< " MB GPU memory. Please shrink " "Cannot malloc " + std::to_string(size / 1024.0 / 1024.0) +
" MB GPU memory. Please shrink "
"FLAGS_fraction_of_gpu_memory_to_use or " "FLAGS_fraction_of_gpu_memory_to_use or "
"FLAGS_initial_gpu_memory_in_mb or " "FLAGS_initial_gpu_memory_in_mb or "
"FLAGS_reallocate_gpu_memory_in_mb" "FLAGS_reallocate_gpu_memory_in_mb"
"environment variable to a lower value. " "environment variable to a lower value. " +
<< "Current FLAGS_fraction_of_gpu_memory_to_use value is " "Current FLAGS_fraction_of_gpu_memory_to_use value is " +
<< FLAGS_fraction_of_gpu_memory_to_use std::to_string(FLAGS_fraction_of_gpu_memory_to_use) +
<< ". Current FLAGS_initial_gpu_memory_in_mb value is " ". Current FLAGS_initial_gpu_memory_in_mb value is " +
<< FLAGS_initial_gpu_memory_in_mb std::to_string(FLAGS_initial_gpu_memory_in_mb) +
<< ". Current FLAGS_reallocate_gpu_memory_in_mb value is " ". Current FLAGS_reallocate_gpu_memory_in_mb value is " +
<< FLAGS_reallocate_gpu_memory_in_mb; std::to_string(FLAGS_reallocate_gpu_memory_in_mb));
return nullptr;
} }
} }
......
...@@ -66,25 +66,61 @@ inline std::string demangle(std::string name) { ...@@ -66,25 +66,61 @@ inline std::string demangle(std::string name) {
inline std::string demangle(std::string name) { return name; } inline std::string demangle(std::string name) { return name; }
#endif #endif
template <typename StrType>
inline std::string GetTraceBackString(StrType&& what, const char* file,
int line) {
static constexpr int TRACE_STACK_LIMIT = 100;
std::ostringstream sout;
sout << string::Sprintf("%s at [%s:%d]", std::forward<StrType>(what), file,
line)
<< std::endl;
sout << "PaddlePaddle Call Stacks: " << std::endl;
#if !defined(_WIN32)
void* call_stack[TRACE_STACK_LIMIT];
auto size = backtrace(call_stack, TRACE_STACK_LIMIT);
auto symbols = backtrace_symbols(call_stack, size);
Dl_info info;
for (int i = 0; i < size; ++i) {
if (dladdr(call_stack[i], &info) && info.dli_sname) {
auto demangled = demangle(info.dli_sname);
auto addr_offset = static_cast<char*>(call_stack[i]) -
static_cast<char*>(info.dli_saddr);
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, 2 + sizeof(void*) * 2,
call_stack[i], demangled, addr_offset);
} else {
sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2,
call_stack[i]);
}
}
free(symbols);
#else
sout << "Windows not support stack backtrace yet.";
#endif
return sout.str();
}
struct EnforceNotMet : public std::exception { struct EnforceNotMet : public std::exception {
std::string err_str_; std::string err_str_;
EnforceNotMet(std::exception_ptr e, const char* f, int l) { EnforceNotMet(std::exception_ptr e, const char* file, int line) {
try { try {
std::rethrow_exception(e); std::rethrow_exception(e);
} catch (std::exception& e) { } catch (std::exception& e) {
Init(e.what(), f, l); err_str_ = GetTraceBackString(e.what(), file, line);
SaveErrorInformation(err_str_);
} }
} }
EnforceNotMet(const std::string& str, const char* f, int l) { EnforceNotMet(const std::string& str, const char* file, int line)
Init(str, f, l); : err_str_(GetTraceBackString(str, file, line)) {
SaveErrorInformation(err_str_);
} }
const char* what() const noexcept override { return err_str_.c_str(); } const char* what() const noexcept override { return err_str_.c_str(); }
private: private:
static void SaveErrorInformation(const std::string& err) {
const std::string output_file_name{"paddle_err_info"}; const std::string output_file_name{"paddle_err_info"};
void saveErrorInformation(const std::string& err) {
std::stringstream ss; std::stringstream ss;
ss << output_file_name; ss << output_file_name;
std::time_t t = std::time(nullptr); std::time_t t = std::time(nullptr);
...@@ -98,49 +134,15 @@ struct EnforceNotMet : public std::exception { ...@@ -98,49 +134,15 @@ struct EnforceNotMet : public std::exception {
err_file.close(); err_file.close();
} }
} }
template <typename StrType>
inline void Init(StrType what, const char* f, int l) {
static constexpr int TRACE_STACK_LIMIT = 100;
std::ostringstream sout;
sout << string::Sprintf("%s at [%s:%d]", what, f, l) << std::endl;
sout << "PaddlePaddle Call Stacks: " << std::endl;
#if !defined(_WIN32)
void* call_stack[TRACE_STACK_LIMIT];
auto size = backtrace(call_stack, TRACE_STACK_LIMIT);
auto symbols = backtrace_symbols(call_stack, size);
Dl_info info;
for (int i = 0; i < size; ++i) {
if (dladdr(call_stack[i], &info) && info.dli_sname) {
auto demangled = demangle(info.dli_sname);
auto addr_offset = static_cast<char*>(call_stack[i]) -
static_cast<char*>(info.dli_saddr);
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
2 + sizeof(void*) * 2, call_stack[i], demangled,
addr_offset);
} else {
sout << string::Sprintf("%-3d %*0p\n", i, 2 + sizeof(void*) * 2,
call_stack[i]);
}
}
free(symbols);
#else
sout << "Windows not support stack backtrace yet.";
#endif
err_str_ = sout.str();
saveErrorInformation(err_str_);
}
}; };
struct EOFException : public std::exception { struct EOFException : public std::exception {
std::string err_str_; std::string err_str_;
EOFException(const char* err_msg, const char* f, int l) { EOFException(const char* err_msg, const char* file, int line) {
err_str_ = string::Sprintf("%s at [%s:%d]", err_msg, f, l); err_str_ = string::Sprintf("%s at [%s:%d]", err_msg, file, line);
} }
const char* what() const noexcept { return err_str_.c_str(); } const char* what() const noexcept override { return err_str_.c_str(); }
}; };
// Because most enforce conditions would evaluate to true, we can use // Because most enforce conditions would evaluate to true, we can use
...@@ -329,6 +331,12 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess); ...@@ -329,6 +331,12 @@ DEFINE_CUDA_STATUS_TYPE(ncclResult_t, ncclSuccess);
__LINE__); \ __LINE__); \
} while (0) } while (0)
#define PADDLE_THROW_BAD_ALLOC(...) \
do { \
throw ::paddle::memory::allocation::BadAlloc( \
::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \
} while (0)
/* /*
* Some enforce helpers here, usage: * Some enforce helpers here, usage:
* int a = 1; * int a = 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册