未验证 提交 2c21d240 编写于 作者: L Leo Chen 提交者: GitHub

fix gpu callback (#40445)

* fix gpu conetxt callback

* fix gpu callback

* fix callback early destruct problem
上级 bb801960
......@@ -654,10 +654,17 @@ struct GPUContext::Impl {
}
void AddStreamCallback(const std::function<void()>& callback) const {
// TODO(wilber): Do we need ThreadPool?
auto* func = new std::function<void()>([this, callback] {
// NOTE(zhiqiu): better use threadpool here, otherwise "std::async" may
// launch too
// many threads and result in thread oversubscription.
auto* callback_func = new std::function<void()>(std::move(callback));
auto* func = new std::function<void()>([this, callback_func] {
std::lock_guard<std::mutex> lock(stream_call_back_mtx_);
last_future_ = std::async(std::launch::deferred, [&]() { callback(); });
VLOG(4) << "Stream callback";
last_future_ = std::async(std::launch::async, [callback_func]() {
std::unique_ptr<std::function<void()>> releaser(callback_func);
(*callback_func)();
});
});
#ifdef PADDLE_WITH_HIP
......
......@@ -395,6 +395,8 @@ struct ConcatFunctor<phi::GPUContext, T> {
auto* data_alloc_released = data_alloc.release();
auto* col_alloc_released = col_alloc.release();
context.AddStreamCallback([data_alloc_released, col_alloc_released] {
VLOG(4) << "Delete cuda pinned at " << data_alloc_released;
VLOG(4) << "Delete cuda pinned at " << col_alloc_released;
paddle::memory::allocation::Allocator::AllocationDeleter(
data_alloc_released);
paddle::memory::allocation::Allocator::AllocationDeleter(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册