From 2c21d24038093084512763f073ddbc9cfa8749fe Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 14 Mar 2022 17:59:44 +0800 Subject: [PATCH] fix gpu callback (#40445) * fix gpu conetxt callback * fix gpu callback * fix callback early destruct problem --- paddle/phi/backends/gpu/gpu_context.cc | 13 ++++++++++--- .../phi/kernels/funcs/concat_and_split_functor.cu | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index 09deb575f2..a3b2525985 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -654,10 +654,17 @@ struct GPUContext::Impl { } void AddStreamCallback(const std::function& callback) const { - // TODO(wilber): Do we need ThreadPool? - auto* func = new std::function([this, callback] { + // NOTE(zhiqiu): better use threadpool here, otherwise "std::async" may + // launch too + // many threads and result in thread oversubscription. + auto* callback_func = new std::function(std::move(callback)); + auto* func = new std::function([this, callback_func] { std::lock_guard lock(stream_call_back_mtx_); - last_future_ = std::async(std::launch::deferred, [&]() { callback(); }); + VLOG(4) << "Stream callback"; + last_future_ = std::async(std::launch::async, [callback_func]() { + std::unique_ptr> releaser(callback_func); + (*callback_func)(); + }); }); #ifdef PADDLE_WITH_HIP diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cu b/paddle/phi/kernels/funcs/concat_and_split_functor.cu index 840c8872f5..06be592dd9 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.cu +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cu @@ -395,6 +395,8 @@ struct ConcatFunctor { auto* data_alloc_released = data_alloc.release(); auto* col_alloc_released = col_alloc.release(); context.AddStreamCallback([data_alloc_released, col_alloc_released] { + VLOG(4) << "Delete cuda pinned at " << data_alloc_released; + VLOG(4) << "Delete cuda pinned at " << col_alloc_released; paddle::memory::allocation::Allocator::AllocationDeleter( data_alloc_released); paddle::memory::allocation::Allocator::AllocationDeleter( -- GitLab