未验证 提交 7f92e27e 编写于 作者: Z zyfncg 提交者: GitHub

Fix bug of zero_allocator in HostAlloc (#48108)

* fix bug of zero_allocator in host

* fix test compile bug

* add unittest

* update test
上级 058aa381
......@@ -427,6 +427,10 @@ void AnalysisPredictor::InitDeviceContexts() {
memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(place_)
.get());
gpu_context->SetHostZeroAllocator(
memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(platform::CPUPlace())
.get());
gpu_context->SetGenerator(
framework::DefaultCUDAGenerator(place_.GetDeviceId()).get());
gpu_context->SetHostGenerator(framework::DefaultCPUGenerator().get());
......
......@@ -38,6 +38,10 @@ class TensorRTEngineTest : public ::testing::Test {
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(platform::CUDAPlace(0))
.get());
ctx_->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
ctx_->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
......
......@@ -183,6 +183,10 @@ TEST(Malloc, GPUContextMultiThreadMultiStream) {
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(place)
.get());
ctx->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
ctx->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
......
......@@ -221,6 +221,10 @@ NCCLComm* NCCLCommContext::AssignNCCLComm(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(CUDAPlace(dev_id))
.get());
dev_ctx->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
dev_ctx->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
......@@ -364,6 +368,10 @@ BKCLComm* BKCLCommContext::AssignBKCLComm(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(XPUPlace(dev_id))
.get());
dev_ctx->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
BKCLCommImpl* c = new BKCLCommImpl;
c->set_ring_id(ring_id);
......
......@@ -136,6 +136,10 @@ struct NCCLContext {
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(CUDAPlace(dev_id))
.get());
ctx_->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
ctx_->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
......
......@@ -183,6 +183,9 @@ std::unique_ptr<DeviceContext> CreateDeviceContext(
dev_ctx->SetZeroAllocator(memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(p)
.get());
dev_ctx->SetHostZeroAllocator(memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(platform::CPUPlace())
.get());
return PtrType(dev_ctx);
}
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/core/dense_tensor.h"
TEST(Device, Init) {
using paddle::platform::CUDAPlace;
......@@ -38,6 +39,10 @@ TEST(Device, Init) {
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(CUDAPlace(i))
.get());
device_context->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
device_context->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
......@@ -69,6 +74,10 @@ TEST(Device, GPUContext) {
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(CUDAPlace(i))
.get());
device_context->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
device_context->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
......@@ -92,6 +101,45 @@ TEST(Device, GPUContext) {
}
}
TEST(Device, HostZeroAllocator) {
using paddle::platform::CUDAPlace;
auto device_context = std::make_unique<phi::GPUContext>(CUDAPlace(0));
device_context->SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(CUDAPlace(0), device_context->stream())
.get());
device_context->SetHostAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CPUPlace())
.get());
device_context->SetZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(CUDAPlace(0))
.get());
device_context->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
device_context->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
.get());
device_context->PartialInitWithAllocator();
phi::DenseTensor tensor;
tensor.Resize({0});
device_context->HostAlloc<float>(&tensor);
ASSERT_EQ(tensor.place().GetType(), phi::AllocationType::CPU);
ASSERT_EQ(tensor.numel(), 0);
ASSERT_EQ(tensor.dtype(), phi::DataType::FLOAT32);
phi::GPUContext gpu_context(CUDAPlace(0));
gpu_context.SetHostZeroAllocator(&device_context->GetHostZeroAllocator());
gpu_context.HostAlloc<float>(&tensor);
ASSERT_EQ(tensor.place().GetType(), phi::AllocationType::CPU);
}
TEST(Device, DeviceContextPool) {
using paddle::platform::CPUPlace;
using paddle::platform::CUDAPlace;
......
......@@ -1325,6 +1325,10 @@ All parameter, weight, gradient are variables in Paddle.
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(place)
.get());
context->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
return context;
})
.def_static(
......@@ -1349,6 +1353,10 @@ All parameter, weight, gradient are variables in Paddle.
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(place)
.get());
context->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
return context;
#endif
})
......@@ -1410,6 +1418,10 @@ All parameter, weight, gradient are variables in Paddle.
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(place)
.get());
context->SetHostZeroAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetZeroAllocator(paddle::platform::CPUPlace())
.get());
context->SetPinnedAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CUDAPinnedPlace())
......
......@@ -54,6 +54,14 @@ struct DeviceContext::Impl {
zero_allocator_ = allocator;
}
void SetHostZeroAllocator(const Allocator* allocator) {
PADDLE_ENFORCE_NOT_NULL(
allocator,
phi::errors::InvalidArgument(
"Required allocator shall not be nullptr, but received nullptr."));
host_zero_allocator_ = allocator;
}
void SetPinnedAllocator(const Allocator* allocator) {
PADDLE_ENFORCE_NOT_NULL(
allocator,
......@@ -106,6 +114,14 @@ struct DeviceContext::Impl {
return *zero_allocator_;
}
const Allocator& GetHostZeroAllocator() const {
PADDLE_ENFORCE_NOT_NULL(
host_zero_allocator_,
phi::errors::InvalidArgument("Required zero_allocator_ shall not be "
"nullptr, but received nullptr."));
return *host_zero_allocator_;
}
const Allocator& GetPinnedAllocator() const {
PADDLE_ENFORCE_NOT_NULL(
pinned_allocator_,
......@@ -172,7 +188,8 @@ struct DeviceContext::Impl {
if (tensor->initialized() && tensor->place() != CPUPlace()) {
ClearHolder(tensor);
}
auto* allocator = tensor->numel() == 0 ? zero_allocator_ : host_allocator_;
auto* allocator =
tensor->numel() == 0 ? host_zero_allocator_ : host_allocator_;
return tensor->AllocateFrom(
const_cast<Allocator*>(allocator), dtype, requested_size);
}
......@@ -234,6 +251,7 @@ struct DeviceContext::Impl {
const Allocator* device_allocator_{nullptr};
const Allocator* host_allocator_{nullptr};
const Allocator* zero_allocator_{nullptr};
const Allocator* host_zero_allocator_{nullptr};
const Allocator* pinned_allocator_{nullptr};
#ifdef PADDLE_WITH_CUDA
const Allocator* cuda_graph_allocator_{nullptr};
......@@ -248,6 +266,7 @@ DeviceContext::DeviceContext(const DeviceContext& other) {
impl_->SetHostAllocator(&other.GetHostAllocator());
impl_->SetAllocator(&other.GetAllocator());
impl_->SetZeroAllocator(&other.GetZeroAllocator());
impl_->SetHostZeroAllocator(&other.GetHostZeroAllocator());
impl_->SetPinnedAllocator(&other.GetPinnedAllocator());
impl_->SetHostGenerator(other.GetHostGenerator());
impl_->SetGenerator(other.GetGenerator());
......@@ -300,10 +319,18 @@ void DeviceContext::SetZeroAllocator(const Allocator* allocator) {
impl_->SetZeroAllocator(allocator);
}
void DeviceContext::SetHostZeroAllocator(const Allocator* allocator) {
impl_->SetHostZeroAllocator(allocator);
}
const Allocator& DeviceContext::GetZeroAllocator() const {
return impl_->GetZeroAllocator();
}
const Allocator& DeviceContext::GetHostZeroAllocator() const {
return impl_->GetHostZeroAllocator();
}
void DeviceContext::SetPinnedAllocator(const Allocator* allocator) {
impl_->SetPinnedAllocator(allocator);
}
......
......@@ -82,6 +82,13 @@ class PADDLE_API DeviceContext {
*/
void SetZeroAllocator(const Allocator*);
/**
* @brief Set the zero-size host Allocator object.
*
* @param allocator
*/
void SetHostZeroAllocator(const Allocator*);
/**
* @brief Set the zero-size Allocator object.
*
......@@ -105,6 +112,8 @@ class PADDLE_API DeviceContext {
const Allocator& GetZeroAllocator() const;
const Allocator& GetHostZeroAllocator() const;
const Allocator& GetPinnedAllocator() const;
#ifdef PADDLE_WITH_CUDA
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册