未验证 提交 cce2b94d 编写于 作者: H HongyuJia 提交者: GitHub

[GetCurrentCUDAStream] Add C++ API GetCurrentCUDAStream (#51027)

* polish codes according #50813

* [getCurrentCUDAStream] Add C++ API getCurrentCUDAStream

* change get->Get

* wrap with macro

* use Get instead of get
上级 72f34450
......@@ -25,6 +25,8 @@ namespace phi {
class DeviceContext;
class CPUContext;
class GPUContext;
class Allocator;
class CUDAStream;
} // namespace phi
namespace paddle {
......@@ -88,9 +90,18 @@ class PADDLE_API DeviceContextPool {
} // namespace experimental
} // namespace paddle
namespace phi {
class Allocator;
namespace paddle {
PADDLE_API Allocator* GetAllocator(const Place& place);
/**
* Get the Allocator for the passed place.
*/
PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place);
} // namespace phi
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
/**
* Get the current CUDA stream for the passed CUDA device.
*/
PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place);
#endif
} // namespace paddle
......@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/enforce.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/phi/core/cuda_stream.h"
#endif
#include "paddle/fluid/platform/init.h"
namespace paddle {
......@@ -52,12 +56,27 @@ phi::DeviceContext* DeviceContextPool::GetMutable(const Place& place) {
} // namespace experimental
} // namespace paddle
namespace phi {
namespace paddle {
PADDLE_API Allocator* GetAllocator(const Place& place) {
const DeviceContext* dev_ctx =
PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) {
const phi::DeviceContext* dev_ctx =
paddle::experimental::DeviceContextPool::Instance().Get(place);
return const_cast<phi::Allocator*>(&dev_ctx->GetAllocator());
}
} // namespace phi
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) {
PADDLE_ENFORCE(place.GetType() == phi::AllocationType::GPU,
phi::errors::InvalidArgument(
"GetCurrentCUDAStream only supports GPUPlace input. "
"However, your input is place=%s",
place));
auto& pool = paddle::experimental::DeviceContextPool::Instance();
const phi::GPUContext* dev_ctx =
static_cast<const phi::GPUContext*>(pool.Get(place));
return dev_ctx->cuda_stream();
}
#endif
} // namespace paddle
......@@ -14,7 +14,4 @@ limitations under the License. */
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/core/device_context.h"
namespace phi {} // namespace phi
......@@ -18,7 +18,6 @@ limitations under the License. */
#include <functional>
#include <memory>
#include "paddle/phi/api/include/dll_decl.h"
#include "paddle/phi/common/place.h"
namespace phi {
......
......@@ -9,6 +9,10 @@ if(WITH_GPU)
test_allocator
SRCS test_allocator.cu
DEPS memory place device_context context_pool)
nv_test(
test_cuda_stream
SRCS test_cuda_stream.cu
DEPS context_pool)
elseif(WITH_ROCM)
hip_test(
test_phi_tensor
......@@ -18,6 +22,10 @@ elseif(WITH_ROCM)
test_allocator
SRCS test_allocator.cu
DEPS memory place device_context context_pool)
hip_test(
test_cuda_stream
SRCS test_cuda_stream.cu
DEPS context_pool)
else()
cc_test(
test_phi_tensor
......
......@@ -22,8 +22,20 @@ limitations under the License. */
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/device_context.h"
using paddle::memory::Copy;
template <typename T>
class Scale {
public:
explicit Scale(const T& scale) : scale_(scale) {}
HOSTDEVICE T operator()(const T& a) const { return a * scale_; }
private:
T scale_;
};
TEST(Allocator, CPU) {
phi::Allocator* allocator = phi::GetAllocator(phi::CPUPlace());
phi::Allocator* allocator = paddle::GetAllocator(phi::CPUPlace());
auto cpu_allocation = allocator->Allocate(sizeof(float) * 4);
float* cpu_buf = static_cast<float*>(cpu_allocation->ptr());
ASSERT_NE(cpu_buf, nullptr);
......@@ -39,23 +51,10 @@ TEST(Allocator, CPU) {
}
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
using paddle::memory::Copy;
template <typename T>
class Scale {
public:
explicit Scale(const T& scale) : scale_(scale) {}
HOSTDEVICE T operator()(const T& a) const { return a * scale_; }
private:
T scale_;
};
TEST(Allocator, GPU) {
phi::GPUPlace gpu0(0);
float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4};
phi::Allocator* allocator = phi::GetAllocator(gpu0);
phi::Allocator* allocator = paddle::GetAllocator(gpu0);
auto gpu_allocation = allocator->Allocate(sizeof(cpu_buf));
float* gpu_buf = static_cast<float*>(gpu_allocation->ptr());
......@@ -70,4 +69,3 @@ TEST(Allocator, GPU) {
ASSERT_NEAR(cpu_buf[i], static_cast<float>(i + 1), 1e-5);
}
}
#endif
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/core/cuda_stream.h"
TEST(CUDAStream, GPU) {
phi::GPUPlace gpu0(0);
phi::CUDAStream* stream = paddle::GetCurrentCUDAStream(gpu0);
EXPECT_TRUE(stream != nullptr);
gpuStream_t raw_stream = stream->raw_stream();
EXPECT_TRUE(raw_stream != nullptr);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册