未验证 提交 cce2b94d 编写于 作者: H HongyuJia 提交者: GitHub

[GetCurrentCUDAStream] Add C++ API GetCurrentCUDAStream (#51027)

* polish codes according #50813

* [getCurrentCUDAStream] Add C++ API getCurrentCUDAStream

* change get->Get

* wrap with macro

* use Get instead of get
上级 72f34450
...@@ -25,6 +25,8 @@ namespace phi { ...@@ -25,6 +25,8 @@ namespace phi {
class DeviceContext; class DeviceContext;
class CPUContext; class CPUContext;
class GPUContext; class GPUContext;
class Allocator;
class CUDAStream;
} // namespace phi } // namespace phi
namespace paddle { namespace paddle {
...@@ -88,9 +90,18 @@ class PADDLE_API DeviceContextPool { ...@@ -88,9 +90,18 @@ class PADDLE_API DeviceContextPool {
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
namespace phi { namespace paddle {
class Allocator;
PADDLE_API Allocator* GetAllocator(const Place& place); /**
* Get the Allocator for the passed place.
*/
PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place);
} // namespace phi #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
/**
* Get the current CUDA stream for the passed CUDA device.
*/
PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place);
#endif
} // namespace paddle
...@@ -18,6 +18,10 @@ limitations under the License. */ ...@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/phi/core/allocator.h" #include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/phi/core/cuda_stream.h"
#endif
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
namespace paddle { namespace paddle {
...@@ -52,12 +56,27 @@ phi::DeviceContext* DeviceContextPool::GetMutable(const Place& place) { ...@@ -52,12 +56,27 @@ phi::DeviceContext* DeviceContextPool::GetMutable(const Place& place) {
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
namespace phi { namespace paddle {
PADDLE_API Allocator* GetAllocator(const Place& place) { PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) {
const DeviceContext* dev_ctx = const phi::DeviceContext* dev_ctx =
paddle::experimental::DeviceContextPool::Instance().Get(place); paddle::experimental::DeviceContextPool::Instance().Get(place);
return const_cast<phi::Allocator*>(&dev_ctx->GetAllocator()); return const_cast<phi::Allocator*>(&dev_ctx->GetAllocator());
} }
} // namespace phi #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) {
PADDLE_ENFORCE(place.GetType() == phi::AllocationType::GPU,
phi::errors::InvalidArgument(
"GetCurrentCUDAStream only supports GPUPlace input. "
"However, your input is place=%s",
place));
auto& pool = paddle::experimental::DeviceContextPool::Instance();
const phi::GPUContext* dev_ctx =
static_cast<const phi::GPUContext*>(pool.Get(place));
return dev_ctx->cuda_stream();
}
#endif
} // namespace paddle
...@@ -14,7 +14,4 @@ limitations under the License. */ ...@@ -14,7 +14,4 @@ limitations under the License. */
#include "paddle/phi/core/allocator.h" #include "paddle/phi/core/allocator.h"
#include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/core/device_context.h"
namespace phi {} // namespace phi namespace phi {} // namespace phi
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <functional> #include <functional>
#include <memory> #include <memory>
#include "paddle/phi/api/include/dll_decl.h"
#include "paddle/phi/common/place.h" #include "paddle/phi/common/place.h"
namespace phi { namespace phi {
......
...@@ -9,6 +9,10 @@ if(WITH_GPU) ...@@ -9,6 +9,10 @@ if(WITH_GPU)
test_allocator test_allocator
SRCS test_allocator.cu SRCS test_allocator.cu
DEPS memory place device_context context_pool) DEPS memory place device_context context_pool)
nv_test(
test_cuda_stream
SRCS test_cuda_stream.cu
DEPS context_pool)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test( hip_test(
test_phi_tensor test_phi_tensor
...@@ -18,6 +22,10 @@ elseif(WITH_ROCM) ...@@ -18,6 +22,10 @@ elseif(WITH_ROCM)
test_allocator test_allocator
SRCS test_allocator.cu SRCS test_allocator.cu
DEPS memory place device_context context_pool) DEPS memory place device_context context_pool)
hip_test(
test_cuda_stream
SRCS test_cuda_stream.cu
DEPS context_pool)
else() else()
cc_test( cc_test(
test_phi_tensor test_phi_tensor
......
...@@ -22,8 +22,20 @@ limitations under the License. */ ...@@ -22,8 +22,20 @@ limitations under the License. */
#include "paddle/phi/core/allocator.h" #include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/device_context.h" #include "paddle/phi/core/device_context.h"
using paddle::memory::Copy;
template <typename T>
class Scale {
public:
explicit Scale(const T& scale) : scale_(scale) {}
HOSTDEVICE T operator()(const T& a) const { return a * scale_; }
private:
T scale_;
};
TEST(Allocator, CPU) { TEST(Allocator, CPU) {
phi::Allocator* allocator = phi::GetAllocator(phi::CPUPlace()); phi::Allocator* allocator = paddle::GetAllocator(phi::CPUPlace());
auto cpu_allocation = allocator->Allocate(sizeof(float) * 4); auto cpu_allocation = allocator->Allocate(sizeof(float) * 4);
float* cpu_buf = static_cast<float*>(cpu_allocation->ptr()); float* cpu_buf = static_cast<float*>(cpu_allocation->ptr());
ASSERT_NE(cpu_buf, nullptr); ASSERT_NE(cpu_buf, nullptr);
...@@ -39,23 +51,10 @@ TEST(Allocator, CPU) { ...@@ -39,23 +51,10 @@ TEST(Allocator, CPU) {
} }
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
using paddle::memory::Copy;
template <typename T>
class Scale {
public:
explicit Scale(const T& scale) : scale_(scale) {}
HOSTDEVICE T operator()(const T& a) const { return a * scale_; }
private:
T scale_;
};
TEST(Allocator, GPU) { TEST(Allocator, GPU) {
phi::GPUPlace gpu0(0); phi::GPUPlace gpu0(0);
float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4};
phi::Allocator* allocator = phi::GetAllocator(gpu0); phi::Allocator* allocator = paddle::GetAllocator(gpu0);
auto gpu_allocation = allocator->Allocate(sizeof(cpu_buf)); auto gpu_allocation = allocator->Allocate(sizeof(cpu_buf));
float* gpu_buf = static_cast<float*>(gpu_allocation->ptr()); float* gpu_buf = static_cast<float*>(gpu_allocation->ptr());
...@@ -70,4 +69,3 @@ TEST(Allocator, GPU) { ...@@ -70,4 +69,3 @@ TEST(Allocator, GPU) {
ASSERT_NEAR(cpu_buf[i], static_cast<float>(i + 1), 1e-5); ASSERT_NEAR(cpu_buf[i], static_cast<float>(i + 1), 1e-5);
} }
} }
#endif
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/core/cuda_stream.h"
TEST(CUDAStream, GPU) {
phi::GPUPlace gpu0(0);
phi::CUDAStream* stream = paddle::GetCurrentCUDAStream(gpu0);
EXPECT_TRUE(stream != nullptr);
gpuStream_t raw_stream = stream->raw_stream();
EXPECT_TRUE(raw_stream != nullptr);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册