From 22da1907e4b9b89cb6acd58b437e765a810f4b20 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Thu, 19 Aug 2021 09:46:38 +0800 Subject: [PATCH] Abstract DeviceEvent to manage cross-platform Event implementation (#34922) * add device_context * add gtest for device_event_gpu * Remvoe duplicate DeviceType * push for test * add unittest * fix macros * fix MSVC using usage --- paddle/fluid/platform/CMakeLists.txt | 5 + paddle/fluid/platform/device_context.h | 2 + paddle/fluid/platform/device_event.cc | 27 ++ paddle/fluid/platform/device_event.h | 277 +++++++++++++++++++++ paddle/fluid/platform/device_event_gpu.cc | 105 ++++++++ paddle/fluid/platform/device_event_test.cc | 78 ++++++ paddle/fluid/pybind/cuda_streams_py.cc | 2 +- 7 files changed, 495 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/platform/device_event.cc create mode 100644 paddle/fluid/platform/device_event.h create mode 100644 paddle/fluid/platform/device_event_gpu.cc create mode 100644 paddle/fluid/platform/device_event_test.cc diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 97c81568e67..fab0909c01c 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -151,11 +151,16 @@ endif() cc_test(init_test SRCS init_test.cc DEPS device_context) +cc_library(device_event SRCS device_event.cc DEPS place enforce device_context op_registry) +cc_library(device_event_gpu SRCS device_event_gpu.cc DEPS device_event) + + if(WITH_GPU) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(cudnn_desc_test SRCS cudnn_desc_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) + nv_test(device_event_test SRCS device_event_test.cc DEPS device_event_gpu) endif() if(WITH_ROCM) diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index abac12ff266..87ce7c438b6 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -97,6 +97,8 @@ enum DeviceType { CUDA = 1, XPU = 2, NPU = 3, + + MAX_DEVICE_TYPES = 4, }; DeviceType Place2DeviceType(const platform::Place& place); diff --git a/paddle/fluid/platform/device_event.cc b/paddle/fluid/platform/device_event.cc new file mode 100644 index 00000000000..2c96de16379 --- /dev/null +++ b/paddle/fluid/platform/device_event.cc @@ -0,0 +1,27 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/device_event.h" + +namespace paddle { +namespace platform { + +EventCreateFunction DeviceEvent::event_creator_[MaxDeviceTypes]; +EventRecordFunction DeviceEvent::event_recorder_[MaxDeviceTypes]; +EventQueryFunction DeviceEvent::event_querier_[MaxDeviceTypes]; +EventFinishFunction DeviceEvent::event_finisher_[MaxDeviceTypes]; +EventWaitFunction DeviceEvent::event_waiter_[MaxDeviceTypes][MaxDeviceTypes]; + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/device_event.h b/paddle/fluid/platform/device_event.h new file mode 100644 index 00000000000..c1f0acc00e6 --- /dev/null +++ b/paddle/fluid/platform/device_event.h @@ -0,0 +1,277 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace platform { + +class DeviceOption; +class DeviceEvent; + +constexpr int MaxDeviceTypes = + static_cast(platform::DeviceType::MAX_DEVICE_TYPES); + +typedef void (*EventCreateFunction)(DeviceEvent*, const DeviceOption&); +typedef void (*EventRecordFunction)(DeviceEvent*, const platform::Place&, + const DeviceContext*); +typedef bool (*EventQueryFunction)(const DeviceEvent*); +typedef void (*EventFinishFunction)(const DeviceEvent*); +typedef void (*EventWaitFunction)(const DeviceEvent*, DeviceContext*); + +inline int DeviceTypeToId(const DeviceType& device_type) { + return static_cast(device_type); +} + +class DeviceOption { + public: + explicit DeviceOption(int device_type) : device_type_(device_type) {} + + DeviceOption(int device_type, int device_id) + : device_type_(device_type), device_id_(device_id) {} + + int device_type() const { return device_type_; } + + int device_id() const { return device_id_; } + + private: + int device_type_; + int device_id_; +}; + +class DeviceEvent { + public: + explicit DeviceEvent(const DeviceOption& device_option) + : event_(), + type_(device_option.device_type()), + device_option_(device_option) { + PADDLE_ENFORCE_LT(type_, MaxDeviceTypes, + platform::errors::PreconditionNotMet( + "Required type < %d, but received type = %d", + MaxDeviceTypes, type_)); + PADDLE_ENFORCE_NOT_NULL( + event_creator_[type_], + platform::errors::Unavailable( + "event_creator_[%d] shall not be nullptr.", type_)); + event_creator_[type_](this, device_option_); + } + + ~DeviceEvent() {} + + void Record(const platform::Place& place, const DeviceContext* dev_ctx) { + PADDLE_ENFORCE_NOT_NULL( + event_recorder_[type_], + platform::errors::Unavailable( + "event_recorder_[%d] shall not be nullptr.", type_)); + event_recorder_[type_](this, place, dev_ctx); + } + + bool Query() { + PADDLE_ENFORCE_NOT_NULL( + event_querier_[type_], + platform::errors::Unavailable( + "event_querier_[%d] shall not be nullptr.", type_)); + return event_querier_[type_](this); + } + + void Finish() const { + PADDLE_ENFORCE_NOT_NULL( + event_finisher_[type_], + platform::errors::Unavailable( + "event_finisher_[%d] shall not be nullptr.", type_)); + event_finisher_[type_](this); + } + + void Wait(const DeviceType& waiter_type, DeviceContext* context) const { + auto waiter_idx = DeviceTypeToId(waiter_type); + PADDLE_ENFORCE_NOT_NULL( + event_waiter_[waiter_idx][type_], + platform::errors::Unavailable( + "event_waiter_[%d][%d] shall not be nullptr.", waiter_idx, type_)); + event_waiter_[waiter_idx][type_](this, context); + } + + void InitEvent(std::shared_ptr event) { event_ = event; } + + std::shared_ptr GetEvent() const { return event_; } + + private: + std::shared_ptr event_; + int type_; + DeviceOption device_option_; + + static EventCreateFunction event_creator_[MaxDeviceTypes]; + static EventRecordFunction event_recorder_[MaxDeviceTypes]; + static EventQueryFunction event_querier_[MaxDeviceTypes]; + static EventFinishFunction event_finisher_[MaxDeviceTypes]; + static EventWaitFunction event_waiter_[MaxDeviceTypes][MaxDeviceTypes]; + + template + friend struct EventCreateFunctionRegisterer; + + template + friend struct EventRecordFunctionRegisterer; + + template + friend struct EventQueryFunctionRegisterer; + + template + friend struct EventFinishFunctionRegisterer; + + template + friend struct EventWaitFunctionRegisterer; +}; + +/** + * check if MACRO is used in GLOBAL NAMESPACE. + */ +#define STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ + struct __test_global_namespace_##uniq_name##__ {}; \ + static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \ + __test_global_namespace_##uniq_name##__>::value, \ + msg) + +// =============== Register for Create =============== +template +struct EventCreateFunctionRegisterer : public framework::Registrar { + explicit EventCreateFunctionRegisterer(EventCreateFunction func) { + auto type_idx = DeviceTypeToId(device_type); + VLOG(3) << "register event_creator with type_id :" << type_idx; + DeviceEvent::event_creator_[type_idx] = func; + } +}; + +#define REGISTER_EVENT_CREATE_FUNCTION(device_type, func) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_event_creator__##device_type, \ + "REGISTER_EVENT_CREATE_FUNCTION must be called in global namespace"); \ + static ::paddle::platform::EventCreateFunctionRegisterer \ + __reg_event_create_##device_type##__(func); \ + int TouchDeviceEventCreate##device_type() { \ + __reg_event_create_##device_type##__.Touch(); \ + return 0; \ + } + +// =============== Register for Record =============== +template +struct EventRecordFunctionRegisterer : public framework::Registrar { + explicit EventRecordFunctionRegisterer(EventRecordFunction func) { + auto type_idx = DeviceTypeToId(device_type); + VLOG(3) << "register event_recorder with type_id :" << type_idx; + DeviceEvent::event_recorder_[type_idx] = func; + } +}; + +#define REGISTER_EVENT_RECORD_FUNCTION(device_type, func) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_event_recorder__##device_type, \ + "REGISTER_EVENT_RECORD_FUNCTION must be called in global namespace"); \ + static ::paddle::platform::EventRecordFunctionRegisterer \ + __reg_event_record_##device_type##__(func); \ + int TouchDeviceEventRecord##device_type() { \ + __reg_event_record_##device_type##__.Touch(); \ + return 0; \ + } + +// =============== Register for Query =============== +template +struct EventQueryFunctionRegisterer : public framework::Registrar { + explicit EventQueryFunctionRegisterer(EventQueryFunction func) { + auto type_idx = DeviceTypeToId(device_type); + VLOG(3) << "register event_querier with type_id :" << type_idx; + DeviceEvent::event_querier_[type_idx] = func; + } +}; + +#define REGISTER_EVENT_QUERY_FUNCTION(device_type, func) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_event_querier__##device_type, \ + "REGISTER_EVENT_QUERY_FUNCTION must be called in global namespace"); \ + static ::paddle::platform::EventQueryFunctionRegisterer \ + __reg_event_query_##device_type##__(func); \ + int TouchDeviceEventQuery##device_type() { \ + __reg_event_query_##device_type##__.Touch(); \ + return 0; \ + } + +// =============== Register for Finish =============== +template +struct EventFinishFunctionRegisterer : public framework::Registrar { + explicit EventFinishFunctionRegisterer(EventFinishFunction func) { + auto type_idx = DeviceTypeToId(device_type); + VLOG(3) << "register event_finisher with type_id :" << type_idx; + DeviceEvent::event_finisher_[type_idx] = func; + } +}; + +#define REGISTER_EVENT_FINISH_FUNCTION(device_type, func) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_event_finishier__##device_type, \ + "REGISTER_EVENT_FINISH_FUNCTION must be called in global namespace"); \ + static ::paddle::platform::EventFinishFunctionRegisterer \ + __reg_event_finish_##device_type##__(func); \ + int TouchDeviceEventFinish##device_type() { \ + __reg_event_finish_##device_type##__.Touch(); \ + return 0; \ + } + +// =============== Register for Wait =============== +template +struct EventWaitFunctionRegisterer : public framework::Registrar { + explicit EventWaitFunctionRegisterer(EventWaitFunction func) { + auto waiter_idx = DeviceTypeToId(waiter_type); + auto event_idx = DeviceTypeToId(event_type); + VLOG(3) << "register event_finisher with waiter_idx : " << waiter_idx + << ", event_idx : " << event_idx; + DeviceEvent::event_waiter_[waiter_idx][event_idx] = func; + } +}; + +#define REGISTER_EVENT_WAIT_FUNCTION(waiter_type, event_type, func) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_event_waiter__##waiter_type##event_type, \ + "REGISTER_EVENT_WAIT_FUNCTION must be called in global namespace"); \ + static ::paddle::platform::EventWaitFunctionRegisterer \ + __reg_event_wait_##waiter_type##event_type##__(func); \ + int TouchDeviceEventWait##waiter_type##event_type() { \ + __reg_event_wait_##waiter_type##event_type##__.Touch(); \ + return 0; \ + } + +#define USE_EVENT(device_type) \ + extern int TouchDeviceEventCreate##device_type(); \ + extern int TouchDeviceEventRecord##device_type(); \ + extern int TouchDeviceEventQuery##device_type(); \ + extern int TouchDeviceEventFinish##device_type(); \ + UNUSED static int use_event_creator_##device_type = \ + TouchDeviceEventCreate##device_type(); \ + UNUSED static int use_event_recorder_##device_type = \ + TouchDeviceEventRecord##device_type(); \ + UNUSED static int use_event_querier_##device_type = \ + TouchDeviceEventQuery##device_type(); \ + UNUSED static int use_event_finisher_##device_type = \ + TouchDeviceEventFinish##device_type(); + +#define USE_EVENT_WAIT(waiter_type, event_type) \ + extern int TouchDeviceEventWait##waiter_type##event_type(); \ + UNUSED static int use_event_waiter_##waiter_type##event_type = \ + TouchDeviceEventWait##waiter_type##event_type(); + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/device_event_gpu.cc b/paddle/fluid/platform/device_event_gpu.cc new file mode 100644 index 00000000000..86bcfdad5ba --- /dev/null +++ b/paddle/fluid/platform/device_event_gpu.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/device_event.h" +#include "paddle/fluid/platform/event.h" + +#ifdef PADDLE_WITH_CUDA +namespace paddle { +namespace platform { +struct CUDADeviceEventWrapper { + explicit CUDADeviceEventWrapper(const DeviceOption& dev_opt) + : inner_event_() { + PADDLE_ENFORCE_EQ( + dev_opt.device_type(), static_cast(DeviceType::CUDA), + platform::errors::PreconditionNotMet( + "Required device type shall be CUDA, but received %d. ", + dev_opt.device_type())); + PADDLE_ENFORCE_GT( + dev_opt.device_id(), -1, + platform::errors::PreconditionNotMet( + "Required DeviceOption.device_id > -1, but received %d. ", + dev_opt.device_id())); + device_id_ = dev_opt.device_id(); + } + + CudaEvent inner_event_; + int device_id_; +}; + +void DeviceEventCreateCUDA(DeviceEvent* event, const DeviceOption& dev_opt) { + event->InitEvent(std::make_shared(dev_opt)); +} + +void DeviceEventRecordCUDA(DeviceEvent* event, const platform::Place& place, + const DeviceContext* context) { + auto* wrapper = static_cast(event->GetEvent().get()); + + auto* cuda_dev_ctx = + dynamic_cast(context); + PADDLE_ENFORCE_NOT_NULL( + cuda_dev_ctx, + platform::errors::PreconditionNotMet( + "Failed to dynamic_cast context into CUDADeviceContext.")); + + wrapper->inner_event_.Record(*cuda_dev_ctx->context()->Stream()); +} + +bool DeviceEventQueryCUDA(const DeviceEvent* event) { + auto* wrapper = static_cast(event->GetEvent().get()); + PADDLE_ENFORCE_NOT_NULL( + wrapper, + platform::errors::PreconditionNotMet( + "Failed to dynamic_cast event into CUDADeviceEventWrapper.")); + + return wrapper->inner_event_.Query(); +} + +void DeviceEventFinishCUDA(const DeviceEvent* event) { + auto* wrapper = static_cast(event->GetEvent().get()); + // calling cudaEventSynchronize + wrapper->inner_event_.Synchronize(); +} + +void DeviceEventCUDAWaitCUDA(const DeviceEvent* event, DeviceContext* context) { + auto* wrapper = static_cast(event->GetEvent().get()); + auto* cuda_dev_ctx = + dynamic_cast(context); + PADDLE_ENFORCE_NOT_NULL( + cuda_dev_ctx, + platform::errors::PreconditionNotMet( + "Failed to dynamic_cast context into CUDADeviceContext.")); + // calling cudaStreamWaitEvent(stream, event, 0) + cuda_dev_ctx->context()->Stream()->WaitEvent( + wrapper->inner_event_.GetRawCudaEvent()); +} + +void DeviceEventCPUWaitCUDA(const DeviceEvent* event, DeviceContext* context) { + DeviceEventFinishCUDA(event); +} + +} // namespace platform +} // namespace paddle + +using ::paddle::platform::kCUDA; +using ::paddle::platform::kCPU; +REGISTER_EVENT_CREATE_FUNCTION(kCUDA, paddle::platform::DeviceEventCreateCUDA) +REGISTER_EVENT_RECORD_FUNCTION(kCUDA, paddle::platform::DeviceEventRecordCUDA) +REGISTER_EVENT_QUERY_FUNCTION(kCUDA, paddle::platform::DeviceEventQueryCUDA) +REGISTER_EVENT_FINISH_FUNCTION(kCUDA, paddle::platform::DeviceEventFinishCUDA) +REGISTER_EVENT_WAIT_FUNCTION(kCUDA, kCUDA, + paddle::platform::DeviceEventCUDAWaitCUDA) +REGISTER_EVENT_WAIT_FUNCTION(kCPU, kCUDA, + paddle::platform::DeviceEventCPUWaitCUDA) +#endif diff --git a/paddle/fluid/platform/device_event_test.cc b/paddle/fluid/platform/device_event_test.cc new file mode 100644 index 00000000000..04288599c40 --- /dev/null +++ b/paddle/fluid/platform/device_event_test.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/device_event.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#ifdef PADDLE_WITH_CUDA +#include +using ::paddle::platform::kCUDA; +using ::paddle::platform::kCPU; +USE_EVENT(kCUDA); +USE_EVENT_WAIT(kCUDA, kCUDA) +USE_EVENT_WAIT(kCPU, kCUDA) + +TEST(DeviceEvent, CUDA) { + VLOG(1) << "In Test"; + using paddle::platform::CUDAPlace; + using paddle::platform::DeviceOption; + using paddle::platform::DeviceEvent; + using paddle::platform::DeviceContextPool; + using paddle::platform::DeviceType; + + auto& pool = DeviceContextPool::Instance(); + auto place = CUDAPlace(0); + auto* context = + static_cast(pool.Get(place)); + int device_type = static_cast(DeviceType::CUDA); + DeviceOption dev_opt(device_type, place.device); + + ASSERT_NE(context, nullptr); + // case 1. test for event_creator + DeviceEvent event(dev_opt); + ASSERT_NE(event.GetEvent().get(), nullptr); + // case 2. test for event_recorder + event.Record(place, context); + bool status = event.Query(); + ASSERT_EQ(status, false); + // case 3. test for event_finisher + event.Finish(); + status = event.Query(); + ASSERT_EQ(status, true); + + // case 4. test for event_waiter + float *src_fp32, *dst_fp32; + int size = 1000000 * sizeof(float); + cudaMallocHost(reinterpret_cast(&src_fp32), size); + cudaMalloc(reinterpret_cast(&dst_fp32), size); + cudaMemcpyAsync(dst_fp32, src_fp32, size, cudaMemcpyHostToDevice, + context->stream()); + event.Record(place, context); // step 1. record it + status = event.Query(); + ASSERT_EQ(status, false); + + event.Wait(kCUDA, context); // step 2. add streamWaitEvent + status = event.Query(); + ASSERT_EQ(status, false); // async + + event.Wait(kCPU, context); // step 3. EventSynchornize + status = event.Query(); + ASSERT_EQ(status, true); // sync + + // release resource + cudaFree(dst_fp32); + cudaFreeHost(src_fp32); +} +#endif diff --git a/paddle/fluid/pybind/cuda_streams_py.cc b/paddle/fluid/pybind/cuda_streams_py.cc index 21c6e0a4f28..5ea0a2553f7 100644 --- a/paddle/fluid/pybind/cuda_streams_py.cc +++ b/paddle/fluid/pybind/cuda_streams_py.cc @@ -105,7 +105,7 @@ void BindCudaStream(py::module *m_ptr) { .def("wait_stream", [](paddle::platform::stream::CUDAStream &self, paddle::platform::stream::CUDAStream &stream) { - auto event = paddle::platform::CudaEvent(); + paddle::platform::CudaEvent event; event.Record(stream); self.WaitEvent(event.GetRawCudaEvent()); -- GitLab