From cc343a41e3062bdcc5a086dfe0fd019c8f7ac27c Mon Sep 17 00:00:00 2001 From: helen88 Date: Thu, 12 May 2022 15:31:49 +0800 Subject: [PATCH] add xpu buffer_reader, *test=kunlun (#42578) * add xpu buffer_reader, *test=kunlun * xpu buffer_reader, use XPUDeviceGuard, *test=kunlun * modify xpu.cmake, *test=kunlun * modify xpu.cmake, *test=kunlun * modify xpu.cmake, *test=kunlun * add xpu buffer_reader, *test=kunlun * add xpu buffer reader, *test=kunlun * add xpu buffer reader, *test=kunlun --- .../fluid/operators/reader/buffered_reader.cc | 73 +++++++++++++- .../fluid/operators/reader/buffered_reader.h | 13 ++- paddle/fluid/platform/CMakeLists.txt | 2 +- .../fluid/platform/device/xpu/CMakeLists.txt | 1 + paddle/fluid/platform/device/xpu/xpu_info.cc | 6 +- paddle/fluid/platform/device/xpu/xpu_info.h | 10 +- .../platform/device/xpu/xpu_resource_pool.cc | 98 +++++++++++++++++++ .../platform/device/xpu/xpu_resource_pool.h | 64 ++++++++++++ paddle/fluid/platform/device_context.h | 3 +- 9 files changed, 264 insertions(+), 6 deletions(-) create mode 100644 paddle/fluid/platform/device/xpu/xpu_resource_pool.cc create mode 100644 paddle/fluid/platform/device/xpu/xpu_resource_pool.h diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index 4b6759ea16..db0f5758d2 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ #include "paddle/fluid/operators/reader/buffered_reader.h" #include "paddle/fluid/framework/convert_utils.h" +#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -85,10 +86,27 @@ BufferedReader::BufferedReader( stream_ = platform::MluStreamResourcePool::Instance().New(dev_idx); } #endif + +#ifdef PADDLE_WITH_XPU + if (platform::is_xpu_place(place_)) { + int dev_idx = place_.device; + compute_stream_ = + ((platform::XPUDeviceContext *)(platform::DeviceContextPool::Instance() + .Get(place_))) + ->stream(); + events_.resize(buffer_size); + for (auto &event : events_) { + event = platform::XpuEventResourcePool::Instance().New(dev_idx); + } + stream_ = platform::XpuStreamResourcePool::Instance().New(dev_idx); + } +#endif + cpu_buffer_.resize(buffer_size); cuda_buffer_.resize(buffer_size); npu_buffer_.resize(buffer_size); mlu_buffer_.resize(buffer_size); + xpu_buffer_.resize(buffer_size); ReadTillBufferFullAsync(); } @@ -322,6 +340,57 @@ void BufferedReader::ReadAsync(size_t i) { platform::MLUStreamSync(stream_.get()); } #endif + +#ifdef PADDLE_WITH_XPU + if (platform::is_xpu_place(place_)) { + TensorVec &xpu = xpu_buffer_[i]; + if (xpu.empty()) { + xpu.resize(cpu.size()); + } else { + PADDLE_ENFORCE_EQ( + xpu.size(), cpu.size(), + platform::errors::InvalidArgument( + "Input tensor number on XPU and CPU devices are not matched. " + "The number on XPU is %d, on CPU is %d", + xpu.size(), cpu.size())); + } + + std::vector xpu_ptrs; + xpu_ptrs.reserve(cpu.size()); + for (size_t i = 0; i < cpu.size(); ++i) { + xpu[i].Resize(cpu[i].dims()); + xpu[i].set_layout(cpu[i].layout()); + xpu_ptrs.emplace_back(xpu[i].mutable_data(place_, cpu[i].type())); + } + + platform::XPUDeviceGuard gurad(place_.device); + int r = xpu_event_record(events_[i].get(), compute_stream_); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_event_record"); + r = xpu_stream_wait_event(stream_.get(), events_[i].get()); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_stream_wait_event"); + + platform::RecordEvent record_event("BufferedReader:MemoryCopy", + platform::TracerEventType::UserDefined, + 1); + for (size_t i = 0; i < cpu.size(); ++i) { + auto cpu_place = cpu[i].place(); + auto cpu_ptr = cpu[i].data(); + auto xpu_ptr = xpu_ptrs[i]; + auto size = + cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype()); + // TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe + // KL3 + if ((platform::is_xpu_place(cpu_place))) { + memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size); + platform::XPUStreamSync(stream_.get()); + } else { + memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size); + } + xpu[i].set_lod(cpu[i].lod()); + } + platform::XPUStreamSync(stream_.get()); + } +#endif return i; })); } @@ -359,6 +428,8 @@ void BufferedReader::ReadNextImpl(std::vector *out) { *out = std::move(npu_buffer_[i]); } else if (platform::is_mlu_place(place_)) { *out = std::move(mlu_buffer_[i]); + } else if (platform::is_xpu_place(place_)) { + *out = std::move(xpu_buffer_[i]); } else { *out = std::move(cpu_buffer_[i]); } diff --git a/paddle/fluid/operators/reader/buffered_reader.h b/paddle/fluid/operators/reader/buffered_reader.h index f0f3b6b7f9..52d3d8d699 100644 --- a/paddle/fluid/operators/reader/buffered_reader.h +++ b/paddle/fluid/operators/reader/buffered_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,6 +33,10 @@ #include "paddle/fluid/platform/device/mlu/mlu_info.h" #include "paddle/fluid/platform/device/mlu/mlu_resource_pool.h" #endif +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/device/xpu/xpu_info.h" +#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h" +#endif namespace paddle { namespace operators { @@ -76,6 +80,7 @@ class BufferedReader : public framework::DecoratedReader { std::vector cuda_buffer_; std::vector npu_buffer_; std::vector mlu_buffer_; + std::vector xpu_buffer_; size_t prev_pos_{-1UL}; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) gpuStream_t compute_stream_; @@ -94,6 +99,12 @@ class BufferedReader : public framework::DecoratedReader { std::shared_ptr stream_; std::vector> events_; #endif + +#ifdef PADDLE_WITH_XPU + xpuStream compute_stream_; + std::shared_ptr stream_; + std::vector> events_; +#endif }; } // namespace reader diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index f29546c521..356b5ab2cd 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -125,7 +125,7 @@ cc_library(device_context SRCS device_context.cc DEPS simple_threadpool malloc x place phi_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS} ${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} eigen3 cpu_context generator) if(WITH_XPU) - target_link_libraries(device_context xpu_context) + target_link_libraries(device_context xpu_context xpu_resource_pool) endif() cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce) diff --git a/paddle/fluid/platform/device/xpu/CMakeLists.txt b/paddle/fluid/platform/device/xpu/CMakeLists.txt index b6a26f2554..3399fff087 100644 --- a/paddle/fluid/platform/device/xpu/CMakeLists.txt +++ b/paddle/fluid/platform/device/xpu/CMakeLists.txt @@ -7,5 +7,6 @@ set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl) cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place phi_xpu_info) cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context op_kernel_type) +cc_library(xpu_resource_pool SRCS xpu_resource_pool.cc DEPS xpu_info) add_subdirectory(tests) diff --git a/paddle/fluid/platform/device/xpu/xpu_info.cc b/paddle/fluid/platform/device/xpu/xpu_info.cc index 2e960c1c0d..cdd7ee7f80 100644 --- a/paddle/fluid/platform/device/xpu/xpu_info.cc +++ b/paddle/fluid/platform/device/xpu/xpu_info.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -79,6 +79,10 @@ void MemcpySyncD2D(void* dst, const platform::XPUPlace& dst_place, *dev_ctx); } +void XPUStreamSync(xpuStream stream) { + PADDLE_ENFORCE_XDNN_SUCCESS(xpu_wait(stream), "xpu_wait"); +} + /**************************** Others **************************/ phi::backends::xpu::XPUVersion get_xpu_version(int dev_id) { diff --git a/paddle/fluid/platform/device/xpu/xpu_info.h b/paddle/fluid/platform/device/xpu/xpu_info.h index 33385f8e45..38b4defadc 100644 --- a/paddle/fluid/platform/device/xpu/xpu_info.h +++ b/paddle/fluid/platform/device/xpu/xpu_info.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -14,8 +14,13 @@ limitations under the License. */ #include #include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/xpu/xpu_info.h" +#include "xpu/runtime.h" namespace paddle { + +using xpuStream = XPUStream; +using xpuEventHandle = XPUEvent; + namespace platform { /***** Version Management *****/ @@ -51,6 +56,9 @@ void MemcpySyncD2D(void *dst, const platform::XPUPlace &dst_place, const void *src, const platform::XPUPlace &src_place, size_t count); +//! Blocks until stream has completed all operations. +void XPUStreamSync(xpuStream stream); + using XPUDeviceGuard = phi::backends::xpu::XPUDeviceGuard; phi::backends::xpu::XPUVersion get_xpu_version(int dev_id); diff --git a/paddle/fluid/platform/device/xpu/xpu_resource_pool.cc b/paddle/fluid/platform/device/xpu/xpu_resource_pool.cc new file mode 100644 index 0000000000..af0d47c716 --- /dev/null +++ b/paddle/fluid/platform/device/xpu/xpu_resource_pool.cc @@ -0,0 +1,98 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if defined(PADDLE_WITH_XPU) +#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h" + +namespace paddle { +namespace platform { + +XpuStreamResourcePool::XpuStreamResourcePool() { + int dev_cnt = platform::GetXPUDeviceCount(); + pool_.reserve(dev_cnt); + for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) { + auto creator = [dev_idx] { + platform::XPUDeviceGuard gurad(dev_idx); + xpuStream stream; + xpu_stream_create(&stream); + return stream; + }; + + auto deleter = [dev_idx](xpuStream stream) { + platform::XPUDeviceGuard gurad(dev_idx); + xpu_stream_destroy(stream); + }; + + pool_.emplace_back(ResourcePool::Create(creator, deleter)); + } +} + +XpuStreamResourcePool& XpuStreamResourcePool::Instance() { + static XpuStreamResourcePool pool; + return pool; +} + +std::shared_ptr XpuStreamResourcePool::New(int dev_idx) { + PADDLE_ENFORCE_GE( + dev_idx, 0, + platform::errors::InvalidArgument( + "The dev_idx should be not less than 0, but got %d.", dev_idx)); + PADDLE_ENFORCE_LT( + dev_idx, pool_.size(), + platform::errors::OutOfRange( + "The dev_idx should be less than device count %d, but got %d.", + pool_.size(), dev_idx)); + return pool_[dev_idx]->New(); +} + +XpuEventResourcePool::XpuEventResourcePool() { + int dev_cnt = platform::GetXPUDeviceCount(); + pool_.reserve(dev_cnt); + for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) { + auto creator = [dev_idx] { + platform::XPUDeviceGuard gurad(dev_idx); + xpuEventHandle event; + xpu_event_create(&event); + return event; + }; + + auto deleter = [dev_idx](xpuEventHandle event) { + platform::XPUDeviceGuard gurad(dev_idx); + xpu_event_destroy(event); + }; + + pool_.emplace_back(ResourcePool::Create(creator, deleter)); + } +} + +XpuEventResourcePool& XpuEventResourcePool::Instance() { + static XpuEventResourcePool pool; + return pool; +} + +std::shared_ptr XpuEventResourcePool::New(int dev_idx) { + PADDLE_ENFORCE_GE( + dev_idx, 0, + platform::errors::InvalidArgument( + "The dev_idx should be not less than 0, but got %d.", dev_idx)); + PADDLE_ENFORCE_LT( + dev_idx, pool_.size(), + platform::errors::OutOfRange( + "The dev_idx should be less than device count %d, but got %d.", + pool_.size(), dev_idx)); + return pool_[dev_idx]->New(); +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/device/xpu/xpu_resource_pool.h b/paddle/fluid/platform/device/xpu/xpu_resource_pool.h new file mode 100644 index 0000000000..5c6ade8f6f --- /dev/null +++ b/paddle/fluid/platform/device/xpu/xpu_resource_pool.h @@ -0,0 +1,64 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(PADDLE_WITH_XPU) +#include +#include +#include + +#include "paddle/fluid/platform/device/xpu/xpu_info.h" +#include "paddle/fluid/platform/resource_pool.h" + +namespace paddle { +namespace platform { + +using XpuStreamObject = std::remove_pointer::type; +using XpuEventObject = std::remove_pointer::type; + +class XpuStreamResourcePool { + public: + std::shared_ptr New(int dev_idx); + + static XpuStreamResourcePool &Instance(); + + private: + XpuStreamResourcePool(); + + DISABLE_COPY_AND_ASSIGN(XpuStreamResourcePool); + + private: + std::vector>> pool_; +}; + +class XpuEventResourcePool { + public: + std::shared_ptr New(int dev_idx); + + static XpuEventResourcePool &Instance(); + + private: + XpuEventResourcePool(); + + DISABLE_COPY_AND_ASSIGN(XpuEventResourcePool); + + private: + std::vector>> pool_; +}; + +} // namespace platform +} // namespace paddle + +#endif diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 2c5f24d28c..2b53ecf86a 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Copyright (c) 2022 NVIDIA Corporation. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -188,6 +188,7 @@ class XPUDeviceContext : public phi::XPUContext { explicit XPUDeviceContext(XPUPlace place); virtual ~XPUDeviceContext(); Eigen::DefaultDevice* eigen_device() const { return nullptr; } + xpuStream stream() const { return XPUContext::x_context()->xpu_stream; } }; template <> -- GitLab