未验证 提交 cc343a41 编写于 作者: z8hanghuan's avatar z8hanghuan 提交者: GitHub

add xpu buffer_reader, *test=kunlun (#42578)

* add xpu buffer_reader, *test=kunlun

* xpu buffer_reader, use XPUDeviceGuard, *test=kunlun

* modify xpu.cmake, *test=kunlun

* modify xpu.cmake, *test=kunlun

* modify xpu.cmake, *test=kunlun

* add xpu buffer_reader, *test=kunlun

* add xpu buffer reader, *test=kunlun

* add xpu buffer reader, *test=kunlun
上级 e906eb5b
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "paddle/fluid/operators/reader/buffered_reader.h" #include "paddle/fluid/operators/reader/buffered_reader.h"
#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
...@@ -85,10 +86,27 @@ BufferedReader::BufferedReader( ...@@ -85,10 +86,27 @@ BufferedReader::BufferedReader(
stream_ = platform::MluStreamResourcePool::Instance().New(dev_idx); stream_ = platform::MluStreamResourcePool::Instance().New(dev_idx);
} }
#endif #endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(place_)) {
int dev_idx = place_.device;
compute_stream_ =
((platform::XPUDeviceContext *)(platform::DeviceContextPool::Instance()
.Get(place_)))
->stream();
events_.resize(buffer_size);
for (auto &event : events_) {
event = platform::XpuEventResourcePool::Instance().New(dev_idx);
}
stream_ = platform::XpuStreamResourcePool::Instance().New(dev_idx);
}
#endif
cpu_buffer_.resize(buffer_size); cpu_buffer_.resize(buffer_size);
cuda_buffer_.resize(buffer_size); cuda_buffer_.resize(buffer_size);
npu_buffer_.resize(buffer_size); npu_buffer_.resize(buffer_size);
mlu_buffer_.resize(buffer_size); mlu_buffer_.resize(buffer_size);
xpu_buffer_.resize(buffer_size);
ReadTillBufferFullAsync(); ReadTillBufferFullAsync();
} }
...@@ -322,6 +340,57 @@ void BufferedReader::ReadAsync(size_t i) { ...@@ -322,6 +340,57 @@ void BufferedReader::ReadAsync(size_t i) {
platform::MLUStreamSync(stream_.get()); platform::MLUStreamSync(stream_.get());
} }
#endif #endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(place_)) {
TensorVec &xpu = xpu_buffer_[i];
if (xpu.empty()) {
xpu.resize(cpu.size());
} else {
PADDLE_ENFORCE_EQ(
xpu.size(), cpu.size(),
platform::errors::InvalidArgument(
"Input tensor number on XPU and CPU devices are not matched. "
"The number on XPU is %d, on CPU is %d",
xpu.size(), cpu.size()));
}
std::vector<void *> xpu_ptrs;
xpu_ptrs.reserve(cpu.size());
for (size_t i = 0; i < cpu.size(); ++i) {
xpu[i].Resize(cpu[i].dims());
xpu[i].set_layout(cpu[i].layout());
xpu_ptrs.emplace_back(xpu[i].mutable_data(place_, cpu[i].type()));
}
platform::XPUDeviceGuard gurad(place_.device);
int r = xpu_event_record(events_[i].get(), compute_stream_);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_event_record");
r = xpu_stream_wait_event(stream_.get(), events_[i].get());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_stream_wait_event");
platform::RecordEvent record_event("BufferedReader:MemoryCopy",
platform::TracerEventType::UserDefined,
1);
for (size_t i = 0; i < cpu.size(); ++i) {
auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data();
auto xpu_ptr = xpu_ptrs[i];
auto size =
cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
// TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
// KL3
if ((platform::is_xpu_place(cpu_place))) {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
platform::XPUStreamSync(stream_.get());
} else {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
}
xpu[i].set_lod(cpu[i].lod());
}
platform::XPUStreamSync(stream_.get());
}
#endif
return i; return i;
})); }));
} }
...@@ -359,6 +428,8 @@ void BufferedReader::ReadNextImpl(std::vector<framework::LoDTensor> *out) { ...@@ -359,6 +428,8 @@ void BufferedReader::ReadNextImpl(std::vector<framework::LoDTensor> *out) {
*out = std::move(npu_buffer_[i]); *out = std::move(npu_buffer_[i]);
} else if (platform::is_mlu_place(place_)) { } else if (platform::is_mlu_place(place_)) {
*out = std::move(mlu_buffer_[i]); *out = std::move(mlu_buffer_[i]);
} else if (platform::is_xpu_place(place_)) {
*out = std::move(xpu_buffer_[i]);
} else { } else {
*out = std::move(cpu_buffer_[i]); *out = std::move(cpu_buffer_[i]);
} }
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -33,6 +33,10 @@ ...@@ -33,6 +33,10 @@
#include "paddle/fluid/platform/device/mlu/mlu_info.h" #include "paddle/fluid/platform/device/mlu/mlu_info.h"
#include "paddle/fluid/platform/device/mlu/mlu_resource_pool.h" #include "paddle/fluid/platform/device/mlu/mlu_resource_pool.h"
#endif #endif
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h"
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -76,6 +80,7 @@ class BufferedReader : public framework::DecoratedReader { ...@@ -76,6 +80,7 @@ class BufferedReader : public framework::DecoratedReader {
std::vector<TensorVec> cuda_buffer_; std::vector<TensorVec> cuda_buffer_;
std::vector<TensorVec> npu_buffer_; std::vector<TensorVec> npu_buffer_;
std::vector<TensorVec> mlu_buffer_; std::vector<TensorVec> mlu_buffer_;
std::vector<TensorVec> xpu_buffer_;
size_t prev_pos_{-1UL}; size_t prev_pos_{-1UL};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gpuStream_t compute_stream_; gpuStream_t compute_stream_;
...@@ -94,6 +99,12 @@ class BufferedReader : public framework::DecoratedReader { ...@@ -94,6 +99,12 @@ class BufferedReader : public framework::DecoratedReader {
std::shared_ptr<platform::MluStreamObject> stream_; std::shared_ptr<platform::MluStreamObject> stream_;
std::vector<std::shared_ptr<platform::MluEventObject>> events_; std::vector<std::shared_ptr<platform::MluEventObject>> events_;
#endif #endif
#ifdef PADDLE_WITH_XPU
xpuStream compute_stream_;
std::shared_ptr<platform::XpuStreamObject> stream_;
std::vector<std::shared_ptr<platform::XpuEventObject>> events_;
#endif
}; };
} // namespace reader } // namespace reader
......
...@@ -125,7 +125,7 @@ cc_library(device_context SRCS device_context.cc DEPS simple_threadpool malloc x ...@@ -125,7 +125,7 @@ cc_library(device_context SRCS device_context.cc DEPS simple_threadpool malloc x
place phi_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS} place phi_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} eigen3 cpu_context generator) ${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} eigen3 cpu_context generator)
if(WITH_XPU) if(WITH_XPU)
target_link_libraries(device_context xpu_context) target_link_libraries(device_context xpu_context xpu_resource_pool)
endif() endif()
cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce) cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
......
...@@ -7,5 +7,6 @@ set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl) ...@@ -7,5 +7,6 @@ set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl)
cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place phi_xpu_info) cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place phi_xpu_info)
cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context op_kernel_type) cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context op_kernel_type)
cc_library(xpu_resource_pool SRCS xpu_resource_pool.cc DEPS xpu_info)
add_subdirectory(tests) add_subdirectory(tests)
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
...@@ -79,6 +79,10 @@ void MemcpySyncD2D(void* dst, const platform::XPUPlace& dst_place, ...@@ -79,6 +79,10 @@ void MemcpySyncD2D(void* dst, const platform::XPUPlace& dst_place,
*dev_ctx); *dev_ctx);
} }
void XPUStreamSync(xpuStream stream) {
PADDLE_ENFORCE_XDNN_SUCCESS(xpu_wait(stream), "xpu_wait");
}
/**************************** Others **************************/ /**************************** Others **************************/
phi::backends::xpu::XPUVersion get_xpu_version(int dev_id) { phi::backends::xpu::XPUVersion get_xpu_version(int dev_id) {
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
...@@ -14,8 +14,13 @@ limitations under the License. */ ...@@ -14,8 +14,13 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/phi/backends/xpu/xpu_info.h" #include "paddle/phi/backends/xpu/xpu_info.h"
#include "xpu/runtime.h"
namespace paddle { namespace paddle {
using xpuStream = XPUStream;
using xpuEventHandle = XPUEvent;
namespace platform { namespace platform {
/***** Version Management *****/ /***** Version Management *****/
...@@ -51,6 +56,9 @@ void MemcpySyncD2D(void *dst, const platform::XPUPlace &dst_place, ...@@ -51,6 +56,9 @@ void MemcpySyncD2D(void *dst, const platform::XPUPlace &dst_place,
const void *src, const platform::XPUPlace &src_place, const void *src, const platform::XPUPlace &src_place,
size_t count); size_t count);
//! Blocks until stream has completed all operations.
void XPUStreamSync(xpuStream stream);
using XPUDeviceGuard = phi::backends::xpu::XPUDeviceGuard; using XPUDeviceGuard = phi::backends::xpu::XPUDeviceGuard;
phi::backends::xpu::XPUVersion get_xpu_version(int dev_id); phi::backends::xpu::XPUVersion get_xpu_version(int dev_id);
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(PADDLE_WITH_XPU)
#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h"
namespace paddle {
namespace platform {
XpuStreamResourcePool::XpuStreamResourcePool() {
int dev_cnt = platform::GetXPUDeviceCount();
pool_.reserve(dev_cnt);
for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) {
auto creator = [dev_idx] {
platform::XPUDeviceGuard gurad(dev_idx);
xpuStream stream;
xpu_stream_create(&stream);
return stream;
};
auto deleter = [dev_idx](xpuStream stream) {
platform::XPUDeviceGuard gurad(dev_idx);
xpu_stream_destroy(stream);
};
pool_.emplace_back(ResourcePool<XpuStreamObject>::Create(creator, deleter));
}
}
XpuStreamResourcePool& XpuStreamResourcePool::Instance() {
static XpuStreamResourcePool pool;
return pool;
}
std::shared_ptr<XpuStreamObject> XpuStreamResourcePool::New(int dev_idx) {
PADDLE_ENFORCE_GE(
dev_idx, 0,
platform::errors::InvalidArgument(
"The dev_idx should be not less than 0, but got %d.", dev_idx));
PADDLE_ENFORCE_LT(
dev_idx, pool_.size(),
platform::errors::OutOfRange(
"The dev_idx should be less than device count %d, but got %d.",
pool_.size(), dev_idx));
return pool_[dev_idx]->New();
}
XpuEventResourcePool::XpuEventResourcePool() {
int dev_cnt = platform::GetXPUDeviceCount();
pool_.reserve(dev_cnt);
for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) {
auto creator = [dev_idx] {
platform::XPUDeviceGuard gurad(dev_idx);
xpuEventHandle event;
xpu_event_create(&event);
return event;
};
auto deleter = [dev_idx](xpuEventHandle event) {
platform::XPUDeviceGuard gurad(dev_idx);
xpu_event_destroy(event);
};
pool_.emplace_back(ResourcePool<XpuEventObject>::Create(creator, deleter));
}
}
XpuEventResourcePool& XpuEventResourcePool::Instance() {
static XpuEventResourcePool pool;
return pool;
}
std::shared_ptr<XpuEventObject> XpuEventResourcePool::New(int dev_idx) {
PADDLE_ENFORCE_GE(
dev_idx, 0,
platform::errors::InvalidArgument(
"The dev_idx should be not less than 0, but got %d.", dev_idx));
PADDLE_ENFORCE_LT(
dev_idx, pool_.size(),
platform::errors::OutOfRange(
"The dev_idx should be less than device count %d, but got %d.",
pool_.size(), dev_idx));
return pool_[dev_idx]->New();
}
} // namespace platform
} // namespace paddle
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(PADDLE_WITH_XPU)
#include <memory>
#include <type_traits>
#include <vector>
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/fluid/platform/resource_pool.h"
namespace paddle {
namespace platform {
using XpuStreamObject = std::remove_pointer<xpuStream>::type;
using XpuEventObject = std::remove_pointer<xpuEventHandle>::type;
class XpuStreamResourcePool {
public:
std::shared_ptr<XpuStreamObject> New(int dev_idx);
static XpuStreamResourcePool &Instance();
private:
XpuStreamResourcePool();
DISABLE_COPY_AND_ASSIGN(XpuStreamResourcePool);
private:
std::vector<std::shared_ptr<ResourcePool<XpuStreamObject>>> pool_;
};
class XpuEventResourcePool {
public:
std::shared_ptr<XpuEventObject> New(int dev_idx);
static XpuEventResourcePool &Instance();
private:
XpuEventResourcePool();
DISABLE_COPY_AND_ASSIGN(XpuEventResourcePool);
private:
std::vector<std::shared_ptr<ResourcePool<XpuEventObject>>> pool_;
};
} // namespace platform
} // namespace paddle
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Copyright (c) 2022 NVIDIA Corporation. All rights reserved. Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
...@@ -188,6 +188,7 @@ class XPUDeviceContext : public phi::XPUContext { ...@@ -188,6 +188,7 @@ class XPUDeviceContext : public phi::XPUContext {
explicit XPUDeviceContext(XPUPlace place); explicit XPUDeviceContext(XPUPlace place);
virtual ~XPUDeviceContext(); virtual ~XPUDeviceContext();
Eigen::DefaultDevice* eigen_device() const { return nullptr; } Eigen::DefaultDevice* eigen_device() const { return nullptr; }
xpuStream stream() const { return XPUContext::x_context()->xpu_stream; }
}; };
template <> template <>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册