未验证 提交 cc343a41 编写于 作者: z8hanghuan's avatar z8hanghuan 提交者: GitHub

add xpu buffer_reader, *test=kunlun (#42578)

* add xpu buffer_reader, *test=kunlun

* xpu buffer_reader, use XPUDeviceGuard, *test=kunlun

* modify xpu.cmake, *test=kunlun

* modify xpu.cmake, *test=kunlun

* modify xpu.cmake, *test=kunlun

* add xpu buffer_reader, *test=kunlun

* add xpu buffer reader, *test=kunlun

* add xpu buffer reader, *test=kunlun
上级 e906eb5b
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -14,6 +14,7 @@
#include "paddle/fluid/operators/reader/buffered_reader.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
......@@ -85,10 +86,27 @@ BufferedReader::BufferedReader(
stream_ = platform::MluStreamResourcePool::Instance().New(dev_idx);
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(place_)) {
int dev_idx = place_.device;
compute_stream_ =
((platform::XPUDeviceContext *)(platform::DeviceContextPool::Instance()
.Get(place_)))
->stream();
events_.resize(buffer_size);
for (auto &event : events_) {
event = platform::XpuEventResourcePool::Instance().New(dev_idx);
}
stream_ = platform::XpuStreamResourcePool::Instance().New(dev_idx);
}
#endif
cpu_buffer_.resize(buffer_size);
cuda_buffer_.resize(buffer_size);
npu_buffer_.resize(buffer_size);
mlu_buffer_.resize(buffer_size);
xpu_buffer_.resize(buffer_size);
ReadTillBufferFullAsync();
}
......@@ -322,6 +340,57 @@ void BufferedReader::ReadAsync(size_t i) {
platform::MLUStreamSync(stream_.get());
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(place_)) {
TensorVec &xpu = xpu_buffer_[i];
if (xpu.empty()) {
xpu.resize(cpu.size());
} else {
PADDLE_ENFORCE_EQ(
xpu.size(), cpu.size(),
platform::errors::InvalidArgument(
"Input tensor number on XPU and CPU devices are not matched. "
"The number on XPU is %d, on CPU is %d",
xpu.size(), cpu.size()));
}
std::vector<void *> xpu_ptrs;
xpu_ptrs.reserve(cpu.size());
for (size_t i = 0; i < cpu.size(); ++i) {
xpu[i].Resize(cpu[i].dims());
xpu[i].set_layout(cpu[i].layout());
xpu_ptrs.emplace_back(xpu[i].mutable_data(place_, cpu[i].type()));
}
platform::XPUDeviceGuard gurad(place_.device);
int r = xpu_event_record(events_[i].get(), compute_stream_);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_event_record");
r = xpu_stream_wait_event(stream_.get(), events_[i].get());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_stream_wait_event");
platform::RecordEvent record_event("BufferedReader:MemoryCopy",
platform::TracerEventType::UserDefined,
1);
for (size_t i = 0; i < cpu.size(); ++i) {
auto cpu_place = cpu[i].place();
auto cpu_ptr = cpu[i].data();
auto xpu_ptr = xpu_ptrs[i];
auto size =
cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
// TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
// KL3
if ((platform::is_xpu_place(cpu_place))) {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
platform::XPUStreamSync(stream_.get());
} else {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
}
xpu[i].set_lod(cpu[i].lod());
}
platform::XPUStreamSync(stream_.get());
}
#endif
return i;
}));
}
......@@ -359,6 +428,8 @@ void BufferedReader::ReadNextImpl(std::vector<framework::LoDTensor> *out) {
*out = std::move(npu_buffer_[i]);
} else if (platform::is_mlu_place(place_)) {
*out = std::move(mlu_buffer_[i]);
} else if (platform::is_xpu_place(place_)) {
*out = std::move(xpu_buffer_[i]);
} else {
*out = std::move(cpu_buffer_[i]);
}
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -33,6 +33,10 @@
#include "paddle/fluid/platform/device/mlu/mlu_info.h"
#include "paddle/fluid/platform/device/mlu/mlu_resource_pool.h"
#endif
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h"
#endif
namespace paddle {
namespace operators {
......@@ -76,6 +80,7 @@ class BufferedReader : public framework::DecoratedReader {
std::vector<TensorVec> cuda_buffer_;
std::vector<TensorVec> npu_buffer_;
std::vector<TensorVec> mlu_buffer_;
std::vector<TensorVec> xpu_buffer_;
size_t prev_pos_{-1UL};
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gpuStream_t compute_stream_;
......@@ -94,6 +99,12 @@ class BufferedReader : public framework::DecoratedReader {
std::shared_ptr<platform::MluStreamObject> stream_;
std::vector<std::shared_ptr<platform::MluEventObject>> events_;
#endif
#ifdef PADDLE_WITH_XPU
xpuStream compute_stream_;
std::shared_ptr<platform::XpuStreamObject> stream_;
std::vector<std::shared_ptr<platform::XpuEventObject>> events_;
#endif
};
} // namespace reader
......
......@@ -125,7 +125,7 @@ cc_library(device_context SRCS device_context.cc DEPS simple_threadpool malloc x
place phi_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} eigen3 cpu_context generator)
if(WITH_XPU)
target_link_libraries(device_context xpu_context)
target_link_libraries(device_context xpu_context xpu_resource_pool)
endif()
cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
......
......@@ -7,5 +7,6 @@ set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl)
cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place phi_xpu_info)
cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context op_kernel_type)
cc_library(xpu_resource_pool SRCS xpu_resource_pool.cc DEPS xpu_info)
add_subdirectory(tests)
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
......@@ -79,6 +79,10 @@ void MemcpySyncD2D(void* dst, const platform::XPUPlace& dst_place,
*dev_ctx);
}
void XPUStreamSync(xpuStream stream) {
PADDLE_ENFORCE_XDNN_SUCCESS(xpu_wait(stream), "xpu_wait");
}
/**************************** Others **************************/
phi::backends::xpu::XPUVersion get_xpu_version(int dev_id) {
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
......@@ -14,8 +14,13 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/backends/xpu/xpu_info.h"
#include "xpu/runtime.h"
namespace paddle {
using xpuStream = XPUStream;
using xpuEventHandle = XPUEvent;
namespace platform {
/***** Version Management *****/
......@@ -51,6 +56,9 @@ void MemcpySyncD2D(void *dst, const platform::XPUPlace &dst_place,
const void *src, const platform::XPUPlace &src_place,
size_t count);
//! Blocks until stream has completed all operations.
void XPUStreamSync(xpuStream stream);
using XPUDeviceGuard = phi::backends::xpu::XPUDeviceGuard;
phi::backends::xpu::XPUVersion get_xpu_version(int dev_id);
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#if defined(PADDLE_WITH_XPU)
#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h"
namespace paddle {
namespace platform {
XpuStreamResourcePool::XpuStreamResourcePool() {
int dev_cnt = platform::GetXPUDeviceCount();
pool_.reserve(dev_cnt);
for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) {
auto creator = [dev_idx] {
platform::XPUDeviceGuard gurad(dev_idx);
xpuStream stream;
xpu_stream_create(&stream);
return stream;
};
auto deleter = [dev_idx](xpuStream stream) {
platform::XPUDeviceGuard gurad(dev_idx);
xpu_stream_destroy(stream);
};
pool_.emplace_back(ResourcePool<XpuStreamObject>::Create(creator, deleter));
}
}
XpuStreamResourcePool& XpuStreamResourcePool::Instance() {
static XpuStreamResourcePool pool;
return pool;
}
std::shared_ptr<XpuStreamObject> XpuStreamResourcePool::New(int dev_idx) {
PADDLE_ENFORCE_GE(
dev_idx, 0,
platform::errors::InvalidArgument(
"The dev_idx should be not less than 0, but got %d.", dev_idx));
PADDLE_ENFORCE_LT(
dev_idx, pool_.size(),
platform::errors::OutOfRange(
"The dev_idx should be less than device count %d, but got %d.",
pool_.size(), dev_idx));
return pool_[dev_idx]->New();
}
XpuEventResourcePool::XpuEventResourcePool() {
int dev_cnt = platform::GetXPUDeviceCount();
pool_.reserve(dev_cnt);
for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) {
auto creator = [dev_idx] {
platform::XPUDeviceGuard gurad(dev_idx);
xpuEventHandle event;
xpu_event_create(&event);
return event;
};
auto deleter = [dev_idx](xpuEventHandle event) {
platform::XPUDeviceGuard gurad(dev_idx);
xpu_event_destroy(event);
};
pool_.emplace_back(ResourcePool<XpuEventObject>::Create(creator, deleter));
}
}
XpuEventResourcePool& XpuEventResourcePool::Instance() {
static XpuEventResourcePool pool;
return pool;
}
std::shared_ptr<XpuEventObject> XpuEventResourcePool::New(int dev_idx) {
PADDLE_ENFORCE_GE(
dev_idx, 0,
platform::errors::InvalidArgument(
"The dev_idx should be not less than 0, but got %d.", dev_idx));
PADDLE_ENFORCE_LT(
dev_idx, pool_.size(),
platform::errors::OutOfRange(
"The dev_idx should be less than device count %d, but got %d.",
pool_.size(), dev_idx));
return pool_[dev_idx]->New();
}
} // namespace platform
} // namespace paddle
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(PADDLE_WITH_XPU)
#include <memory>
#include <type_traits>
#include <vector>
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/fluid/platform/resource_pool.h"
namespace paddle {
namespace platform {
using XpuStreamObject = std::remove_pointer<xpuStream>::type;
using XpuEventObject = std::remove_pointer<xpuEventHandle>::type;
class XpuStreamResourcePool {
public:
std::shared_ptr<XpuStreamObject> New(int dev_idx);
static XpuStreamResourcePool &Instance();
private:
XpuStreamResourcePool();
DISABLE_COPY_AND_ASSIGN(XpuStreamResourcePool);
private:
std::vector<std::shared_ptr<ResourcePool<XpuStreamObject>>> pool_;
};
class XpuEventResourcePool {
public:
std::shared_ptr<XpuEventObject> New(int dev_idx);
static XpuEventResourcePool &Instance();
private:
XpuEventResourcePool();
DISABLE_COPY_AND_ASSIGN(XpuEventResourcePool);
private:
std::vector<std::shared_ptr<ResourcePool<XpuEventObject>>> pool_;
};
} // namespace platform
} // namespace paddle
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
......@@ -188,6 +188,7 @@ class XPUDeviceContext : public phi::XPUContext {
explicit XPUDeviceContext(XPUPlace place);
virtual ~XPUDeviceContext();
Eigen::DefaultDevice* eigen_device() const { return nullptr; }
xpuStream stream() const { return XPUContext::x_context()->xpu_stream; }
};
template <>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册