From b5a8a0d96b594ae16ae95b645aa38e3bbc78ec76 Mon Sep 17 00:00:00 2001 From: fwenguang <95677191+fwenguang@users.noreply.github.com> Date: Wed, 9 Mar 2022 11:22:21 +0800 Subject: [PATCH] [MLU] add mlu buffer reader (#40131) --- .../fluid/operators/reader/buffered_reader.cc | 68 +++++++++++++++++++ .../fluid/operators/reader/buffered_reader.h | 12 ++++ .../fluid/platform/stream_callback_manager.cc | 8 +-- 3 files changed, 84 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index 21c23a7f602..4b6759ea165 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -70,9 +70,25 @@ BufferedReader::BufferedReader( stream_ = platform::NpuStreamResourcePool::Instance().New(dev_idx); } #endif + +#ifdef PADDLE_WITH_MLU + if (platform::is_mlu_place(place_)) { + int dev_idx = place_.device; + compute_stream_ = + ((platform::MLUDeviceContext *)(platform::DeviceContextPool::Instance() + .Get(place_))) + ->stream(); + events_.resize(buffer_size); + for (auto &event : events_) { + event = platform::MluEventResourcePool::Instance().New(dev_idx); + } + stream_ = platform::MluStreamResourcePool::Instance().New(dev_idx); + } +#endif cpu_buffer_.resize(buffer_size); cuda_buffer_.resize(buffer_size); npu_buffer_.resize(buffer_size); + mlu_buffer_.resize(buffer_size); ReadTillBufferFullAsync(); } @@ -256,6 +272,56 @@ void BufferedReader::ReadAsync(size_t i) { platform::NPUStreamSync(stream_.get()); } #endif + +#ifdef PADDLE_WITH_MLU + if (platform::is_mlu_place(place_)) { + TensorVec &mlu = mlu_buffer_[i]; + if (mlu.empty()) { + mlu.resize(cpu.size()); + } else { + PADDLE_ENFORCE_EQ( + mlu.size(), cpu.size(), + platform::errors::InvalidArgument( + "Input tensor number on MLU and CPU devices are not matched. " + "The number on MLU is %d, on CPU is %d", + mlu.size(), cpu.size())); + } + + std::vector mlu_ptrs; + mlu_ptrs.reserve(cpu.size()); + for (size_t i = 0; i < cpu.size(); ++i) { + mlu[i].Resize(cpu[i].dims()); + mlu[i].set_layout(cpu[i].layout()); + mlu_ptrs.emplace_back(mlu[i].mutable_data(place_, cpu[i].type())); + } + + platform::SetMLUDeviceId(place_.device); + PADDLE_ENFORCE_MLU_SUCCESS( + cnPlaceNotifier(events_[i].get(), compute_stream_)); + PADDLE_ENFORCE_MLU_SUCCESS(cnWaitNotifier(events_[i].get())); + + platform::RecordEvent record_event("BufferedReader:MemoryCopy", + platform::TracerEventType::UserDefined, + 1); + for (size_t i = 0; i < cpu.size(); ++i) { + auto cpu_place = cpu[i].place(); + auto cpu_ptr = cpu[i].data(); + auto mlu_ptr = mlu_ptrs[i]; + auto size = + cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype()); + if ((platform::is_mlu_place(cpu_place))) { + memory::Copy(place_, mlu_ptr, cpu_place, cpu_ptr, size, + stream_.get()); + } else { + memory::Copy(place_, mlu_ptr, cpu_place, cpu_ptr, size, + stream_.get()); + platform::MLUStreamSync(stream_.get()); + } + mlu[i].set_lod(cpu[i].lod()); + } + platform::MLUStreamSync(stream_.get()); + } +#endif return i; })); } @@ -291,6 +357,8 @@ void BufferedReader::ReadNextImpl(std::vector *out) { *out = std::move(cuda_buffer_[i]); } else if (platform::is_npu_place(place_)) { *out = std::move(npu_buffer_[i]); + } else if (platform::is_mlu_place(place_)) { + *out = std::move(mlu_buffer_[i]); } else { *out = std::move(cpu_buffer_[i]); } diff --git a/paddle/fluid/operators/reader/buffered_reader.h b/paddle/fluid/operators/reader/buffered_reader.h index 3d42486c6df..f0f3b6b7f9f 100644 --- a/paddle/fluid/operators/reader/buffered_reader.h +++ b/paddle/fluid/operators/reader/buffered_reader.h @@ -29,6 +29,11 @@ #include "paddle/fluid/platform/device/npu/npu_info.h" #include "paddle/fluid/platform/device/npu/npu_resource_pool.h" #endif +#ifdef PADDLE_WITH_MLU +#include "paddle/fluid/platform/device/mlu/mlu_info.h" +#include "paddle/fluid/platform/device/mlu/mlu_resource_pool.h" +#endif + namespace paddle { namespace operators { namespace reader { @@ -70,6 +75,7 @@ class BufferedReader : public framework::DecoratedReader { std::vector cpu_buffer_; std::vector cuda_buffer_; std::vector npu_buffer_; + std::vector mlu_buffer_; size_t prev_pos_{-1UL}; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) gpuStream_t compute_stream_; @@ -82,6 +88,12 @@ class BufferedReader : public framework::DecoratedReader { std::shared_ptr stream_; std::vector> events_; #endif + +#ifdef PADDLE_WITH_MLU + mluStream compute_stream_; + std::shared_ptr stream_; + std::vector> events_; +#endif }; } // namespace reader diff --git a/paddle/fluid/platform/stream_callback_manager.cc b/paddle/fluid/platform/stream_callback_manager.cc index 7fce0296d43..7148afee273 100644 --- a/paddle/fluid/platform/stream_callback_manager.cc +++ b/paddle/fluid/platform/stream_callback_manager.cc @@ -80,10 +80,10 @@ void StreamCallbackManager::AddCallback( #endif #if PADDLE_WITH_MLU - VLOG(3) << "MLULaunchCallback at stream: " << stream_; - LOG(ERROR) << "failed to call MLULaunchCallback, " - << "because mlu not support StreamAddCallback yet. " - << "function: " << func; + VLOG(3) << "MLULaunchCallback at stream: " << stream_ + << " Failed to call MLULaunchCallback, " + << "because mlu not support StreamAddCallback yet. " + << "function: " << func; #endif } -- GitLab