From d1a4c53eeec15ee739cf9fdc1afdde5aba79c480 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Wed, 24 Mar 2021 16:25:41 +0800 Subject: [PATCH] [NPU] support default stream (#31510) --- paddle/fluid/operators/npu_op_runner.cc | 19 ++++++++++++++++--- paddle/fluid/operators/npu_op_runner.h | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/npu_op_runner.cc b/paddle/fluid/operators/npu_op_runner.cc index dc503a0a96..ce2359db40 100644 --- a/paddle/fluid/operators/npu_op_runner.cc +++ b/paddle/fluid/operators/npu_op_runner.cc @@ -64,6 +64,13 @@ aclFormat ConvertToNpuFormat(DataLayout layout) { return iter->second; } +aclrtStream GetCurrentNPUStream() { + int device_id = GetCurrentNPUDeviceId(); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(platform::NPUPlace(device_id)); + return dev_ctx->stream(); +} + NpuOpRunner::NpuOpRunner(std::string op_type) : op_type_(op_type) { attr_ = aclopCreateAttr(); } @@ -249,7 +256,7 @@ aclTensorDesc *NpuOpRunner::CreateTensorDesc(Tensor tensor) { auto format = ConvertToNpuFormat(tensor.layout()); auto dims = framework::vectorize(tensor.dims()); - VLOG(4) << "dtype:" << dtype << " " + VLOG(4) << "NPU dtype:" << dtype << " " << "rank:" << dims.size() << " dims:" << tensor.dims() << " format:" << format; @@ -264,7 +271,7 @@ aclTensorDesc *NpuOpRunner::CreateTensorDesc(Tensor tensor) { aclDataBuffer *NpuOpRunner::CreateDataBuffer(Tensor tensor) { void *ptr = tensor.data(); - VLOG(4) << "ptr: " << ptr << ", size: " << tensor.memory_size(); + VLOG(4) << "NPU ptr: " << ptr << ", size: " << tensor.memory_size(); auto *buffer = aclCreateDataBuffer(ptr, tensor.memory_size()); PADDLE_ENFORCE_NOT_NULL( buffer, platform::errors::External("Call aclCreateDataBuffer failed.")); @@ -272,11 +279,17 @@ aclDataBuffer *NpuOpRunner::CreateDataBuffer(Tensor tensor) { } void NpuOpRunner::Run(aclrtStream stream) { + if (!stream) { + VLOG(4) << "Run with default current npu stream: " << stream; + stream = GetCurrentNPUStream(); + } + VLOG(4) << "op_type: " << op_type_; VLOG(4) << "input_desc.size: " << input_descs_.size(); VLOG(4) << "output_desc.size: " << output_descs_.size(); - VLOG(4) << "stream: " << stream; VLOG(4) << "attr: " << attr_; + VLOG(4) << "stream: " << stream; + aclError ret = aclopCompileAndExecute( op_type_.c_str(), input_descs_.size(), input_descs_.data(), input_buffers_.data(), output_descs_.size(), output_descs_.data(), diff --git a/paddle/fluid/operators/npu_op_runner.h b/paddle/fluid/operators/npu_op_runner.h index 2ead0d5b54..81d5826951 100644 --- a/paddle/fluid/operators/npu_op_runner.h +++ b/paddle/fluid/operators/npu_op_runner.h @@ -69,7 +69,7 @@ class NpuOpRunner { std::vector &GetOutputBuffers(); - void Run(aclrtStream stream); + void Run(aclrtStream stream == nullptrr); private: aclTensorDesc *CreateTensorDesc(Tensor tensor); -- GitLab