提交 3c0591dc 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!4653 support multi in/out for subgraph_opencl

Merge pull request !4653 from wandongdong/master
......@@ -15,6 +15,7 @@
*/
#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
#include <set>
#include "src/runtime/opencl/opencl_executor.h"
#include "src/runtime/opencl/opencl_runtime.h"
#include "src/runtime/kernel/opencl/utils.h"
......@@ -26,23 +27,41 @@ namespace mindspore::kernel {
SubGraphOpenCLKernel::~SubGraphOpenCLKernel() { UnInit(); }
int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *> &in_tensors,
const std::vector<kernel::LiteKernel *> in_kernels,
const std::vector<std::vector<kernel::LiteKernel *>> in_kernels,
std::vector<lite::tensor::Tensor *> *out_tensors,
std::vector<OpenCLToFormatParameter *> *out_parameters,
std::vector<LiteKernel *> *out_convert_ops, OpenCLMemType mem_type) {
out_tensors->clear();
out_parameters->clear();
out_convert_ops->clear();
MS_ASSERT(in_tensors.size() == to_kernels.size());
MS_ASSERT(in_tensors.size() == from_kernels.size());
for (auto &iv : in_kernels) {
for (auto &jv : iv) {
OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(jv);
schema::Format ori_format = cur_opencl_op->GetOriFormat();
auto tens = cur_opencl_op->out_tensors();
if (mem_type == OpenCLMemType::BUF && mem_type == cur_opencl_op->GetMemType() &&
tens[0]->GetFormat() == ori_format) {
continue;
}
if (mem_type == OpenCLMemType::IMG) {
jv->set_in_tensors({});
} else {
jv->set_out_tensors({});
}
}
}
for (size_t i = 0; i < in_tensors.size(); ++i) {
OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(in_kernels[i]);
OpenCLKernel *cur_opencl_op = reinterpret_cast<OpenCLKernel *>(in_kernels[i][0]);
schema::Format ori_format = cur_opencl_op->GetOriFormat();
if (mem_type == OpenCLMemType::BUF && mem_type == cur_opencl_op->GetMemType() &&
in_tensors[i]->GetFormat() == ori_format) {
continue;
}
auto dst_format = (mem_type == OpenCLMemType::IMG) ? in_kernels[i]->out_tensors()[0]->GetFormat() : ori_format;
auto dst_format = (mem_type == OpenCLMemType::IMG) ? in_kernels[i][0]->out_tensors()[0]->GetFormat() : ori_format;
auto src_format =
(mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i]->out_tensors()[0]->GetFormat();
(mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i][0]->out_tensors()[0]->GetFormat();
lite::tensor::Tensor *new_tensor = new (std::nothrow) lite::tensor::Tensor();
MS_ASSERT(new_tensor);
if (new_tensor == nullptr) {
......@@ -62,7 +81,7 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
std::vector<int> dst_shape{shape[0], shape[2], shape[3], shape[1]};
new_tensor->set_shape(shape);
}
new_tensor->SetFormat(dst_format);
new_tensor->SetFormat(in_kernels[i][0]->out_tensors()[0]->GetFormat());
out_tensors->emplace_back(new_tensor);
#ifdef ENABLE_FP16
KernelKey desc{kGPU, kNumberTypeFloat16, schema::PrimitiveType_ToFormat};
......@@ -94,13 +113,17 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
}
auto in_opencl_op = reinterpret_cast<OpenCLKernel *>(in_convert_op);
if (mem_type == OpenCLMemType::IMG) {
in_opencl_op->AddOutKernel(in_kernels[i]);
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->SetInKernel({in_convert_op});
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->set_in_tensors({new_tensor});
for (auto &iv : in_kernels[i]) {
in_opencl_op->AddOutKernel(iv);
reinterpret_cast<OpenCLKernel *>(iv)->SetInKernel({in_convert_op});
reinterpret_cast<OpenCLKernel *>(iv)->set_in_tensors({new_tensor});
}
} else {
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->SetOutKernel({in_convert_op});
reinterpret_cast<OpenCLKernel *>(in_kernels[i])->set_out_tensors({new_tensor});
in_convert_op->AddInKernel(in_kernels[i]);
for (auto &iv : in_kernels[i]) {
reinterpret_cast<OpenCLKernel *>(iv)->SetOutKernel({in_convert_op});
reinterpret_cast<OpenCLKernel *>(iv)->set_out_tensors({new_tensor});
in_convert_op->AddInKernel(iv);
}
}
out_convert_ops->emplace_back(in_convert_op);
}
......@@ -116,13 +139,19 @@ int SubGraphOpenCLKernel::Init() {
for (const auto tensor : out_tensors_) {
tensor->set_allocator(allocator_);
}
int ret = GenToFormatOp(in_tensors_, in_kernels_, &in_convert_tensors_, &in_parameters_, &in_convert_ops_,
std::vector<std::vector<kernel::LiteKernel *>> from_kernels_;
GetKernelFromToTensor(in_tensors_, in_kernels_, &from_kernels_, true);
int ret = GenToFormatOp(in_tensors_, from_kernels_, &in_convert_tensors_, &in_parameters_, &in_convert_ops_,
OpenCLMemType::IMG);
if (ret != RET_OK) {
return RET_ERROR;
}
nodes_.insert(nodes_.begin(), in_convert_ops_.begin(), in_convert_ops_.end());
ret = GenToFormatOp(out_tensors_, out_kernels_, &out_convert_tensors_, &out_parameters_, &out_convert_ops_,
std::vector<std::vector<kernel::LiteKernel *>> to_kernels_;
GetKernelFromToTensor(out_tensors_, out_kernels_, &to_kernels_, false);
ret = GenToFormatOp(out_tensors_, to_kernels_, &out_convert_tensors_, &out_parameters_, &out_convert_ops_,
OpenCLMemType::BUF);
if (ret != RET_OK) {
return RET_ERROR;
......@@ -144,6 +173,7 @@ int SubGraphOpenCLKernel::Init() {
}
return RET_OK;
}
int SubGraphOpenCLKernel::MallocTensorWithReuse() {
kernel::LiteKernelUtil::InitTensorRefCount(nodes_);
for (auto *kernel : nodes_) {
......@@ -195,10 +225,30 @@ int SubGraphOpenCLKernel::MallocTensorWithReuse() {
return RET_OK;
}
int SubGraphOpenCLKernel::UnInit() {
for (auto &tensor : out_tensors_) {
allocator_->UnmapBuffer(tensor->Data());
int SubGraphOpenCLKernel::GetKernelFromToTensor(const std::vector<lite::tensor::Tensor *> &in_tensors,
const std::vector<kernel::LiteKernel *> &in_kernels,
std::vector<std::vector<kernel::LiteKernel *>> *out_kernels,
bool is_from) {
std::vector<std::set<lite::tensor::Tensor *>> ksets;
for (auto jv : in_kernels) {
auto tens = is_from ? jv->in_tensors() : jv->out_tensors();
std::set<lite::tensor::Tensor *> kset;
kset.insert(tens.begin(), tens.end());
ksets.emplace_back(kset);
}
for (size_t i = 0; i < in_tensors.size(); ++i) {
std::vector<kernel::LiteKernel *> kvec;
for (size_t j = 0; j < in_kernels.size(); ++j) {
if (ksets[j].count(in_tensors[i])) {
kvec.emplace_back(in_kernels[j]);
}
}
out_kernels->emplace_back(kvec);
}
return RET_OK;
}
int SubGraphOpenCLKernel::UnInit() {
for (const auto tensor : in_tensors_) {
if (tensor != nullptr) {
tensor->FreeData();
......@@ -206,12 +256,10 @@ int SubGraphOpenCLKernel::UnInit() {
}
for (const auto tensor : out_tensors_) {
if (tensor != nullptr) {
allocator_->UnmapBuffer(tensor->Data());
tensor->FreeData();
}
}
for (auto &tensor : out_tensors_) {
allocator_->UnmapBuffer(tensor->Data());
}
for (const auto tensor : in_convert_tensors_) {
if (tensor != nullptr) {
tensor->FreeData();
......
......@@ -36,7 +36,7 @@ class SubGraphOpenCLKernel : public SubGraphKernel {
const std::vector<kernel::LiteKernel *> inKernels,
const std::vector<kernel::LiteKernel *> outKernels,
const std::vector<kernel::LiteKernel *> nodes)
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, nullptr, nullptr) {}
: SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, nullptr, nullptr) {}
~SubGraphOpenCLKernel() override;
int Init() override;
......@@ -48,10 +48,13 @@ class SubGraphOpenCLKernel : public SubGraphKernel {
protected:
int MallocTensorWithReuse();
int GenToFormatOp(const std::vector<lite::tensor::Tensor *> &in_tensors,
const std::vector<kernel::LiteKernel *> in_kernels,
const std::vector<std::vector<kernel::LiteKernel *>> in_kernels,
std::vector<lite::tensor::Tensor *> *out_tensors,
std::vector<OpenCLToFormatParameter *> *out_parameters, std::vector<LiteKernel *> *out_convert_ops,
OpenCLMemType mem_type);
int GetKernelFromToTensor(const std::vector<lite::tensor::Tensor *> &in_tensors,
const std::vector<kernel::LiteKernel *> &in_kernels,
std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, bool is_from);
private:
SubGraphOpenCLParameter *subgraph_ocl_parameter_;
......
......@@ -345,7 +345,7 @@ int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t>
internal_global_ws[i] = ROUND_UP(global[i], local[i]);
}
MS_LOG(INFO) << "global size: " << global.size() << ", local size: " << local.size();
MS_LOG(DEBUG) << "global size: " << global.size() << ", local size: " << local.size();
for (size_t i = 0; i < global.size(); i++) {
MS_LOG(DEBUG) << "global[" << i << "] = " << global[i];
}
......@@ -367,7 +367,7 @@ int OpenCLRuntime::RunKernel(const cl_kernel &kernel, const std::vector<size_t>
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(error);
return 1;
}
MS_LOG(INFO) << "RunKernel success!";
MS_LOG(DEBUG) << "RunKernel success!";
return 0;
}
......@@ -383,7 +383,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
internal_global_ws[i] = ROUND_UP(global[i], local[i]);
}
MS_LOG(INFO) << "global size: " << global.size() << ", local size: " << local.size();
MS_LOG(DEBUG) << "global size: " << global.size() << ", local size: " << local.size();
for (size_t i = 0; i < global.size(); i++) {
MS_LOG(DEBUG) << "global[" << i << "] = " << global[i];
}
......@@ -412,7 +412,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
local_range = cl::NDRange(local[0], local[1], local[2]);
}
} else {
MS_LOG(INFO) << "Not supported NDRange!";
MS_LOG(ERROR) << "Not supported NDRange!";
return 1;
}
......@@ -422,7 +422,7 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t>
MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(err);
return 1;
}
MS_LOG(INFO) << "RunKernel success!";
MS_LOG(DEBUG) << "RunKernel success!";
#if MS_OPENCL_PROFILE
event.wait();
cl_ulong time_start;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册