未验证 提交 e6b3d883 编写于 作者: Q Qi Li 提交者: GitHub

[NPU] apply npu cache offline model to other devices, test=develop (#3925)

* [NPU] apply npu cache offline model to other devices, test=develop

* [NPU] address review comments, test=develop
上级 b85bc6e5
...@@ -28,7 +28,7 @@ namespace lite { ...@@ -28,7 +28,7 @@ namespace lite {
namespace kernels { namespace kernels {
namespace apu { namespace apu {
int SubgraphEngine::BuildDeviceProgram() { bool SubgraphEngine::BuildDeviceProgram() {
unsigned int version; unsigned int version;
Neuron_getVersion(&version); Neuron_getVersion(&version);
VLOG(3) << "Neuron Adapter version: " << version; VLOG(3) << "Neuron Adapter version: " << version;
...@@ -38,7 +38,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -38,7 +38,7 @@ int SubgraphEngine::BuildDeviceProgram() {
int neuron_errCode = NeuronModel_create(&model_); int neuron_errCode = NeuronModel_create(&model_);
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Fail to create model"; LOG(WARNING) << "Fail to create model";
return subgraph::FAILED; return false;
} }
graph.set_model(model_); graph.set_model(model_);
graph.set_input_names(input_names_); graph.set_input_names(input_names_);
...@@ -46,6 +46,9 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -46,6 +46,9 @@ int SubgraphEngine::BuildDeviceProgram() {
// Convert all of ops and their input vars and weights and added into the APU // Convert all of ops and their input vars and weights and added into the APU
// NIR graph // NIR graph
if (origin_program_.empty()) {
BuildOriginProgram();
}
const auto& bridges = subgraph::Registry::Instance(); const auto& bridges = subgraph::Registry::Instance();
for (auto& inst : origin_program_) { for (auto& inst : origin_program_) {
auto op = const_cast<OpLite*>(inst.op()); auto op = const_cast<OpLite*>(inst.op());
...@@ -54,7 +57,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -54,7 +57,7 @@ int SubgraphEngine::BuildDeviceProgram() {
op->InferShape(); op->InferShape();
std::string op_type = op->op_info()->Type(); std::string op_type = op->op_info()->Type();
if (!bridges.Exists(op_type, TARGET(kAPU))) { if (!bridges.Exists(op_type, TARGET(kAPU))) {
return subgraph::FAILED; return false;
} }
auto kernel = inst.kernel(); auto kernel = inst.kernel();
...@@ -63,7 +66,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -63,7 +66,7 @@ int SubgraphEngine::BuildDeviceProgram() {
const_cast<OpLite*>(op), const_cast<OpLite*>(op),
const_cast<KernelBase*>(kernel)); const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) { if (subgraph::CHECK_FAILED(status)) {
return subgraph::FAILED; return false;
} }
} }
...@@ -84,7 +87,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -84,7 +87,7 @@ int SubgraphEngine::BuildDeviceProgram() {
VLOG(3) << "input idx: " << graph.Get(input_names_[i])->index(); VLOG(3) << "input idx: " << graph.Get(input_names_[i])->index();
} else { } else {
LOG(WARNING) << "Fail to find input: " << input_names_[i]; LOG(WARNING) << "Fail to find input: " << input_names_[i];
return subgraph::FAILED; return false;
} }
} }
...@@ -105,7 +108,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -105,7 +108,7 @@ int SubgraphEngine::BuildDeviceProgram() {
VLOG(3) << "output idx: " << graph.Get(output_names_[i])->index(); VLOG(3) << "output idx: " << graph.Get(output_names_[i])->index();
} else { } else {
LOG(WARNING) << "Fail to find output: " << output_names_[i]; LOG(WARNING) << "Fail to find output: " << output_names_[i];
return subgraph::FAILED; return false;
} }
} }
...@@ -116,7 +119,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -116,7 +119,7 @@ int SubgraphEngine::BuildDeviceProgram() {
neuron_errCode = NeuronModel_finish(model_); neuron_errCode = NeuronModel_finish(model_);
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Fail to create NIR model:" << neuron_errCode; LOG(WARNING) << "Fail to create NIR model:" << neuron_errCode;
return subgraph::FAILED; return false;
} }
VLOG(3) << "[APU] APU NIR model created!"; VLOG(3) << "[APU] APU NIR model created!";
...@@ -129,15 +132,14 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -129,15 +132,14 @@ int SubgraphEngine::BuildDeviceProgram() {
compilation_ = lite::apu::Device::Global().Build(model_); compilation_ = lite::apu::Device::Global().Build(model_);
if (compilation_ == nullptr) { if (compilation_ == nullptr) {
LOG(WARNING) << "[APU] Build APU DLA model failed!"; LOG(WARNING) << "[APU] Build APU DLA model failed!";
return subgraph::FAILED; return false;
} }
VLOG(3) << "[APU] APU DLA model created, Build cost " VLOG(3) << "[APU] APU DLA model created, Build cost "
<< GetCurrentUS() - start_time << " us"; << GetCurrentUS() - start_time << " us";
return true;
return status;
} }
int SubgraphEngine::LaunchDeviceProgram() { bool SubgraphEngine::LaunchDeviceProgram() {
auto GetCurrentUS = []() -> double { auto GetCurrentUS = []() -> double {
struct timeval time; struct timeval time;
gettimeofday(&time, NULL); gettimeofday(&time, NULL);
...@@ -149,7 +151,7 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -149,7 +151,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
int neuron_errCode = NeuronExecution_create(compilation_, &run); int neuron_errCode = NeuronExecution_create(compilation_, &run);
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "[APU] Build APU runtime failed!"; LOG(WARNING) << "[APU] Build APU runtime failed!";
return subgraph::FAILED; return false;
} }
// Set input buffer // Set input buffer
...@@ -177,7 +179,7 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -177,7 +179,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
neuron_errCode = NeuronExecution_compute(run); neuron_errCode = NeuronExecution_compute(run);
if (NEURON_NO_ERROR != neuron_errCode) { if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Fail to run execution!" << neuron_errCode; LOG(WARNING) << "Fail to run execution!" << neuron_errCode;
return subgraph::FAILED; return false;
} }
for (size_t i = 0; i < origin_otensors_.size(); i++) { for (size_t i = 0; i < origin_otensors_.size(); i++) {
...@@ -190,7 +192,7 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -190,7 +192,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
} }
NeuronExecution_free(run); NeuronExecution_free(run);
VLOG(3) << "[APU] Process cost " << GetCurrentUS() - start_time << " us"; VLOG(3) << "[APU] Process cost " << GetCurrentUS() - start_time << " us";
return 0; return true;
} }
SubgraphEngine::~SubgraphEngine() { SubgraphEngine::~SubgraphEngine() {
...@@ -211,12 +213,11 @@ void SubgraphCompute::PrepareForRun() { ...@@ -211,12 +213,11 @@ void SubgraphCompute::PrepareForRun() {
param.output_data_names, param.output_data_names,
param.scope)); param.scope));
CHECK(engine_); CHECK(engine_);
engine_->Build();
} }
void SubgraphCompute::Run() { void SubgraphCompute::Run() {
CHECK(engine_); CHECK(engine_);
engine_->Launch(); engine_->Run();
} }
} // namespace apu } // namespace apu
......
...@@ -41,8 +41,8 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -41,8 +41,8 @@ class SubgraphEngine : public subgraph::Engine {
~SubgraphEngine(); ~SubgraphEngine();
protected: protected:
int BuildDeviceProgram() override; bool BuildDeviceProgram() override;
int LaunchDeviceProgram() override; bool LaunchDeviceProgram() override;
NeuronModel *model_; NeuronModel *model_;
NeuronCompilation *compilation_; NeuronCompilation *compilation_;
......
...@@ -28,12 +28,35 @@ namespace lite { ...@@ -28,12 +28,35 @@ namespace lite {
namespace kernels { namespace kernels {
namespace bm { namespace bm {
int SubgraphEngine::BuildDeviceProgram() { bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
// Obtain the origin input tensors, and create the origin output
// tensors(Don't try to access them before launch the device program or the
// origin program)
PrepareWorkspaceForOriginProgram();
// Create the device input and output tensors, but don't initialize them
// with the dimensions
device_inputs_.resize(input_names_.size());
for (int i = 0; i < input_names_.size(); i++) {
device_inputs_[i].reset(new hiai::AiTensor);
CHECK(device_inputs_[i]);
}
device_outputs_.resize(output_names_.size());
for (int i = 0; i < output_names_.size(); i++) {
device_outputs_[i].reset(new hiai::AiTensor);
CHECK(device_outputs_[i]);
}
return true;
}
bool SubgraphEngine::BuildDeviceProgram() {
int status = 0; int status = 0;
subgraph::bm::Graph graph; subgraph::bm::Graph graph;
const auto& bridges = subgraph::Registry::Instance(); const auto& bridges = subgraph::Registry::Instance();
graph.CreateCompilerHandle(); graph.CreateCompilerHandle();
auto& ctx = this->ctx_->template As<BMContext>(); auto& ctx = this->ctx_->template As<BMContext>();
if (origin_program_.empty()) {
BuildOriginProgram();
}
for (auto& inst : origin_program_) { for (auto& inst : origin_program_) {
auto op = const_cast<OpLite*>(inst.op()); auto op = const_cast<OpLite*>(inst.op());
CHECK(op); CHECK(op);
...@@ -42,7 +65,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -42,7 +65,7 @@ int SubgraphEngine::BuildDeviceProgram() {
std::string op_type = op->op_info()->Type(); std::string op_type = op->op_info()->Type();
LOG(INFO) << op_type; LOG(INFO) << op_type;
if (!bridges.Exists(op_type, TARGET(kBM))) { if (!bridges.Exists(op_type, TARGET(kBM))) {
return subgraph::FAILED; return false;
} }
auto kernel = inst.kernel(); auto kernel = inst.kernel();
status |= status |=
...@@ -50,7 +73,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -50,7 +73,7 @@ int SubgraphEngine::BuildDeviceProgram() {
const_cast<OpLite*>(op), const_cast<OpLite*>(op),
const_cast<KernelBase*>(kernel)); const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) { if (subgraph::CHECK_FAILED(status)) {
return subgraph::FAILED; return false;
} }
} }
std::string net_name = "bmnetc_f32umodel"; std::string net_name = "bmnetc_f32umodel";
...@@ -63,7 +86,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -63,7 +86,7 @@ int SubgraphEngine::BuildDeviceProgram() {
graph.UnlockCompilerMutex(); graph.UnlockCompilerMutex();
bmrt_hd_ = bmrt_create(bm_hd_); bmrt_hd_ = bmrt_create(bm_hd_);
if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) { if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
return subgraph::FAILED; return false;
} }
bmrt_get_network_names(bmrt_hd_, &net_names_); bmrt_get_network_names(bmrt_hd_, &net_names_);
net_info_ = bmrt_get_network_info(bmrt_hd_, net_names_[0]); net_info_ = bmrt_get_network_info(bmrt_hd_, net_names_[0]);
...@@ -116,10 +139,10 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -116,10 +139,10 @@ int SubgraphEngine::BuildDeviceProgram() {
net_info_->output_dtypes[i], net_info_->output_dtypes[i],
stage.output_shapes[i]); stage.output_shapes[i]);
} }
return status; return true;
} }
int SubgraphEngine::LaunchDeviceProgram() { bool SubgraphEngine::LaunchDeviceProgram() {
for (size_t i = 0; i < device_inputs_.size(); i++) { for (size_t i = 0; i < device_inputs_.size(); i++) {
bm_memcpy_s2d(bm_hd_, bm_memcpy_s2d(bm_hd_,
device_inputs_[i].device_mem, device_inputs_[i].device_mem,
...@@ -143,7 +166,7 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -143,7 +166,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
out_index++; out_index++;
} }
} }
return 0; return true;
} }
void SubgraphCompute::PrepareForRun() { void SubgraphCompute::PrepareForRun() {
...@@ -155,12 +178,11 @@ void SubgraphCompute::PrepareForRun() { ...@@ -155,12 +178,11 @@ void SubgraphCompute::PrepareForRun() {
param.output_data_names, param.output_data_names,
param.scope)); param.scope));
CHECK(engine_); CHECK(engine_);
engine_->Build();
} }
void SubgraphCompute::Run() { void SubgraphCompute::Run() {
CHECK(engine_); CHECK(engine_);
engine_->Launch(); engine_->Run();
} }
} // namespace bm } // namespace bm
......
...@@ -44,8 +44,9 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -44,8 +44,9 @@ class SubgraphEngine : public subgraph::Engine {
ctx, block_idx, block_desc, input_names, output_names, scope) {} ctx, block_idx, block_desc, input_names, output_names, scope) {}
protected: protected:
int BuildDeviceProgram() override; bool PrepareWorkspaceForDeviceProgram() override;
int LaunchDeviceProgram() override; bool BuildDeviceProgram() override;
bool LaunchDeviceProgram() override;
private: private:
void *bmrt_hd_; void *bmrt_hd_;
......
...@@ -62,32 +62,6 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -62,32 +62,6 @@ class SubgraphEngine : public subgraph::Engine {
} }
} }
int Build() {
// In order to attach all of the ops of the block desc, we need to build
// the original program firstly.
BuildOriginProgram();
// Run InferShape() of all of ops, and convert Paddle ops to MLU IR graph
build_device_program_status_ = BuildDeviceProgram();
return build_device_program_status_;
}
int Launch() {
// Rebuild device program when the shapes of input tensors have been
// changed.
if (subgraph::CHECK_SUCCESS(build_device_program_status_) &&
subgraph::CHECK_REBUILD_WHEN_SHAPE_CHANGED(
build_device_program_status_) &&
InputShapeChanged()) {
Build();
}
if (subgraph::CHECK_FAILED(build_device_program_status_)) {
LaunchOriginProgram();
} else {
LaunchDeviceProgram();
}
return 0;
}
bool InputShapeChanged() { bool InputShapeChanged() {
std::vector<std::vector<int64_t>> new_shape; std::vector<std::vector<int64_t>> new_shape;
// used in batch changable situation // used in batch changable situation
...@@ -127,7 +101,10 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -127,7 +101,10 @@ class SubgraphEngine : public subgraph::Engine {
} }
protected: protected:
int BuildDeviceProgram() override { bool BuildDeviceProgram() override {
if (origin_program_.empty()) {
BuildOriginProgram();
}
if (!error_compile_batch_size_changeable_ && if (!error_compile_batch_size_changeable_ &&
!disable_batch_size_changeable_) { !disable_batch_size_changeable_) {
int status = BuildDeviceProgramImpl(); int status = BuildDeviceProgramImpl();
...@@ -142,7 +119,7 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -142,7 +119,7 @@ class SubgraphEngine : public subgraph::Engine {
return BuildDeviceProgramImpl(); return BuildDeviceProgramImpl();
} }
int BuildDeviceProgramImpl() { bool BuildDeviceProgramImpl() {
int status = 0; int status = 0;
auto graph = std::make_shared<paddle::lite::subgraph::mlu::Graph>(); auto graph = std::make_shared<paddle::lite::subgraph::mlu::Graph>();
graph->SetFPType(fp_type_); graph->SetFPType(fp_type_);
...@@ -197,13 +174,16 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -197,13 +174,16 @@ class SubgraphEngine : public subgraph::Engine {
status |= subgraph::FAILED; status |= subgraph::FAILED;
VLOG(4) << "[MLU] found unsupported batch_size changeable op type: " VLOG(4) << "[MLU] found unsupported batch_size changeable op type: "
<< op_type; << op_type;
return status; if (subgraph::CHECK_FAILED(status)) {
return false;
}
return true;
} }
op->CheckShape(); op->CheckShape();
const_cast<OpLite*>(op)->InferShape(); const_cast<OpLite*>(op)->InferShape();
if (!bridges.Exists(op_type, TARGET(kMLU))) { if (!bridges.Exists(op_type, TARGET(kMLU))) {
LOG(INFO) << "MLU bridges doesn't support op_type: " << op_type; LOG(INFO) << "MLU bridges doesn't support op_type: " << op_type;
return subgraph::FAILED; return false;
} }
auto kernel = inst.kernel(); auto kernel = inst.kernel();
status |= bridges.Select(op_type, TARGET(kMLU))( status |= bridges.Select(op_type, TARGET(kMLU))(
...@@ -211,7 +191,7 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -211,7 +191,7 @@ class SubgraphEngine : public subgraph::Engine {
const_cast<OpLite*>(op), const_cast<OpLite*>(op),
const_cast<KernelBase*>(kernel)); const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) { if (subgraph::CHECK_FAILED(status)) {
return subgraph::FAILED; return false;
} }
} }
// Obtain the output nodes of the MLU IR graph and build the graph to MLU // Obtain the output nodes of the MLU IR graph and build the graph to MLU
...@@ -242,7 +222,7 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -242,7 +222,7 @@ class SubgraphEngine : public subgraph::Engine {
if (GetBoolFromEnv("PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL")) { if (GetBoolFromEnv("PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL")) {
graph->GenOfflineModel(GetOfflineModName()); graph->GenOfflineModel(GetOfflineModName());
} }
return status; return true;
} }
std::string TrimStrings(const std::string& origin_str) { std::string TrimStrings(const std::string& origin_str) {
...@@ -329,7 +309,7 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -329,7 +309,7 @@ class SubgraphEngine : public subgraph::Engine {
} }
} }
int LaunchDeviceProgram() override { bool LaunchDeviceProgram() override {
// prepare input and output memory // prepare input and output memory
auto& mlu_context = this->ctx_->template As<MLUContext>(); auto& mlu_context = this->ctx_->template As<MLUContext>();
auto exec_queue = mlu_context.exec_queue(); auto exec_queue = mlu_context.exec_queue();
...@@ -453,7 +433,7 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -453,7 +433,7 @@ class SubgraphEngine : public subgraph::Engine {
// =========== DUMP END ================ // =========== DUMP END ================
} }
return 0; return true;
} }
paddle::lite_api::PrecisionType fp_type_; paddle::lite_api::PrecisionType fp_type_;
...@@ -501,12 +481,11 @@ class SubgraphCompute ...@@ -501,12 +481,11 @@ class SubgraphCompute
param.scope, param.scope,
this->precision())); this->precision()));
CHECK(engine_); CHECK(engine_);
engine_->Build();
} }
void Run() override { void Run() override {
CHECK(engine_); CHECK(engine_);
engine_->Launch(); engine_->Run();
} }
virtual ~SubgraphCompute() = default; virtual ~SubgraphCompute() = default;
......
...@@ -28,13 +28,36 @@ namespace lite { ...@@ -28,13 +28,36 @@ namespace lite {
namespace kernels { namespace kernels {
namespace rknpu { namespace rknpu {
int SubgraphEngine::BuildDeviceProgram() { bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
// Obtain the origin input tensors, and create the origin output
// tensors(Don't try to access them before launch the device program or the
// origin program)
PrepareWorkspaceForOriginProgram();
// Create the device input and output tensors, but don't initialize them
// with the dimensions
device_itensors_.resize(input_names_.size());
for (int i = 0; i < input_names_.size(); i++) {
device_itensors_[i].reset(new hiai::AiTensor);
CHECK(device_itensors_[i]);
}
device_otensors_.resize(output_names_.size());
for (int i = 0; i < output_names_.size(); i++) {
device_otensors_[i].reset(new hiai::AiTensor);
CHECK(device_otensors_[i]);
}
return true;
}
bool SubgraphEngine::BuildDeviceProgram() {
LOG(INFO) << "[RKNPU]:BuildDeviceProgram"; LOG(INFO) << "[RKNPU]:BuildDeviceProgram";
int status = 0; int status = 0;
// Convert all of ops and their input vars and weights and added into the NPU // Convert all of ops and their input vars and weights and added into the NPU
// RKNPU IR graph // RKNPU IR graph
subgraph::rknpu::Graph graph; subgraph::rknpu::Graph graph;
const auto& bridges = subgraph::Registry::Instance(); const auto& bridges = subgraph::Registry::Instance();
if (origin_program_.empty()) {
BuildOriginProgram();
}
for (auto& inst : origin_program_) { for (auto& inst : origin_program_) {
auto op = const_cast<OpLite*>(inst.op()); auto op = const_cast<OpLite*>(inst.op());
CHECK(op); CHECK(op);
...@@ -42,13 +65,13 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -42,13 +65,13 @@ int SubgraphEngine::BuildDeviceProgram() {
op->InferShape(); op->InferShape();
std::string op_type = op->op_info()->Type(); std::string op_type = op->op_info()->Type();
if (!bridges.Exists(op_type, TARGET(kRKNPU))) { if (!bridges.Exists(op_type, TARGET(kRKNPU))) {
return subgraph::FAILED; return false;
} }
auto kernel = inst.kernel(); auto kernel = inst.kernel();
status |= bridges.Select(op_type, TARGET(kRKNPU))( status |= bridges.Select(op_type, TARGET(kRKNPU))(
reinterpret_cast<void*>(&graph), op, const_cast<KernelBase*>(kernel)); reinterpret_cast<void*>(&graph), op, const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) { if (subgraph::CHECK_FAILED(status)) {
return subgraph::FAILED; return false;
} }
} }
// Collect the valid input and output nodes in the RKNPU IR graph and update // Collect the valid input and output nodes in the RKNPU IR graph and update
...@@ -91,7 +114,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -91,7 +114,7 @@ int SubgraphEngine::BuildDeviceProgram() {
model_name_, graph.GetHandle(), device_itensors_, device_otensors_); model_name_, graph.GetHandle(), device_itensors_, device_otensors_);
if (device_program_ == nullptr) { if (device_program_ == nullptr) {
LOG(WARNING) << "[RKNPU] Build model failed!"; LOG(WARNING) << "[RKNPU] Build model failed!";
return subgraph::FAILED; return false;
} }
// input // input
...@@ -165,10 +188,10 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -165,10 +188,10 @@ int SubgraphEngine::BuildDeviceProgram() {
break; break;
} }
} }
return status; return true;
} }
int SubgraphEngine::LaunchDeviceProgram() { bool SubgraphEngine::LaunchDeviceProgram() {
LOG(INFO) << "[RKNPU]:LaunchDeviceProgram"; LOG(INFO) << "[RKNPU]:LaunchDeviceProgram";
std::vector<rk::nn::InputInfo> inputs; std::vector<rk::nn::InputInfo> inputs;
std::vector<rk::nn::OutputInfo> outputs; std::vector<rk::nn::OutputInfo> outputs;
...@@ -195,7 +218,7 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -195,7 +218,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
device_program_->SetInputs(inputs); device_program_->SetInputs(inputs);
device_program_->Run(); device_program_->Run();
device_program_->GetOutputs(outputs); device_program_->GetOutputs(outputs);
return 0; return true;
} }
void SubgraphCompute::PrepareForRun() { void SubgraphCompute::PrepareForRun() {
...@@ -208,13 +231,12 @@ void SubgraphCompute::PrepareForRun() { ...@@ -208,13 +231,12 @@ void SubgraphCompute::PrepareForRun() {
param.output_data_names, param.output_data_names,
param.scope)); param.scope));
CHECK(engine_); CHECK(engine_);
engine_->Build();
} }
void SubgraphCompute::Run() { void SubgraphCompute::Run() {
LOG(INFO) << "[RKNPU]:Run"; LOG(INFO) << "[RKNPU]:Run";
CHECK(engine_); CHECK(engine_);
engine_->Launch(); engine_->Run();
} }
} // namespace rknpu } // namespace rknpu
......
...@@ -42,14 +42,15 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -42,14 +42,15 @@ class SubgraphEngine : public subgraph::Engine {
ctx, block_idx, block_desc, input_names, output_names, scope) {} ctx, block_idx, block_desc, input_names, output_names, scope) {}
protected: protected:
int BuildDeviceProgram() override; bool PrepareWorkspaceForDeviceProgram() override;
int LaunchDeviceProgram() override; bool BuildDeviceProgram() override;
bool LaunchDeviceProgram() override;
std::string model_name_; std::string model_name_;
std::vector<std::string> device_inames_; std::vector<std::string> device_inames_;
std::vector<std::string> device_onames_; std::vector<std::string> device_onames_;
std::vector<std::shared_ptr<rk::nn::Tensor>> device_itensors_; std::vector<std::shared_ptr<rk::nn::Tensor>> device_itensors_{};
std::vector<std::shared_ptr<rk::nn::Tensor>> device_otensors_; std::vector<std::shared_ptr<rk::nn::Tensor>> device_otensors_{};
std::unique_ptr<rk::nn::Exection> device_program_{nullptr}; std::unique_ptr<rk::nn::Exection> device_program_{nullptr};
}; };
......
...@@ -27,12 +27,35 @@ namespace lite { ...@@ -27,12 +27,35 @@ namespace lite {
namespace kernels { namespace kernels {
namespace xpu { namespace xpu {
int SubgraphEngine::BuildDeviceProgram() { bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
// Obtain the origin input tensors, and create the origin output
// tensors(Don't try to access them before launch the device program or the
// origin program)
PrepareWorkspaceForOriginProgram();
// Create the device input and output tensors, but don't initialize them
// with the dimensions
device_itensors_.resize(input_names_.size());
for (int i = 0; i < input_names_.size(); i++) {
device_itensors_[i].reset(new hiai::AiTensor);
CHECK(device_itensors_[i]);
}
device_otensors_.resize(output_names_.size());
for (int i = 0; i < output_names_.size(); i++) {
device_otensors_[i].reset(new hiai::AiTensor);
CHECK(device_otensors_[i]);
}
return true;
}
bool SubgraphEngine::BuildDeviceProgram() {
int status = 0; int status = 0;
// Convert all of ops and their input vars and weights and added into the XPU // Convert all of ops and their input vars and weights and added into the XPU
// IR graph // IR graph
subgraph::xpu::Graph graph; subgraph::xpu::Graph graph;
const auto& bridges = subgraph::Registry::Instance(); const auto& bridges = subgraph::Registry::Instance();
if (origin_program_.empty()) {
BuildOriginProgram();
}
for (auto& inst : origin_program_) { for (auto& inst : origin_program_) {
auto op = const_cast<OpLite*>(inst.op()); auto op = const_cast<OpLite*>(inst.op());
CHECK(op); CHECK(op);
...@@ -40,13 +63,13 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -40,13 +63,13 @@ int SubgraphEngine::BuildDeviceProgram() {
op->InferShape(); op->InferShape();
std::string op_type = op->op_info()->Type(); std::string op_type = op->op_info()->Type();
if (!bridges.Exists(op_type, TARGET(kXPU))) { if (!bridges.Exists(op_type, TARGET(kXPU))) {
return subgraph::FAILED; return false;
} }
auto kernel = inst.kernel(); auto kernel = inst.kernel();
status |= bridges.Select(op_type, TARGET(kXPU))( status |= bridges.Select(op_type, TARGET(kXPU))(
reinterpret_cast<void*>(&graph), op, const_cast<KernelBase*>(kernel)); reinterpret_cast<void*>(&graph), op, const_cast<KernelBase*>(kernel));
if (subgraph::CHECK_FAILED(status)) { if (subgraph::CHECK_FAILED(status)) {
return subgraph::FAILED; return false;
} }
} }
// Obtain the output nodes of the XPU IR graph and build the graph to the XPU // Obtain the output nodes of the XPU IR graph and build the graph to the XPU
...@@ -86,7 +109,7 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -86,7 +109,7 @@ int SubgraphEngine::BuildDeviceProgram() {
&graph.builder_, &graph.params_, &device_onodes); &graph.builder_, &graph.params_, &device_onodes);
if (device_program_ == nullptr) { if (device_program_ == nullptr) {
LOG(WARNING) << "[XPU] Build model failed!"; LOG(WARNING) << "[XPU] Build model failed!";
return subgraph::FAILED; return false;
} }
// Query and check the dimensions of input and output tensors // Query and check the dimensions of input and output tensors
...@@ -166,10 +189,10 @@ int SubgraphEngine::BuildDeviceProgram() { ...@@ -166,10 +189,10 @@ int SubgraphEngine::BuildDeviceProgram() {
device_otensors_[i].strides = nullptr; device_otensors_[i].strides = nullptr;
device_otensors_[i].byte_offset = 0; device_otensors_[i].byte_offset = 0;
} }
return status; return true;
} }
int SubgraphEngine::LaunchDeviceProgram() { bool SubgraphEngine::LaunchDeviceProgram() {
for (size_t i = 0; i < device_itensors_.size(); i++) { for (size_t i = 0; i < device_itensors_.size(); i++) {
// Update the data pointer of DLTensor to track the origin input tensors // Update the data pointer of DLTensor to track the origin input tensors
device_itensors_[i].data = device_itensors_[i].data =
...@@ -191,7 +214,7 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -191,7 +214,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
const_cast<void*>(origin_otensors_[i]->raw_data()); const_cast<void*>(origin_otensors_[i]->raw_data());
device_program_->CopyOutputTo(i, &device_otensors_[i]); device_program_->CopyOutputTo(i, &device_otensors_[i]);
} }
return 0; return true;
} }
void SubgraphCompute::PrepareForRun() { void SubgraphCompute::PrepareForRun() {
...@@ -203,12 +226,11 @@ void SubgraphCompute::PrepareForRun() { ...@@ -203,12 +226,11 @@ void SubgraphCompute::PrepareForRun() {
param.output_data_names, param.output_data_names,
param.scope)); param.scope));
CHECK(engine_); CHECK(engine_);
engine_->Build();
} }
void SubgraphCompute::Run() { void SubgraphCompute::Run() {
CHECK(engine_); CHECK(engine_);
engine_->Launch(); engine_->Run();
} }
} // namespace xpu } // namespace xpu
......
...@@ -39,13 +39,14 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -39,13 +39,14 @@ class SubgraphEngine : public subgraph::Engine {
ctx, block_idx, block_desc, input_names, output_names, scope) {} ctx, block_idx, block_desc, input_names, output_names, scope) {}
protected: protected:
int BuildDeviceProgram() override; bool PrepareWorkspaceForDeviceProgram() override;
int LaunchDeviceProgram() override; bool BuildDeviceProgram() override;
bool LaunchDeviceProgram() override;
std::vector<std::string> device_inames_; std::vector<std::string> device_inames_;
std::vector<std::string> device_onames_; std::vector<std::string> device_onames_;
std::vector<DLTensor> device_itensors_; std::vector<DLTensor> device_itensors_{};
std::vector<DLTensor> device_otensors_; std::vector<DLTensor> device_otensors_{};
std::unique_ptr<xtcl::network::xRuntimeInstance> device_program_{nullptr}; std::unique_ptr<xtcl::network::xRuntimeInstance> device_program_{nullptr};
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册