// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/npu/graph_compute.h" #include #include #include #include #include "ai_ddk_lib/include/HiAiModelManagerService.h" #include "lite/core/op_registry.h" #include "lite/core/type_system.h" namespace paddle { namespace lite { namespace kernels { namespace npu { void GraphCompute::PrepareForRun() { auto& ctx = this->ctx_->template As(); auto& param = this->Param(); CHECK(param.weight); CHECK(lite::npu::LoadModel(*param.weight, &model_client_, &model_name_)); // TODO(hong19860320): find an good way to free the model data. // No interface exists to free the data of tensor, so I resize the dim to 1 // and change target to force it to realloc a small size memory. param.weight->Resize({1}); param.weight->mutable_data(TargetType::kARM); CHECK(model_client_); int ret = model_client_->GetModelIOTensorDim(model_name_, npu_idims_, npu_odims_); CHECK_EQ(ret, hiai::AI_SUCCESS) << "[NPU] Get dims failed."; npu_itensors_.resize(npu_idims_.size()); npu_otensors_.resize(npu_odims_.size()); for (size_t i = 0; i < npu_idims_.size(); ++i) { VLOG(3) << "npu_idims[" << i << "]: " << npu_idims_[i].GetNumber() << "," << npu_idims_[i].GetChannel() << "," << npu_idims_[i].GetHeight() << "," << npu_idims_[i].GetWidth(); VLOG(3) << "lite_idims[" << i << "]: " << param.inputs[i].second->dims(); CHECK_EQ(param.inputs[i].second->dims().production(), npu_idims_[i].GetNumber() * npu_idims_[i].GetChannel() * npu_idims_[i].GetHeight() * npu_idims_[i].GetWidth()); npu_itensors_[i].reset(new hiai::AiTensor); npu_itensors_[i]->Init(&(npu_idims_[i])); } for (size_t i = 0; i < npu_odims_.size(); ++i) { VLOG(3) << "npu_odims[" << i << "]: " << npu_odims_[i].GetNumber() << "," << npu_odims_[i].GetChannel() << "," << npu_odims_[i].GetHeight() << "," << npu_odims_[i].GetWidth(); VLOG(3) << "lite_odims[" << i << "]: " << param.outputs[i].second->dims(); auto out_size = npu_odims_[i].GetNumber() * npu_odims_[i].GetChannel() * npu_odims_[i].GetHeight() * npu_odims_[i].GetWidth(); if (param.outputs[i].second->dims().production() != out_size) { param.outputs[i].second->Resize({npu_odims_[i].GetNumber(), npu_odims_[i].GetChannel(), npu_odims_[i].GetHeight(), npu_odims_[i].GetWidth()}); } LOG(INFO) << param.outputs[i].second->dims(); npu_otensors_[i].reset(new hiai::AiTensor); npu_otensors_[i]->Init(&(npu_odims_[i])); } } bool GraphCompute::input_dims_changed() const { auto& param = this->Param(); CHECK_EQ(param.inputs.size(), npu_idims_.size()); for (size_t i = 0; i < param.inputs.size(); ++i) { auto param_idims = param.inputs[i].second->dims(); CHECK(!param_idims.empty()); CHECK_EQ(param_idims.size(), 4); std::vector idims{static_cast(npu_idims_[i].GetNumber()), static_cast(npu_idims_[i].GetChannel()), static_cast(npu_idims_[i].GetHeight()), static_cast(npu_idims_[i].GetWidth())}; for (size_t i = 0; i < 4; ++i) { if (param_idims[i] != idims[i]) { return true; } } } return false; } void GraphCompute::Run() { CHECK(!input_dims_changed()) << "When NPU is enabled, the input shape could not be changed yet."; auto& param = this->Param(); CHECK_EQ(param.inputs.size(), npu_itensors_.size()); CHECK_EQ(param.outputs.size(), npu_otensors_.size()); for (size_t i = 0; i < param.inputs.size(); ++i) { auto* itensor = param.inputs[i].second; CHECK(itensor); const auto* i_data = itensor->data(); std::memcpy( npu_itensors_[i]->GetBuffer(), i_data, sizeof(float) * static_cast(itensor->dims().production())); } std::string key = "model_name"; // Note: key seems must be model_name model_context_.AddPara(key, model_name_); auto GetCurrentUS = []() -> double { struct timeval time; gettimeofday(&time, NULL); return 1e+6 * time.tv_sec + time.tv_usec; }; int istamp; auto start_time = GetCurrentUS(); CHECK_EQ(hiai::AI_SUCCESS, model_client_->Process( model_context_, npu_itensors_, npu_otensors_, 1000, istamp)); VLOG(3) << "[NPU] Process cost " << GetCurrentUS() - start_time << " us"; for (size_t i = 0; i < param.outputs.size(); ++i) { auto* otensor = param.outputs[i].second; CHECK(otensor); auto* o_data = otensor->mutable_data(); auto* npu_obuffer = static_cast(npu_otensors_[i]->GetBuffer()); std::memcpy( o_data, npu_obuffer, sizeof(float) * static_cast(otensor->dims().production())); } } } // namespace npu } // namespace kernels } // namespace lite } // namespace paddle REGISTER_LITE_KERNEL(graph_op, kNPU, kFloat, kNCHW, paddle::lite::kernels::npu::GraphCompute, def) .BindInput("Inputs", {LiteType::GetTensorTy(TARGET(kHost))}) .BindInput("Weight", {LiteType::GetTensorTy(TARGET(kHost))}) .BindOutput("Outputs", {LiteType::GetTensorTy(TARGET(kHost))}) .Finalize();