From d42cc8d10d65921b6412e4f7b2fe38caa57de8db Mon Sep 17 00:00:00 2001 From: zhupengyang Date: Thu, 7 May 2020 13:53:45 +0800 Subject: [PATCH] [NPU] init device tensor when needed (#3552) --- lite/kernels/npu/bridges/engine.cc | 3 ++ lite/kernels/npu/bridges/engine.h | 1 + lite/kernels/npu/subgraph_compute.cc | 44 +++++++++++++++++++--------- lite/kernels/npu/subgraph_compute.h | 4 +++ lite/tools/ci_build.sh | 2 +- 5 files changed, 39 insertions(+), 15 deletions(-) diff --git a/lite/kernels/npu/bridges/engine.cc b/lite/kernels/npu/bridges/engine.cc index 6e639a37ba..8ca8357710 100644 --- a/lite/kernels/npu/bridges/engine.cc +++ b/lite/kernels/npu/bridges/engine.cc @@ -95,6 +95,8 @@ int Engine::Build() { return build_device_program_status_; } +void Engine::InitDeviceTensor() { return; } + bool Engine::InputShapeChanged() { for (size_t i = 0; i < origin_itensors_.size(); i++) { if (origin_itensors_[i]->dims() != origin_idims_[i]) { @@ -110,6 +112,7 @@ int Engine::Launch() { CHECK_REBUILD_WHEN_SHAPE_CHANGED(build_device_program_status_) && InputShapeChanged()) { Build(); + InitDeviceTensor(); } if (CHECK_FAILED(build_device_program_status_)) { LaunchOriginProgram(); diff --git a/lite/kernels/npu/bridges/engine.h b/lite/kernels/npu/bridges/engine.h index 61a4e12cf3..34ec923889 100644 --- a/lite/kernels/npu/bridges/engine.h +++ b/lite/kernels/npu/bridges/engine.h @@ -55,6 +55,7 @@ class Engine { virtual int BuildOriginProgram(); virtual int LaunchOriginProgram(); + virtual void InitDeviceTensor(); virtual bool InputShapeChanged(); KernelContext *ctx_{nullptr}; diff --git a/lite/kernels/npu/subgraph_compute.cc b/lite/kernels/npu/subgraph_compute.cc index 1baa5a0de4..1a991bfc74 100644 --- a/lite/kernels/npu/subgraph_compute.cc +++ b/lite/kernels/npu/subgraph_compute.cc @@ -195,18 +195,6 @@ int SubgraphEngine::LaunchDeviceProgram() { // Copy the data of origin input tensors to the buffer of input HiAI tensors // init device_itensors_, device_otensors_, origin_otensors_ auto device_program = device_program_map_[inputs_shape_]; - for (size_t i = 0; i < device_itensors_.size(); i++) { - device_itensors_[i]->Init(&(device_program->device_idims[i])); - std::memcpy(device_itensors_[i]->GetBuffer(), - origin_itensors_[i]->raw_data(), - origin_itensors_[i]->memory_size()); - } - for (size_t i = 0; i < device_otensors_.size(); i++) { - device_otensors_[i]->Init(&(device_program->device_odims[i])); - } - for (size_t i = 0; i < origin_otensors_.size(); i++) { - origin_otensors_[i]->Resize(device_program->origin_odims[i]); - } // Run the HiAI model by name std::string key = "model_name"; // Note: key seems must be model_name @@ -233,15 +221,43 @@ int SubgraphEngine::LaunchDeviceProgram() { return 0; } +int SubgraphEngine::Build() { + if (device_program_map_.count(inputs_shape_) > 0) { + return subgraph::SUCCESS; + } + // In order to attach all of the ops of the block desc, we need to build the + // original program firstly. + BuildOriginProgram(); + // Run InferShape() of all of ops, and convert Paddle ops to NPU/XPU IR graph + build_device_program_status_ = BuildDeviceProgram(); + return build_device_program_status_; +} + +void SubgraphEngine::InitDeviceTensor() { + auto device_program = device_program_map_[inputs_shape_]; + for (size_t i = 0; i < device_itensors_.size(); i++) { + device_itensors_[i]->Init(&(device_program->device_idims[i])); + std::memcpy(device_itensors_[i]->GetBuffer(), + origin_itensors_[i]->raw_data(), + origin_itensors_[i]->memory_size()); + } + for (size_t i = 0; i < device_otensors_.size(); i++) { + device_otensors_[i]->Init(&(device_program->device_odims[i])); + } + for (size_t i = 0; i < origin_otensors_.size(); i++) { + origin_otensors_[i]->Resize(device_program->origin_odims[i]); + } +} + bool SubgraphEngine::InputShapeChanged() { std::vector> new_shape; for (auto origin_itensor : origin_itensors_) { new_shape.push_back(origin_itensor->dims().Vectorize()); } - inputs_shape_ = new_shape; - if (device_program_map_.count(inputs_shape_) > 0) { + if (inputs_shape_ == new_shape) { return false; } + inputs_shape_ = new_shape; return true; } diff --git a/lite/kernels/npu/subgraph_compute.h b/lite/kernels/npu/subgraph_compute.h index 801f61b036..db84fc1883 100644 --- a/lite/kernels/npu/subgraph_compute.h +++ b/lite/kernels/npu/subgraph_compute.h @@ -49,9 +49,13 @@ class SubgraphEngine : public subgraph::Engine { std::vector device_odims{}; }; + int Build() override; + protected: int BuildDeviceProgram() override; int LaunchDeviceProgram() override; + + void InitDeviceTensor() override; bool InputShapeChanged() override; std::string model_name_{"model.om"}; diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh index 270c3cf79c..cda8bbd4e0 100755 --- a/lite/tools/ci_build.sh +++ b/lite/tools/ci_build.sh @@ -396,7 +396,7 @@ function test_arm_android { adb -s ${device} push ${testpath} ${adb_work_dir} adb -s ${device} shell "cd ${adb_work_dir} && ./${test_name}" - adb -s ${device} shell "rm ${adb_work_dir}/${test_name}" + adb -s ${device} shell "rm -f ${adb_work_dir}/${test_name}" } # test_npu -- GitLab