diff --git a/lite/kernels/npu/bridges/engine.cc b/lite/kernels/npu/bridges/engine.cc index 6e639a37badf45e4a01f542011f0149e93e06772..8ca8357710e1f36a7c3f21417d7633e47f18c59a 100644 --- a/lite/kernels/npu/bridges/engine.cc +++ b/lite/kernels/npu/bridges/engine.cc @@ -95,6 +95,8 @@ int Engine::Build() { return build_device_program_status_; } +void Engine::InitDeviceTensor() { return; } + bool Engine::InputShapeChanged() { for (size_t i = 0; i < origin_itensors_.size(); i++) { if (origin_itensors_[i]->dims() != origin_idims_[i]) { @@ -110,6 +112,7 @@ int Engine::Launch() { CHECK_REBUILD_WHEN_SHAPE_CHANGED(build_device_program_status_) && InputShapeChanged()) { Build(); + InitDeviceTensor(); } if (CHECK_FAILED(build_device_program_status_)) { LaunchOriginProgram(); diff --git a/lite/kernels/npu/bridges/engine.h b/lite/kernels/npu/bridges/engine.h index 61a4e12cf3ad6e3eab608a585f165fde9dec081d..34ec9238892448f57298fee6693a0820b9c7e091 100644 --- a/lite/kernels/npu/bridges/engine.h +++ b/lite/kernels/npu/bridges/engine.h @@ -55,6 +55,7 @@ class Engine { virtual int BuildOriginProgram(); virtual int LaunchOriginProgram(); + virtual void InitDeviceTensor(); virtual bool InputShapeChanged(); KernelContext *ctx_{nullptr}; diff --git a/lite/kernels/npu/subgraph_compute.cc b/lite/kernels/npu/subgraph_compute.cc index 1baa5a0de44d71356cabd505fb0cdfe388a0bae3..1a991bfc7494db76553ec20a9a6d831abcb5c5fe 100644 --- a/lite/kernels/npu/subgraph_compute.cc +++ b/lite/kernels/npu/subgraph_compute.cc @@ -195,18 +195,6 @@ int SubgraphEngine::LaunchDeviceProgram() { // Copy the data of origin input tensors to the buffer of input HiAI tensors // init device_itensors_, device_otensors_, origin_otensors_ auto device_program = device_program_map_[inputs_shape_]; - for (size_t i = 0; i < device_itensors_.size(); i++) { - device_itensors_[i]->Init(&(device_program->device_idims[i])); - std::memcpy(device_itensors_[i]->GetBuffer(), - origin_itensors_[i]->raw_data(), - origin_itensors_[i]->memory_size()); - } - for (size_t i = 0; i < device_otensors_.size(); i++) { - device_otensors_[i]->Init(&(device_program->device_odims[i])); - } - for (size_t i = 0; i < origin_otensors_.size(); i++) { - origin_otensors_[i]->Resize(device_program->origin_odims[i]); - } // Run the HiAI model by name std::string key = "model_name"; // Note: key seems must be model_name @@ -233,15 +221,43 @@ int SubgraphEngine::LaunchDeviceProgram() { return 0; } +int SubgraphEngine::Build() { + if (device_program_map_.count(inputs_shape_) > 0) { + return subgraph::SUCCESS; + } + // In order to attach all of the ops of the block desc, we need to build the + // original program firstly. + BuildOriginProgram(); + // Run InferShape() of all of ops, and convert Paddle ops to NPU/XPU IR graph + build_device_program_status_ = BuildDeviceProgram(); + return build_device_program_status_; +} + +void SubgraphEngine::InitDeviceTensor() { + auto device_program = device_program_map_[inputs_shape_]; + for (size_t i = 0; i < device_itensors_.size(); i++) { + device_itensors_[i]->Init(&(device_program->device_idims[i])); + std::memcpy(device_itensors_[i]->GetBuffer(), + origin_itensors_[i]->raw_data(), + origin_itensors_[i]->memory_size()); + } + for (size_t i = 0; i < device_otensors_.size(); i++) { + device_otensors_[i]->Init(&(device_program->device_odims[i])); + } + for (size_t i = 0; i < origin_otensors_.size(); i++) { + origin_otensors_[i]->Resize(device_program->origin_odims[i]); + } +} + bool SubgraphEngine::InputShapeChanged() { std::vector> new_shape; for (auto origin_itensor : origin_itensors_) { new_shape.push_back(origin_itensor->dims().Vectorize()); } - inputs_shape_ = new_shape; - if (device_program_map_.count(inputs_shape_) > 0) { + if (inputs_shape_ == new_shape) { return false; } + inputs_shape_ = new_shape; return true; } diff --git a/lite/kernels/npu/subgraph_compute.h b/lite/kernels/npu/subgraph_compute.h index 801f61b0365c03d59c36e2a62ac3c2bb61f46607..db84fc18835e836e7d234b92c4acedbc8846a48c 100644 --- a/lite/kernels/npu/subgraph_compute.h +++ b/lite/kernels/npu/subgraph_compute.h @@ -49,9 +49,13 @@ class SubgraphEngine : public subgraph::Engine { std::vector device_odims{}; }; + int Build() override; + protected: int BuildDeviceProgram() override; int LaunchDeviceProgram() override; + + void InitDeviceTensor() override; bool InputShapeChanged() override; std::string model_name_{"model.om"}; diff --git a/lite/tools/ci_build.sh b/lite/tools/ci_build.sh index 270c3cf79c0dc498f0f792a32442130822545635..cda8bbd4e08c7c5e774f0d872b00aaa5d2d7afd1 100755 --- a/lite/tools/ci_build.sh +++ b/lite/tools/ci_build.sh @@ -396,7 +396,7 @@ function test_arm_android { adb -s ${device} push ${testpath} ${adb_work_dir} adb -s ${device} shell "cd ${adb_work_dir} && ./${test_name}" - adb -s ${device} shell "rm ${adb_work_dir}/${test_name}" + adb -s ${device} shell "rm -f ${adb_work_dir}/${test_name}" } # test_npu