未验证 提交 d42cc8d1 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] init device tensor when needed (#3552)

上级 17a0837f
...@@ -95,6 +95,8 @@ int Engine::Build() { ...@@ -95,6 +95,8 @@ int Engine::Build() {
return build_device_program_status_; return build_device_program_status_;
} }
void Engine::InitDeviceTensor() { return; }
bool Engine::InputShapeChanged() { bool Engine::InputShapeChanged() {
for (size_t i = 0; i < origin_itensors_.size(); i++) { for (size_t i = 0; i < origin_itensors_.size(); i++) {
if (origin_itensors_[i]->dims() != origin_idims_[i]) { if (origin_itensors_[i]->dims() != origin_idims_[i]) {
...@@ -110,6 +112,7 @@ int Engine::Launch() { ...@@ -110,6 +112,7 @@ int Engine::Launch() {
CHECK_REBUILD_WHEN_SHAPE_CHANGED(build_device_program_status_) && CHECK_REBUILD_WHEN_SHAPE_CHANGED(build_device_program_status_) &&
InputShapeChanged()) { InputShapeChanged()) {
Build(); Build();
InitDeviceTensor();
} }
if (CHECK_FAILED(build_device_program_status_)) { if (CHECK_FAILED(build_device_program_status_)) {
LaunchOriginProgram(); LaunchOriginProgram();
......
...@@ -55,6 +55,7 @@ class Engine { ...@@ -55,6 +55,7 @@ class Engine {
virtual int BuildOriginProgram(); virtual int BuildOriginProgram();
virtual int LaunchOriginProgram(); virtual int LaunchOriginProgram();
virtual void InitDeviceTensor();
virtual bool InputShapeChanged(); virtual bool InputShapeChanged();
KernelContext *ctx_{nullptr}; KernelContext *ctx_{nullptr};
......
...@@ -195,18 +195,6 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -195,18 +195,6 @@ int SubgraphEngine::LaunchDeviceProgram() {
// Copy the data of origin input tensors to the buffer of input HiAI tensors // Copy the data of origin input tensors to the buffer of input HiAI tensors
// init device_itensors_, device_otensors_, origin_otensors_ // init device_itensors_, device_otensors_, origin_otensors_
auto device_program = device_program_map_[inputs_shape_]; auto device_program = device_program_map_[inputs_shape_];
for (size_t i = 0; i < device_itensors_.size(); i++) {
device_itensors_[i]->Init(&(device_program->device_idims[i]));
std::memcpy(device_itensors_[i]->GetBuffer(),
origin_itensors_[i]->raw_data(),
origin_itensors_[i]->memory_size());
}
for (size_t i = 0; i < device_otensors_.size(); i++) {
device_otensors_[i]->Init(&(device_program->device_odims[i]));
}
for (size_t i = 0; i < origin_otensors_.size(); i++) {
origin_otensors_[i]->Resize(device_program->origin_odims[i]);
}
// Run the HiAI model by name // Run the HiAI model by name
std::string key = "model_name"; // Note: key seems must be model_name std::string key = "model_name"; // Note: key seems must be model_name
...@@ -233,15 +221,43 @@ int SubgraphEngine::LaunchDeviceProgram() { ...@@ -233,15 +221,43 @@ int SubgraphEngine::LaunchDeviceProgram() {
return 0; return 0;
} }
int SubgraphEngine::Build() {
if (device_program_map_.count(inputs_shape_) > 0) {
return subgraph::SUCCESS;
}
// In order to attach all of the ops of the block desc, we need to build the
// original program firstly.
BuildOriginProgram();
// Run InferShape() of all of ops, and convert Paddle ops to NPU/XPU IR graph
build_device_program_status_ = BuildDeviceProgram();
return build_device_program_status_;
}
void SubgraphEngine::InitDeviceTensor() {
auto device_program = device_program_map_[inputs_shape_];
for (size_t i = 0; i < device_itensors_.size(); i++) {
device_itensors_[i]->Init(&(device_program->device_idims[i]));
std::memcpy(device_itensors_[i]->GetBuffer(),
origin_itensors_[i]->raw_data(),
origin_itensors_[i]->memory_size());
}
for (size_t i = 0; i < device_otensors_.size(); i++) {
device_otensors_[i]->Init(&(device_program->device_odims[i]));
}
for (size_t i = 0; i < origin_otensors_.size(); i++) {
origin_otensors_[i]->Resize(device_program->origin_odims[i]);
}
}
bool SubgraphEngine::InputShapeChanged() { bool SubgraphEngine::InputShapeChanged() {
std::vector<std::vector<int64_t>> new_shape; std::vector<std::vector<int64_t>> new_shape;
for (auto origin_itensor : origin_itensors_) { for (auto origin_itensor : origin_itensors_) {
new_shape.push_back(origin_itensor->dims().Vectorize()); new_shape.push_back(origin_itensor->dims().Vectorize());
} }
inputs_shape_ = new_shape; if (inputs_shape_ == new_shape) {
if (device_program_map_.count(inputs_shape_) > 0) {
return false; return false;
} }
inputs_shape_ = new_shape;
return true; return true;
} }
......
...@@ -49,9 +49,13 @@ class SubgraphEngine : public subgraph::Engine { ...@@ -49,9 +49,13 @@ class SubgraphEngine : public subgraph::Engine {
std::vector<hiai::TensorDimension> device_odims{}; std::vector<hiai::TensorDimension> device_odims{};
}; };
int Build() override;
protected: protected:
int BuildDeviceProgram() override; int BuildDeviceProgram() override;
int LaunchDeviceProgram() override; int LaunchDeviceProgram() override;
void InitDeviceTensor() override;
bool InputShapeChanged() override; bool InputShapeChanged() override;
std::string model_name_{"model.om"}; std::string model_name_{"model.om"};
......
...@@ -396,7 +396,7 @@ function test_arm_android { ...@@ -396,7 +396,7 @@ function test_arm_android {
adb -s ${device} push ${testpath} ${adb_work_dir} adb -s ${device} push ${testpath} ${adb_work_dir}
adb -s ${device} shell "cd ${adb_work_dir} && ./${test_name}" adb -s ${device} shell "cd ${adb_work_dir} && ./${test_name}"
adb -s ${device} shell "rm ${adb_work_dir}/${test_name}" adb -s ${device} shell "rm -f ${adb_work_dir}/${test_name}"
} }
# test_npu <some_test_name> <adb_port_number> # test_npu <some_test_name> <adb_port_number>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册