提交 be0ef8ac 编写于 作者: Z zhupengyang 提交者: GitHub

[NPU] init device tensor when needed (#3552)

上级 1c7707e0
......@@ -95,6 +95,8 @@ int Engine::Build() {
return build_device_program_status_;
}
void Engine::InitDeviceTensor() { return; }
bool Engine::InputShapeChanged() {
for (size_t i = 0; i < origin_itensors_.size(); i++) {
if (origin_itensors_[i]->dims() != origin_idims_[i]) {
......@@ -110,6 +112,7 @@ int Engine::Launch() {
CHECK_REBUILD_WHEN_SHAPE_CHANGED(build_device_program_status_) &&
InputShapeChanged()) {
Build();
InitDeviceTensor();
}
if (CHECK_FAILED(build_device_program_status_)) {
LaunchOriginProgram();
......
......@@ -55,6 +55,7 @@ class Engine {
virtual int BuildOriginProgram();
virtual int LaunchOriginProgram();
virtual void InitDeviceTensor();
virtual bool InputShapeChanged();
KernelContext *ctx_{nullptr};
......
......@@ -195,18 +195,6 @@ int SubgraphEngine::LaunchDeviceProgram() {
// Copy the data of origin input tensors to the buffer of input HiAI tensors
// init device_itensors_, device_otensors_, origin_otensors_
auto device_program = device_program_map_[inputs_shape_];
for (size_t i = 0; i < device_itensors_.size(); i++) {
device_itensors_[i]->Init(&(device_program->device_idims[i]));
std::memcpy(device_itensors_[i]->GetBuffer(),
origin_itensors_[i]->raw_data(),
origin_itensors_[i]->memory_size());
}
for (size_t i = 0; i < device_otensors_.size(); i++) {
device_otensors_[i]->Init(&(device_program->device_odims[i]));
}
for (size_t i = 0; i < origin_otensors_.size(); i++) {
origin_otensors_[i]->Resize(device_program->origin_odims[i]);
}
// Run the HiAI model by name
std::string key = "model_name"; // Note: key seems must be model_name
......@@ -233,15 +221,43 @@ int SubgraphEngine::LaunchDeviceProgram() {
return 0;
}
int SubgraphEngine::Build() {
if (device_program_map_.count(inputs_shape_) > 0) {
return subgraph::SUCCESS;
}
// In order to attach all of the ops of the block desc, we need to build the
// original program firstly.
BuildOriginProgram();
// Run InferShape() of all of ops, and convert Paddle ops to NPU/XPU IR graph
build_device_program_status_ = BuildDeviceProgram();
return build_device_program_status_;
}
void SubgraphEngine::InitDeviceTensor() {
auto device_program = device_program_map_[inputs_shape_];
for (size_t i = 0; i < device_itensors_.size(); i++) {
device_itensors_[i]->Init(&(device_program->device_idims[i]));
std::memcpy(device_itensors_[i]->GetBuffer(),
origin_itensors_[i]->raw_data(),
origin_itensors_[i]->memory_size());
}
for (size_t i = 0; i < device_otensors_.size(); i++) {
device_otensors_[i]->Init(&(device_program->device_odims[i]));
}
for (size_t i = 0; i < origin_otensors_.size(); i++) {
origin_otensors_[i]->Resize(device_program->origin_odims[i]);
}
}
bool SubgraphEngine::InputShapeChanged() {
std::vector<std::vector<int64_t>> new_shape;
for (auto origin_itensor : origin_itensors_) {
new_shape.push_back(origin_itensor->dims().Vectorize());
}
inputs_shape_ = new_shape;
if (device_program_map_.count(inputs_shape_) > 0) {
if (inputs_shape_ == new_shape) {
return false;
}
inputs_shape_ = new_shape;
return true;
}
......
......@@ -49,9 +49,13 @@ class SubgraphEngine : public subgraph::Engine {
std::vector<hiai::TensorDimension> device_odims{};
};
int Build() override;
protected:
int BuildDeviceProgram() override;
int LaunchDeviceProgram() override;
void InitDeviceTensor() override;
bool InputShapeChanged() override;
std::string model_name_{"model.om"};
......
......@@ -396,7 +396,7 @@ function test_arm_android {
adb -s ${device} push ${testpath} ${adb_work_dir}
adb -s ${device} shell "cd ${adb_work_dir} && ./${test_name}"
adb -s ${device} shell "rm ${adb_work_dir}/${test_name}"
adb -s ${device} shell "rm -f ${adb_work_dir}/${test_name}"
}
# test_npu <some_test_name> <adb_port_number>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册