diff --git a/lite/kernels/apu/subgraph_compute.cc b/lite/kernels/apu/subgraph_compute.cc
index 6009e71e05c33f6dedfd995020612e112c888d36..9e2ef7538675486072d43913c1a3973971277a23 100644
--- a/lite/kernels/apu/subgraph_compute.cc
+++ b/lite/kernels/apu/subgraph_compute.cc
@@ -28,7 +28,7 @@ namespace lite {
 namespace kernels {
 namespace apu {
 
-int SubgraphEngine::BuildDeviceProgram() {
+bool SubgraphEngine::BuildDeviceProgram() {
   unsigned int version;
   Neuron_getVersion(&version);
   VLOG(3) << "Neuron Adapter version: " << version;
@@ -38,7 +38,7 @@ int SubgraphEngine::BuildDeviceProgram() {
   int neuron_errCode = NeuronModel_create(&model_);
   if (NEURON_NO_ERROR != neuron_errCode) {
     LOG(WARNING) << "Fail to create model";
-    return subgraph::FAILED;
+    return false;
   }
   graph.set_model(model_);
   graph.set_input_names(input_names_);
@@ -46,6 +46,9 @@ int SubgraphEngine::BuildDeviceProgram() {
 
   // Convert all of ops and their input vars and weights and added into the APU
   // NIR graph
+  if (origin_program_.empty()) {
+    BuildOriginProgram();
+  }
   const auto& bridges = subgraph::Registry::Instance();
   for (auto& inst : origin_program_) {
     auto op = const_cast<OpLite*>(inst.op());
@@ -54,7 +57,7 @@ int SubgraphEngine::BuildDeviceProgram() {
     op->InferShape();
     std::string op_type = op->op_info()->Type();
     if (!bridges.Exists(op_type, TARGET(kAPU))) {
-      return subgraph::FAILED;
+      return false;
     }
 
     auto kernel = inst.kernel();
@@ -63,7 +66,7 @@ int SubgraphEngine::BuildDeviceProgram() {
                                               const_cast<OpLite*>(op),
                                               const_cast<KernelBase*>(kernel));
     if (subgraph::CHECK_FAILED(status)) {
-      return subgraph::FAILED;
+      return false;
     }
   }
 
@@ -84,7 +87,7 @@ int SubgraphEngine::BuildDeviceProgram() {
       VLOG(3) << "input idx: " << graph.Get(input_names_[i])->index();
     } else {
       LOG(WARNING) << "Fail to find input: " << input_names_[i];
-      return subgraph::FAILED;
+      return false;
     }
   }
 
@@ -105,7 +108,7 @@ int SubgraphEngine::BuildDeviceProgram() {
       VLOG(3) << "output idx: " << graph.Get(output_names_[i])->index();
     } else {
       LOG(WARNING) << "Fail to find output: " << output_names_[i];
-      return subgraph::FAILED;
+      return false;
     }
   }
 
@@ -116,7 +119,7 @@ int SubgraphEngine::BuildDeviceProgram() {
   neuron_errCode = NeuronModel_finish(model_);
   if (NEURON_NO_ERROR != neuron_errCode) {
     LOG(WARNING) << "Fail to create NIR model:" << neuron_errCode;
-    return subgraph::FAILED;
+    return false;
   }
   VLOG(3) << "[APU] APU NIR model created!";
 
@@ -129,15 +132,14 @@ int SubgraphEngine::BuildDeviceProgram() {
   compilation_ = lite::apu::Device::Global().Build(model_);
   if (compilation_ == nullptr) {
     LOG(WARNING) << "[APU] Build APU DLA model failed!";
-    return subgraph::FAILED;
+    return false;
   }
   VLOG(3) << "[APU] APU DLA model created, Build cost "
           << GetCurrentUS() - start_time << " us";
-
-  return status;
+  return true;
 }
 
-int SubgraphEngine::LaunchDeviceProgram() {
+bool SubgraphEngine::LaunchDeviceProgram() {
   auto GetCurrentUS = []() -> double {
     struct timeval time;
     gettimeofday(&time, NULL);
@@ -149,7 +151,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
   int neuron_errCode = NeuronExecution_create(compilation_, &run);
   if (NEURON_NO_ERROR != neuron_errCode) {
     LOG(WARNING) << "[APU] Build APU runtime failed!";
-    return subgraph::FAILED;
+    return false;
   }
 
   // Set input buffer
@@ -180,7 +182,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
   neuron_errCode = NeuronExecution_compute(run);
   if (NEURON_NO_ERROR != neuron_errCode) {
     LOG(WARNING) << "Fail to run execution!" << neuron_errCode;
-    return subgraph::FAILED;
+    return false;
   }
 
   for (size_t i = 0; i < origin_otensors_.size(); i++) {
@@ -192,7 +194,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
   }
   NeuronExecution_free(run);
   VLOG(3) << "[APU] Process cost " << GetCurrentUS() - start_time << " us";
-  return 0;
+  return true;
 }
 
 SubgraphEngine::~SubgraphEngine() {
@@ -213,12 +215,11 @@ void SubgraphCompute::PrepareForRun() {
                                    param.output_data_names,
                                    param.scope));
   CHECK(engine_);
-  engine_->Build();
 }
 
 void SubgraphCompute::Run() {
   CHECK(engine_);
-  engine_->Launch();
+  engine_->Run();
 }
 
 }  // namespace apu
diff --git a/lite/kernels/apu/subgraph_compute.h b/lite/kernels/apu/subgraph_compute.h
index ecd8a38343cd1f62bb5a3bf8e948384b90cfe826..beb582b8cc16e456491c28ace5e2d1695143216a 100644
--- a/lite/kernels/apu/subgraph_compute.h
+++ b/lite/kernels/apu/subgraph_compute.h
@@ -41,8 +41,8 @@ class SubgraphEngine : public subgraph::Engine {
   ~SubgraphEngine();
 
  protected:
-  int BuildDeviceProgram() override;
-  int LaunchDeviceProgram() override;
+  bool BuildDeviceProgram() override;
+  bool LaunchDeviceProgram() override;
 
   NeuronModel *model_;
   NeuronCompilation *compilation_;
diff --git a/lite/kernels/bm/subgraph_compute.cc b/lite/kernels/bm/subgraph_compute.cc
index c6059461d1e790064407009cfc0aa3cfcdec8935..868481f4b8419c39131c145eb85ff450686482a8 100644
--- a/lite/kernels/bm/subgraph_compute.cc
+++ b/lite/kernels/bm/subgraph_compute.cc
@@ -28,12 +28,35 @@ namespace lite {
 namespace kernels {
 namespace bm {
 
-int SubgraphEngine::BuildDeviceProgram() {
+bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
+  // Obtain the origin input tensors, and create the origin output
+  // tensors(Don't try to access them before launch the device program or the
+  // origin program)
+  PrepareWorkspaceForOriginProgram();
+  // Create the device input and output tensors, but don't initialize them
+  // with the dimensions
+  device_inputs_.resize(input_names_.size());
+  for (int i = 0; i < input_names_.size(); i++) {
+    device_inputs_[i].reset(new hiai::AiTensor);
+    CHECK(device_inputs_[i]);
+  }
+  device_outputs_.resize(output_names_.size());
+  for (int i = 0; i < output_names_.size(); i++) {
+    device_outputs_[i].reset(new hiai::AiTensor);
+    CHECK(device_outputs_[i]);
+  }
+  return true;
+}
+
+bool SubgraphEngine::BuildDeviceProgram() {
   int status = 0;
   subgraph::bm::Graph graph;
   const auto& bridges = subgraph::Registry::Instance();
   graph.CreateCompilerHandle();
   auto& ctx = this->ctx_->template As<BMContext>();
+  if (origin_program_.empty()) {
+    BuildOriginProgram();
+  }
   for (auto& inst : origin_program_) {
     auto op = const_cast<OpLite*>(inst.op());
     CHECK(op);
@@ -41,7 +64,7 @@ int SubgraphEngine::BuildDeviceProgram() {
     op->InferShape();
     std::string op_type = op->op_info()->Type();
     if (!bridges.Exists(op_type, TARGET(kBM))) {
-      return subgraph::FAILED;
+      return false;
     }
     auto kernel = inst.kernel();
     status |=
@@ -49,7 +72,7 @@ int SubgraphEngine::BuildDeviceProgram() {
                                              const_cast<OpLite*>(op),
                                              const_cast<KernelBase*>(kernel));
     if (subgraph::CHECK_FAILED(status)) {
-      return subgraph::FAILED;
+      return false;
     }
   }
   std::string net_name = "bmnetc_f32umodel";
@@ -61,7 +84,7 @@ int SubgraphEngine::BuildDeviceProgram() {
   finish_bmcompiler_data(graph.GetCompilerHandle(), &bmodel_data, &data_size);
   bmrt_hd_ = bmrt_create(bm_hd_);
   if (false == bmrt_load_bmodel_data(bmrt_hd_, bmodel_data, data_size)) {
-    return subgraph::FAILED;
+    return false;
   }
   bmrt_get_network_names(bmrt_hd_, &net_names_);
   net_info_ = bmrt_get_network_info(bmrt_hd_, net_names_[0]);
@@ -114,10 +137,10 @@ int SubgraphEngine::BuildDeviceProgram() {
                             net_info_->output_dtypes[i],
                             stage.output_shapes[i]);
   }
-  return status;
+  return true;
 }
 
-int SubgraphEngine::LaunchDeviceProgram() {
+bool SubgraphEngine::LaunchDeviceProgram() {
   for (size_t i = 0; i < device_inputs_.size(); i++) {
     bm_memcpy_s2d(bm_hd_,
                   device_inputs_[i].device_mem,
@@ -141,7 +164,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
       out_index++;
     }
   }
-  return 0;
+  return true;
 }
 
 void SubgraphCompute::PrepareForRun() {
@@ -153,12 +176,11 @@ void SubgraphCompute::PrepareForRun() {
                                    param.output_data_names,
                                    param.scope));
   CHECK(engine_);
-  engine_->Build();
 }
 
 void SubgraphCompute::Run() {
   CHECK(engine_);
-  engine_->Launch();
+  engine_->Run();
 }
 
 }  // namespace bm
diff --git a/lite/kernels/bm/subgraph_compute.h b/lite/kernels/bm/subgraph_compute.h
index 60f7661c7990d90020dbfc7ec3a6e0d178dceb70..7a5b2552ff95681da09346ba11f40f1a6acb7f01 100644
--- a/lite/kernels/bm/subgraph_compute.h
+++ b/lite/kernels/bm/subgraph_compute.h
@@ -44,8 +44,9 @@ class SubgraphEngine : public subgraph::Engine {
             ctx, block_idx, block_desc, input_names, output_names, scope) {}
 
  protected:
-  int BuildDeviceProgram() override;
-  int LaunchDeviceProgram() override;
+  bool PrepareWorkspaceForDeviceProgram() override;
+  bool BuildDeviceProgram() override;
+  bool LaunchDeviceProgram() override;
 
  private:
   void *bmrt_hd_;
diff --git a/lite/kernels/mlu/subgraph_compute.h b/lite/kernels/mlu/subgraph_compute.h
index 3bfba33f4d7e8fd86f7aaf276da2ca4a8b0bd7cf..dbd055fe226aa1853bc8e33de7b4db17666558cc 100644
--- a/lite/kernels/mlu/subgraph_compute.h
+++ b/lite/kernels/mlu/subgraph_compute.h
@@ -46,34 +46,8 @@ class SubgraphEngine : public subgraph::Engine {
     graph_.SetFPType(type);
   }
 
-  int Build() {
-    // In order to attach all of the ops of the block desc, we need to build
-    // the original program firstly.
-    BuildOriginProgram();
-    // Run InferShape() of all of ops, and convert Paddle ops to MLU IR graph
-    build_device_program_status_ = BuildDeviceProgram();
-    return build_device_program_status_;
-  }
-
-  int Launch() {
-    // Rebuild device program when the shapes of input tensors have been
-    // changed.
-    if (subgraph::CHECK_SUCCESS(build_device_program_status_) &&
-        subgraph::CHECK_REBUILD_WHEN_SHAPE_CHANGED(
-            build_device_program_status_) &&
-        InputShapeChanged()) {
-      Build();
-    }
-    if (subgraph::CHECK_FAILED(build_device_program_status_)) {
-      LaunchOriginProgram();
-    } else {
-      LaunchDeviceProgram();
-    }
-    return 0;
-  }
-
  protected:
-  int BuildDeviceProgram() override {
+  bool BuildDeviceProgram() override {
     int status = 0;
     // Convert all of input data vars and added into the MLU IR graph
     for (auto& input_name : input_names_) {
@@ -94,6 +68,9 @@ class SubgraphEngine : public subgraph::Engine {
     LOG(INFO) << "START TO CONVERT ";
     // Convert all of ops and its weights and added into the MLU IR graph
     const auto& bridges = subgraph::Registry::Instance();
+    if (origin_program_.empty()) {
+      BuildOriginProgram();
+    }
     for (auto& inst : origin_program_) {
       auto op = inst.op();
       CHECK(op);
@@ -102,7 +79,7 @@ class SubgraphEngine : public subgraph::Engine {
       const_cast<OpLite*>(op)->InferShape();
       if (!bridges.Exists(op_type, TARGET(kMLU))) {
         LOG(INFO) << "MLU bridges doesn't support op_type: " << op_type;
-        return subgraph::FAILED;
+        return false;
       }
       auto kernel = inst.kernel();
       status |= bridges.Select(op_type, TARGET(kMLU))(
@@ -110,7 +87,7 @@ class SubgraphEngine : public subgraph::Engine {
           const_cast<OpLite*>(op),
           const_cast<KernelBase*>(kernel));
       if (subgraph::CHECK_FAILED(status)) {
-        return subgraph::FAILED;
+        return false;
       }
     }
     // Obtain the output nodes of the MLU IR graph and build the graph to MLU
@@ -138,10 +115,10 @@ class SubgraphEngine : public subgraph::Engine {
     auto core_version = mlu_context.MLUCoreVersion();
     auto core_number = mlu_context.MLUCoreNumber();
     graph_.Compile(core_version, core_number);
-    return status;
+    return true;
   }
 
-  int LaunchDeviceProgram() override {
+  bool LaunchDeviceProgram() override {
     auto& mlu_context = this->ctx_->template As<MLUContext>();
     auto exec_queue = mlu_context.exec_queue();
     u32_t affinity = mlu_context.affinity();
@@ -151,7 +128,7 @@ class SubgraphEngine : public subgraph::Engine {
     forward_param.affinity = &affinity;
     forward_param.end = CNRT_PARAM_END;
     graph_.Compute(forward_param, exec_queue);
-    return 0;
+    return true;
   }
 
   paddle::lite::subgraph::mlu::Graph graph_;
@@ -174,12 +151,11 @@ class SubgraphCompute
                                                 param.scope,
                                                 this->precision()));
     CHECK(engine_);
-    engine_->Build();
   }
 
   void Run() override {
     CHECK(engine_);
-    engine_->Launch();
+    engine_->Run();
   }
 
   virtual ~SubgraphCompute() = default;
diff --git a/lite/kernels/rknpu/subgraph_compute.cc b/lite/kernels/rknpu/subgraph_compute.cc
index e0b63205705609b6899918ce8e254ccdf6cbad47..a50505c38c0740f762256cd71e006caf9249838e 100644
--- a/lite/kernels/rknpu/subgraph_compute.cc
+++ b/lite/kernels/rknpu/subgraph_compute.cc
@@ -28,13 +28,36 @@ namespace lite {
 namespace kernels {
 namespace rknpu {
 
-int SubgraphEngine::BuildDeviceProgram() {
+bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
+  // Obtain the origin input tensors, and create the origin output
+  // tensors(Don't try to access them before launch the device program or the
+  // origin program)
+  PrepareWorkspaceForOriginProgram();
+  // Create the device input and output tensors, but don't initialize them
+  // with the dimensions
+  device_itensors_.resize(input_names_.size());
+  for (int i = 0; i < input_names_.size(); i++) {
+    device_itensors_[i].reset(new hiai::AiTensor);
+    CHECK(device_itensors_[i]);
+  }
+  device_otensors_.resize(output_names_.size());
+  for (int i = 0; i < output_names_.size(); i++) {
+    device_otensors_[i].reset(new hiai::AiTensor);
+    CHECK(device_otensors_[i]);
+  }
+  return true;
+}
+
+bool SubgraphEngine::BuildDeviceProgram() {
   LOG(INFO) << "[RKNPU]:BuildDeviceProgram";
   int status = 0;
   // Convert all of ops and their input vars and weights and added into the NPU
   // RKNPU IR graph
   subgraph::rknpu::Graph graph;
   const auto& bridges = subgraph::Registry::Instance();
+  if (origin_program_.empty()) {
+    BuildOriginProgram();
+  }
   for (auto& inst : origin_program_) {
     auto op = const_cast<OpLite*>(inst.op());
     CHECK(op);
@@ -42,13 +65,13 @@ int SubgraphEngine::BuildDeviceProgram() {
     op->InferShape();
     std::string op_type = op->op_info()->Type();
     if (!bridges.Exists(op_type, TARGET(kRKNPU))) {
-      return subgraph::FAILED;
+      return false;
     }
     auto kernel = inst.kernel();
     status |= bridges.Select(op_type, TARGET(kRKNPU))(
         reinterpret_cast<void*>(&graph), op, const_cast<KernelBase*>(kernel));
     if (subgraph::CHECK_FAILED(status)) {
-      return subgraph::FAILED;
+      return false;
     }
   }
   // Collect the valid input and output nodes in the RKNPU IR graph and update
@@ -91,7 +114,7 @@ int SubgraphEngine::BuildDeviceProgram() {
       model_name_, graph.GetHandle(), device_itensors_, device_otensors_);
   if (device_program_ == nullptr) {
     LOG(WARNING) << "[RKNPU] Build model failed!";
-    return subgraph::FAILED;
+    return false;
   }
 
   // input
@@ -165,10 +188,10 @@ int SubgraphEngine::BuildDeviceProgram() {
         break;
     }
   }
-  return status;
+  return true;
 }
 
-int SubgraphEngine::LaunchDeviceProgram() {
+bool SubgraphEngine::LaunchDeviceProgram() {
   LOG(INFO) << "[RKNPU]:LaunchDeviceProgram";
   std::vector<rk::nn::InputInfo> inputs;
   std::vector<rk::nn::OutputInfo> outputs;
@@ -195,7 +218,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
   device_program_->SetInputs(inputs);
   device_program_->Run();
   device_program_->GetOutputs(outputs);
-  return 0;
+  return true;
 }
 
 void SubgraphCompute::PrepareForRun() {
@@ -208,13 +231,12 @@ void SubgraphCompute::PrepareForRun() {
                                    param.output_data_names,
                                    param.scope));
   CHECK(engine_);
-  engine_->Build();
 }
 
 void SubgraphCompute::Run() {
   LOG(INFO) << "[RKNPU]:Run";
   CHECK(engine_);
-  engine_->Launch();
+  engine_->Run();
 }
 
 }  // namespace rknpu
diff --git a/lite/kernels/rknpu/subgraph_compute.h b/lite/kernels/rknpu/subgraph_compute.h
index 863e6aef39ad54f0e9d94d4b507c6fca4128ebb8..a4bdadc658a81decd8107072f7b5948613d0c68a 100644
--- a/lite/kernels/rknpu/subgraph_compute.h
+++ b/lite/kernels/rknpu/subgraph_compute.h
@@ -42,14 +42,15 @@ class SubgraphEngine : public subgraph::Engine {
             ctx, block_idx, block_desc, input_names, output_names, scope) {}
 
  protected:
-  int BuildDeviceProgram() override;
-  int LaunchDeviceProgram() override;
+  bool PrepareWorkspaceForDeviceProgram() override;
+  bool BuildDeviceProgram() override;
+  bool LaunchDeviceProgram() override;
 
   std::string model_name_;
   std::vector<std::string> device_inames_;
   std::vector<std::string> device_onames_;
-  std::vector<std::shared_ptr<rk::nn::Tensor>> device_itensors_;
-  std::vector<std::shared_ptr<rk::nn::Tensor>> device_otensors_;
+  std::vector<std::shared_ptr<rk::nn::Tensor>> device_itensors_{};
+  std::vector<std::shared_ptr<rk::nn::Tensor>> device_otensors_{};
   std::unique_ptr<rk::nn::Exection> device_program_{nullptr};
 };
 
diff --git a/lite/kernels/xpu/subgraph_compute.cc b/lite/kernels/xpu/subgraph_compute.cc
index 9c2191331c85a7f99ffb5a2e9662ed5831cb1dda..981922f8eacab57da4638e1fdcdd3df72465b379 100644
--- a/lite/kernels/xpu/subgraph_compute.cc
+++ b/lite/kernels/xpu/subgraph_compute.cc
@@ -27,12 +27,35 @@ namespace lite {
 namespace kernels {
 namespace xpu {
 
-int SubgraphEngine::BuildDeviceProgram() {
+bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
+  // Obtain the origin input tensors, and create the origin output
+  // tensors(Don't try to access them before launch the device program or the
+  // origin program)
+  PrepareWorkspaceForOriginProgram();
+  // Create the device input and output tensors, but don't initialize them
+  // with the dimensions
+  device_itensors_.resize(input_names_.size());
+  for (int i = 0; i < input_names_.size(); i++) {
+    device_itensors_[i].reset(new hiai::AiTensor);
+    CHECK(device_itensors_[i]);
+  }
+  device_otensors_.resize(output_names_.size());
+  for (int i = 0; i < output_names_.size(); i++) {
+    device_otensors_[i].reset(new hiai::AiTensor);
+    CHECK(device_otensors_[i]);
+  }
+  return true;
+}
+
+bool SubgraphEngine::BuildDeviceProgram() {
   int status = 0;
   // Convert all of ops and their input vars and weights and added into the XPU
   // IR graph
   subgraph::xpu::Graph graph;
   const auto& bridges = subgraph::Registry::Instance();
+  if (origin_program_.empty()) {
+    BuildOriginProgram();
+  }
   for (auto& inst : origin_program_) {
     auto op = const_cast<OpLite*>(inst.op());
     CHECK(op);
@@ -40,13 +63,13 @@ int SubgraphEngine::BuildDeviceProgram() {
     op->InferShape();
     std::string op_type = op->op_info()->Type();
     if (!bridges.Exists(op_type, TARGET(kXPU))) {
-      return subgraph::FAILED;
+      return false;
     }
     auto kernel = inst.kernel();
     status |= bridges.Select(op_type, TARGET(kXPU))(
         reinterpret_cast<void*>(&graph), op, const_cast<KernelBase*>(kernel));
     if (subgraph::CHECK_FAILED(status)) {
-      return subgraph::FAILED;
+      return false;
     }
   }
   // Obtain the output nodes of the XPU IR graph and build the graph to the XPU
@@ -86,7 +109,7 @@ int SubgraphEngine::BuildDeviceProgram() {
       &graph.builder_, &graph.params_, &device_onodes);
   if (device_program_ == nullptr) {
     LOG(WARNING) << "[XPU] Build model failed!";
-    return subgraph::FAILED;
+    return false;
   }
 
   // Query and check the dimensions of input and output tensors
@@ -166,10 +189,10 @@ int SubgraphEngine::BuildDeviceProgram() {
     device_otensors_[i].strides = nullptr;
     device_otensors_[i].byte_offset = 0;
   }
-  return status;
+  return true;
 }
 
-int SubgraphEngine::LaunchDeviceProgram() {
+bool SubgraphEngine::LaunchDeviceProgram() {
   for (size_t i = 0; i < device_itensors_.size(); i++) {
     // Update the data pointer of DLTensor to track the origin input tensors
     device_itensors_[i].data =
@@ -191,7 +214,7 @@ int SubgraphEngine::LaunchDeviceProgram() {
         const_cast<void*>(origin_otensors_[i]->raw_data());
     device_program_->CopyOutputTo(i, &device_otensors_[i]);
   }
-  return 0;
+  return true;
 }
 
 void SubgraphCompute::PrepareForRun() {
@@ -203,12 +226,11 @@ void SubgraphCompute::PrepareForRun() {
                                    param.output_data_names,
                                    param.scope));
   CHECK(engine_);
-  engine_->Build();
 }
 
 void SubgraphCompute::Run() {
   CHECK(engine_);
-  engine_->Launch();
+  engine_->Run();
 }
 
 }  // namespace xpu
diff --git a/lite/kernels/xpu/subgraph_compute.h b/lite/kernels/xpu/subgraph_compute.h
index 601c8821bc826e350c233573bf7eff89cdf5c1f5..f09a06a85d5382c72e9efb20cede8bea1922f2da 100644
--- a/lite/kernels/xpu/subgraph_compute.h
+++ b/lite/kernels/xpu/subgraph_compute.h
@@ -39,13 +39,14 @@ class SubgraphEngine : public subgraph::Engine {
             ctx, block_idx, block_desc, input_names, output_names, scope) {}
 
  protected:
-  int BuildDeviceProgram() override;
-  int LaunchDeviceProgram() override;
+  bool PrepareWorkspaceForDeviceProgram() override;
+  bool BuildDeviceProgram() override;
+  bool LaunchDeviceProgram() override;
 
   std::vector<std::string> device_inames_;
   std::vector<std::string> device_onames_;
-  std::vector<DLTensor> device_itensors_;
-  std::vector<DLTensor> device_otensors_;
+  std::vector<DLTensor> device_itensors_{};
+  std::vector<DLTensor> device_otensors_{};
   std::unique_ptr<xtcl::network::xRuntimeInstance> device_program_{nullptr};
 };