[APU] Refine the dynamic loading of libneuron_adapter.so (#3544)

56a0a238 · hong19860320 · GitHub · 87497b9c · 56a0a238 · 56a0a238
17 changed file
--- a/cmake/device/apu.cmake
+++ b/cmake/device/apu.cmake
@@ -32,34 +32,3 @@ endif()
 message(STATUS "APU_DDK_INC: ${APU_DDK_INC}")

 include_directories("${APU_DDK_ROOT}/include")
-
-set(APU_SUB_LIB_PATH "lib64")
-if(ARM_TARGET_ARCH_ABI STREQUAL "armv8")
-    set(APU_SUB_LIB_PATH "lib64")
-endif()
-
-find_library(APU_NEURON_FILE NAMES neuron
-  PATHS ${APU_DDK_ROOT}/${APU_SUB_LIB_PATH})
-
-find_library(APU_NEURON_ADAPTER_FILE NAMES neuron_adapter
-  PATHS ${APU_DDK_ROOT}/${APU_SUB_LIB_PATH})
-
-if(NOT APU_NEURON_FILE)
-  message(FATAL_ERROR "Can not find APU_NEURON_FILE in ${APU_DDK_ROOT}")
-else()
-  message(STATUS "Found APU NEURON Library: ${APU_NEURON_FILE}")
-  add_library(apu_neuron SHARED IMPORTED GLOBAL)
-  set_property(TARGET apu_neuron PROPERTY IMPORTED_LOCATION ${APU_NEURON_FILE})
-endif()
-
-if(NOT APU_NEURON_ADAPTER_FILE)
-  message(FATAL_ERROR "Can not find APU_NEURON_ADAPTER_FILE in ${APU_DDK_ROOT}")
-else()
-  message(STATUS "Found APU NEURON ADAPTER Library: ${APU_NEURON_ADAPTER_FILE}")
-  add_library(apu_neuron_adapter SHARED IMPORTED GLOBAL)
-  set_property(TARGET apu_neuron_adapter PROPERTY IMPORTED_LOCATION ${APU_NEURON_ADAPTER_FILE})
-endif()
-
-set(apu_runtime_libs apu_neuron apu_neuron_adapter CACHE INTERNAL "apu runtime libs")
-message(STATUS "${apu_runtime_libs}")
-
--- a/lite/backends/apu/CMakeLists.txt
+++ b/lite/backends/apu/CMakeLists.txt
@@ -2,4 +2,5 @@ if(NOT LITE_WITH_APU)
  return()
 endif()

-lite_cc_library(device_apu SRCS device.cc)
+lite_cc_library(neuron_adapter SRCS neuron_adapter.cc)
+lite_cc_library(device_apu SRCS device.cc DEPS neuron_adapter)
--- a/lite/backends/apu/device.cc
+++ b/lite/backends/apu/device.cc
@@ -20,48 +20,19 @@ namespace paddle {
 namespace lite {
 namespace apu {

-inline void* LoadFunc(void* libHandle, const char* name) {
-  CHECK(libHandle != nullptr);
-  CHECK(name != nullptr);
-  void* fn = dlsym(libHandle, name);
-  if (fn == nullptr) {
-    LOG(WARNING) << "Unable to open Neuron Runtime function [" << name
-                 << "] Because " << dlerror();
-  }
-  return fn;
-}
-
-NeuronCompilation* Device::Build(void* libHandle, NeuronModel* model) {
-  typedef int (*NeuronCompilation_create)(NeuronModel * model,
-                                          NeuronCompilation * *compilation);
-  typedef void (*NeuronCompilation_free)(NeuronCompilation * compilation);
-  typedef int (*NeuronCompilation_finish)(NeuronCompilation * compilation);
-
-#define LOAD_FUNCTIONS(libHandle, FUNC_NAME, VARIABLE_NAME) \
-  FUNC_NAME VARIABLE_NAME =                                 \
-      reinterpret_cast<FUNC_NAME>(LoadFunc(libHandle, #FUNC_NAME));
-  LOAD_FUNCTIONS(libHandle, NeuronCompilation_create, neuron_compilation_create)
-  LOAD_FUNCTIONS(libHandle, NeuronCompilation_free, neuron_compilation_free)
-  LOAD_FUNCTIONS(libHandle, NeuronCompilation_finish, neuron_compilation_finish)
-#undef LOAD_FUNCTIONS
-
-  int neuron_errCode = 0;
-  NeuronCompilation* compilation = NULL;
-
+NeuronCompilation* Device::Build(NeuronModel* model) {
  VLOG(3) << "[APU] Compile model";
-
-  neuron_errCode = (*neuron_compilation_create)(model, &compilation);
+  NeuronCompilation* compilation = NULL;
+  int neuron_errCode = NeuronCompilation_create(model, &compilation);
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "[APU] create compile failed! " << neuron_errCode;
    return nullptr;
  }
-
-  neuron_errCode = (*neuron_compilation_finish)(compilation);
+  neuron_errCode = NeuronCompilation_finish(compilation);
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "[APU] compile failed! " << neuron_errCode;
    return nullptr;
  }
-
  VLOG(3) << "[APU] Build done";
  return compilation;
 }

--- a/lite/backends/apu/device.h
+++ b/lite/backends/apu/device.h
@@ -18,7 +18,7 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "NeuronAdapter.h"  // NOLINT
+#include "lite/backends/apu/neuron_adapter.h"

 namespace paddle {
 namespace lite {
@@ -32,7 +32,7 @@ class Device {
  }
  Device() {}

-  NeuronCompilation* Build(void* libHandle, NeuronModel* model);
+  NeuronCompilation* Build(NeuronModel* model);
 };

 }  // namespace apu

--- a/lite/backends/apu/neuron_adapter.cc
+++ b/lite/backends/apu/neuron_adapter.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "lite/backends/apu/neuron_adapter.h"
+#include <dlfcn.h>
+#include <string>
+#include <vector>
+
+namespace paddle {
+namespace lite {
+NeuronAdapter* NeuronAdapter::Global() {
+  static NeuronAdapter adapter;
+  return &adapter;
+}
+
+NeuronAdapter::NeuronAdapter() {
+  CHECK(InitHandle()) << "Fail to initialize the Neuron Adapter library!";
+  InitFunctions();
+}
+
+bool NeuronAdapter::InitHandle() {
+  const std::vector<std::string> paths = {
+    "libneuron_adapter.so",
+#if defined(__aarch64__)
+    "/vendor/lib64/libneuron_adapter.so",
+    "/system/lib64/libneuron_adapter.so",
+    "/system/vendor/lib64/libneuron_adapter.so",
+#else
+    "/vendor/lib/libneuron_adapter.so",
+    "/system/lib/libneuron_adapter.so",
+    "/system/vendor/lib/libneuron_adapter.so",
+#endif
+  };
+  std::string target_lib = "Unknown";
+  for (auto path : paths) {
+    handle_ = dlopen(path.c_str(), RTLD_LAZY);
+    if (handle_ != nullptr) {
+      target_lib = path;
+      break;
+    }
+  }
+  VLOG(4) << "Load the Neuron Adapter library from " << target_lib;
+  if (handle_ != nullptr) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void NeuronAdapter::InitFunctions() {
+  CHECK(handle_ != nullptr) << "The library handle can't be null!";
+
+#define PADDLE_DLSYM(neuron_adapter_func)                                 \
+  do {                                                                    \
+    neuron_adapter_func##_ =                                              \
+        (neuron_adapter_func##_Type)dlsym(handle_, #neuron_adapter_func); \
+    if (neuron_adapter_func##_ == nullptr) {                              \
+      LOG(FATAL) << "Cannot find the " << #neuron_adapter_func            \
+                 << " symbol in libneuron_adapter.so!";                   \
+      break;                                                              \
+    }                                                                     \
+    VLOG(4) << "Loaded the " << #neuron_adapter_func                      \
+            << " symbol successfully.";                                   \
+  } while (false)
+
+  PADDLE_DLSYM(Neuron_getVersion);
+  PADDLE_DLSYM(NeuronModel_create);
+  PADDLE_DLSYM(NeuronModel_free);
+  PADDLE_DLSYM(NeuronModel_finish);
+  PADDLE_DLSYM(NeuronModel_addOperand);
+  PADDLE_DLSYM(NeuronModel_setOperandValue);
+  PADDLE_DLSYM(NeuronModel_setOperandSymmPerChannelQuantParams);
+  PADDLE_DLSYM(NeuronModel_addOperation);
+  PADDLE_DLSYM(NeuronModel_identifyInputsAndOutputs);
+  PADDLE_DLSYM(NeuronCompilation_create);
+  PADDLE_DLSYM(NeuronCompilation_free);
+  PADDLE_DLSYM(NeuronCompilation_finish);
+  PADDLE_DLSYM(NeuronExecution_create);
+  PADDLE_DLSYM(NeuronExecution_free);
+  PADDLE_DLSYM(NeuronExecution_setInput);
+  PADDLE_DLSYM(NeuronExecution_setOutput);
+  PADDLE_DLSYM(NeuronExecution_compute);
+
+#undef PADDLE_DLSYM
+}
+
+}  // namespace lite
+}  // namespace paddle
+
+int Neuron_getVersion(uint32_t* version) {
+  return paddle::lite::NeuronAdapter::Global()->Neuron_getVersion()(version);
+}
+
+int NeuronModel_create(NeuronModel** model) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronModel_create()(model);
+}
+
+void NeuronModel_free(NeuronModel* model) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronModel_free()(model);
+}
+
+int NeuronModel_finish(NeuronModel* model) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronModel_finish()(model);
+}
+
+int NeuronModel_addOperand(NeuronModel* model, const NeuronOperandType* type) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronModel_addOperand()(model,
+                                                                         type);
+}
+
+int NeuronModel_setOperandValue(NeuronModel* model,
+                                int32_t index,
+                                const void* buffer,
+                                size_t length) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronModel_setOperandValue()(
+      model, index, buffer, length);
+}
+
+int NeuronModel_setOperandSymmPerChannelQuantParams(
+    NeuronModel* model,
+    int32_t index,
+    const NeuronSymmPerChannelQuantParams* channelQuant) {
+  return paddle::lite::NeuronAdapter::Global()
+      ->NeuronModel_setOperandSymmPerChannelQuantParams()(
+          model, index, channelQuant);
+}
+
+int NeuronModel_addOperation(NeuronModel* model,
+                             NeuronOperationType type,
+                             uint32_t inputCount,
+                             const uint32_t* inputs,
+                             uint32_t outputCount,
+                             const uint32_t* outputs) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronModel_addOperation()(
+      model, type, inputCount, inputs, outputCount, outputs);
+}
+
+int NeuronModel_identifyInputsAndOutputs(NeuronModel* model,
+                                         uint32_t inputCount,
+                                         const uint32_t* inputs,
+                                         uint32_t outputCount,
+                                         const uint32_t* outputs) {
+  return paddle::lite::NeuronAdapter::Global()
+      ->NeuronModel_identifyInputsAndOutputs()(
+          model, inputCount, inputs, outputCount, outputs);
+}
+
+int NeuronCompilation_create(NeuronModel* model,
+                             NeuronCompilation** compilation) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_create()(
+      model, compilation);
+}
+
+void NeuronCompilation_free(NeuronCompilation* compilation) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_free()(
+      compilation);
+}
+
+int NeuronCompilation_finish(NeuronCompilation* compilation) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_finish()(
+      compilation);
+}
+
+int NeuronExecution_create(NeuronCompilation* compilation,
+                           NeuronExecution** execution) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronExecution_create()(
+      compilation, execution);
+}
+
+void NeuronExecution_free(NeuronExecution* execution) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronExecution_free()(
+      execution);
+}
+
+int NeuronExecution_setInput(NeuronExecution* execution,
+                             int32_t index,
+                             const NeuronOperandType* type,
+                             const void* buffer,
+                             size_t length) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronExecution_setInput()(
+      execution, index, type, buffer, length);
+}
+
+int NeuronExecution_setOutput(NeuronExecution* execution,
+                              int32_t index,
+                              const NeuronOperandType* type,
+                              void* buffer,
+                              size_t length) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronExecution_setOutput()(
+      execution, index, type, buffer, length);
+}
+
+int NeuronExecution_compute(NeuronExecution* execution) {
+  return paddle::lite::NeuronAdapter::Global()->NeuronExecution_compute()(
+      execution);
+}
--- a/lite/backends/apu/neuron_adapter.h
+++ b/lite/backends/apu/neuron_adapter.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "NeuronAdapter.h"  // NOLINT
+#include "lite/utils/cp_logging.h"
+
+namespace paddle {
+namespace lite {
+
+class NeuronAdapter final {
+ public:
+  static NeuronAdapter *Global();
+  // Platform APIs
+  using Neuron_getVersion_Type = int (*)(uint32_t *);
+  using NeuronModel_create_Type = int (*)(NeuronModel **);
+  using NeuronModel_free_Type = void (*)(NeuronModel *);
+  using NeuronModel_finish_Type = int (*)(NeuronModel *);
+  using NeuronModel_addOperand_Type = int (*)(NeuronModel *,
+                                              const NeuronOperandType *);
+  using NeuronModel_setOperandValue_Type = int (*)(NeuronModel *,
+                                                   int32_t,
+                                                   const void *,
+                                                   size_t);
+  using NeuronModel_setOperandSymmPerChannelQuantParams_Type =
+      int (*)(NeuronModel *, int32_t, const NeuronSymmPerChannelQuantParams *);
+  using NeuronModel_addOperation_Type = int (*)(NeuronModel *,
+                                                NeuronOperationType,
+                                                uint32_t,
+                                                const uint32_t *,
+                                                uint32_t,
+                                                const uint32_t *);
+  using NeuronModel_identifyInputsAndOutputs_Type = int (*)(
+      NeuronModel *, uint32_t, const uint32_t *, uint32_t, const uint32_t *);
+  using NeuronCompilation_create_Type = int (*)(NeuronModel *,
+                                                NeuronCompilation **);
+  using NeuronCompilation_free_Type = void (*)(NeuronCompilation *);
+  using NeuronCompilation_finish_Type = int (*)(NeuronCompilation *);
+  using NeuronExecution_create_Type = int (*)(NeuronCompilation *,
+                                              NeuronExecution **);
+  using NeuronExecution_free_Type = void (*)(NeuronExecution *);
+  using NeuronExecution_setInput_Type = int (*)(NeuronExecution *,
+                                                int32_t,
+                                                const NeuronOperandType *,
+                                                const void *,
+                                                size_t);
+  using NeuronExecution_setOutput_Type = int (*)(
+      NeuronExecution *, int32_t, const NeuronOperandType *, void *, size_t);
+  using NeuronExecution_compute_Type = int (*)(NeuronExecution *);
+
+  Neuron_getVersion_Type Neuron_getVersion() {
+    CHECK(Neuron_getVersion_ != nullptr) << "Cannot load Neuron_getVersion!";
+    return Neuron_getVersion_;
+  }
+
+  NeuronModel_create_Type NeuronModel_create() {
+    CHECK(NeuronModel_create_ != nullptr) << "Cannot load NeuronModel_create!";
+    return NeuronModel_create_;
+  }
+
+  NeuronModel_free_Type NeuronModel_free() {
+    CHECK(NeuronModel_free_ != nullptr) << "Cannot load NeuronModel_free!";
+    return NeuronModel_free_;
+  }
+
+  NeuronModel_finish_Type NeuronModel_finish() {
+    CHECK(NeuronModel_finish_ != nullptr) << "Cannot load NeuronModel_finish!";
+    return NeuronModel_finish_;
+  }
+
+  NeuronModel_addOperand_Type NeuronModel_addOperand() {
+    CHECK(NeuronModel_addOperand_ != nullptr)
+        << "Cannot load NeuronModel_addOperand!";
+    return NeuronModel_addOperand_;
+  }
+
+  NeuronModel_setOperandValue_Type NeuronModel_setOperandValue() {
+    CHECK(NeuronModel_setOperandValue_ != nullptr)
+        << "Cannot load NeuronModel_setOperandValue!";
+    return NeuronModel_setOperandValue_;
+  }
+
+  NeuronModel_setOperandSymmPerChannelQuantParams_Type
+  NeuronModel_setOperandSymmPerChannelQuantParams() {
+    CHECK(NeuronModel_setOperandSymmPerChannelQuantParams_ != nullptr)
+        << "Cannot load NeuronModel_setOperandSymmPerChannelQuantParams!";
+    return NeuronModel_setOperandSymmPerChannelQuantParams_;
+  }
+
+  NeuronModel_addOperation_Type NeuronModel_addOperation() {
+    CHECK(NeuronModel_addOperation_ != nullptr)
+        << "Cannot load NeuronModel_addOperation!";
+    return NeuronModel_addOperation_;
+  }
+
+  NeuronModel_identifyInputsAndOutputs_Type
+  NeuronModel_identifyInputsAndOutputs() {
+    CHECK(NeuronModel_identifyInputsAndOutputs_ != nullptr)
+        << "Cannot load NeuronModel_identifyInputsAndOutputs!";
+    return NeuronModel_identifyInputsAndOutputs_;
+  }
+
+  NeuronCompilation_create_Type NeuronCompilation_create() {
+    CHECK(NeuronCompilation_create_ != nullptr)
+        << "Cannot load NeuronCompilation_create!";
+    return NeuronCompilation_create_;
+  }
+
+  NeuronCompilation_free_Type NeuronCompilation_free() {
+    CHECK(NeuronCompilation_free_ != nullptr)
+        << "Cannot load NeuronCompilation_free!";
+    return NeuronCompilation_free_;
+  }
+
+  NeuronCompilation_finish_Type NeuronCompilation_finish() {
+    CHECK(NeuronCompilation_finish_ != nullptr)
+        << "Cannot load NeuronCompilation_finish!";
+    return NeuronCompilation_finish_;
+  }
+
+  NeuronExecution_create_Type NeuronExecution_create() {
+    CHECK(NeuronExecution_create_ != nullptr)
+        << "Cannot load NeuronExecution_create!";
+    return NeuronExecution_create_;
+  }
+
+  NeuronExecution_free_Type NeuronExecution_free() {
+    CHECK(NeuronExecution_free_ != nullptr)
+        << "Cannot load NeuronExecution_free!";
+    return NeuronExecution_free_;
+  }
+
+  NeuronExecution_setInput_Type NeuronExecution_setInput() {
+    CHECK(NeuronExecution_setInput_ != nullptr)
+        << "Cannot loadcl NeuronExecution_setInput!";
+    return NeuronExecution_setInput_;
+  }
+
+  NeuronExecution_setOutput_Type NeuronExecution_setOutput() {
+    CHECK(NeuronExecution_setOutput_ != nullptr)
+        << "Cannot load NeuronExecution_setOutput!";
+    return NeuronExecution_setOutput_;
+  }
+
+  NeuronExecution_compute_Type NeuronExecution_compute() {
+    CHECK(NeuronExecution_compute_ != nullptr)
+        << "Cannot load NeuronExecution_compute!";
+    return NeuronExecution_compute_;
+  }
+
+ private:
+  NeuronAdapter();
+  NeuronAdapter(const NeuronAdapter &) = delete;
+  NeuronAdapter &operator=(const NeuronAdapter &) = delete;
+  bool InitHandle();
+  void InitFunctions();
+  void *handle_{nullptr};
+  Neuron_getVersion_Type Neuron_getVersion_{nullptr};
+  NeuronModel_create_Type NeuronModel_create_{nullptr};
+  NeuronModel_free_Type NeuronModel_free_{nullptr};
+  NeuronModel_finish_Type NeuronModel_finish_{nullptr};
+  NeuronModel_addOperand_Type NeuronModel_addOperand_{nullptr};
+  NeuronModel_setOperandValue_Type NeuronModel_setOperandValue_{nullptr};
+  NeuronModel_setOperandSymmPerChannelQuantParams_Type
+      NeuronModel_setOperandSymmPerChannelQuantParams_{nullptr};
+  NeuronModel_addOperation_Type NeuronModel_addOperation_{nullptr};
+  NeuronModel_identifyInputsAndOutputs_Type
+      NeuronModel_identifyInputsAndOutputs_{nullptr};
+  NeuronCompilation_create_Type NeuronCompilation_create_{nullptr};
+  NeuronCompilation_free_Type NeuronCompilation_free_{nullptr};
+  NeuronCompilation_finish_Type NeuronCompilation_finish_{nullptr};
+  NeuronExecution_create_Type NeuronExecution_create_{nullptr};
+  NeuronExecution_free_Type NeuronExecution_free_{nullptr};
+  NeuronExecution_setInput_Type NeuronExecution_setInput_{nullptr};
+  NeuronExecution_setOutput_Type NeuronExecution_setOutput_{nullptr};
+  NeuronExecution_compute_Type NeuronExecution_compute_{nullptr};
+};
+}  // namespace lite
+}  // namespace paddle
--- a/lite/kernels/apu/CMakeLists.txt
+++ b/lite/kernels/apu/CMakeLists.txt
 add_subdirectory(bridges)

-add_kernel(subgraph_compute_apu APU basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} device_apu subgraph_bridge_engine ${apu_subgraph_bridges})
+add_kernel(subgraph_compute_apu APU basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} device_apu neuron_adapter subgraph_bridge_engine ${apu_subgraph_bridges})
--- a/lite/kernels/apu/bridges/CMakeLists.txt
+++ b/lite/kernels/apu/bridges/CMakeLists.txt
@@ -3,7 +3,7 @@ if(NOT LITE_WITH_APU)
 endif()


-lite_cc_library(subgraph_bridge_utility_apu SRCS utility.cc DEPS tensor)
+lite_cc_library(subgraph_bridge_utility_apu SRCS utility.cc DEPS tensor neuron_adapter)
 lite_cc_library(subgraph_bridge_graph_apu SRCS graph.cc DEPS subgraph_bridge_utility_apu)

 set(apu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_utility_apu subgraph_bridge_graph_apu)

--- a/lite/kernels/apu/bridges/conv_op.cc
+++ b/lite/kernels/apu/bridges/conv_op.cc
@@ -33,16 +33,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto op_type = op_info->Type();
  auto scope = op->scope();
  int neuron_errCode;
-
  VLOG(3) << "[APU] Converting [" << op_type << "]";
-  auto libHandle = graph->libHandle();
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
-  LOAD_FUNCTIONS(
-      libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
-  LOAD_FUNCTIONS(libHandle,
-                 NeuronModel_setOperandSymmPerChannelQuantParams,
-                 neuron_model_setOperandSymmPerChannelQuantParams)

  // Get input and output vars and op attributes
  auto input_name = op_info->Input("Input").front();
@@ -167,7 +158,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
      input_node = graph->Get(input_name);
      if (input_node == nullptr) return subgraph::FAILED;
    } else {
-      (*neuron_model_addOperand)(model, &inType);  // input
+      NeuronModel_addOperand(model, &inType);  // input
      input_node = graph->Add(input_name, dims_in);
    }
  }
@@ -253,7 +244,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {

  std::shared_ptr<Node> filter_node = nullptr;
  if (1 == weight_scale.size()) {
-    (*neuron_model_addOperand)(model, &filterType);  // 1: filter
+    NeuronModel_addOperand(model, &filterType);  // 1: filter
    filter_node = graph->Add(filter_name, dims_filter);
    VLOG(3) << "filter node idx: " << filter_node->index() << "w_scale[0]"
            << weight_scale[0] << ": filterType: " << filterType.dimensions[0]
@@ -262,14 +253,14 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    memcpy(filter->mutable_data<int8_t>(),
           transpose_filter.mutable_data<uint8_t>(),
           filter->memory_size());
-    neuron_errCode = (*neuron_model_setOperandValue)(
+    neuron_errCode = NeuronModel_setOperandValue(
        model, filter_node->index(), filter->raw_data(), filter->memory_size());
    if (NEURON_NO_ERROR != neuron_errCode) {
      LOG(WARNING) << "Set filter operand value fail:" << neuron_errCode;
      return subgraph::FAILED;
    }
  } else {
-    (*neuron_model_addOperand)(model, &channelFilterType);  // 1: filter
+    NeuronModel_addOperand(model, &channelFilterType);  // 1: filter
    filter_node = graph->Add(filter_name, dims_filter);
    VLOG(3) << "chennel filter node idx: " << filter_node->index()
            << " ,scale_count:" << weight_scale.size()
@@ -281,13 +272,13 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    memcpy(filter->mutable_data<int8_t>(),
           transpose_filter.mutable_data<uint8_t>(),
           filter->memory_size());
-    neuron_errCode = (*neuron_model_setOperandValue)(
+    neuron_errCode = NeuronModel_setOperandValue(
        model, filter_node->index(), filter->raw_data(), filter->memory_size());
    if (NEURON_NO_ERROR != neuron_errCode) {
      LOG(WARNING) << "Set filter operand value fail:" << neuron_errCode;
      return subgraph::FAILED;
    }
-    neuron_errCode = (*neuron_model_setOperandSymmPerChannelQuantParams)(
+    neuron_errCode = NeuronModel_setOperandSymmPerChannelQuantParams(
        model, filter_node->index(), &symmPerChannelQuantParams);
    if (NEURON_NO_ERROR != neuron_errCode) {
      LOG(WARNING) << "Set per channel filter params fail:" << neuron_errCode;
@@ -315,7 +306,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    for (int i = 0; i < bias_dims.size(); i++)
      dims_bias.push_back(bias_dims[i]);
    biasType.dimensions = &dims_bias[0];
-    (*neuron_model_addOperand)(model, &biasType);  // 2: bias
+    NeuronModel_addOperand(model, &biasType);  // 2: bias
    bias_node = graph->Add(bias_name, dims_bias);
    VLOG(3) << "node idx" << bias_node->index() << ": Bias name: " << bias_name
            << " ,bias scale: " << biasType.scale
@@ -324,7 +315,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    biasType.dimensionCount = 1;
    dims_bias = {(uint32_t)output_dims[1]};
    biasType.dimensions = &dims_bias[0];
-    (*neuron_model_addOperand)(model, &biasType);  // 2: bias
+    NeuronModel_addOperand(model, &biasType);  // 2: bias
    bias_node = graph->Add(filter_name + "_default_bias", dims_bias);
    VLOG(3) << "node idx" << bias_node->index() << ": Bias name: default_bias "
            << " ,bias scale: " << biasType.scale
@@ -337,37 +328,37 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::vector<uint32_t> dims_int32 = {1};

  std::shared_ptr<Node> paddingL_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 3: padding left
+  NeuronModel_addOperand(model, &int32Type);  // 3: padding left
  paddingL_node = graph->Add(filter_name + "_padding_left", dims_int32);

  std::shared_ptr<Node> paddingR_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 4: padding right
+  NeuronModel_addOperand(model, &int32Type);  // 4: padding right
  paddingR_node = graph->Add(filter_name + "_padding_right", dims_int32);

  std::shared_ptr<Node> paddingT_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 5: padding top
+  NeuronModel_addOperand(model, &int32Type);  // 5: padding top
  paddingT_node = graph->Add(filter_name + "_padding_top", dims_int32);

  std::shared_ptr<Node> paddingB_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 6: padding bottom
+  NeuronModel_addOperand(model, &int32Type);  // 6: padding bottom
  paddingB_node = graph->Add(filter_name + "_padding_bottom", dims_int32);

  std::shared_ptr<Node> strideW_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 7: stride width
+  NeuronModel_addOperand(model, &int32Type);  // 7: stride width
  strideW_node = graph->Add(filter_name + "_stride_width", dims_int32);

  std::shared_ptr<Node> strideH_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 8: stride height
+  NeuronModel_addOperand(model, &int32Type);  // 8: stride height
  strideH_node = graph->Add(filter_name + "_stride_height", dims_int32);

  std::shared_ptr<Node> dm_node = nullptr;
  if (is_depthwise_mode) {
-    (*neuron_model_addOperand)(model, &int32Type);  // 9: depthwise multiplier
+    NeuronModel_addOperand(model, &int32Type);  // 9: depthwise multiplier
    dm_node = graph->Add(filter_name + "_dm", dims_int32);
  }

  std::shared_ptr<Node> fuse_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 9/10: fuse
+  NeuronModel_addOperand(model, &int32Type);  // 9/10: fuse
  fuse_node = graph->Add(filter_name + "_fuse", dims_int32);

  // Add output tensor type
@@ -390,10 +381,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  } else {
    // add output operand
    if (graph->IsOutput(output_name)) {
-      (*neuron_model_addOperand)(model, &outType);  // output
+      NeuronModel_addOperand(model, &outType);  // output
      output_node = graph->Add("transpose_" + output_name, dims_out);
    } else {
-      (*neuron_model_addOperand)(model, &outType);  // output
+      NeuronModel_addOperand(model, &outType);  // output
      output_node = graph->Add(output_name, dims_out);
    }
  }
@@ -415,7 +406,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    VLOG(3) << "int32_bias_data: " << int32_bias_data[0] << " : "
            << int32_bias_data[1] << " : " << int32_bias_data[2] << " : "
            << int32_bias_data[3];
-    neuron_errCode = (*neuron_model_setOperandValue)(
+    neuron_errCode = NeuronModel_setOperandValue(
        model, bias_node->index(), bias->raw_data(), bias->memory_size());
  } else {
    auto int32_bias = std::make_shared<Tensor>();
@@ -423,10 +414,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    int32_bias->mutable_data<int32_t>();
    VLOG(3) << "bais_default: " << int32_bias->memory_size();
    memset(int32_bias->mutable_data<int32_t>(), 0, int32_bias->memory_size());
-    neuron_errCode = (*neuron_model_setOperandValue)(model,
-                                                     bias_node->index(),
-                                                     int32_bias->raw_data(),
-                                                     int32_bias->memory_size());
+    neuron_errCode = NeuronModel_setOperandValue(model,
+                                                 bias_node->index(),
+                                                 int32_bias->raw_data(),
+                                                 int32_bias->memory_size());
    bias_node->set_data(int32_bias);
  }
  if (NEURON_NO_ERROR != neuron_errCode) {
@@ -439,16 +430,16 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Add padding value
  int32_t padding_val[1];
  padding_val[0] = paddings[2];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingL_node->index(), padding_val, sizeof(int32_t) * 1);
  padding_val[0] = paddings[3];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingR_node->index(), padding_val, sizeof(int32_t) * 1);
  padding_val[0] = paddings[0];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingT_node->index(), padding_val, sizeof(int32_t) * 1);
  padding_val[0] = paddings[1];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingB_node->index(), padding_val, sizeof(int32_t) * 1);

  VLOG(3) << " stride width:" << strides[1] << " height:" << strides[0];
@@ -456,10 +447,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Add Stride
  int32_t stride_val[1];
  stride_val[0] = strides[1];  // width
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, strideW_node->index(), stride_val, sizeof(int32_t) * 1);
  stride_val[0] = strides[0];  // height
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, strideH_node->index(), stride_val, sizeof(int32_t) * 1);

  // Add fuse
@@ -478,12 +469,12 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {

  if (is_depthwise_mode) {
    int32_t dm = oc / ic;
-    (*neuron_model_setOperandValue)(
+    NeuronModel_setOperandValue(
        model, dm_node->index(), &dm, sizeof(int32_t) * 1);
    VLOG(3) << "depthwise multiplier:" << dm;

    // Depthwise conv
-    (*neuron_model_setOperandValue)(
+    NeuronModel_setOperandValue(
        model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
    std::vector<uint32_t> addInIndex = {
        input_node->index(),     // 0: input
@@ -499,14 +490,14 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
        fuse_node->index()};     // 10 : fuse

    std::vector<uint32_t> addOutIndex = {output_node->index()};
-    neuron_errCode = (*neuron_model_addOperation)(model,
-                                                  NEURON_DEPTHWISE_CONV_2D,
-                                                  addInIndex.size(),
-                                                  &addInIndex[0],
-                                                  addOutIndex.size(),
-                                                  &addOutIndex[0]);
+    neuron_errCode = NeuronModel_addOperation(model,
+                                              NEURON_DEPTHWISE_CONV_2D,
+                                              addInIndex.size(),
+                                              &addInIndex[0],
+                                              addOutIndex.size(),
+                                              &addOutIndex[0]);
  } else {
-    (*neuron_model_setOperandValue)(
+    NeuronModel_setOperandValue(
        model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
    std::vector<uint32_t> addInIndex = {
        input_node->index(),     // 0: input
@@ -521,12 +512,12 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
        fuse_node->index()};     // 9: fuse

    std::vector<uint32_t> addOutIndex = {output_node->index()};
-    neuron_errCode = (*neuron_model_addOperation)(model,
-                                                  NEURON_CONV_2D,
-                                                  addInIndex.size(),
-                                                  &addInIndex[0],
-                                                  addOutIndex.size(),
-                                                  &addOutIndex[0]);
+    neuron_errCode = NeuronModel_addOperation(model,
+                                              NEURON_CONV_2D,
+                                              addInIndex.size(),
+                                              &addInIndex[0],
+                                              addOutIndex.size(),
+                                              &addOutIndex[0]);
  }

  if (NEURON_NO_ERROR != neuron_errCode) {

--- a/lite/kernels/apu/bridges/fc_op.cc
+++ b/lite/kernels/apu/bridges/fc_op.cc
@@ -31,12 +31,6 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto scope = op->scope();
  VLOG(3) << "[APU] Converting [" + op_type + "]";

-  auto libHandle = graph->libHandle();
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
-  LOAD_FUNCTIONS(
-      libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
-
  auto input_name = op_info->Input("Input").front();
  auto input = scope->FindMutableTensor(input_name);
  auto input_dims = input->dims();
@@ -95,7 +89,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    VLOG(3) << "Graph has " << input_name << ",index: " << in_node->index();
  } else {
    // add input operand
-    (*neuron_model_addOperand)(model, &inType);  // 0: input
+    NeuronModel_addOperand(model, &inType);  // 0: input
    in_node = graph->Add(input_name, dims_in);
  }
  VLOG(3) << "input_scale: " << input_scale
@@ -110,7 +104,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  wType.dimensionCount = w_dims.size();
  std::vector<uint32_t> dims_w = {(uint32_t)w_dims[1], (uint32_t)w_dims[0]};
  wType.dimensions = &dims_w[0];
-  (*neuron_model_addOperand)(model, &wType);  // 1: weight
+  NeuronModel_addOperand(model, &wType);  // 1: weight
  std::shared_ptr<Node> w_node = nullptr;
  w_node = graph->Add(w_name, dims_w);
  VLOG(3) << "w_scale size: " << w_scale.size() << ",w_scale: " << w_scale[0]
@@ -132,7 +126,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    biasType.dimensionCount = bias_dims.size();
    std::vector<uint32_t> dims_bias = {(uint32_t)bias_dims[0]};
    biasType.dimensions = &dims_bias[0];
-    (*neuron_model_addOperand)(model, &biasType);  // 2: bias
+    NeuronModel_addOperand(model, &biasType);  // 2: bias
    bias_node = graph->Add(bias_name, dims_bias);
    VLOG(3) << "Bias name: " << bias_name << ", bias dims: " << bias_dims
            << ", bias scale: " << biasType.scale
@@ -141,7 +135,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    biasType.dimensionCount = 1;
    std::vector<uint32_t> dims_bias = {(uint32_t)n};
    biasType.dimensions = &dims_bias[0];
-    (*neuron_model_addOperand)(model, &biasType);  // 2: bias
+    NeuronModel_addOperand(model, &biasType);  // 2: bias
    bias_node = graph->Add(w_name + "_default_bias", dims_bias);
  }

@@ -150,7 +144,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  fuseType.type = NEURON_INT32;
  fuseType.dimensionCount = 0;
  std::vector<uint32_t> dims_int32 = {0};
-  (*neuron_model_addOperand)(model, &fuseType);  // 3: fuse
+  NeuronModel_addOperand(model, &fuseType);  // 3: fuse
  std::shared_ptr<Node> fuse_node = nullptr;
  fuse_node = graph->Add(w_name + "_fuse", dims_int32);

@@ -165,7 +159,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  VLOG(3) << "out_scale: " << out_scale
          << ", outType: " << outType.dimensions[0] << " : "
          << outType.dimensions[1];
-  (*neuron_model_addOperand)(model, &outType);  // output
+  NeuronModel_addOperand(model, &outType);  // output
  std::shared_ptr<Node> out_node = nullptr;
  out_node = graph->Add(out_name, dims_out);

@@ -181,7 +175,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  memcpy(w->mutable_data<int8_t>(),
         transpose_filter.mutable_data<uint8_t>(),
         w->memory_size());
-  int neuron_errCode = (*neuron_model_setOperandValue)(
+  int neuron_errCode = NeuronModel_setOperandValue(
      model, w_node->index(), w->raw_data(), w->memory_size());
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Set W operand value fail:" << neuron_errCode
@@ -200,10 +194,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    VLOG(3) << int32_bias_data[0] << ":" << int32_bias_data[1] << ":"
            << int32_bias_data[2] << ":" << int32_bias_data[3];
    neuron_errCode =
-        (*neuron_model_setOperandValue)(model,
-                                        bias_node->index(),
-                                        bias->raw_data(),
-                                        bias->memory_size());  // 2: bias
+        NeuronModel_setOperandValue(model,
+                                    bias_node->index(),
+                                    bias->raw_data(),
+                                    bias->memory_size());  // 2: bias
  } else {
    auto int32_bias = std::make_shared<Tensor>();
    int32_bias->Resize({1, out_dims[1]});
@@ -211,15 +205,15 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    memset(int32_bias->mutable_data<int32_t>(), 0, int32_bias->memory_size());
    VLOG(3) << "default: " << int32_bias->memory_size();
    neuron_errCode =
-        (*neuron_model_setOperandValue)(model,
-                                        bias_node->index(),
-                                        int32_bias->raw_data(),
-                                        int32_bias->memory_size());  // 2: bias
+        NeuronModel_setOperandValue(model,
+                                    bias_node->index(),
+                                    int32_bias->raw_data(),
+                                    int32_bias->memory_size());  // 2: bias
    bias_node->set_data(int32_bias);
  }
  // Add fuse value
  int32_t fuse_val[1] = {0};
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);  // 3: fuse

  std::vector<uint32_t> addInIndex = {in_node->index(),
@@ -227,12 +221,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                                      bias_node->index(),
                                      fuse_node->index()};
  std::vector<uint32_t> addOutIndex = {out_node->index()};
-  neuron_errCode = (*neuron_model_addOperation)(model,
-                                                NEURON_FULLY_CONNECTED,
-                                                addInIndex.size(),
-                                                &addInIndex[0],
-                                                addOutIndex.size(),
-                                                &addOutIndex[0]);
+  neuron_errCode = NeuronModel_addOperation(model,
+                                            NEURON_FULLY_CONNECTED,
+                                            addInIndex.size(),
+                                            &addInIndex[0],
+                                            addOutIndex.size(),
+                                            &addOutIndex[0]);

  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Add op fail:" << op_type;

--- a/lite/kernels/apu/bridges/graph.h
+++ b/lite/kernels/apu/bridges/graph.h
@@ -19,7 +19,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
-#include "NeuronAdapter.h"
+#include "lite/backends/apu/neuron_adapter.h"
 #include "lite/core/op_lite.h"
 #include "lite/core/tensor.h"

@@ -64,9 +64,6 @@ class Graph {
  void set_model(NeuronModel* model) { model_ = model; }
  NeuronModel* model() { return model_; }

-  void set_libHandle(void* libHandle) { libHandle_ = libHandle; }
-  void* libHandle() { return libHandle_; }
-
  void set_input_names(const std::vector<std::string> input_names) {
    input_names_ = input_names;
  }
@@ -99,7 +96,6 @@ class Graph {
  }

 private:
-  void* libHandle_;
  NeuronModel* model_;
  std::unordered_map<std::string, std::vector<std::shared_ptr<Node>>> nodes_;
  int32_t operandIdx_ = 0;

--- a/lite/kernels/apu/bridges/pool_op.cc
+++ b/lite/kernels/apu/bridges/pool_op.cc
@@ -32,12 +32,6 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto scope = op->scope();
  VLOG(3) << "[APU] Converting [" + op_type + "] ";

-  auto libHandle = graph->libHandle();
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
-  LOAD_FUNCTIONS(
-      libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
-
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
  auto x = scope->FindMutableTensor(x_name);
@@ -127,7 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    x_node = graph->Get(x_name);
  } else {
    // add input operand
-    (*neuron_model_addOperand)(model, &xType);  // 0: x
+    NeuronModel_addOperand(model, &xType);  // 0: x
    x_node = graph->Add(x_name, dims_x);
  }
  VLOG(3) << "x_scale: " << x_scale << ", xType: " << xType.dimensions[0] << ":"
@@ -140,39 +134,39 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  std::vector<uint32_t> dims_int32 = {0};

  std::shared_ptr<Node> paddingL_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 1: padding left
+  NeuronModel_addOperand(model, &int32Type);  // 1: padding left
  paddingL_node = graph->Add(x_name + "_padding_left", dims_int32);

  std::shared_ptr<Node> paddingR_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 2: padding right
+  NeuronModel_addOperand(model, &int32Type);  // 2: padding right
  paddingR_node = graph->Add(x_name + "_padding_right", dims_int32);

  std::shared_ptr<Node> paddingT_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 3: padding top
+  NeuronModel_addOperand(model, &int32Type);  // 3: padding top
  paddingT_node = graph->Add(x_name + "_padding_top", dims_int32);

  std::shared_ptr<Node> paddingB_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 4: padding bottom
+  NeuronModel_addOperand(model, &int32Type);  // 4: padding bottom
  paddingB_node = graph->Add(x_name + "_padding_bottom", dims_int32);

  std::shared_ptr<Node> strideW_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 5: stride width
+  NeuronModel_addOperand(model, &int32Type);  // 5: stride width
  strideW_node = graph->Add(x_name + "_stride_width", dims_int32);

  std::shared_ptr<Node> strideH_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 6: stride height
+  NeuronModel_addOperand(model, &int32Type);  // 6: stride height
  strideH_node = graph->Add(x_name + "_stride_height", dims_int32);

  std::shared_ptr<Node> filterW_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 7: filter width
+  NeuronModel_addOperand(model, &int32Type);  // 7: filter width
  filterW_node = graph->Add(x_name + "_filter_width", dims_int32);

  std::shared_ptr<Node> filterH_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 8: filter height
+  NeuronModel_addOperand(model, &int32Type);  // 8: filter height
  filterH_node = graph->Add(x_name + "_filter_height", dims_int32);

  std::shared_ptr<Node> fuse_node = nullptr;
-  (*neuron_model_addOperand)(model, &int32Type);  // 9: fuse
+  NeuronModel_addOperand(model, &int32Type);  // 9: fuse
  fuse_node = graph->Add(x_name + "_fuse", dims_int32);

  // Add out type
@@ -191,7 +185,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  if (graph->Has(out_name)) {
    out_node = graph->Get(out_name);
  } else {
-    (*neuron_model_addOperand)(model, &outType);  // out
+    NeuronModel_addOperand(model, &outType);  // out
    out_node = graph->Add(out_name, dims_out);
  }
  VLOG(3) << "output_scale: " << x_scale
@@ -202,39 +196,39 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  // Add padding value
  int32_t padding_val[1];
  padding_val[0] = paddings[2];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingL_node->index(), padding_val, sizeof(int32_t) * 1);
  padding_val[0] = paddings[3];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingR_node->index(), padding_val, sizeof(int32_t) * 1);
  padding_val[0] = paddings[0];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingT_node->index(), padding_val, sizeof(int32_t) * 1);
  padding_val[0] = paddings[1];
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, paddingB_node->index(), padding_val, sizeof(int32_t) * 1);

  // Add Stride
  int32_t stride_val[1];
  stride_val[0] = strides[1];  // width
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, strideW_node->index(), stride_val, sizeof(int32_t) * 1);
  stride_val[0] = strides[0];  // height
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, strideH_node->index(), stride_val, sizeof(int32_t) * 1);

  // Add filter
  int32_t filter_val[1];
  filter_val[0] = global_pooling ? x_dims[3] : ksize[1];  // width
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, filterW_node->index(), filter_val, sizeof(int32_t) * 1);
  filter_val[0] = global_pooling ? x_dims[2] : ksize[0];  // height
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, filterH_node->index(), filter_val, sizeof(int32_t) * 1);

  // Add fuse
  int32_t fuse_val[1] = {0};
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);

  std::vector<uint32_t> addInIndex = {x_node->index(),
@@ -251,19 +245,19 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {

  int neuron_errCode;
  if (pooling_type == "max") {
-    neuron_errCode = (*neuron_model_addOperation)(model,
-                                                  NEURON_MAX_POOL_2D,
-                                                  addInIndex.size(),
-                                                  &addInIndex[0],
-                                                  addOutIndex.size(),
-                                                  &addOutIndex[0]);
+    neuron_errCode = NeuronModel_addOperation(model,
+                                              NEURON_MAX_POOL_2D,
+                                              addInIndex.size(),
+                                              &addInIndex[0],
+                                              addOutIndex.size(),
+                                              &addOutIndex[0]);
  } else {
-    neuron_errCode = (*neuron_model_addOperation)(model,
-                                                  NEURON_AVERAGE_POOL_2D,
-                                                  addInIndex.size(),
-                                                  &addInIndex[0],
-                                                  addOutIndex.size(),
-                                                  &addOutIndex[0]);
+    neuron_errCode = NeuronModel_addOperation(model,
+                                              NEURON_AVERAGE_POOL_2D,
+                                              addInIndex.size(),
+                                              &addInIndex[0],
+                                              addOutIndex.size(),
+                                              &addOutIndex[0]);
  }

  return REBUILD_WHEN_SHAPE_CHANGED;

--- a/lite/kernels/apu/bridges/softmax_op.cc
+++ b/lite/kernels/apu/bridges/softmax_op.cc
@@ -31,12 +31,6 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto scope = op->scope();
  VLOG(3) << "[APU] Converting [" + op_type + "]";

-  auto libHandle = graph->libHandle();
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
-  LOAD_FUNCTIONS(
-      libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
-
  // Get input and output vars and op attributes
  auto x_name = op_info->Input("X").front();
  auto x = scope->FindMutableTensor(x_name);
@@ -84,7 +78,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    VLOG(3) << "Graph has " << x_name << ",index: " << x_node->index();
  } else {
    // add input operand
-    (*neuron_model_addOperand)(model, &xType);  // 0: input
+    NeuronModel_addOperand(model, &xType);  // 0: input
    x_node = graph->Add(x_name, dims_x);
  }
  VLOG(3) << "input_scale size: " << input_scale
@@ -95,7 +89,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  NeuronOperandType betaType;
  betaType.type = NEURON_FLOAT32;
  betaType.dimensionCount = 0;
-  (*neuron_model_addOperand)(model, &betaType);  // 1: beta
+  NeuronModel_addOperand(model, &betaType);  // 1: beta
  std::shared_ptr<Node> beta_node = nullptr;
  beta_node = graph->Add(x_name + "_beta", dims_int32);

@@ -103,7 +97,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  NeuronOperandType axisType;
  axisType.type = NEURON_INT32;
  axisType.dimensionCount = 0;
-  (*neuron_model_addOperand)(model, &axisType);  // 2: axis
+  NeuronModel_addOperand(model, &axisType);  // 2: axis
  std::shared_ptr<Node> axis_node = nullptr;
  axis_node = graph->Add(x_name + "_axis", dims_int32);

@@ -114,28 +108,28 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  outType.zeroPoint = 128;
  outType.dimensionCount = x_dims.size();
  outType.dimensions = &dims_x[0];
-  (*neuron_model_addOperand)(model, &outType);  // 3: output
+  NeuronModel_addOperand(model, &outType);  // 3: output
  std::shared_ptr<Node> out_node = nullptr;
  out_node = graph->Add(out_name, dims_x);
  VLOG(3) << "output_scale: " << out_scale;

  float beta_val[] = {1.0f};
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, beta_node->index(), beta_val, sizeof(float) * 1);

  int32_t axis_val[1];
  axis_val[0] = axis;
-  (*neuron_model_setOperandValue)(
+  NeuronModel_setOperandValue(
      model, axis_node->index(), axis_val, sizeof(int32_t) * 1);
  std::vector<uint32_t> addInIndex = {
      x_node->index(), beta_node->index(), axis_node->index()};
  std::vector<uint32_t> addOutIndex = {out_node->index()};
-  int neuron_errCode = (*neuron_model_addOperation)(model,
-                                                    NEURON_SOFTMAX,
-                                                    addInIndex.size(),
-                                                    &addInIndex[0],
-                                                    addOutIndex.size(),
-                                                    &addOutIndex[0]);
+  int neuron_errCode = NeuronModel_addOperation(model,
+                                                NEURON_SOFTMAX,
+                                                addInIndex.size(),
+                                                &addInIndex[0],
+                                                addOutIndex.size(),
+                                                &addOutIndex[0]);
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Add op fail:" << op_type;
    return FAILED;

--- a/lite/kernels/apu/bridges/utility.cc
+++ b/lite/kernels/apu/bridges/utility.cc
@@ -21,58 +21,6 @@ namespace lite {
 namespace subgraph {
 namespace apu {

-// typedef to the build functions pointer signatures
-typedef int (*Neuron_getVersion)(uint32_t* version);
-typedef int (*NeuronModel_create)(NeuronModel** model);
-typedef void (*NeuronModel_free)(NeuronModel* model);
-typedef int (*NeuronModel_finish)(NeuronModel* model);
-typedef int (*NeuronModel_addOperand)(NeuronModel* model,
-                                      const NeuronOperandType* type);
-typedef int (*NeuronModel_setOperandValue)(NeuronModel* model,
-                                           int32_t index,
-                                           const void* buffer,
-                                           size_t length);
-typedef int (*NeuronModel_addOperation)(NeuronModel* model,
-                                        NeuronOperationType type,
-                                        uint32_t inputCount,
-                                        const uint32_t* inputs,
-                                        uint32_t outputCount,
-                                        const uint32_t* outputs);
-typedef int (*NeuronModel_identifyInputsAndOutputs)(NeuronModel* model,
-                                                    uint32_t inputCount,
-                                                    const uint32_t* inputs,
-                                                    uint32_t outputCount,
-                                                    const uint32_t* outputs);
-typedef int (*NeuronModel_setOperandSymmPerChannelQuantParams)(
-    NeuronModel* model,
-    int32_t index,
-    const NeuronSymmPerChannelQuantParams* channelQuant);
-typedef int (*NeuronExecution_create)(NeuronCompilation* compilation,
-                                      NeuronExecution** execution);
-typedef void (*NeuronExecution_free)(NeuronExecution* execution);
-typedef int (*NeuronExecution_setInput)(NeuronExecution* execution,
-                                        int32_t index,
-                                        const NeuronOperandType* type,
-                                        const void* buffer,
-                                        size_t length);
-typedef int (*NeuronExecution_setOutput)(NeuronExecution* execution,
-                                         int32_t index,
-                                         const NeuronOperandType* type,
-                                         void* buffer,
-                                         size_t length);
-typedef int (*NeuronExecution_compute)(NeuronExecution* execution);
-
-void* LoadFunc(void* libHandle, const char* name) {
-  CHECK(libHandle != nullptr);
-  CHECK(name != nullptr);
-  void* fn = dlsym(libHandle, name);
-  if (fn == nullptr) {
-    LOG(WARNING) << "Unable to open Neuron Runtime function [" << name
-                 << "] Because " << dlerror();
-  }
-  return fn;
-}
-
 bool HasInputArg(const OpInfo* op_info,
                 const Scope* scope,
                 const std::string& argname) {
@@ -102,11 +50,6 @@ void insert_transpose_node(void* ctx,
  int neuron_errCode;
  auto graph = static_cast<Graph*>(ctx);
  auto model = graph->model();
-  auto libHandle = graph->libHandle();
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
-  LOAD_FUNCTIONS(
-      libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
-  LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)

  // Add input
  NeuronOperandType inType;
@@ -121,7 +64,7 @@ void insert_transpose_node(void* ctx,
    VLOG(3) << "Has " << input_name;
    input_node = graph->Get(input_name);
  } else {
-    neuron_errCode = (*neuron_model_addOperand)(model, &inType);  // input
+    neuron_errCode = NeuronModel_addOperand(model, &inType);  // input
    if (NEURON_NO_ERROR != neuron_errCode) {
      LOG(WARNING) << "Insert transpose op fail!";
      return;
@@ -137,7 +80,7 @@ void insert_transpose_node(void* ctx,
  uint32_t dims_perms[1] = {4};
  permsType.dimensions = dims_perms;

-  neuron_errCode = (*neuron_model_addOperand)(model, &permsType);  // perm
+  neuron_errCode = NeuronModel_addOperand(model, &permsType);  // perm
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Insert transpose op fail!";
    return;
@@ -148,7 +91,7 @@ void insert_transpose_node(void* ctx,
  VLOG(3) << "axis :" << axis[0] << ":" << axis[1] << ":" << axis[2] << ":"
          << axis[3];
  //  &axis[0], sizeof(int32_t) * axis.size());
-  neuron_errCode = (*neuron_model_setOperandValue)(
+  neuron_errCode = NeuronModel_setOperandValue(
      model, perms_node->index(), &axis[0], sizeof(int32_t) * axis.size());
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Insert transpose op fail!";
@@ -163,7 +106,7 @@ void insert_transpose_node(void* ctx,
  outType.dimensionCount = output_shape.size();
  outType.dimensions = &output_shape[0];

-  (*neuron_model_addOperand)(model, &outType);  // output
+  NeuronModel_addOperand(model, &outType);  // output
  std::shared_ptr<Node> output_node = nullptr;
  output_node = graph->Add(output_name, output_shape);

@@ -172,12 +115,12 @@ void insert_transpose_node(void* ctx,

  std::vector<uint32_t> addOutIndex = {output_node->index()};

-  neuron_errCode = (*neuron_model_addOperation)(model,
-                                                NEURON_TRANSPOSE,
-                                                addInIndex.size(),
-                                                &addInIndex[0],
-                                                addOutIndex.size(),
-                                                &addOutIndex[0]);
+  neuron_errCode = NeuronModel_addOperation(model,
+                                            NEURON_TRANSPOSE,
+                                            addInIndex.size(),
+                                            &addInIndex[0],
+                                            addOutIndex.size(),
+                                            &addOutIndex[0]);

  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Insert transpose op fail!";

--- a/lite/kernels/apu/bridges/utility.h
+++ b/lite/kernels/apu/bridges/utility.h
@@ -20,7 +20,6 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "NeuronAdapter.h"
 #include "lite/core/op_lite.h"
 #include "lite/utils/macros.h"

@@ -29,53 +28,6 @@ namespace lite {
 namespace subgraph {
 namespace apu {

-// typedef to the build functions pointer signatures
-typedef int (*Neuron_getVersion)(uint32_t* version);
-typedef int (*NeuronModel_create)(NeuronModel** model);
-typedef void (*NeuronModel_free)(NeuronModel* model);
-typedef int (*NeuronModel_finish)(NeuronModel* model);
-typedef int (*NeuronModel_addOperand)(NeuronModel* model,
-                                      const NeuronOperandType* type);
-typedef int (*NeuronModel_setOperandValue)(NeuronModel* model,
-                                           int32_t index,
-                                           const void* buffer,
-                                           size_t length);
-typedef int (*NeuronModel_addOperation)(NeuronModel* model,
-                                        NeuronOperationType type,
-                                        uint32_t inputCount,
-                                        const uint32_t* inputs,
-                                        uint32_t outputCount,
-                                        const uint32_t* outputs);
-typedef int (*NeuronModel_identifyInputsAndOutputs)(NeuronModel* model,
-                                                    uint32_t inputCount,
-                                                    const uint32_t* inputs,
-                                                    uint32_t outputCount,
-                                                    const uint32_t* outputs);
-typedef int (*NeuronModel_setOperandSymmPerChannelQuantParams)(
-    NeuronModel* model,
-    int32_t index,
-    const NeuronSymmPerChannelQuantParams* channelQuant);
-typedef int (*NeuronExecution_create)(NeuronCompilation* compilation,
-                                      NeuronExecution** execution);
-typedef void (*NeuronExecution_free)(NeuronExecution* execution);
-typedef int (*NeuronExecution_setInput)(NeuronExecution* execution,
-                                        int32_t index,
-                                        const NeuronOperandType* type,
-                                        const void* buffer,
-                                        size_t length);
-typedef int (*NeuronExecution_setOutput)(NeuronExecution* execution,
-                                         int32_t index,
-                                         const NeuronOperandType* type,
-                                         void* buffer,
-                                         size_t length);
-typedef int (*NeuronExecution_compute)(NeuronExecution* execution);
-
-void* LoadFunc(void* libHandle, const char* name);
-
-#define LOAD_FUNCTIONS(libHandle, FUNC_NAME, VARIABLE_NAME) \
-  FUNC_NAME VARIABLE_NAME =                                 \
-      reinterpret_cast<FUNC_NAME>(LoadFunc(libHandle, #FUNC_NAME));
-
 // Type/tensor converters for converting Paddle type/tensor to HiAI type/tensor
 bool HasInputArg(const OpInfo* op_info,
                 const Scope* scope,

--- a/lite/kernels/apu/subgraph_compute.cc
+++ b/lite/kernels/apu/subgraph_compute.cc
@@ -28,58 +28,18 @@ namespace lite {
 namespace kernels {
 namespace apu {

-inline void* LoadFunc(void* libHandle, const char* name) {
-  CHECK(libHandle != nullptr);
-  CHECK(name != nullptr);
-  void* fn = dlsym(libHandle, name);
-  if (fn == nullptr) {
-    LOG(WARNING) << "Unable to open Neuron Runtime function [" << name
-                 << "] Because " << dlerror();
-  }
-  return fn;
-}
-
-#define LOAD_FUNCTIONS(libHandle, FUNC_NAME, VARIABLE_NAME) \
-  FUNC_NAME VARIABLE_NAME =                                 \
-      reinterpret_cast<FUNC_NAME>(LoadFunc(libHandle, #FUNC_NAME));
-
 int SubgraphEngine::BuildDeviceProgram() {
-  typedef int (*Neuron_getVersion)(uint32_t * version);
-  typedef int (*NeuronModel_create)(NeuronModel * *model);
-  typedef void (*NeuronModel_free)(NeuronModel * model);
-  typedef int (*NeuronModel_finish)(NeuronModel * model);
-  typedef int (*NeuronModel_identifyInputsAndOutputs)(NeuronModel * model,
-                                                      uint32_t inputCount,
-                                                      const uint32_t* inputs,
-                                                      uint32_t outputCount,
-                                                      const uint32_t* outputs);
-
-  // Open the share library
-  libHandle_ = dlopen("libneuron_adapter.so", RTLD_LAZY);
-  if (libHandle_ == nullptr) {
-    LOG(WARNING) << "Failed to open libneuron_adapter.so. " << dlerror();
-    return subgraph::FAILED;
-  }
-
-  LOAD_FUNCTIONS(libHandle_, Neuron_getVersion, neuron_getVersion)
-  LOAD_FUNCTIONS(libHandle_, NeuronModel_create, neuron_model_create)
-  LOAD_FUNCTIONS(libHandle_, NeuronModel_finish, neuron_model_finish)
-  LOAD_FUNCTIONS(libHandle_,
-                 NeuronModel_identifyInputsAndOutputs,
-                 neuron_model_identifyInputsAndOutputs)
-
  unsigned int version;
-  (*neuron_getVersion)(&version);
+  Neuron_getVersion(&version);
  VLOG(3) << "Neuron Adapter version: " << version;

  int status = 0;
  subgraph::apu::Graph graph;
-  int neuron_errCode = (*neuron_model_create)(&model_);
+  int neuron_errCode = NeuronModel_create(&model_);
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Fail to create model";
    return subgraph::FAILED;
  }
-  graph.set_libHandle(libHandle_);
  graph.set_model(model_);
  graph.set_input_names(input_names_);
  graph.set_output_names(output_names_);
@@ -151,9 +111,9 @@ int SubgraphEngine::BuildDeviceProgram() {

  VLOG(3) << "ins size: " << ins.size() << " outs size:" << outs.size();
  // Set subgraph input/output
-  (*neuron_model_identifyInputsAndOutputs)(
+  NeuronModel_identifyInputsAndOutputs(
      model_, ins.size(), &ins[0], outs.size(), &outs[0]);
-  neuron_errCode = (*neuron_model_finish)(model_);
+  neuron_errCode = NeuronModel_finish(model_);
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Fail to create NIR model:" << neuron_errCode;
    return subgraph::FAILED;
@@ -166,7 +126,7 @@ int SubgraphEngine::BuildDeviceProgram() {
    return 1e+6 * time.tv_sec + time.tv_usec;
  };
  auto start_time = GetCurrentUS();
-  compilation_ = lite::apu::Device::Global().Build(libHandle_, model_);
+  compilation_ = lite::apu::Device::Global().Build(model_);
  if (compilation_ == nullptr) {
    LOG(WARNING) << "[APU] Build APU DLA model failed!";
    return subgraph::FAILED;
@@ -178,30 +138,6 @@ int SubgraphEngine::BuildDeviceProgram() {
 }

 int SubgraphEngine::LaunchDeviceProgram() {
-  typedef int (*NeuronExecution_create)(NeuronCompilation * compilation,
-                                        NeuronExecution * *execution);
-  typedef void (*NeuronExecution_free)(NeuronExecution * execution);
-  typedef int (*NeuronExecution_setInput)(NeuronExecution * execution,
-                                          int32_t index,
-                                          const NeuronOperandType* type,
-                                          const void* buffer,
-                                          size_t length);
-  typedef int (*NeuronExecution_setOutput)(NeuronExecution * execution,
-                                           int32_t index,
-                                           const NeuronOperandType* type,
-                                           void* buffer,
-                                           size_t length);
-  typedef int (*NeuronExecution_compute)(NeuronExecution * execution);
-
-  LOAD_FUNCTIONS(libHandle_, NeuronExecution_create, neuron_execution_create)
-  LOAD_FUNCTIONS(libHandle_, NeuronExecution_free, neuron_execution_free)
-  LOAD_FUNCTIONS(
-      libHandle_, NeuronExecution_setInput, neuron_execution_setInput)
-  LOAD_FUNCTIONS(
-      libHandle_, NeuronExecution_setOutput, neuron_execution_setOutput)
-  LOAD_FUNCTIONS(libHandle_, NeuronExecution_compute, neuron_execution_compute)
-
-  NeuronExecution* run1 = NULL;
  auto GetCurrentUS = []() -> double {
    struct timeval time;
    gettimeofday(&time, NULL);
@@ -209,7 +145,8 @@ int SubgraphEngine::LaunchDeviceProgram() {
  };

  auto start_time = GetCurrentUS();
-  int neuron_errCode = (*neuron_execution_create)(compilation_, &run1);
+  NeuronExecution* run = NULL;
+  int neuron_errCode = NeuronExecution_create(compilation_, &run);
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "[APU] Build APU runtime failed!";
    return subgraph::FAILED;
@@ -226,21 +163,21 @@ int SubgraphEngine::LaunchDeviceProgram() {
    for (int j = 0; j < origin_itensors_[i]->data_size(); j++) {
      input_data[j] += (uint8_t)128;
    }
-    (*neuron_execution_setInput)(
-        run1, i, NULL, input_data, origin_itensors_[i]->memory_size());
+    NeuronExecution_setInput(
+        run, i, NULL, input_data, origin_itensors_[i]->memory_size());
  }

  // Set output buffer
  for (size_t i = 0; i < origin_otensors_.size(); i++) {
-    (*neuron_execution_setOutput)(
-        run1,
+    NeuronExecution_setOutput(
+        run,
        i,
        NULL,
        reinterpret_cast<void*>(origin_otensors_[i]->raw_data()),
        origin_otensors_[i]->memory_size());
  }

-  neuron_errCode = (*neuron_execution_compute)(run1);
+  neuron_errCode = NeuronExecution_compute(run);
  if (NEURON_NO_ERROR != neuron_errCode) {
    LOG(WARNING) << "Fail to run execution!" << neuron_errCode;
    return subgraph::FAILED;
@@ -253,11 +190,20 @@ int SubgraphEngine::LaunchDeviceProgram() {
      output_data[j] -= (int8_t)128;
    }
  }
-  (*neuron_execution_free)(run1);
+  NeuronExecution_free(run);
  VLOG(3) << "[APU] Process cost " << GetCurrentUS() - start_time << " us";
  return 0;
 }

+SubgraphEngine::~SubgraphEngine() {
+  if (compilation_) {
+    NeuronCompilation_free(compilation_);
+  }
+  if (model_) {
+    NeuronModel_free(model_);
+  }
+}
+
 void SubgraphCompute::PrepareForRun() {
  auto& param = this->Param<param_t>();
  engine_.reset(new SubgraphEngine(ctx_.get(),

--- a/lite/kernels/apu/subgraph_compute.h
+++ b/lite/kernels/apu/subgraph_compute.h
@@ -38,12 +38,12 @@ class SubgraphEngine : public subgraph::Engine {
      : subgraph::Engine(
            ctx, block_idx, block_desc, input_names, output_names, scope) {}

+  ~SubgraphEngine();
+
 protected:
  int BuildDeviceProgram() override;
  int LaunchDeviceProgram() override;

-  std::string model_name_;
-  void *libHandle_;
  NeuronModel *model_;
  NeuronCompilation *compilation_;
 };