提交 56a0a238 编写于 作者: H hong19860320 提交者: GitHub

[APU] Refine the dynamic loading of libneuron_adapter.so (#3544)

上级 87497b9c
......@@ -32,34 +32,3 @@ endif()
message(STATUS "APU_DDK_INC: ${APU_DDK_INC}")
include_directories("${APU_DDK_ROOT}/include")
set(APU_SUB_LIB_PATH "lib64")
if(ARM_TARGET_ARCH_ABI STREQUAL "armv8")
set(APU_SUB_LIB_PATH "lib64")
endif()
find_library(APU_NEURON_FILE NAMES neuron
PATHS ${APU_DDK_ROOT}/${APU_SUB_LIB_PATH})
find_library(APU_NEURON_ADAPTER_FILE NAMES neuron_adapter
PATHS ${APU_DDK_ROOT}/${APU_SUB_LIB_PATH})
if(NOT APU_NEURON_FILE)
message(FATAL_ERROR "Can not find APU_NEURON_FILE in ${APU_DDK_ROOT}")
else()
message(STATUS "Found APU NEURON Library: ${APU_NEURON_FILE}")
add_library(apu_neuron SHARED IMPORTED GLOBAL)
set_property(TARGET apu_neuron PROPERTY IMPORTED_LOCATION ${APU_NEURON_FILE})
endif()
if(NOT APU_NEURON_ADAPTER_FILE)
message(FATAL_ERROR "Can not find APU_NEURON_ADAPTER_FILE in ${APU_DDK_ROOT}")
else()
message(STATUS "Found APU NEURON ADAPTER Library: ${APU_NEURON_ADAPTER_FILE}")
add_library(apu_neuron_adapter SHARED IMPORTED GLOBAL)
set_property(TARGET apu_neuron_adapter PROPERTY IMPORTED_LOCATION ${APU_NEURON_ADAPTER_FILE})
endif()
set(apu_runtime_libs apu_neuron apu_neuron_adapter CACHE INTERNAL "apu runtime libs")
message(STATUS "${apu_runtime_libs}")
......@@ -2,4 +2,5 @@ if(NOT LITE_WITH_APU)
return()
endif()
lite_cc_library(device_apu SRCS device.cc)
lite_cc_library(neuron_adapter SRCS neuron_adapter.cc)
lite_cc_library(device_apu SRCS device.cc DEPS neuron_adapter)
......@@ -20,48 +20,19 @@ namespace paddle {
namespace lite {
namespace apu {
inline void* LoadFunc(void* libHandle, const char* name) {
CHECK(libHandle != nullptr);
CHECK(name != nullptr);
void* fn = dlsym(libHandle, name);
if (fn == nullptr) {
LOG(WARNING) << "Unable to open Neuron Runtime function [" << name
<< "] Because " << dlerror();
}
return fn;
}
NeuronCompilation* Device::Build(void* libHandle, NeuronModel* model) {
typedef int (*NeuronCompilation_create)(NeuronModel * model,
NeuronCompilation * *compilation);
typedef void (*NeuronCompilation_free)(NeuronCompilation * compilation);
typedef int (*NeuronCompilation_finish)(NeuronCompilation * compilation);
#define LOAD_FUNCTIONS(libHandle, FUNC_NAME, VARIABLE_NAME) \
FUNC_NAME VARIABLE_NAME = \
reinterpret_cast<FUNC_NAME>(LoadFunc(libHandle, #FUNC_NAME));
LOAD_FUNCTIONS(libHandle, NeuronCompilation_create, neuron_compilation_create)
LOAD_FUNCTIONS(libHandle, NeuronCompilation_free, neuron_compilation_free)
LOAD_FUNCTIONS(libHandle, NeuronCompilation_finish, neuron_compilation_finish)
#undef LOAD_FUNCTIONS
int neuron_errCode = 0;
NeuronCompilation* compilation = NULL;
NeuronCompilation* Device::Build(NeuronModel* model) {
VLOG(3) << "[APU] Compile model";
neuron_errCode = (*neuron_compilation_create)(model, &compilation);
NeuronCompilation* compilation = NULL;
int neuron_errCode = NeuronCompilation_create(model, &compilation);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "[APU] create compile failed! " << neuron_errCode;
return nullptr;
}
neuron_errCode = (*neuron_compilation_finish)(compilation);
neuron_errCode = NeuronCompilation_finish(compilation);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "[APU] compile failed! " << neuron_errCode;
return nullptr;
}
VLOG(3) << "[APU] Build done";
return compilation;
}
......
......@@ -18,7 +18,7 @@
#include <string>
#include <unordered_map>
#include <vector>
#include "NeuronAdapter.h" // NOLINT
#include "lite/backends/apu/neuron_adapter.h"
namespace paddle {
namespace lite {
......@@ -32,7 +32,7 @@ class Device {
}
Device() {}
NeuronCompilation* Build(void* libHandle, NeuronModel* model);
NeuronCompilation* Build(NeuronModel* model);
};
} // namespace apu
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "lite/backends/apu/neuron_adapter.h"
#include <dlfcn.h>
#include <string>
#include <vector>
namespace paddle {
namespace lite {
NeuronAdapter* NeuronAdapter::Global() {
static NeuronAdapter adapter;
return &adapter;
}
NeuronAdapter::NeuronAdapter() {
CHECK(InitHandle()) << "Fail to initialize the Neuron Adapter library!";
InitFunctions();
}
bool NeuronAdapter::InitHandle() {
const std::vector<std::string> paths = {
"libneuron_adapter.so",
#if defined(__aarch64__)
"/vendor/lib64/libneuron_adapter.so",
"/system/lib64/libneuron_adapter.so",
"/system/vendor/lib64/libneuron_adapter.so",
#else
"/vendor/lib/libneuron_adapter.so",
"/system/lib/libneuron_adapter.so",
"/system/vendor/lib/libneuron_adapter.so",
#endif
};
std::string target_lib = "Unknown";
for (auto path : paths) {
handle_ = dlopen(path.c_str(), RTLD_LAZY);
if (handle_ != nullptr) {
target_lib = path;
break;
}
}
VLOG(4) << "Load the Neuron Adapter library from " << target_lib;
if (handle_ != nullptr) {
return true;
} else {
return false;
}
}
void NeuronAdapter::InitFunctions() {
CHECK(handle_ != nullptr) << "The library handle can't be null!";
#define PADDLE_DLSYM(neuron_adapter_func) \
do { \
neuron_adapter_func##_ = \
(neuron_adapter_func##_Type)dlsym(handle_, #neuron_adapter_func); \
if (neuron_adapter_func##_ == nullptr) { \
LOG(FATAL) << "Cannot find the " << #neuron_adapter_func \
<< " symbol in libneuron_adapter.so!"; \
break; \
} \
VLOG(4) << "Loaded the " << #neuron_adapter_func \
<< " symbol successfully."; \
} while (false)
PADDLE_DLSYM(Neuron_getVersion);
PADDLE_DLSYM(NeuronModel_create);
PADDLE_DLSYM(NeuronModel_free);
PADDLE_DLSYM(NeuronModel_finish);
PADDLE_DLSYM(NeuronModel_addOperand);
PADDLE_DLSYM(NeuronModel_setOperandValue);
PADDLE_DLSYM(NeuronModel_setOperandSymmPerChannelQuantParams);
PADDLE_DLSYM(NeuronModel_addOperation);
PADDLE_DLSYM(NeuronModel_identifyInputsAndOutputs);
PADDLE_DLSYM(NeuronCompilation_create);
PADDLE_DLSYM(NeuronCompilation_free);
PADDLE_DLSYM(NeuronCompilation_finish);
PADDLE_DLSYM(NeuronExecution_create);
PADDLE_DLSYM(NeuronExecution_free);
PADDLE_DLSYM(NeuronExecution_setInput);
PADDLE_DLSYM(NeuronExecution_setOutput);
PADDLE_DLSYM(NeuronExecution_compute);
#undef PADDLE_DLSYM
}
} // namespace lite
} // namespace paddle
int Neuron_getVersion(uint32_t* version) {
return paddle::lite::NeuronAdapter::Global()->Neuron_getVersion()(version);
}
int NeuronModel_create(NeuronModel** model) {
return paddle::lite::NeuronAdapter::Global()->NeuronModel_create()(model);
}
void NeuronModel_free(NeuronModel* model) {
return paddle::lite::NeuronAdapter::Global()->NeuronModel_free()(model);
}
int NeuronModel_finish(NeuronModel* model) {
return paddle::lite::NeuronAdapter::Global()->NeuronModel_finish()(model);
}
int NeuronModel_addOperand(NeuronModel* model, const NeuronOperandType* type) {
return paddle::lite::NeuronAdapter::Global()->NeuronModel_addOperand()(model,
type);
}
int NeuronModel_setOperandValue(NeuronModel* model,
int32_t index,
const void* buffer,
size_t length) {
return paddle::lite::NeuronAdapter::Global()->NeuronModel_setOperandValue()(
model, index, buffer, length);
}
int NeuronModel_setOperandSymmPerChannelQuantParams(
NeuronModel* model,
int32_t index,
const NeuronSymmPerChannelQuantParams* channelQuant) {
return paddle::lite::NeuronAdapter::Global()
->NeuronModel_setOperandSymmPerChannelQuantParams()(
model, index, channelQuant);
}
int NeuronModel_addOperation(NeuronModel* model,
NeuronOperationType type,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs) {
return paddle::lite::NeuronAdapter::Global()->NeuronModel_addOperation()(
model, type, inputCount, inputs, outputCount, outputs);
}
int NeuronModel_identifyInputsAndOutputs(NeuronModel* model,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs) {
return paddle::lite::NeuronAdapter::Global()
->NeuronModel_identifyInputsAndOutputs()(
model, inputCount, inputs, outputCount, outputs);
}
int NeuronCompilation_create(NeuronModel* model,
NeuronCompilation** compilation) {
return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_create()(
model, compilation);
}
void NeuronCompilation_free(NeuronCompilation* compilation) {
return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_free()(
compilation);
}
int NeuronCompilation_finish(NeuronCompilation* compilation) {
return paddle::lite::NeuronAdapter::Global()->NeuronCompilation_finish()(
compilation);
}
int NeuronExecution_create(NeuronCompilation* compilation,
NeuronExecution** execution) {
return paddle::lite::NeuronAdapter::Global()->NeuronExecution_create()(
compilation, execution);
}
void NeuronExecution_free(NeuronExecution* execution) {
return paddle::lite::NeuronAdapter::Global()->NeuronExecution_free()(
execution);
}
int NeuronExecution_setInput(NeuronExecution* execution,
int32_t index,
const NeuronOperandType* type,
const void* buffer,
size_t length) {
return paddle::lite::NeuronAdapter::Global()->NeuronExecution_setInput()(
execution, index, type, buffer, length);
}
int NeuronExecution_setOutput(NeuronExecution* execution,
int32_t index,
const NeuronOperandType* type,
void* buffer,
size_t length) {
return paddle::lite::NeuronAdapter::Global()->NeuronExecution_setOutput()(
execution, index, type, buffer, length);
}
int NeuronExecution_compute(NeuronExecution* execution) {
return paddle::lite::NeuronAdapter::Global()->NeuronExecution_compute()(
execution);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "NeuronAdapter.h" // NOLINT
#include "lite/utils/cp_logging.h"
namespace paddle {
namespace lite {
class NeuronAdapter final {
public:
static NeuronAdapter *Global();
// Platform APIs
using Neuron_getVersion_Type = int (*)(uint32_t *);
using NeuronModel_create_Type = int (*)(NeuronModel **);
using NeuronModel_free_Type = void (*)(NeuronModel *);
using NeuronModel_finish_Type = int (*)(NeuronModel *);
using NeuronModel_addOperand_Type = int (*)(NeuronModel *,
const NeuronOperandType *);
using NeuronModel_setOperandValue_Type = int (*)(NeuronModel *,
int32_t,
const void *,
size_t);
using NeuronModel_setOperandSymmPerChannelQuantParams_Type =
int (*)(NeuronModel *, int32_t, const NeuronSymmPerChannelQuantParams *);
using NeuronModel_addOperation_Type = int (*)(NeuronModel *,
NeuronOperationType,
uint32_t,
const uint32_t *,
uint32_t,
const uint32_t *);
using NeuronModel_identifyInputsAndOutputs_Type = int (*)(
NeuronModel *, uint32_t, const uint32_t *, uint32_t, const uint32_t *);
using NeuronCompilation_create_Type = int (*)(NeuronModel *,
NeuronCompilation **);
using NeuronCompilation_free_Type = void (*)(NeuronCompilation *);
using NeuronCompilation_finish_Type = int (*)(NeuronCompilation *);
using NeuronExecution_create_Type = int (*)(NeuronCompilation *,
NeuronExecution **);
using NeuronExecution_free_Type = void (*)(NeuronExecution *);
using NeuronExecution_setInput_Type = int (*)(NeuronExecution *,
int32_t,
const NeuronOperandType *,
const void *,
size_t);
using NeuronExecution_setOutput_Type = int (*)(
NeuronExecution *, int32_t, const NeuronOperandType *, void *, size_t);
using NeuronExecution_compute_Type = int (*)(NeuronExecution *);
Neuron_getVersion_Type Neuron_getVersion() {
CHECK(Neuron_getVersion_ != nullptr) << "Cannot load Neuron_getVersion!";
return Neuron_getVersion_;
}
NeuronModel_create_Type NeuronModel_create() {
CHECK(NeuronModel_create_ != nullptr) << "Cannot load NeuronModel_create!";
return NeuronModel_create_;
}
NeuronModel_free_Type NeuronModel_free() {
CHECK(NeuronModel_free_ != nullptr) << "Cannot load NeuronModel_free!";
return NeuronModel_free_;
}
NeuronModel_finish_Type NeuronModel_finish() {
CHECK(NeuronModel_finish_ != nullptr) << "Cannot load NeuronModel_finish!";
return NeuronModel_finish_;
}
NeuronModel_addOperand_Type NeuronModel_addOperand() {
CHECK(NeuronModel_addOperand_ != nullptr)
<< "Cannot load NeuronModel_addOperand!";
return NeuronModel_addOperand_;
}
NeuronModel_setOperandValue_Type NeuronModel_setOperandValue() {
CHECK(NeuronModel_setOperandValue_ != nullptr)
<< "Cannot load NeuronModel_setOperandValue!";
return NeuronModel_setOperandValue_;
}
NeuronModel_setOperandSymmPerChannelQuantParams_Type
NeuronModel_setOperandSymmPerChannelQuantParams() {
CHECK(NeuronModel_setOperandSymmPerChannelQuantParams_ != nullptr)
<< "Cannot load NeuronModel_setOperandSymmPerChannelQuantParams!";
return NeuronModel_setOperandSymmPerChannelQuantParams_;
}
NeuronModel_addOperation_Type NeuronModel_addOperation() {
CHECK(NeuronModel_addOperation_ != nullptr)
<< "Cannot load NeuronModel_addOperation!";
return NeuronModel_addOperation_;
}
NeuronModel_identifyInputsAndOutputs_Type
NeuronModel_identifyInputsAndOutputs() {
CHECK(NeuronModel_identifyInputsAndOutputs_ != nullptr)
<< "Cannot load NeuronModel_identifyInputsAndOutputs!";
return NeuronModel_identifyInputsAndOutputs_;
}
NeuronCompilation_create_Type NeuronCompilation_create() {
CHECK(NeuronCompilation_create_ != nullptr)
<< "Cannot load NeuronCompilation_create!";
return NeuronCompilation_create_;
}
NeuronCompilation_free_Type NeuronCompilation_free() {
CHECK(NeuronCompilation_free_ != nullptr)
<< "Cannot load NeuronCompilation_free!";
return NeuronCompilation_free_;
}
NeuronCompilation_finish_Type NeuronCompilation_finish() {
CHECK(NeuronCompilation_finish_ != nullptr)
<< "Cannot load NeuronCompilation_finish!";
return NeuronCompilation_finish_;
}
NeuronExecution_create_Type NeuronExecution_create() {
CHECK(NeuronExecution_create_ != nullptr)
<< "Cannot load NeuronExecution_create!";
return NeuronExecution_create_;
}
NeuronExecution_free_Type NeuronExecution_free() {
CHECK(NeuronExecution_free_ != nullptr)
<< "Cannot load NeuronExecution_free!";
return NeuronExecution_free_;
}
NeuronExecution_setInput_Type NeuronExecution_setInput() {
CHECK(NeuronExecution_setInput_ != nullptr)
<< "Cannot loadcl NeuronExecution_setInput!";
return NeuronExecution_setInput_;
}
NeuronExecution_setOutput_Type NeuronExecution_setOutput() {
CHECK(NeuronExecution_setOutput_ != nullptr)
<< "Cannot load NeuronExecution_setOutput!";
return NeuronExecution_setOutput_;
}
NeuronExecution_compute_Type NeuronExecution_compute() {
CHECK(NeuronExecution_compute_ != nullptr)
<< "Cannot load NeuronExecution_compute!";
return NeuronExecution_compute_;
}
private:
NeuronAdapter();
NeuronAdapter(const NeuronAdapter &) = delete;
NeuronAdapter &operator=(const NeuronAdapter &) = delete;
bool InitHandle();
void InitFunctions();
void *handle_{nullptr};
Neuron_getVersion_Type Neuron_getVersion_{nullptr};
NeuronModel_create_Type NeuronModel_create_{nullptr};
NeuronModel_free_Type NeuronModel_free_{nullptr};
NeuronModel_finish_Type NeuronModel_finish_{nullptr};
NeuronModel_addOperand_Type NeuronModel_addOperand_{nullptr};
NeuronModel_setOperandValue_Type NeuronModel_setOperandValue_{nullptr};
NeuronModel_setOperandSymmPerChannelQuantParams_Type
NeuronModel_setOperandSymmPerChannelQuantParams_{nullptr};
NeuronModel_addOperation_Type NeuronModel_addOperation_{nullptr};
NeuronModel_identifyInputsAndOutputs_Type
NeuronModel_identifyInputsAndOutputs_{nullptr};
NeuronCompilation_create_Type NeuronCompilation_create_{nullptr};
NeuronCompilation_free_Type NeuronCompilation_free_{nullptr};
NeuronCompilation_finish_Type NeuronCompilation_finish_{nullptr};
NeuronExecution_create_Type NeuronExecution_create_{nullptr};
NeuronExecution_free_Type NeuronExecution_free_{nullptr};
NeuronExecution_setInput_Type NeuronExecution_setInput_{nullptr};
NeuronExecution_setOutput_Type NeuronExecution_setOutput_{nullptr};
NeuronExecution_compute_Type NeuronExecution_compute_{nullptr};
};
} // namespace lite
} // namespace paddle
add_subdirectory(bridges)
add_kernel(subgraph_compute_apu APU basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} device_apu subgraph_bridge_engine ${apu_subgraph_bridges})
add_kernel(subgraph_compute_apu APU basic SRCS subgraph_compute.cc DEPS ${lite_kernel_deps} device_apu neuron_adapter subgraph_bridge_engine ${apu_subgraph_bridges})
......@@ -3,7 +3,7 @@ if(NOT LITE_WITH_APU)
endif()
lite_cc_library(subgraph_bridge_utility_apu SRCS utility.cc DEPS tensor)
lite_cc_library(subgraph_bridge_utility_apu SRCS utility.cc DEPS tensor neuron_adapter)
lite_cc_library(subgraph_bridge_graph_apu SRCS graph.cc DEPS subgraph_bridge_utility_apu)
set(apu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_utility_apu subgraph_bridge_graph_apu)
......
......@@ -33,16 +33,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto op_type = op_info->Type();
auto scope = op->scope();
int neuron_errCode;
VLOG(3) << "[APU] Converting [" << op_type << "]";
auto libHandle = graph->libHandle();
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
LOAD_FUNCTIONS(
libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
LOAD_FUNCTIONS(libHandle,
NeuronModel_setOperandSymmPerChannelQuantParams,
neuron_model_setOperandSymmPerChannelQuantParams)
// Get input and output vars and op attributes
auto input_name = op_info->Input("Input").front();
......@@ -167,7 +158,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
input_node = graph->Get(input_name);
if (input_node == nullptr) return subgraph::FAILED;
} else {
(*neuron_model_addOperand)(model, &inType); // input
NeuronModel_addOperand(model, &inType); // input
input_node = graph->Add(input_name, dims_in);
}
}
......@@ -253,7 +244,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::shared_ptr<Node> filter_node = nullptr;
if (1 == weight_scale.size()) {
(*neuron_model_addOperand)(model, &filterType); // 1: filter
NeuronModel_addOperand(model, &filterType); // 1: filter
filter_node = graph->Add(filter_name, dims_filter);
VLOG(3) << "filter node idx: " << filter_node->index() << "w_scale[0]"
<< weight_scale[0] << ": filterType: " << filterType.dimensions[0]
......@@ -262,14 +253,14 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
memcpy(filter->mutable_data<int8_t>(),
transpose_filter.mutable_data<uint8_t>(),
filter->memory_size());
neuron_errCode = (*neuron_model_setOperandValue)(
neuron_errCode = NeuronModel_setOperandValue(
model, filter_node->index(), filter->raw_data(), filter->memory_size());
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Set filter operand value fail:" << neuron_errCode;
return subgraph::FAILED;
}
} else {
(*neuron_model_addOperand)(model, &channelFilterType); // 1: filter
NeuronModel_addOperand(model, &channelFilterType); // 1: filter
filter_node = graph->Add(filter_name, dims_filter);
VLOG(3) << "chennel filter node idx: " << filter_node->index()
<< " ,scale_count:" << weight_scale.size()
......@@ -281,13 +272,13 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
memcpy(filter->mutable_data<int8_t>(),
transpose_filter.mutable_data<uint8_t>(),
filter->memory_size());
neuron_errCode = (*neuron_model_setOperandValue)(
neuron_errCode = NeuronModel_setOperandValue(
model, filter_node->index(), filter->raw_data(), filter->memory_size());
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Set filter operand value fail:" << neuron_errCode;
return subgraph::FAILED;
}
neuron_errCode = (*neuron_model_setOperandSymmPerChannelQuantParams)(
neuron_errCode = NeuronModel_setOperandSymmPerChannelQuantParams(
model, filter_node->index(), &symmPerChannelQuantParams);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Set per channel filter params fail:" << neuron_errCode;
......@@ -315,7 +306,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
for (int i = 0; i < bias_dims.size(); i++)
dims_bias.push_back(bias_dims[i]);
biasType.dimensions = &dims_bias[0];
(*neuron_model_addOperand)(model, &biasType); // 2: bias
NeuronModel_addOperand(model, &biasType); // 2: bias
bias_node = graph->Add(bias_name, dims_bias);
VLOG(3) << "node idx" << bias_node->index() << ": Bias name: " << bias_name
<< " ,bias scale: " << biasType.scale
......@@ -324,7 +315,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
biasType.dimensionCount = 1;
dims_bias = {(uint32_t)output_dims[1]};
biasType.dimensions = &dims_bias[0];
(*neuron_model_addOperand)(model, &biasType); // 2: bias
NeuronModel_addOperand(model, &biasType); // 2: bias
bias_node = graph->Add(filter_name + "_default_bias", dims_bias);
VLOG(3) << "node idx" << bias_node->index() << ": Bias name: default_bias "
<< " ,bias scale: " << biasType.scale
......@@ -337,37 +328,37 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::vector<uint32_t> dims_int32 = {1};
std::shared_ptr<Node> paddingL_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 3: padding left
NeuronModel_addOperand(model, &int32Type); // 3: padding left
paddingL_node = graph->Add(filter_name + "_padding_left", dims_int32);
std::shared_ptr<Node> paddingR_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 4: padding right
NeuronModel_addOperand(model, &int32Type); // 4: padding right
paddingR_node = graph->Add(filter_name + "_padding_right", dims_int32);
std::shared_ptr<Node> paddingT_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 5: padding top
NeuronModel_addOperand(model, &int32Type); // 5: padding top
paddingT_node = graph->Add(filter_name + "_padding_top", dims_int32);
std::shared_ptr<Node> paddingB_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 6: padding bottom
NeuronModel_addOperand(model, &int32Type); // 6: padding bottom
paddingB_node = graph->Add(filter_name + "_padding_bottom", dims_int32);
std::shared_ptr<Node> strideW_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 7: stride width
NeuronModel_addOperand(model, &int32Type); // 7: stride width
strideW_node = graph->Add(filter_name + "_stride_width", dims_int32);
std::shared_ptr<Node> strideH_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 8: stride height
NeuronModel_addOperand(model, &int32Type); // 8: stride height
strideH_node = graph->Add(filter_name + "_stride_height", dims_int32);
std::shared_ptr<Node> dm_node = nullptr;
if (is_depthwise_mode) {
(*neuron_model_addOperand)(model, &int32Type); // 9: depthwise multiplier
NeuronModel_addOperand(model, &int32Type); // 9: depthwise multiplier
dm_node = graph->Add(filter_name + "_dm", dims_int32);
}
std::shared_ptr<Node> fuse_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 9/10: fuse
NeuronModel_addOperand(model, &int32Type); // 9/10: fuse
fuse_node = graph->Add(filter_name + "_fuse", dims_int32);
// Add output tensor type
......@@ -390,10 +381,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
} else {
// add output operand
if (graph->IsOutput(output_name)) {
(*neuron_model_addOperand)(model, &outType); // output
NeuronModel_addOperand(model, &outType); // output
output_node = graph->Add("transpose_" + output_name, dims_out);
} else {
(*neuron_model_addOperand)(model, &outType); // output
NeuronModel_addOperand(model, &outType); // output
output_node = graph->Add(output_name, dims_out);
}
}
......@@ -415,7 +406,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "int32_bias_data: " << int32_bias_data[0] << " : "
<< int32_bias_data[1] << " : " << int32_bias_data[2] << " : "
<< int32_bias_data[3];
neuron_errCode = (*neuron_model_setOperandValue)(
neuron_errCode = NeuronModel_setOperandValue(
model, bias_node->index(), bias->raw_data(), bias->memory_size());
} else {
auto int32_bias = std::make_shared<Tensor>();
......@@ -423,10 +414,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int32_bias->mutable_data<int32_t>();
VLOG(3) << "bais_default: " << int32_bias->memory_size();
memset(int32_bias->mutable_data<int32_t>(), 0, int32_bias->memory_size());
neuron_errCode = (*neuron_model_setOperandValue)(model,
bias_node->index(),
int32_bias->raw_data(),
int32_bias->memory_size());
neuron_errCode = NeuronModel_setOperandValue(model,
bias_node->index(),
int32_bias->raw_data(),
int32_bias->memory_size());
bias_node->set_data(int32_bias);
}
if (NEURON_NO_ERROR != neuron_errCode) {
......@@ -439,16 +430,16 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add padding value
int32_t padding_val[1];
padding_val[0] = paddings[2];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingL_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[3];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingR_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[0];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingT_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[1];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingB_node->index(), padding_val, sizeof(int32_t) * 1);
VLOG(3) << " stride width:" << strides[1] << " height:" << strides[0];
......@@ -456,10 +447,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add Stride
int32_t stride_val[1];
stride_val[0] = strides[1]; // width
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, strideW_node->index(), stride_val, sizeof(int32_t) * 1);
stride_val[0] = strides[0]; // height
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, strideH_node->index(), stride_val, sizeof(int32_t) * 1);
// Add fuse
......@@ -478,12 +469,12 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (is_depthwise_mode) {
int32_t dm = oc / ic;
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, dm_node->index(), &dm, sizeof(int32_t) * 1);
VLOG(3) << "depthwise multiplier:" << dm;
// Depthwise conv
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex = {
input_node->index(), // 0: input
......@@ -499,14 +490,14 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
fuse_node->index()}; // 10 : fuse
std::vector<uint32_t> addOutIndex = {output_node->index()};
neuron_errCode = (*neuron_model_addOperation)(model,
NEURON_DEPTHWISE_CONV_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
neuron_errCode = NeuronModel_addOperation(model,
NEURON_DEPTHWISE_CONV_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
} else {
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex = {
input_node->index(), // 0: input
......@@ -521,12 +512,12 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
fuse_node->index()}; // 9: fuse
std::vector<uint32_t> addOutIndex = {output_node->index()};
neuron_errCode = (*neuron_model_addOperation)(model,
NEURON_CONV_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
neuron_errCode = NeuronModel_addOperation(model,
NEURON_CONV_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
}
if (NEURON_NO_ERROR != neuron_errCode) {
......
......@@ -31,12 +31,6 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope();
VLOG(3) << "[APU] Converting [" + op_type + "]";
auto libHandle = graph->libHandle();
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
LOAD_FUNCTIONS(
libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
auto input_name = op_info->Input("Input").front();
auto input = scope->FindMutableTensor(input_name);
auto input_dims = input->dims();
......@@ -95,7 +89,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "Graph has " << input_name << ",index: " << in_node->index();
} else {
// add input operand
(*neuron_model_addOperand)(model, &inType); // 0: input
NeuronModel_addOperand(model, &inType); // 0: input
in_node = graph->Add(input_name, dims_in);
}
VLOG(3) << "input_scale: " << input_scale
......@@ -110,7 +104,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
wType.dimensionCount = w_dims.size();
std::vector<uint32_t> dims_w = {(uint32_t)w_dims[1], (uint32_t)w_dims[0]};
wType.dimensions = &dims_w[0];
(*neuron_model_addOperand)(model, &wType); // 1: weight
NeuronModel_addOperand(model, &wType); // 1: weight
std::shared_ptr<Node> w_node = nullptr;
w_node = graph->Add(w_name, dims_w);
VLOG(3) << "w_scale size: " << w_scale.size() << ",w_scale: " << w_scale[0]
......@@ -132,7 +126,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
biasType.dimensionCount = bias_dims.size();
std::vector<uint32_t> dims_bias = {(uint32_t)bias_dims[0]};
biasType.dimensions = &dims_bias[0];
(*neuron_model_addOperand)(model, &biasType); // 2: bias
NeuronModel_addOperand(model, &biasType); // 2: bias
bias_node = graph->Add(bias_name, dims_bias);
VLOG(3) << "Bias name: " << bias_name << ", bias dims: " << bias_dims
<< ", bias scale: " << biasType.scale
......@@ -141,7 +135,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
biasType.dimensionCount = 1;
std::vector<uint32_t> dims_bias = {(uint32_t)n};
biasType.dimensions = &dims_bias[0];
(*neuron_model_addOperand)(model, &biasType); // 2: bias
NeuronModel_addOperand(model, &biasType); // 2: bias
bias_node = graph->Add(w_name + "_default_bias", dims_bias);
}
......@@ -150,7 +144,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
fuseType.type = NEURON_INT32;
fuseType.dimensionCount = 0;
std::vector<uint32_t> dims_int32 = {0};
(*neuron_model_addOperand)(model, &fuseType); // 3: fuse
NeuronModel_addOperand(model, &fuseType); // 3: fuse
std::shared_ptr<Node> fuse_node = nullptr;
fuse_node = graph->Add(w_name + "_fuse", dims_int32);
......@@ -165,7 +159,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "out_scale: " << out_scale
<< ", outType: " << outType.dimensions[0] << " : "
<< outType.dimensions[1];
(*neuron_model_addOperand)(model, &outType); // output
NeuronModel_addOperand(model, &outType); // output
std::shared_ptr<Node> out_node = nullptr;
out_node = graph->Add(out_name, dims_out);
......@@ -181,7 +175,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
memcpy(w->mutable_data<int8_t>(),
transpose_filter.mutable_data<uint8_t>(),
w->memory_size());
int neuron_errCode = (*neuron_model_setOperandValue)(
int neuron_errCode = NeuronModel_setOperandValue(
model, w_node->index(), w->raw_data(), w->memory_size());
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Set W operand value fail:" << neuron_errCode
......@@ -200,10 +194,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << int32_bias_data[0] << ":" << int32_bias_data[1] << ":"
<< int32_bias_data[2] << ":" << int32_bias_data[3];
neuron_errCode =
(*neuron_model_setOperandValue)(model,
bias_node->index(),
bias->raw_data(),
bias->memory_size()); // 2: bias
NeuronModel_setOperandValue(model,
bias_node->index(),
bias->raw_data(),
bias->memory_size()); // 2: bias
} else {
auto int32_bias = std::make_shared<Tensor>();
int32_bias->Resize({1, out_dims[1]});
......@@ -211,15 +205,15 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
memset(int32_bias->mutable_data<int32_t>(), 0, int32_bias->memory_size());
VLOG(3) << "default: " << int32_bias->memory_size();
neuron_errCode =
(*neuron_model_setOperandValue)(model,
bias_node->index(),
int32_bias->raw_data(),
int32_bias->memory_size()); // 2: bias
NeuronModel_setOperandValue(model,
bias_node->index(),
int32_bias->raw_data(),
int32_bias->memory_size()); // 2: bias
bias_node->set_data(int32_bias);
}
// Add fuse value
int32_t fuse_val[1] = {0};
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1); // 3: fuse
std::vector<uint32_t> addInIndex = {in_node->index(),
......@@ -227,12 +221,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
bias_node->index(),
fuse_node->index()};
std::vector<uint32_t> addOutIndex = {out_node->index()};
neuron_errCode = (*neuron_model_addOperation)(model,
NEURON_FULLY_CONNECTED,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
neuron_errCode = NeuronModel_addOperation(model,
NEURON_FULLY_CONNECTED,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Add op fail:" << op_type;
......
......@@ -19,7 +19,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include "NeuronAdapter.h"
#include "lite/backends/apu/neuron_adapter.h"
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
......@@ -64,9 +64,6 @@ class Graph {
void set_model(NeuronModel* model) { model_ = model; }
NeuronModel* model() { return model_; }
void set_libHandle(void* libHandle) { libHandle_ = libHandle; }
void* libHandle() { return libHandle_; }
void set_input_names(const std::vector<std::string> input_names) {
input_names_ = input_names;
}
......@@ -99,7 +96,6 @@ class Graph {
}
private:
void* libHandle_;
NeuronModel* model_;
std::unordered_map<std::string, std::vector<std::shared_ptr<Node>>> nodes_;
int32_t operandIdx_ = 0;
......
......@@ -32,12 +32,6 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope();
VLOG(3) << "[APU] Converting [" + op_type + "] ";
auto libHandle = graph->libHandle();
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
LOAD_FUNCTIONS(
libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
......@@ -127,7 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
x_node = graph->Get(x_name);
} else {
// add input operand
(*neuron_model_addOperand)(model, &xType); // 0: x
NeuronModel_addOperand(model, &xType); // 0: x
x_node = graph->Add(x_name, dims_x);
}
VLOG(3) << "x_scale: " << x_scale << ", xType: " << xType.dimensions[0] << ":"
......@@ -140,39 +134,39 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std::vector<uint32_t> dims_int32 = {0};
std::shared_ptr<Node> paddingL_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 1: padding left
NeuronModel_addOperand(model, &int32Type); // 1: padding left
paddingL_node = graph->Add(x_name + "_padding_left", dims_int32);
std::shared_ptr<Node> paddingR_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 2: padding right
NeuronModel_addOperand(model, &int32Type); // 2: padding right
paddingR_node = graph->Add(x_name + "_padding_right", dims_int32);
std::shared_ptr<Node> paddingT_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 3: padding top
NeuronModel_addOperand(model, &int32Type); // 3: padding top
paddingT_node = graph->Add(x_name + "_padding_top", dims_int32);
std::shared_ptr<Node> paddingB_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 4: padding bottom
NeuronModel_addOperand(model, &int32Type); // 4: padding bottom
paddingB_node = graph->Add(x_name + "_padding_bottom", dims_int32);
std::shared_ptr<Node> strideW_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 5: stride width
NeuronModel_addOperand(model, &int32Type); // 5: stride width
strideW_node = graph->Add(x_name + "_stride_width", dims_int32);
std::shared_ptr<Node> strideH_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 6: stride height
NeuronModel_addOperand(model, &int32Type); // 6: stride height
strideH_node = graph->Add(x_name + "_stride_height", dims_int32);
std::shared_ptr<Node> filterW_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 7: filter width
NeuronModel_addOperand(model, &int32Type); // 7: filter width
filterW_node = graph->Add(x_name + "_filter_width", dims_int32);
std::shared_ptr<Node> filterH_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 8: filter height
NeuronModel_addOperand(model, &int32Type); // 8: filter height
filterH_node = graph->Add(x_name + "_filter_height", dims_int32);
std::shared_ptr<Node> fuse_node = nullptr;
(*neuron_model_addOperand)(model, &int32Type); // 9: fuse
NeuronModel_addOperand(model, &int32Type); // 9: fuse
fuse_node = graph->Add(x_name + "_fuse", dims_int32);
// Add out type
......@@ -191,7 +185,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
if (graph->Has(out_name)) {
out_node = graph->Get(out_name);
} else {
(*neuron_model_addOperand)(model, &outType); // out
NeuronModel_addOperand(model, &outType); // out
out_node = graph->Add(out_name, dims_out);
}
VLOG(3) << "output_scale: " << x_scale
......@@ -202,39 +196,39 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add padding value
int32_t padding_val[1];
padding_val[0] = paddings[2];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingL_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[3];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingR_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[0];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingT_node->index(), padding_val, sizeof(int32_t) * 1);
padding_val[0] = paddings[1];
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, paddingB_node->index(), padding_val, sizeof(int32_t) * 1);
// Add Stride
int32_t stride_val[1];
stride_val[0] = strides[1]; // width
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, strideW_node->index(), stride_val, sizeof(int32_t) * 1);
stride_val[0] = strides[0]; // height
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, strideH_node->index(), stride_val, sizeof(int32_t) * 1);
// Add filter
int32_t filter_val[1];
filter_val[0] = global_pooling ? x_dims[3] : ksize[1]; // width
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, filterW_node->index(), filter_val, sizeof(int32_t) * 1);
filter_val[0] = global_pooling ? x_dims[2] : ksize[0]; // height
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, filterH_node->index(), filter_val, sizeof(int32_t) * 1);
// Add fuse
int32_t fuse_val[1] = {0};
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex = {x_node->index(),
......@@ -251,19 +245,19 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int neuron_errCode;
if (pooling_type == "max") {
neuron_errCode = (*neuron_model_addOperation)(model,
NEURON_MAX_POOL_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
neuron_errCode = NeuronModel_addOperation(model,
NEURON_MAX_POOL_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
} else {
neuron_errCode = (*neuron_model_addOperation)(model,
NEURON_AVERAGE_POOL_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
neuron_errCode = NeuronModel_addOperation(model,
NEURON_AVERAGE_POOL_2D,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
}
return REBUILD_WHEN_SHAPE_CHANGED;
......
......@@ -31,12 +31,6 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto scope = op->scope();
VLOG(3) << "[APU] Converting [" + op_type + "]";
auto libHandle = graph->libHandle();
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
LOAD_FUNCTIONS(
libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
// Get input and output vars and op attributes
auto x_name = op_info->Input("X").front();
auto x = scope->FindMutableTensor(x_name);
......@@ -84,7 +78,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
VLOG(3) << "Graph has " << x_name << ",index: " << x_node->index();
} else {
// add input operand
(*neuron_model_addOperand)(model, &xType); // 0: input
NeuronModel_addOperand(model, &xType); // 0: input
x_node = graph->Add(x_name, dims_x);
}
VLOG(3) << "input_scale size: " << input_scale
......@@ -95,7 +89,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronOperandType betaType;
betaType.type = NEURON_FLOAT32;
betaType.dimensionCount = 0;
(*neuron_model_addOperand)(model, &betaType); // 1: beta
NeuronModel_addOperand(model, &betaType); // 1: beta
std::shared_ptr<Node> beta_node = nullptr;
beta_node = graph->Add(x_name + "_beta", dims_int32);
......@@ -103,7 +97,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronOperandType axisType;
axisType.type = NEURON_INT32;
axisType.dimensionCount = 0;
(*neuron_model_addOperand)(model, &axisType); // 2: axis
NeuronModel_addOperand(model, &axisType); // 2: axis
std::shared_ptr<Node> axis_node = nullptr;
axis_node = graph->Add(x_name + "_axis", dims_int32);
......@@ -114,28 +108,28 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
outType.zeroPoint = 128;
outType.dimensionCount = x_dims.size();
outType.dimensions = &dims_x[0];
(*neuron_model_addOperand)(model, &outType); // 3: output
NeuronModel_addOperand(model, &outType); // 3: output
std::shared_ptr<Node> out_node = nullptr;
out_node = graph->Add(out_name, dims_x);
VLOG(3) << "output_scale: " << out_scale;
float beta_val[] = {1.0f};
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, beta_node->index(), beta_val, sizeof(float) * 1);
int32_t axis_val[1];
axis_val[0] = axis;
(*neuron_model_setOperandValue)(
NeuronModel_setOperandValue(
model, axis_node->index(), axis_val, sizeof(int32_t) * 1);
std::vector<uint32_t> addInIndex = {
x_node->index(), beta_node->index(), axis_node->index()};
std::vector<uint32_t> addOutIndex = {out_node->index()};
int neuron_errCode = (*neuron_model_addOperation)(model,
NEURON_SOFTMAX,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
int neuron_errCode = NeuronModel_addOperation(model,
NEURON_SOFTMAX,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Add op fail:" << op_type;
return FAILED;
......
......@@ -21,58 +21,6 @@ namespace lite {
namespace subgraph {
namespace apu {
// typedef to the build functions pointer signatures
typedef int (*Neuron_getVersion)(uint32_t* version);
typedef int (*NeuronModel_create)(NeuronModel** model);
typedef void (*NeuronModel_free)(NeuronModel* model);
typedef int (*NeuronModel_finish)(NeuronModel* model);
typedef int (*NeuronModel_addOperand)(NeuronModel* model,
const NeuronOperandType* type);
typedef int (*NeuronModel_setOperandValue)(NeuronModel* model,
int32_t index,
const void* buffer,
size_t length);
typedef int (*NeuronModel_addOperation)(NeuronModel* model,
NeuronOperationType type,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs);
typedef int (*NeuronModel_identifyInputsAndOutputs)(NeuronModel* model,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs);
typedef int (*NeuronModel_setOperandSymmPerChannelQuantParams)(
NeuronModel* model,
int32_t index,
const NeuronSymmPerChannelQuantParams* channelQuant);
typedef int (*NeuronExecution_create)(NeuronCompilation* compilation,
NeuronExecution** execution);
typedef void (*NeuronExecution_free)(NeuronExecution* execution);
typedef int (*NeuronExecution_setInput)(NeuronExecution* execution,
int32_t index,
const NeuronOperandType* type,
const void* buffer,
size_t length);
typedef int (*NeuronExecution_setOutput)(NeuronExecution* execution,
int32_t index,
const NeuronOperandType* type,
void* buffer,
size_t length);
typedef int (*NeuronExecution_compute)(NeuronExecution* execution);
void* LoadFunc(void* libHandle, const char* name) {
CHECK(libHandle != nullptr);
CHECK(name != nullptr);
void* fn = dlsym(libHandle, name);
if (fn == nullptr) {
LOG(WARNING) << "Unable to open Neuron Runtime function [" << name
<< "] Because " << dlerror();
}
return fn;
}
bool HasInputArg(const OpInfo* op_info,
const Scope* scope,
const std::string& argname) {
......@@ -102,11 +50,6 @@ void insert_transpose_node(void* ctx,
int neuron_errCode;
auto graph = static_cast<Graph*>(ctx);
auto model = graph->model();
auto libHandle = graph->libHandle();
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand)
LOAD_FUNCTIONS(
libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue)
LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation)
// Add input
NeuronOperandType inType;
......@@ -121,7 +64,7 @@ void insert_transpose_node(void* ctx,
VLOG(3) << "Has " << input_name;
input_node = graph->Get(input_name);
} else {
neuron_errCode = (*neuron_model_addOperand)(model, &inType); // input
neuron_errCode = NeuronModel_addOperand(model, &inType); // input
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!";
return;
......@@ -137,7 +80,7 @@ void insert_transpose_node(void* ctx,
uint32_t dims_perms[1] = {4};
permsType.dimensions = dims_perms;
neuron_errCode = (*neuron_model_addOperand)(model, &permsType); // perm
neuron_errCode = NeuronModel_addOperand(model, &permsType); // perm
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!";
return;
......@@ -148,7 +91,7 @@ void insert_transpose_node(void* ctx,
VLOG(3) << "axis :" << axis[0] << ":" << axis[1] << ":" << axis[2] << ":"
<< axis[3];
// &axis[0], sizeof(int32_t) * axis.size());
neuron_errCode = (*neuron_model_setOperandValue)(
neuron_errCode = NeuronModel_setOperandValue(
model, perms_node->index(), &axis[0], sizeof(int32_t) * axis.size());
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!";
......@@ -163,7 +106,7 @@ void insert_transpose_node(void* ctx,
outType.dimensionCount = output_shape.size();
outType.dimensions = &output_shape[0];
(*neuron_model_addOperand)(model, &outType); // output
NeuronModel_addOperand(model, &outType); // output
std::shared_ptr<Node> output_node = nullptr;
output_node = graph->Add(output_name, output_shape);
......@@ -172,12 +115,12 @@ void insert_transpose_node(void* ctx,
std::vector<uint32_t> addOutIndex = {output_node->index()};
neuron_errCode = (*neuron_model_addOperation)(model,
NEURON_TRANSPOSE,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
neuron_errCode = NeuronModel_addOperation(model,
NEURON_TRANSPOSE,
addInIndex.size(),
&addInIndex[0],
addOutIndex.size(),
&addOutIndex[0]);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Insert transpose op fail!";
......
......@@ -20,7 +20,6 @@
#include <string>
#include <unordered_map>
#include <vector>
#include "NeuronAdapter.h"
#include "lite/core/op_lite.h"
#include "lite/utils/macros.h"
......@@ -29,53 +28,6 @@ namespace lite {
namespace subgraph {
namespace apu {
// typedef to the build functions pointer signatures
typedef int (*Neuron_getVersion)(uint32_t* version);
typedef int (*NeuronModel_create)(NeuronModel** model);
typedef void (*NeuronModel_free)(NeuronModel* model);
typedef int (*NeuronModel_finish)(NeuronModel* model);
typedef int (*NeuronModel_addOperand)(NeuronModel* model,
const NeuronOperandType* type);
typedef int (*NeuronModel_setOperandValue)(NeuronModel* model,
int32_t index,
const void* buffer,
size_t length);
typedef int (*NeuronModel_addOperation)(NeuronModel* model,
NeuronOperationType type,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs);
typedef int (*NeuronModel_identifyInputsAndOutputs)(NeuronModel* model,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs);
typedef int (*NeuronModel_setOperandSymmPerChannelQuantParams)(
NeuronModel* model,
int32_t index,
const NeuronSymmPerChannelQuantParams* channelQuant);
typedef int (*NeuronExecution_create)(NeuronCompilation* compilation,
NeuronExecution** execution);
typedef void (*NeuronExecution_free)(NeuronExecution* execution);
typedef int (*NeuronExecution_setInput)(NeuronExecution* execution,
int32_t index,
const NeuronOperandType* type,
const void* buffer,
size_t length);
typedef int (*NeuronExecution_setOutput)(NeuronExecution* execution,
int32_t index,
const NeuronOperandType* type,
void* buffer,
size_t length);
typedef int (*NeuronExecution_compute)(NeuronExecution* execution);
void* LoadFunc(void* libHandle, const char* name);
#define LOAD_FUNCTIONS(libHandle, FUNC_NAME, VARIABLE_NAME) \
FUNC_NAME VARIABLE_NAME = \
reinterpret_cast<FUNC_NAME>(LoadFunc(libHandle, #FUNC_NAME));
// Type/tensor converters for converting Paddle type/tensor to HiAI type/tensor
bool HasInputArg(const OpInfo* op_info,
const Scope* scope,
......
......@@ -28,58 +28,18 @@ namespace lite {
namespace kernels {
namespace apu {
inline void* LoadFunc(void* libHandle, const char* name) {
CHECK(libHandle != nullptr);
CHECK(name != nullptr);
void* fn = dlsym(libHandle, name);
if (fn == nullptr) {
LOG(WARNING) << "Unable to open Neuron Runtime function [" << name
<< "] Because " << dlerror();
}
return fn;
}
#define LOAD_FUNCTIONS(libHandle, FUNC_NAME, VARIABLE_NAME) \
FUNC_NAME VARIABLE_NAME = \
reinterpret_cast<FUNC_NAME>(LoadFunc(libHandle, #FUNC_NAME));
int SubgraphEngine::BuildDeviceProgram() {
typedef int (*Neuron_getVersion)(uint32_t * version);
typedef int (*NeuronModel_create)(NeuronModel * *model);
typedef void (*NeuronModel_free)(NeuronModel * model);
typedef int (*NeuronModel_finish)(NeuronModel * model);
typedef int (*NeuronModel_identifyInputsAndOutputs)(NeuronModel * model,
uint32_t inputCount,
const uint32_t* inputs,
uint32_t outputCount,
const uint32_t* outputs);
// Open the share library
libHandle_ = dlopen("libneuron_adapter.so", RTLD_LAZY);
if (libHandle_ == nullptr) {
LOG(WARNING) << "Failed to open libneuron_adapter.so. " << dlerror();
return subgraph::FAILED;
}
LOAD_FUNCTIONS(libHandle_, Neuron_getVersion, neuron_getVersion)
LOAD_FUNCTIONS(libHandle_, NeuronModel_create, neuron_model_create)
LOAD_FUNCTIONS(libHandle_, NeuronModel_finish, neuron_model_finish)
LOAD_FUNCTIONS(libHandle_,
NeuronModel_identifyInputsAndOutputs,
neuron_model_identifyInputsAndOutputs)
unsigned int version;
(*neuron_getVersion)(&version);
Neuron_getVersion(&version);
VLOG(3) << "Neuron Adapter version: " << version;
int status = 0;
subgraph::apu::Graph graph;
int neuron_errCode = (*neuron_model_create)(&model_);
int neuron_errCode = NeuronModel_create(&model_);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Fail to create model";
return subgraph::FAILED;
}
graph.set_libHandle(libHandle_);
graph.set_model(model_);
graph.set_input_names(input_names_);
graph.set_output_names(output_names_);
......@@ -151,9 +111,9 @@ int SubgraphEngine::BuildDeviceProgram() {
VLOG(3) << "ins size: " << ins.size() << " outs size:" << outs.size();
// Set subgraph input/output
(*neuron_model_identifyInputsAndOutputs)(
NeuronModel_identifyInputsAndOutputs(
model_, ins.size(), &ins[0], outs.size(), &outs[0]);
neuron_errCode = (*neuron_model_finish)(model_);
neuron_errCode = NeuronModel_finish(model_);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Fail to create NIR model:" << neuron_errCode;
return subgraph::FAILED;
......@@ -166,7 +126,7 @@ int SubgraphEngine::BuildDeviceProgram() {
return 1e+6 * time.tv_sec + time.tv_usec;
};
auto start_time = GetCurrentUS();
compilation_ = lite::apu::Device::Global().Build(libHandle_, model_);
compilation_ = lite::apu::Device::Global().Build(model_);
if (compilation_ == nullptr) {
LOG(WARNING) << "[APU] Build APU DLA model failed!";
return subgraph::FAILED;
......@@ -178,30 +138,6 @@ int SubgraphEngine::BuildDeviceProgram() {
}
int SubgraphEngine::LaunchDeviceProgram() {
typedef int (*NeuronExecution_create)(NeuronCompilation * compilation,
NeuronExecution * *execution);
typedef void (*NeuronExecution_free)(NeuronExecution * execution);
typedef int (*NeuronExecution_setInput)(NeuronExecution * execution,
int32_t index,
const NeuronOperandType* type,
const void* buffer,
size_t length);
typedef int (*NeuronExecution_setOutput)(NeuronExecution * execution,
int32_t index,
const NeuronOperandType* type,
void* buffer,
size_t length);
typedef int (*NeuronExecution_compute)(NeuronExecution * execution);
LOAD_FUNCTIONS(libHandle_, NeuronExecution_create, neuron_execution_create)
LOAD_FUNCTIONS(libHandle_, NeuronExecution_free, neuron_execution_free)
LOAD_FUNCTIONS(
libHandle_, NeuronExecution_setInput, neuron_execution_setInput)
LOAD_FUNCTIONS(
libHandle_, NeuronExecution_setOutput, neuron_execution_setOutput)
LOAD_FUNCTIONS(libHandle_, NeuronExecution_compute, neuron_execution_compute)
NeuronExecution* run1 = NULL;
auto GetCurrentUS = []() -> double {
struct timeval time;
gettimeofday(&time, NULL);
......@@ -209,7 +145,8 @@ int SubgraphEngine::LaunchDeviceProgram() {
};
auto start_time = GetCurrentUS();
int neuron_errCode = (*neuron_execution_create)(compilation_, &run1);
NeuronExecution* run = NULL;
int neuron_errCode = NeuronExecution_create(compilation_, &run);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "[APU] Build APU runtime failed!";
return subgraph::FAILED;
......@@ -226,21 +163,21 @@ int SubgraphEngine::LaunchDeviceProgram() {
for (int j = 0; j < origin_itensors_[i]->data_size(); j++) {
input_data[j] += (uint8_t)128;
}
(*neuron_execution_setInput)(
run1, i, NULL, input_data, origin_itensors_[i]->memory_size());
NeuronExecution_setInput(
run, i, NULL, input_data, origin_itensors_[i]->memory_size());
}
// Set output buffer
for (size_t i = 0; i < origin_otensors_.size(); i++) {
(*neuron_execution_setOutput)(
run1,
NeuronExecution_setOutput(
run,
i,
NULL,
reinterpret_cast<void*>(origin_otensors_[i]->raw_data()),
origin_otensors_[i]->memory_size());
}
neuron_errCode = (*neuron_execution_compute)(run1);
neuron_errCode = NeuronExecution_compute(run);
if (NEURON_NO_ERROR != neuron_errCode) {
LOG(WARNING) << "Fail to run execution!" << neuron_errCode;
return subgraph::FAILED;
......@@ -253,11 +190,20 @@ int SubgraphEngine::LaunchDeviceProgram() {
output_data[j] -= (int8_t)128;
}
}
(*neuron_execution_free)(run1);
NeuronExecution_free(run);
VLOG(3) << "[APU] Process cost " << GetCurrentUS() - start_time << " us";
return 0;
}
SubgraphEngine::~SubgraphEngine() {
if (compilation_) {
NeuronCompilation_free(compilation_);
}
if (model_) {
NeuronModel_free(model_);
}
}
void SubgraphCompute::PrepareForRun() {
auto& param = this->Param<param_t>();
engine_.reset(new SubgraphEngine(ctx_.get(),
......
......@@ -38,12 +38,12 @@ class SubgraphEngine : public subgraph::Engine {
: subgraph::Engine(
ctx, block_idx, block_desc, input_names, output_names, scope) {}
~SubgraphEngine();
protected:
int BuildDeviceProgram() override;
int LaunchDeviceProgram() override;
std::string model_name_;
void *libHandle_;
NeuronModel *model_;
NeuronCompilation *compilation_;
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册