提交 4e0c9ad3 编写于 作者: M Megvii Engine Team 提交者: Xu Xinran

feat(mgb/external): extern-c-opr dumper and loader for MACE

GitOrigin-RevId: bfe5420b1de97eae9ee943c4d3adaf652d795e03
上级 ca52a93e
......@@ -727,6 +727,8 @@ This software has been modified by Megvii Inc.
4. FlatBuffers
Copyright 2014 Google Inc. All rights reserved.
5. MACE
Copyright 2018 Xiaomi Inc. All rights reserved.
Terms of Apache License Version 2.0
......
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
ifeq ($(SDKPATH),)
CURDIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
SDKPATH = $(CURDIR)/arm64-v8a
endif
TARGET := libmace_loader.so
all: $(TARGET)
CXX = aarch64-linux-android-g++
CXXFLAGS = -std=c++14 -I$(SDKPATH)/include \
-Dmgb_mace_loader_init=mgb_c_opr_init
DEBUG ?= 0
ifeq ($(DEBUG), 1)
CXXFLAGS += -O0 -g
else
CXXFLAGS += -O2
endif
LDFLAGS = -L$(SDKPATH)/lib \
-lmace_static -lcore -lgenerated_version \
-lport_android -lport_linux_base -lport_posix -lport_base \
-lproto -lutils -lgenerated_opencl_kernel \
-lops -lcore -lprotobuf-lite -llog \
-lpthread -l:rpcmem.a
LDLIBS = -shared -fPIC
$(TARGET): mace_loader.cpp extern_c_opr.h
$(CXX) $(CXXFLAGS) $(LDLIBS) -o $@ $< $(LDFLAGS)
clean:
@rm -f $(TARGET)
# Extern-C-Opr with MACE
### Build MegEngine `load_and_run` for arm64-v8a
```bash
cd $MEGENGINE_HOME
./scripts/cmake-build/cross_build_android_arm_inference.sh -a arm64-v8a
```
After successfully built, load_and_run should be in `$MEGENGINE_HOME/build_dir/android/arm64-v8a/Release/install/bin`
### Build MACE libraries for arm64-v8a with GPU runtime
```bash
cd $MACE_HOME
RUNTIME=GPU bash tools/cmake/cmake-build-arm64-v8a.sh
cp -r $MACE_HOME/build/cmake-build/arm64-v8a/install $MEGENGINE_HOME/sdk/c-opr-loaders/mace/arm64-v8a
```
### Build MACE loader for MegEngine
```
SDK_PATH=/path/to/mace-sdk make
```
If `SDK_PATH` is not set, by default it's `./arm64-v8a`
You can run with debug mode(by adding `DEBUG=1` to make command), which will show more running information
### Prepare a MACE model(for example: resnet_50), wrap it with MegEngine extern c opr
```
python3 dump_model.py path/to/resnet_50.pb path/to/resnet_50.data path/to/resnet_50.mdl path/to/resnet_50.yml
```
`*.pb` file denotes the model structure, `*.data` denotes the model parameters
Check [here](https://github.com/XiaoMi/mace-models) to learn how to write yml files for MACE
### Run with load-and-run
First of all, send all files to the executed device:
- load_and_run
- resnet_50.mdl
- libmace_loader.so
- opencl library(something like libOpenCL.so, libmali.so or libEGL.so ...) if you want to run it on GPU
```
RUNTIME=GPU OPENCPATH=/path/to/opencl DATAFORMAT=NCHW /path/to/load_and_run /path/to/resnet_50.mdl --c-opr-lib /path/to/libmace_loader.so
```
RUNTIME candidates:
- CPU
- GPU
Running with GPU runtime on android needs opencl library, one can set `OPENCLPATH` by using environment variable
We mainly use NCHW data format, if you have NHWC model, use environment `DATAFORMAT=NHWC`
if you want to run with HEXAGON runtime, more efforts should be made, please check [here](https://mace.readthedocs.io/en/latest/faq.html#why-is-mace-not-working-on-dsp).
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import argparse
import megengine._internal as mgb
import numpy as np
import yaml
# "1,3,224,224" -> (1,3,224,224)
def str2tuple(x):
x = x.split(",")
x = [int(a) for a in x]
x = tuple(x)
return x
def main():
parser = argparse.ArgumentParser(
description="load a .pb model and convert to corresponding "
"load-and-run model"
)
parser.add_argument("input", help="mace model file")
parser.add_argument("param", help="mace param file")
parser.add_argument(
"output", help="converted model that can be fed to dump_with_testcase_mge.py"
)
parser.add_argument("config", help="config file with yaml format")
args = parser.parse_args()
with open(args.config, "r") as f:
configs = yaml.load(f)
for model_name in configs["models"]:
# ignore several sub models currently
sub_model = configs["models"][model_name]["subgraphs"][0]
# input/output shapes
isizes = [str2tuple(x) for x in sub_model["input_shapes"]]
# input/output names
input_names = sub_model["input_tensors"]
if "check_tensors" in sub_model:
output_names = sub_model["check_tensors"]
osizes = [str2tuple(x) for x in sub_model["check_shapes"]]
else:
output_names = sub_model["output_tensors"]
osizes = [str2tuple(x) for x in sub_model["output_shapes"]]
with open(args.input, "rb") as fin:
raw_model = fin.read()
with open(args.param, "rb") as fin:
raw_param = fin.read()
model_size = (len(raw_model)).to_bytes(4, byteorder="little")
param_size = (len(raw_param)).to_bytes(4, byteorder="little")
n_inputs = (len(input_names)).to_bytes(4, byteorder="little")
n_outputs = (len(output_names)).to_bytes(4, byteorder="little")
names_buffer = n_inputs + n_outputs
for iname in input_names:
names_buffer += (len(iname)).to_bytes(4, byteorder="little")
names_buffer += str.encode(iname)
for oname in output_names:
names_buffer += (len(oname)).to_bytes(4, byteorder="little")
names_buffer += str.encode(oname)
shapes_buffer = n_outputs
for oshape in osizes:
shapes_buffer += (len(oshape)).to_bytes(4, byteorder="little")
for oi in oshape:
shapes_buffer += oi.to_bytes(4, byteorder="little")
# raw content contains:
# input/output names + output shapes + model buffer + param buffer
wk_raw_content = (
names_buffer
+ shapes_buffer
+ model_size
+ raw_model
+ param_size
+ raw_param
)
# cn not ensured
cn = mgb.comp_node("xpux")
cg = mgb.comp_graph()
inp = [
mgb.make_shared(
comp_node=cn,
comp_graph=cg,
shape=isizes[i],
name=input_names[i],
dtype=np.float32,
)
for i in range(len(isizes))
]
oup = mgb.opr.extern_c_opr_placeholder(
inp, osizes, dump_name="mace", dump_data=wk_raw_content,
)
mgb.serialize_comp_graph_to_file(args.output, oup)
if __name__ == "__main__":
main()
../../../src/serialization/include/megbrain/serialization/extern_c_opr.h
\ No newline at end of file
/**
* \file sdk/c-opr-loaders/mace/mace_loader.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include <numeric>
#include <iostream>
#include "mace/public/mace.h"
#include "extern_c_opr.h"
#define ASSERT(x, msg) \
do { \
if (!(x)) { \
printf("error at %s:%d %s\n", __FILE__, __LINE__, __FUNCTION__); \
printf(msg); \
__builtin_trap(); \
} \
} while (0)
class MGBOprDescImpl {
struct UserData {
std::shared_ptr<mace::MaceEngine> engine;
size_t nr_inputs, nr_outputs;
std::vector<std::vector<int64_t>> output_shapes;
std::vector<std::string> input_names, output_names;
};
static UserData* user_data(const MGBOprDesc* self) {
return static_cast<UserData*>(self->user_data);
}
static void release(MGBOprDesc* self) {
// free all data buffers
delete user_data(self);
delete self;
}
static size_t hash(const MGBOprDesc* self) {
return reinterpret_cast<size_t>(self);
}
static int is_same(const MGBOprDesc* self, const MGBOprDesc* rhs) {
return self == rhs;
}
static void infer_shape(const MGBOprDesc* self, const MGBTensorShape* input,
MGBTensorShape* output) {
auto ud = user_data(self);
// infer output shape from user data
for (size_t i = 0; i < ud->nr_outputs; i++) {
output[i].ndim = ud->output_shapes[i].size();
for (size_t j = 0; j < output[i].ndim; j++) {
output[i].shape[j] = ud->output_shapes[i][j];
}
}
}
static void infer_dtype(const MGBOprDesc*, const MGBDType* input, MGBDType* output) {
ASSERT(input[0] == MGB_DTYPE_FLOAT32, "Input dtype is not float32");
output[0] = MGB_DTYPE_FLOAT32;
}
static void execute(const MGBOprDesc* self, const MGBTensor* input,
const MGBTensor* output) {
auto ud = user_data(self);
// create input and output tensor buffers
std::map<std::string, mace::MaceTensor> mace_inputs;
std::map<std::string, mace::MaceTensor> mace_outputs;
auto mace_data_format = mace::DataFormat::NCHW;
char *data_format = getenv("DATAFORMAT");
if (!strcmp(data_format, "NHWC")) {
mace_data_format = mace::DataFormat::NHWC;
}
for (size_t i = 0; i < ud->nr_inputs; ++i) {
// allocate input
uint32_t ndim = input[i].layout.shape.ndim;
auto input_shape = std::vector<int64_t>(input[i].layout.shape.shape,
input[i].layout.shape.shape + ndim);
int64_t input_size =
std::accumulate(input_shape.begin(), input_shape.end(), 1,
std::multiplies<uint64_t>());
auto buffer_in = std::shared_ptr<float>(new float[input_size],
std::default_delete<float[]>());
memcpy(buffer_in.get(), input[i].data, input_size * sizeof(float));
mace_inputs[ud->input_names[i]] =
mace::MaceTensor(input_shape, buffer_in, mace_data_format);
}
for (size_t i = 0; i < ud->nr_outputs; ++i) {
// allocate output
uint32_t ndim = output[i].layout.shape.ndim;
auto output_shape = std::vector<int64_t>(output[i].layout.shape.shape,
output[i].layout.shape.shape + ndim);
int64_t output_size =
std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int64_t>());
auto buffer_out = std::shared_ptr<float>(new float[output_size],
std::default_delete<float[]>());
mace_outputs[ud->output_names[i]] =
mace::MaceTensor(output_shape, buffer_out, mace_data_format);
}
// run the model
auto status = (ud->engine)->Run(mace_inputs, &mace_outputs);
ASSERT(status == mace::MaceStatus::MACE_SUCCESS,
"Error in running mace engine");
// send computed output to MGB
int idx = 0;
for (auto it = mace_outputs.begin(); it != mace_outputs.end(); it++) {
float* to = &((float *)output[idx++].data)[0];
to = (it->second).data().get();
}
}
public:
static MGBOprDesc* make(size_t nr_input, const void *buf, size_t buf_len) {
auto ud = std::make_unique<UserData>();
std::shared_ptr<mace::MaceEngine> engine;
mace::DeviceType device_type;
char *runtime_mode = getenv("RUNTIME");
if (!strcmp(runtime_mode, "GPU")) {
device_type = mace::DeviceType::GPU;
} else {
device_type = mace::DeviceType::CPU;
}
mace::MaceEngineConfig config(device_type);
// set gpu context, mainly opencl path
if (device_type == mace::DeviceType::GPU) {
std::shared_ptr<mace::GPUContext> gpu_context;
char *opencl_path = getenv("OPENCLPATH");
ASSERT(opencl_path, "Please set opencl library path");
std::string storage_path(opencl_path);
gpu_context = mace::GPUContextBuilder()
.SetStoragePath(storage_path)
.Finalize();
config.SetGPUContext(gpu_context);
config.SetGPUHints(
static_cast<mace::GPUPerfHint>(mace::GPUPerfHint::PERF_HIGH),
static_cast<mace::GPUPriorityHint>(mace::GPUPriorityHint::PRIORITY_HIGH));
}
std::vector<std::string> input_names, output_names;
// extract all information from buf
void *buffer = const_cast<void *>(buf);
ud->nr_inputs = *reinterpret_cast<uint32_t*>(buffer);
ud->nr_outputs = *(reinterpret_cast<uint32_t*>(buffer) + 1);
// interpret input names
char *name_buf = reinterpret_cast<char*>(buffer) + 8;
for (size_t i = 0; i < ud->nr_inputs; i++) {
size_t ilen = *reinterpret_cast<uint32_t*>(name_buf);
input_names.push_back(std::string(name_buf + 4, ilen));
name_buf += (ilen + 4);
}
// interpret output names
buffer = name_buf;
name_buf = reinterpret_cast<char*>(buffer);
for (size_t i = 0; i < ud->nr_outputs; i++) {
size_t olen = *reinterpret_cast<uint32_t*>(name_buf);
output_names.push_back(std::string(name_buf + 4, olen));
name_buf += (olen + 4);
}
ud->input_names = input_names;
ud->output_names = output_names;
// interpret output shapes
buffer = name_buf;
uint32_t *shape_buf = reinterpret_cast<uint32_t*>(buffer) + 1;
for (size_t i = 0; i < ud->nr_outputs; i++) {
size_t olen = *reinterpret_cast<int*>(shape_buf);
ud->output_shapes.push_back(
std::vector<int64_t>(shape_buf + 1, shape_buf + olen + 1)
);
shape_buf += (olen + 1);
}
buffer = shape_buf;
const size_t model_buf_len = *reinterpret_cast<int*>(buffer);
unsigned char *model_buf = reinterpret_cast<unsigned char*>(buffer) + 4;
const size_t param_buf_len = *reinterpret_cast<int*>(model_buf + model_buf_len);
unsigned char *param_buf = model_buf + model_buf_len + 4;
// create mace engine
auto create_engine_status = mace::CreateMaceEngineFromProto(
model_buf,
model_buf_len,
param_buf,
param_buf_len,
input_names,
output_names,
config,
&engine
);
ASSERT(create_engine_status == mace::MaceStatus::MACE_SUCCESS,
"Error in creating mace engine");
ud->engine = engine;
auto ret = std::make_unique<MGBOprDesc>();
mgb_init_opr_desc(ret.get(), ud->nr_outputs, "mace");
#define a(n) ret->n = &n;
MGB_OPR_DESC_FOREACH_MEM_FN(a);
a(infer_dtype);
#undef a
ret->user_data = ud.release();
return ret.release();
}
};
class MGBOprLoaderImpl {
static MGBOprDesc* create_desc(size_t nr_input, const void *buf,
size_t buf_len)
{
return MGBOprDescImpl::make(nr_input, buf, buf_len);
}
public:
static MGBOprLoader make() {
return {"mace", create_desc};
}
};
extern "C" {
// public interface
__attribute__((visibility("default")))
void MGB_C_OPR_INIT_FUNC(const MGBExternCOprApi* (*get_api)(int))
{
const MGBExternCOprApi* api = get_api(MGB_EXTERN_C_OPR_VERSION);
ASSERT(api, "Create api failed");
MGBOprLoader loader = MGBOprLoaderImpl::make();
api->register_loader(&loader);
}
} // extern "C"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册