提交 43b33191 编写于 作者: L liyin 提交者: 李寅

Refactor model run: Support CMake everything

上级 8d64715c
bazel-*
build/
build/*
cmake-build/
cmake-build-debug/
docs/_build/
......
......@@ -42,15 +42,15 @@ build_docs:
cmake_build_android-armeabi-v7a:
stage: build
script:
- sh tools/cmake-build-android-armeabi-v7a-full.sh
- LIBMACE32_FULL_SIZE=`stat -c%s cmake-build/android-armeabi-v7a-full/install/lib/libmace.so`
- RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
- LIBMACE32_FULL_SIZE=`stat -c%s build/cmake-build/armeabi-v7a/install/lib/libmace.so`
- if (( LIBMACE32_FULL_SIZE > 2200000 )) ; then echo "The libmace.so size too large"; exit 1; fi
cmake_build_android-arm64-v8:
stage: build
script:
- sh tools/cmake-build-android-arm64-v8a-full.sh
- LIBMACE64_FULL_SIZE=`stat -c%s cmake-build/android-arm64-v8a-full/install/lib/libmace.so`
- RUNTIME=GPU bash tools/cmake/cmake-build-arm64-v8a.sh
- LIBMACE64_FULL_SIZE=`stat -c%s build/cmake-build/arm64-v8a/install/lib/libmace.so`
- if (( LIBMACE64_FULL_SIZE > 3100000 )) ; then echo "The libmace.so size too large"; exit 1; fi
bazel_build:
......
......@@ -15,6 +15,7 @@ option(MACE_ENABLE_BENCHMARKS "whether to build c++ micro benchmarks" OFF)
option(MACE_ENABLE_OPT_SIZE "whether to build with optimized binary size" ON)
option(MACE_ENABLE_OBFUSCATE "whether to build with code obfuscation" ON)
option(MACE_ENABLE_CCACHE "whether to build with ccache" ON)
option(MACE_ENABLE_CODE_MODE "whether to use code mode" OFF)
message("CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
......@@ -40,11 +41,15 @@ if(MACE_ENABLE_OPT_SIZE)
set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -fno-rtti -fno-exceptions -DGOOGLE_PROTOBUF_NO_RTTI -DPROTOBUF_USE_EXCEPTIONS=0")
endif(MACE_ENABLE_OPT_SIZE)
if(MACE_ENABLE_CODE_MODE)
set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -DMODEL_GRAPH_FORMAT_CODE")
endif(MACE_ENABLE_CODE_MODE)
# flags apply only to mace code (third_party excluded)
# -Wno-error=unused-command-line-argument: official Android toolchain contains
# unsupported argument and will break ccache preprocessor
if(ANDROID)
set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wall -Werror -Wno-error=unused-command-line-argument")
set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wall -Werror -Wno-error=unused-command-line-argument -Wno-error=unevaluated-expression -Wno-error=tautological-compare")
else(ANDROID)
set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wall -Werror")
endif(ANDROID)
......
......@@ -31,3 +31,21 @@ add_dependencies(generated_opencl_kernel opencl_kernel_src)
install(TARGETS generated_version ARCHIVE DESTINATION lib)
install(TARGETS generated_opencl_kernel ARCHIVE DESTINATION lib)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/null.cc "")
file(GLOB CODEGEN_MODELS ${CMAKE_CURRENT_BINARY_DIR}/null.cc models/**/code/*.cc)
add_library(model STATIC ${CODEGEN_MODELS})
target_link_libraries(model PRIVATE core proto utils port)
install(TARGETS model ARCHIVE DESTINATION lib)
add_library(model_shared SHARED ${CODEGEN_MODELS})
target_link_libraries(model_shared PRIVATE core proto utils port)
if(NOT APPLE)
set_target_properties(model_shared PROPERTIES LINK_FLAGS
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/model_version_script.lds")
endif(NOT APPLE)
install(TARGETS model_shared DESTINATION lib)
file(GLOB MODEL_HEADERS engine/mace_engine_factory.h models/**/code/*.h)
install(FILES ${MODEL_HEADERS} DESTINATION include/models)
mace {
global:
*LoadModelData*;
*CreateNet*;
*ModelName*;
*ModelChecksum*;
*ModelBuildTime*;
*ModelBuildOptions*;
local:
*;
};
......@@ -603,9 +603,11 @@ MaceEngine::Impl::~Impl() {
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
if (device_type_ == HEXAGON || device_type_ == HTA) {
if (VLOG_IS_ON(2)) {
hexagon_controller_->GetPerfInfo();
hexagon_controller_->PrintLog();
}
if (VLOG_IS_ON(1)) {
hexagon_controller_->GetPerfInfo();
}
MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error");
MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error");
}
......
set(MACE_PROTO_PROTOS mace.proto)
set(MACE_PROTO_SRCS)
set(MACE_PROTO_HDRS)
set(MACE_PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto)
foreach(proto_file ${MACE_PROTO_PROTOS})
get_filename_component(proto_file_abs ${proto_file} ABSOLUTE)
......@@ -17,10 +18,20 @@ foreach(proto_file ${MACE_PROTO_PROTOS})
DEPENDS protoc_bin
VERBATIM
)
set(PROTO_GENERATED_PY_FILES ${MACE_PROTO_PYTHON_DIR}/${basename}_pb2.py)
add_custom_command(
OUTPUT ${PROTO_GENERATED_PY_FILES}
COMMAND ${PROTOC_BIN} --python_out ${MACE_PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs}
COMMENT "Generating ${PROTO_GENERATED_PY_FILES} from ${proto_file}"
DEPENDS protoc_bin
VERBATIM
)
endforeach()
add_custom_target(mace_proto_src DEPENDS ${MACE_PROTO_SRCS}
COMMENT "Checking if re-generation is required" )
add_custom_target(mace_proto_src DEPENDS ${PROTO_GENERATED_FILES}
COMMENT "Checking if re-generation is required")
add_custom_target(mace_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES})
add_library(proto ${MACE_PROTO_SRCS})
target_link_libraries(proto libprotobuf_lite)
......
......@@ -4,6 +4,7 @@ file(GLOB MACE_RUN_SRCS
add_executable(mace_run ${MACE_RUN_SRCS})
target_link_libraries(mace_run PUBLIC
mace_static
model
gflags
)
......
......@@ -548,10 +548,16 @@ int Main(int argc, char **argv) {
LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint;
LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads;
LOG(INFO) << "cpu_affinity_policy: " << FLAGS_cpu_affinity_policy;
LOG(INFO) << "limit_opencl_kernel_time: "
<< getenv("MACE_LIMIT_OPENCL_KERNEL_TIME");
LOG(INFO) << "opencl_queue_window_size: "
<< getenv("MACE_OPENCL_QUEUE_WINDOW_SIZE");
auto limit_opencl_kernel_time = getenv("MACE_LIMIT_OPENCL_KERNEL_TIME");
if (limit_opencl_kernel_time) {
LOG(INFO) << "limit_opencl_kernel_time: "
<< limit_opencl_kernel_time;
}
auto opencl_queue_window_size = getenv("MACE_OPENCL_QUEUE_WINDOW_SIZE");
if (opencl_queue_window_size) {
LOG(INFO) << "opencl_queue_window_size: "
<< getenv("MACE_OPENCL_QUEUE_WINDOW_SIZE");
}
std::vector<std::string> input_shapes = Split(FLAGS_input_shape, ':');
std::vector<std::string> output_shapes = Split(FLAGS_output_shape, ':');
......@@ -584,14 +590,12 @@ int Main(int argc, char **argv) {
for (size_t i = 0; i < output_count; ++i) {
output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]);
}
float cpu_float32_performance = 0.0f;
if (FLAGS_input_dir.empty()) {
// get cpu capability
Capability cpu_capability = GetCapability(DeviceType::CPU);
cpu_float32_performance = cpu_capability.float32_performance.exec_time;
}
bool ret = false;
for (int i = 0; i < FLAGS_restart_round; ++i) {
VLOG(0) << "restart round " << i;
......
set(CAFFE_PROTO_PROTOS ${PROJECT_SOURCE_DIR}/third_party/caffe/caffe.proto)
set(MACE_PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto)
foreach(proto_file ${CAFFE_PROTO_PROTOS})
get_filename_component(proto_file_abs ${proto_file} ABSOLUTE)
get_filename_component(basename ${proto_file} NAME_WE)
set(PROTO_GENERATED_PY_FILES ${MACE_PROTO_PYTHON_DIR}/${basename}_pb2.py)
add_custom_command(
OUTPUT ${PROTO_GENERATED_PY_FILES}
COMMAND ${PROTOC_BIN} --python_out ${MACE_PROTO_PYTHON_DIR} -I ${PROJECT_SOURCE_DIR}/third_party/caffe ${proto_file_abs}
COMMENT "Generating ${PROTO_GENERATED_PY_FILES} from ${proto_file}"
DEPENDS protoc_bin
VERBATIM
)
endforeach()
add_custom_target(caffe_proto_src ALL DEPENDS ${PROTO_GENERATED_PY_FILES})
......@@ -50,6 +50,7 @@ include(${PROJECT_SOURCE_DIR}/third_party/opencl-clhpp/opencl-clhpp.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/opencl-headers/opencl-headers.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/protobuf/protobuf.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/tflite/tflite.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/caffe/caffe.cmake)
if(MACE_ENABLE_HEXAGON_DSP)
include(${PROJECT_SOURCE_DIR}/third_party/nnlib/nnlib.cmake)
......
#!/usr/bin/env bash
rm -rf mace/codegen/models
rm -rf mace/codegen/engine
rm -rf mace/codegen/opencl
for d in build/*; do
if [[ "$d" != "build/cmake-build*" ]]; then
rm -rf "$d"
fi
done
#!/usr/bin/env sh
set -e
# build for arm linux aarch64
if [[ -z "$BUILD_DIR" ]]; then
BUILD_DIR=build/cmake-build/aarch64-linux-gnu
fi
MACE_ENABLE_OPENCL=OFF
if [[ "$RUNTIME" == "GPU" ]]; then
MACE_ENABLE_OPENCL=ON
fi
MACE_ENABLE_CODE_MODE=OFF
if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DCROSSTOOL_ROOT=${LINARO_AARCH64_LINUX_GNU} \
-DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/aarch64-linux-gnu.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DCMAKE_INSTALL_PREFIX=install \
../../..
make -j6 VERBOSE=1 && make install
cd ../../..
#!/usr/bin/env sh
set -e
# build for arm linux gnueabihf
if [[ -z "$BUILD_DIR" ]]; then
BUILD_DIR=build/cmake-build/arm-linux-gnueabihf
fi
MACE_ENABLE_CODE_MODE=OFF
if [[ $RUNMODE == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
MACE_ENABLE_OPENCL=OFF
if [[ "$RUNTIME" == "GPU" ]]; then
MACE_ENABLE_OPENCL=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DCROSSTOOL_ROOT=${LINARO_ARM_LINUX_GNUEABIHF} \
-DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/arm-linux-gnueabihf.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DCMAKE_INSTALL_PREFIX=install \
../../..
make -j6 VERBOSE=1 && make install
cd ../../..
#!/usr/bin/env sh
set -e
# build for android arm64-v8a
if [[ -z "$BUILD_DIR" ]]; then
BUILD_DIR=build/cmake-build/arm64-v8a
fi
MACE_ENABLE_OPENCL=OFF
MACE_ENABLE_HEXAGON_DSP=OFF
MACE_ENABLE_HEXAGON_HTA=OFF
MACE_ENABLE_MTK_APU=OFF
if [[ "$RUNTIME" == "GPU" ]]; then
MACE_ENABLE_OPENCL=ON
elif [[ "$RUNTIME" == "HEXAGON" ]]; then
MACE_ENABLE_HEXAGON_DSP=ON
elif [[ "$RUNTIME" == "HTA" ]]; then
MACE_ENABLE_HEXAGON_HTA=ON
elif [[ "$RUNTIME" == "APU" ]]; then
MACE_ENABLE_MTK_APU=ON
fi
MACE_ENABLE_CODE_MODE=OFF
if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DANDROID_ABI="arm64-v8a" \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \
-DANDROID_NATIVE_API_LEVEL=28 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_ANDROID_STL_TYPE=c++_shared \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \
-DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \
-DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DCMAKE_INSTALL_PREFIX=install \
../../..
make -j6 VERBOSE=1 && make install
cd ../../..
#!/usr/bin/env bash
set -e
# build for android armeabi-v7a
if [[ -z "$BUILD_DIR" ]]; then
BUILD_DIR=build/cmake-build/armeabi-v7a
fi
MACE_ENABLE_OPENCL=OFF
MACE_ENABLE_HEXAGON_DSP=OFF
MACE_ENABLE_HEXAGON_HTA=OFF
MACE_ENABLE_MTK_APU=OFF
if [[ "$RUNTIME" == "GPU" ]]; then
MACE_ENABLE_OPENCL=ON
elif [[ "$RUNTIME" == "HEXAGON" ]]; then
MACE_ENABLE_HEXAGON_DSP=ON
elif [[ "$RUNTIME" == "HTA" ]]; then
MACE_ENABLE_HEXAGON_HTA=ON
elif [[ "$RUNTIME" == "APU" ]]; then
MACE_ENABLE_MTK_APU=ON
fi
MACE_ENABLE_CODE_MODE=OFF
if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DANDROID_ABI="armeabi-v7a" \
-DANDROID_ARM_NEON=ON \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \
-DANDROID_NATIVE_API_LEVEL=28 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_ANDROID_STL_TYPE=c++_shared \
-DMACE_ENABLE_NEON=ON \
-DMACE_ENABLE_QUANTIZE=ON \
-DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \
-DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \
-DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \
-DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \
-DMACE_ENABLE_OPT_SIZE=ON \
-DMACE_ENABLE_OBFUSCATE=ON \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DCMAKE_INSTALL_PREFIX=install \
../../..
make -j6 VERBOSE=1 && make install
cd ../../..
#!/usr/bin/env sh
set -e
# build for host
if [[ -z "$BUILD_DIR" ]]; then
BUILD_DIR=build/cmake-build/host
fi
MACE_ENABLE_CODE_MODE=OFF
if [[ "$RUNMODE" == "code" ]]; then
MACE_ENABLE_CODE_MODE=ON
fi
mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
cmake -DMACE_ENABLE_NEON=OFF \
-DMACE_ENABLE_QUANTIZE=OFF \
-DMACE_ENABLE_OPENCL=OFF \
-DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DCMAKE_INSTALL_PREFIX=install \
../../..
make -j6 VERBOSE=1 && make install
cd ../../..
......@@ -224,7 +224,7 @@ def get_opencl_mode(configs):
def get_quantize_mode(configs):
for model_name in configs[YAMLKeyword.models]:
quantize =\
quantize = \
configs[YAMLKeyword.models][model_name].get(
YAMLKeyword.quantize, 0)
if quantize == 1:
......@@ -297,8 +297,8 @@ def get_model_files(model_config, model_output_dir):
if sha256_checksum(model_file) != model_sha256_checksum:
error_info = model_file_path + \
" model file sha256checksum not match " + \
model_sha256_checksum
" model file sha256checksum not match " + \
model_sha256_checksum
MaceLogger.error(ModuleName.MODEL_CONVERTER, error_info)
if weight_file_path.startswith("http://") or \
......@@ -316,8 +316,8 @@ def get_model_files(model_config, model_output_dir):
if weight_file:
if sha256_checksum(weight_file) != weight_sha256_checksum:
error_info = weight_file_path + \
" weight file sha256checksum not match " + \
weight_sha256_checksum
" weight file sha256checksum not match " + \
weight_sha256_checksum
MaceLogger.error(ModuleName.MODEL_CONVERTER, error_info)
if quantize_range_file_path.startswith("http://") or \
......@@ -547,7 +547,7 @@ def format_model_config(flags):
[])
if input_data_formats:
if not isinstance(input_data_formats, list):
subgraph[YAMLKeyword.input_data_formats] =\
subgraph[YAMLKeyword.input_data_formats] = \
[input_data_formats] * input_size
else:
mace_check(len(input_data_formats)
......@@ -555,7 +555,7 @@ def format_model_config(flags):
ModuleName.YAML_CONFIG,
"input_data_formats should match"
" the size of input.")
for input_data_format in\
for input_data_format in \
subgraph[YAMLKeyword.input_data_formats]:
mace_check(input_data_format in DataFormatStrs,
ModuleName.YAML_CONFIG,
......@@ -578,14 +578,14 @@ def format_model_config(flags):
ModuleName.YAML_CONFIG,
"output_data_formats should match"
" the size of output")
for output_data_format in\
for output_data_format in \
subgraph[YAMLKeyword.output_data_formats]:
mace_check(output_data_format in DataFormatStrs,
ModuleName.YAML_CONFIG,
"'output_data_formats' must be in "
+ str(DataFormatStrs))
else:
subgraph[YAMLKeyword.output_data_formats] =\
subgraph[YAMLKeyword.output_data_formats] = \
[DataFormat.NHWC] * output_size
validation_threshold = subgraph.get(
......@@ -767,6 +767,7 @@ def print_library_summary(configs):
def convert_func(flags):
configs = config_parser.parse(flags.config)
print(configs)
library_name = configs[YAMLKeyword.library_name]
if not os.path.exists(BUILD_OUTPUT_DIR):
os.makedirs(BUILD_OUTPUT_DIR)
......@@ -817,26 +818,27 @@ def convert_func(flags):
for model_name, model_config in configs[YAMLKeyword.models].items():
model_codegen_dir = "%s/%s" % (MODEL_CODEGEN_DIR, model_name)
encrypt.encrypt(model_name,
"%s/%s.pb" % (model_codegen_dir, model_name),
"%s/%s.data" % (model_codegen_dir, model_name),
model_config[YAMLKeyword.runtime],
"%s/model/%s.pb" % (model_codegen_dir, model_name),
"%s/model/%s.data" % (model_codegen_dir, model_name),
config_parser.parse_device_type(
model_config[YAMLKeyword.runtime]),
model_codegen_dir,
bool(model_config.get(YAMLKeyword.obfuscate, 1)))
bool(model_config.get(YAMLKeyword.obfuscate, 1)),
model_graph_format == "code",
model_data_format == "code")
if model_graph_format == ModelFormat.file:
sh.mv("-f",
'%s/file/%s.pb' % (model_codegen_dir, model_name),
'%s/model/%s.pb' % (model_codegen_dir, model_name),
model_output_dir)
sh.mv("-f",
'%s/file/%s.data' % (model_codegen_dir, model_name),
'%s/model/%s.data' % (model_codegen_dir, model_name),
model_output_dir)
sh.rm("-rf", '%s/code' % model_codegen_dir)
else:
if not embed_model_data:
sh.mv("-f",
'%s/file/%s.data' % (model_codegen_dir, model_name),
'%s/model/%s.data' % (model_codegen_dir, model_name),
model_output_dir)
sh.rm('%s/code/tensor_data.cc' % model_codegen_dir)
sh.cp("-f", glob.glob("mace/codegen/models/*/code/*.h"),
model_header_dir)
......
# MACE Build and Test Tools
## Clear Workspace
Before you do anything, clear the workspace used by build and test process.
```bash
tools/clear_workspace.sh
```
## Build Engine
Please make sure you have CMake installed.
```bash
RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh
```
which generate libraries in `build/cmake-build/armeabi-v7a`, you can use either static libraries or the `libmace.so` shared library.
You can also build for other target abis.
The default build command builds engine that runs on CPU, you can modify the cmake file to support other hardware, or you can just set environment variable before building.
```bash
RUNTIME: GPU/HEXAGON/HTA/APU
```
## Model Conversion
When you have prepared your model, the first thing to do is write a model config.
```yaml
models:
mobilenet_v1:
platform: tensorflow
model_file_path: https://cnbj1.fds.api.xiaomi.com/mace/miai-models/mobilenet-v1/mobilenet-v1-1.0.pb
model_sha256_checksum: 71b10f540ece33c49a7b51f5d4095fc9bd78ce46ebf0300487b2ee23d71294e6
subgraphs:
- input_tensors:
- input
input_shapes:
- 1,224,224,3
output_tensors:
- MobilenetV1/Predictions/Reshape_1
output_shapes:
- 1,1001
runtime: gpu
```
The following steps generate output to `build` directory which is the default build and test workspace.
Suppose you have the model config in `../mace-models/mobilenet-v1/mobilenet-v1.yml`. Then run
```bash
python tools/python/convert.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml
```
which generate 4 files in `build/mobilenet_v1/model/`
```
├── mobilenet_v1.pb (model file)
├── mobilenet_v1.data (param file)
├── mobilenet_v1_index.html (visualization page, you can open it in browser)
└── mobilenet_v1.pb_txt (model text file, which can be for debug use)
```
## Model Test and Benchmark
After model is converted, simply run
```bash
python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --validate
```
Or benchmark the model
```bash
python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --benchmark
```
It will test your model on the device configured in the model config (`runtime`).
You can also test on other device by specify `--runtime=cpu (dsp/hta/apu)` if you previously build engine for the device.
The log will be shown if `--vlog_level=2` is specified.
## Encrypt Model (optional)
Model can be encrypted by obfuscation.
```bash
python tools/python/encrypt.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml
```
It will override `mobilenet_v1.pb` and `mobilenet_v1.data`.
If you want to compiled the model into a library, you should use options `--gencode_model --gencode_param` to generate model code, i.e.,
```bash
python tools/python/encrypt.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --gencode_model --gencode_param
```
It will generate model code into `mace/codegen/models` and also generate a helper function `CreateMaceEngineFromCode` in `mace/codegen/engine/mace_engine_factory.h` by which you can create an engine with models built in it.
After that you can rebuild the engine.
```bash
RUNTIME=GPU RUNMODE=code bash tools/cmake/cmake-build-armeabi-v7a.sh
```
`RUNMODE=code` means you compile and link model library with MACE engine.
When you test the model in code format, you should specify it in the script as follows.
```bash
python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --gencode_model --gencode_param
```
Of course you can generate model code only, and use parameter file.
## Precompile OpenCL (optional)
After you test model on GPU, it will generate compiled OpenCL binary file automatically in `build/mobilenet_v1/opencl` directory.
```bash
└── mobilenet_v1_compiled_opencl_kernel.MIX2S.sdm845.bin
```
It specifies your test platform model and SoC. You can use it in production to accelerate the initialization.
## Auto Tune OpenCL kernels (optional)
MACE can auto tune OpenCL kernels used by models. You can specify `--tune` option.
```bash
python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --tune
```
It will generate OpenCL tuned parameter binary file in `build/mobilenet_v1/opencl` directory.
```bash
└── mobilenet_v1_tuned_opencl_parameter.MIX2S.sdm845.bin
```
It specifies your test platform model and SoC. You can use it in production to reduce latency on GPU.
## Multi Model Support (optional)
If multiple models are configured in config file. After you test it, it will generate more than one tuned parameter files.
Then you need to merge them together.
```bash
python tools/python/gen_opencl.py
```
After that, it will generate one set of files into `build/opencl` directory.
```bash
├── compiled_opencl_kernel.bin
└── tuned_opencl_parameter.bin
```
You can also generate code into the engine by specify `--gencode`, after which you should rebuild the engine.
......@@ -12,63 +12,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# python tools/python/convert.py \
# --config ../mace-models/mobilenet-v2/mobilenet-v2.yml
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
import numpy as np
import shutil
import tempfile
from utils import config_parser
from utils.config_parser import DataFormat
from utils.config_parser import DeviceType
from utils.config_parser import Platform
from utils import util
from utils.util import mace_check
from utils.config_parser import normalize_model_config
from utils.config_parser import ModelKeys
from py_proto import mace_pb2
from transform import base_converter as cvt
from transform import transformer
from visualize import visualize_model
device_type_map = {'cpu': cvt.DeviceType.CPU.value,
'gpu': cvt.DeviceType.GPU.value,
'dsp': cvt.DeviceType.HEXAGON.value,
'hta': cvt.DeviceType.HTA.value,
'apu': cvt.DeviceType.APU.value,
'cpu+gpu': cvt.DeviceType.CPU.value}
data_format_map = {
'NONE': cvt.DataFormat.NONE,
'NHWC': cvt.DataFormat.NHWC,
'NCHW': cvt.DataFormat.NCHW,
'OIHW': cvt.DataFormat.OIHW,
}
data_type_map = {
'float32': mace_pb2.DT_FLOAT,
'int32': mace_pb2.DT_INT32,
}
def parse_data_type(data_type, quantize):
if quantize or data_type == 'fp32_fp32':
return mace_pb2.DT_FLOAT
else:
return mace_pb2.DT_HALF
def split_shape(shape):
if shape.strip() == "":
return []
else:
return shape.split(',')
def parse_int_array_from_str(ints_str):
return [int(i) for i in split_shape(ints_str)]
def parse_float_array_from_str(floats_str):
return [float(i) for i in floats_str.split(',')]
def transpose_shape(shape, dst_order):
t_shape = [0] * len(shape)
......@@ -77,52 +45,32 @@ def transpose_shape(shape, dst_order):
return t_shape
def to_list(x):
if isinstance(x, list):
return x
else:
return [x]
def separate_params(mace_model):
tensors = mace_model.tensors
params = mace_pb2.NetDef()
params.tensors.extend(tensors)
model = mace_model
del model.tensors[:]
return model, params
def convert(conf, output):
if not os.path.exists(output):
os.mkdir(output)
for model_name, model_conf in conf["models"].items():
model_output = output + "/" + model_name
if not os.path.exists(model_output):
os.mkdir(model_output)
subgraph = model_conf["subgraphs"][0]
del model_conf["subgraphs"]
model_conf.update(subgraph)
model_file = util.download_or_get_file(model_conf["model_file_path"],
model_conf[
"model_sha256_checksum"],
model_output)
model_conf["model_file_path"] = model_file
if "weight_file_path" in model_conf:
weight_file = util.download_or_get_file(
model_conf["weight_file_path"],
model_conf["weight_sha256_checksum"], model_output)
model_conf["weight_file_path"] = weight_file
model_output = output + "/" + model_name + "/model"
org_model_dir = output + "/" + model_name + "/org_model"
util.mkdir_p(model_output)
util.mkdir_p(org_model_dir)
model_conf = normalize_model_config(model_conf)
model_file = util.download_or_get_model(
model_conf[ModelKeys.model_file_path], # noqa
model_conf[ModelKeys.model_sha256_checksum], # noqa
output + "/" + model_name + "/org_model")
model_conf[ModelKeys.model_file_path] = model_file
if ModelKeys.weight_file_path in model_conf:
weight_file = util.download_or_get_model(
model_conf[ModelKeys.weight_file_path],
model_conf[ModelKeys.weight_sha256_checksum], "/tmp/")
model_conf[ModelKeys.weight_file_path] = weight_file
# TODO: remove the following after quantize tool is made
if "quantize_range_file" in model_conf:
range_file = util.download_or_get_file(
model_conf["quantize_range_file"],
if ModelKeys.quantize_range_file in model_conf:
range_file = util.download_or_get_model(
model_conf[ModelKeys.quantize_range_file],
"", model_output)
model_conf["quantize_range_file"] = range_file
model_conf[ModelKeys.quantize_range_file] = range_file
mace_model = convert_model(model_conf)
......@@ -132,7 +80,7 @@ def convert(conf, output):
model_output)
visualizer.save_html()
except: # noqa
print("Failed to visualize model:", sys.exc_info()[0])
print("Failed to visualize model:", sys.exc_info())
model, params = merge_params(mace_model)
......@@ -147,115 +95,64 @@ def convert(conf, output):
def convert_model(conf):
print(conf)
platform = conf["platform"]
mace_check(platform in ['tensorflow', 'caffe', 'onnx'],
"platform not supported")
runtime = conf["runtime"]
mace_check(
runtime in ['cpu', 'gpu', 'dsp', 'hta', 'apu', 'cpu+gpu'],
"runtime not supported")
option = cvt.ConverterOption()
if "graph_optimize_options" in conf:
option.transformer_option = conf["graph_optimize_options"]
option.winograd = conf.get("winograd", 0)
option.quantize = bool(conf.get("quantize", 0))
option.quantize_large_weights = bool(conf.get("quantize_large_weights", 0))
option.quantize_range_file = conf.get("quantize_range_file", "")
option.change_concat_ranges = bool(conf.get("change_concat_ranges", 0))
option.cl_mem_type = conf.get("cl_mem_type", "image")
option.device = device_type_map[conf.get("runtime", "cpu")]
option.data_type = parse_data_type(conf.get("data_type", "fp16_fp32"),
option.quantize)
input_tensors = to_list(conf["input_tensors"])
input_shapes = [parse_int_array_from_str(shape) for shape in
to_list(conf["input_shapes"])]
mace_check(len(input_tensors) == len(input_shapes),
"input node count and shape count do not match")
input_count = len(input_tensors)
input_data_types = [data_type_map[dt] for dt in
to_list(conf.get("input_data_types",
["float32"]))]
if len(input_data_types) == 1 and input_count > 1:
input_data_types = [input_data_types[0]] * input_count
mace_check(len(input_data_types) == input_count,
"the number of input_data_types should be "
"the same as input tensors")
input_data_formats = [data_format_map[df] for df in
to_list(conf.get("input_data_formats",
["NHWC"]))]
if len(input_data_formats) == 1 and input_count > 1:
input_data_formats = [input_data_formats[0]] * input_count
mace_check(len(input_data_formats) == input_count,
"the number of input_data_formats should be "
"the same as input tensors")
input_ranges = [parse_float_array_from_str(r) for r in
to_list(conf.get("input_ranges",
["-1.0,1.0"]))]
if len(input_ranges) == 1 and input_count > 1:
input_ranges = [input_ranges[0]] * input_count
mace_check(len(input_ranges) == input_count,
"the number of input_ranges should be "
"the same as input tensors")
for i in range(len(input_tensors)):
if ModelKeys.graph_optimize_options in conf:
option.transformer_option = conf[ModelKeys.graph_optimize_options]
if ModelKeys.winograd in conf:
option.winograd = conf[ModelKeys.winograd]
if ModelKeys.quantize in conf:
option.quantize = conf[ModelKeys.quantize]
if ModelKeys.quantize_large_weights in conf:
option.quantize_large_weights = conf[ModelKeys.quantize_large_weights]
if ModelKeys.quantize_range_file in conf:
option.quantize_range_file = conf[ModelKeys.quantize_range_file]
if ModelKeys.change_concat_ranges in conf:
option.change_concat_ranges = conf[ModelKeys.change_concat_ranges]
if ModelKeys.cl_mem_type in conf:
option.cl_mem_type = conf[ModelKeys.cl_mem_type]
if ModelKeys.runtime in conf:
option.device = conf[ModelKeys.runtime]
if option.device == DeviceType.CPU_GPU:
# when convert, cpu and gpu share the same model
option.device = DeviceType.CPU
# we don't need `value`, but to be consistent with legacy code
# used by `base_converter`
option.device = option.device.value
option.data_type = conf[ModelKeys.data_types]
for i in range(len(conf[ModelKeys.input_tensors])):
input_node = cvt.NodeInfo()
input_node.name = input_tensors[i]
input_node.shape = input_shapes[i]
input_node.data_type = input_data_types[i]
input_node.data_format = input_data_formats[i]
if (input_node.data_format == cvt.DataFormat.NCHW and len(
input_node.shape) == 4):
input_node.name = conf[ModelKeys.input_tensors][i]
input_node.shape = conf[ModelKeys.input_shapes][i]
input_node.data_type = conf[ModelKeys.input_data_types][i]
input_node.data_format = conf[ModelKeys.input_data_formats][i]
if (input_node.data_format == DataFormat.NCHW and len(
input_node.shape) == 4):
input_node.shape = transpose_shape(input_node.shape, [0, 2, 3, 1])
input_node.data_format = cvt.DataFormat.NHWC
input_node.range = input_ranges[i]
input_node.data_format = DataFormat.NHWC
input_node.range = conf[ModelKeys.input_ranges][i]
option.add_input_node(input_node)
output_tensors = to_list(conf["output_tensors"])
output_shapes = [parse_int_array_from_str(shape) for shape in
to_list(conf["output_shapes"])]
mace_check(len(output_tensors) == len(output_shapes),
"output node count and shape count do not match")
output_count = len(output_tensors)
output_data_types = [data_type_map[dt] for dt in
to_list(conf.get("output_data_types",
["float32"]))]
if len(output_data_types) == 1 and output_count > 1:
output_data_types = [output_data_types[0]] * output_count
mace_check(len(output_data_types) == output_count,
"the number of output_data_types should be "
"the same as output tensors")
output_data_formats = [data_format_map[df] for df in
to_list(conf.get("output_data_formats",
["NHWC"]))]
if len(output_data_formats) == 1 and output_count > 1:
output_data_formats = [output_data_formats[0]] * output_count
mace_check(len(output_data_formats) == output_count,
"the number of output_data_formats should be "
"the same as output tensors")
for i in range(len(output_tensors)):
for i in range(len(conf[ModelKeys.output_tensors])):
output_node = cvt.NodeInfo()
output_node.name = output_tensors[i]
output_node.shape = output_shapes[i]
output_node.data_type = output_data_types[i]
output_node.data_format = output_data_formats[i]
if output_node.data_format == cvt.DataFormat.NCHW and len(
output_node.name = conf[ModelKeys.output_tensors][i]
output_node.shape = conf[ModelKeys.output_shapes][i]
output_node.data_type = conf[ModelKeys.output_data_types][i]
output_node.data_format = conf[ModelKeys.output_data_formats][i]
if output_node.data_format == DataFormat.NCHW and len(
output_node.shape) == 4:
output_node.shape = transpose_shape(output_node.shape,
[0, 2, 3, 1])
output_node.data_format = cvt.DataFormat.NHWC
output_node.data_format = DataFormat.NHWC
option.add_output_node(output_node)
if "check_tensors" in conf:
check_tensors = to_list(conf["check_tensors"])
check_tensors_shapes = [parse_int_array_from_str(shape) for shape in
to_list(conf["check_shapes"])]
mace_check(len(check_tensors) == len(check_tensors_shapes),
"check tensors count and shape count do not match.")
for i in range(len(check_tensors)):
if ModelKeys.check_tensors in conf:
for i in range(len(conf[ModelKeys.check_tensors])):
check_node = cvt.NodeInfo()
check_node.name = check_tensors[i]
check_node.shape = check_tensors_shapes[i]
check_node.name = conf[ModelKeys.check_tensors][i]
check_node.shape = conf[ModelKeys.check_shapes][i]
option.add_check_node(check_node)
else:
option.check_nodes = option.output_nodes
......@@ -263,17 +160,17 @@ def convert_model(conf):
option.build()
print("Transform model to one that can better run on device")
if platform == 'tensorflow':
platform = conf[ModelKeys.platform]
if platform == Platform.TENSORFLOW:
from transform import tensorflow_converter
converter = tensorflow_converter.TensorflowConverter(
option, conf["model_file_path"])
elif platform == 'caffe':
elif platform == Platform.CAFFE:
from transform import caffe_converter
converter = caffe_converter.CaffeConverter(option,
conf["model_file_path"],
conf["weight_file_path"])
elif platform == 'onnx':
elif platform == Platform.ONNX:
from transform import onnx_converter
converter = onnx_converter.OnnxConverter(option,
conf["model_file_path"])
......@@ -285,14 +182,15 @@ def convert_model(conf):
option, output_graph_def)
output_graph_def, quantize_activation_info = mace_transformer.run()
if option.device in [cvt.DeviceType.HEXAGON.value,
cvt.DeviceType.HTA.value]:
runtime = conf[ModelKeys.runtime]
if runtime in [DeviceType.HEXAGON,
DeviceType.HTA]:
from transform import hexagon_converter
converter = hexagon_converter.HexagonConverter(
option, output_graph_def, quantize_activation_info)
output_graph_def = converter.run()
elif runtime == 'apu':
mace_check(platform == "tensorflow",
elif runtime == DeviceType.APU:
mace_check(platform == Platform.TENSORFLOW,
"apu only support model from tensorflow")
from transform import apu_converter
converter = apu_converter.ApuConverter(
......@@ -366,7 +264,7 @@ def parse_args():
parser.add_argument(
'--output',
type=str,
default=".",
default="build",
help="output dir")
flgs, _ = parser.parse_known_args()
return flgs
......
......@@ -22,10 +22,13 @@ import os
import hashlib
from jinja2 import Environment, FileSystemLoader
from py_proto import mace_pb2
from utils import device
from utils import util
from transform import base_converter as cvt
from utils.util import mace_check
from utils.util import MaceLogger
from utils import config_parser
from utils.config_parser import CPP_KEYWORDS
from utils.config_parser import ModelKeys
GENERATED_NAME = set()
......@@ -99,9 +102,8 @@ def obfuscate_name(model):
def save_model_to_code(namespace, model, params, model_checksum,
params_checksum, device, output):
if not os.path.exists(output):
os.mkdir(output)
params_checksum, device, output, gencode_params):
util.mkdir_p(output)
cwd = os.path.dirname(__file__)
j2_env = Environment(
loader=FileSystemLoader(cwd + "/template"), trim_blocks=True)
......@@ -120,24 +122,18 @@ def save_model_to_code(namespace, model, params, model_checksum,
f.write(source)
counter += 1
template_name = "tensor_data.jinja2"
source = j2_env.get_template(template_name).render(
tag=namespace,
model_data_size=len(params),
model_data=params)
with open(output + "/tensor_data.cc", "w") as f:
f.write(source)
if gencode_params:
template_name = "tensor_data.jinja2"
source = j2_env.get_template(template_name).render(
tag=namespace,
model_data_size=len(params),
model_data=params)
with open(output + "/tensor_data.cc", "w") as f:
f.write(source)
template_name = "operator.jinja2"
counter = 0
op_size = len(model.op)
try:
device = cvt.DeviceType[device.upper()]
except: # noqa
if device.upper() == "DSP":
device = cvt.DeviceType.HEXAGON
else:
device = cvt.DeviceType.CPU
for start in range(0, op_size, 10):
source = j2_env.get_template(template_name).render(
......@@ -170,8 +166,7 @@ def save_model_to_code(namespace, model, params, model_checksum,
def save_model_to_file(model_name, model, params, output):
if not os.path.exists(output):
os.mkdir(output)
util.mkdir_p(output)
with open(output + "/" + model_name + ".pb", "wb") as f:
f.write(model.SerializeToString())
with open(output + "/" + model_name + ".data", "wb") as f:
......@@ -179,7 +174,7 @@ def save_model_to_file(model_name, model, params, output):
def encrypt(model_name, model_file, params_file, device, output,
is_obfuscate=False):
is_obfuscate=False, gencode_model=False, gencode_params=False):
model_checksum = util.file_checksum(model_file)
params_checksum = util.file_checksum(params_file)
......@@ -191,9 +186,11 @@ def encrypt(model_name, model_file, params_file, device, output,
if is_obfuscate:
obfuscate_name(model)
save_model_to_file(model_name, model, params, output + "/file/")
save_model_to_code(model_name, model, params, model_checksum,
params_checksum, device, output + "/code/")
save_model_to_file(model_name, model, params, output)
if gencode_model:
save_model_to_code(model_name, model, params, model_checksum,
params_checksum, device, output + "/code/",
gencode_params)
def parse_args():
......@@ -216,22 +213,89 @@ def parse_args():
default='cpu',
help="cpu/gpu/hexagon/hta/apu")
parser.add_argument(
'--output',
'--config',
type=str,
default=".",
help="output dir")
help="model config")
parser.add_argument(
"--obfuscate",
"--no_obfuscate",
action="store_true",
help="obfuscate model names")
parser.add_argument(
"--gencode_model",
action="store_true",
help="generate model code")
parser.add_argument(
"--gencode_param",
action="store_true",
help="generate params code")
parser.add_argument(
'--output',
type=str,
default="build",
help="output dir")
flgs, _ = parser.parse_known_args()
mace_check(flags.model_name not in CPP_KEYWORDS, "model name cannot be cpp"
"keywords")
mace_check(flgs.model_name not in CPP_KEYWORDS, "model name cannot be cpp"
"keywords")
return flgs
def gen_mace_engine_factory(model_name, embed_model_data, output):
util.mkdir_p(output)
cwd = os.path.dirname(__file__)
j2_env = Environment(
loader=FileSystemLoader(cwd + "/template"), trim_blocks=True)
# generate mace_run BUILD file
template_name = 'mace_engine_factory.h.jinja2'
model_name = list(model_name)
source = j2_env.get_template(template_name).render(
model_tags=model_name,
embed_model_data=embed_model_data,
)
with open(output + '/mace_engine_factory.h', "w") as f:
f.write(source)
if __name__ == '__main__':
flags = parse_args()
encrypt(flags.model_name, flags.model_file, flags.params_file,
flags.device, flags.output, flags.obfuscate)
codegen_dir = "mace/codegen/models"
device.execute("rm -rf %s/*" % codegen_dir)
models = []
if flags.config:
conf = config_parser.parse(flags.config)
for name, model_conf in conf["models"].items():
model_conf = config_parser.normalize_model_config(model_conf)
if not flags.model_name or name == flags.model_name:
MaceLogger.info("Encrypt model %s" % name)
encrypt(name,
"build/%s/model/%s.pb" % (name, name),
"build/%s/model/%s.data" % (name, name),
model_conf[ModelKeys.runtime],
codegen_dir + "/" + name,
not flags.no_obfuscate,
flags.gencode_model,
flags.gencode_param)
models.append(name)
os.rename("%s/%s/%s.pb" % (codegen_dir, name, name),
"build/%s/model/%s.pb" % (name, name))
os.rename("%s/%s/%s.data" % (codegen_dir, name, name),
"build/%s/model/%s.data" % (name, name))
else:
device_type = config_parser.parse_device_type(flags.device)
encrypt(flags.model_name, flags.model_file, flags.params_file,
device_type, codegen_dir, not flags.no_obfuscate,
flags.gencode_model, flags.gencode_param)
models.append(flags.model_name)
os.rename(
"%s/%s/%s.pb" % (codegen_dir, flags.model_name, flags.model_name),
"build/%s/model/%s.pb" % (flags.model_name, flags.model_name))
os.rename(
"%s/%s/%s.data" % (codegen_dir, flags.model_name,
flags.model_name),
"build/%s/model/%s.data" % (flags.model_name, flags.model_name))
if flags.gencode_model:
gen_mace_engine_factory(models, flags.gencode_param,
"mace/codegen/engine")
# Copyright 2019 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import jinja2
import os
import struct
import numpy as np
from utils import util
from utils.util import MaceLogger
from utils.util import mace_check
def generate_opencl_code(binary_file_name, load_func_name, size_func_name,
output_path):
binary_array = []
if os.path.exists(binary_file_name):
with open(binary_file_name, 'rb') as f:
binary_array = np.fromfile(f, dtype=np.uint8)
cwd = os.path.dirname(__file__)
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(cwd + "/template"))
content = env.get_template('file_binary.cc.jinja2').render(
data=binary_array,
data_size=len(binary_array),
load_func_name=load_func_name,
size_func_name=size_func_name)
if os.path.exists(output_path):
os.remove(output_path)
with open(output_path, "w") as w_file:
w_file.write(content)
def merge_opencl_binaries(opencl_binaries,
output_file):
platform_info_key = 'mace_opencl_precompiled_platform_info_key'
kvs = {}
for binary in opencl_binaries:
if not os.path.exists(binary):
MaceLogger.warning("OpenCL bin %s not found" % binary)
continue
with open(binary, "rb") as f:
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in range(size):
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
if key == platform_info_key and key in kvs:
mace_check(
(kvs[key] == binary_array[idx:idx + value_size]).all(),
"There exists more than one OpenCL version for models:"
" %s vs %s " %
(kvs[key], binary_array[idx:idx + value_size]))
else:
kvs[key] = binary_array[idx:idx + value_size]
idx += value_size
output_byte_array = bytearray()
data_size = len(kvs)
output_byte_array.extend(struct.pack("Q", data_size))
for key, value in kvs.items():
key_size = len(key)
output_byte_array.extend(struct.pack("i", key_size))
output_byte_array.extend(struct.pack(str(key_size) + "s", key))
value_size = len(value)
output_byte_array.extend(struct.pack("i", value_size))
output_byte_array.extend(value)
np.array(output_byte_array).tofile(output_file)
def merge_opencl_parameters(params_files,
output_file):
kvs = {}
for params in params_files:
if not os.path.exists(params):
MaceLogger.warning("Tune param %s not found" % params)
continue
with open(params, "rb") as f:
binary_array = np.fromfile(f, dtype=np.uint8)
idx = 0
size, = struct.unpack("Q", binary_array[idx:idx + 8])
idx += 8
for _ in range(size):
key_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
key, = struct.unpack(
str(key_size) + "s", binary_array[idx:idx + key_size])
idx += key_size
value_size, = struct.unpack("i", binary_array[idx:idx + 4])
idx += 4
kvs[key] = binary_array[idx:idx + value_size]
idx += value_size
output_byte_array = bytearray()
data_size = len(kvs)
output_byte_array.extend(struct.pack("Q", data_size))
for key, value in kvs.items():
key_size = len(key)
output_byte_array.extend(struct.pack("i", key_size))
output_byte_array.extend(struct.pack(str(key_size) + "s", key))
value_size = len(value)
output_byte_array.extend(struct.pack("i", value_size))
output_byte_array.extend(value)
np.array(output_byte_array).tofile(output_file)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--binary_files',
type=str,
default="",
help="opencl binary files")
parser.add_argument(
'--tuning_files',
type=str,
default="",
help="tuning params files")
parser.add_argument(
'--output',
type=str,
default="build",
help="output dir")
parser.add_argument(
"--gencode",
action="store_true",
help="generate code")
flgs, _ = parser.parse_known_args()
return flgs
if __name__ == '__main__':
flags = parse_args()
util.mkdir_p(flags.output)
opencl_binary_files = []
if flags.binary_files:
opencl_binary_files = flags.binary_files.split(",")
opencl_tuning_files = []
if flags.tuning_files:
opencl_tuning_files = flags.tuning_files.split(",")
compiled_opencl_kernel_prefix = "compiled_opencl_kernel"
tuned_opencl_parameter_prefix = "tuned_opencl_parameter"
if not opencl_binary_files and not opencl_tuning_files:
for root, dirs, files in os.walk("build", topdown=False):
for name in files:
if compiled_opencl_kernel_prefix in name:
opencl_binary_files.append(os.path.join(root, name))
elif tuned_opencl_parameter_prefix in name:
opencl_tuning_files.append(os.path.join(root, name))
opencl_dir = flags.output + "/opencl"
util.mkdir_p(opencl_dir)
merged_opencl_bin_file = "%s/%s.bin" % (opencl_dir,
compiled_opencl_kernel_prefix)
merged_opencl_tuning_file = "%s/%s.bin" % (opencl_dir,
tuned_opencl_parameter_prefix)
merge_opencl_binaries(opencl_binary_files,
merged_opencl_bin_file)
if flags.gencode:
util.mkdir_p('mace/codegen/opencl')
generate_opencl_code(merged_opencl_bin_file,
"LoadOpenCLBinary",
"OpenCLBinarySize",
"mace/codegen/opencl/opencl_binary.cc")
merge_opencl_binaries(opencl_tuning_files,
merged_opencl_tuning_file)
if flags.gencode:
generate_opencl_code(merged_opencl_tuning_file,
"LoadOpenCLParameter",
"LoadOpenCLParameter",
"mace/codegen/opencl/opencl_parameter.cc")
......@@ -18,12 +18,17 @@ from __future__ import print_function
import os
from utils import device
from utils.util import MaceLogger
cwd = os.path.dirname(__file__)
# TODO: Remove bazel deps
device.execute("bazel build //mace/proto:mace_py")
device.execute("cp -f bazel-genfiles/mace/proto/mace_pb2.py %s" % cwd)
try:
device.execute("bazel build //mace/proto:mace_py")
device.execute("cp -f bazel-genfiles/mace/proto/mace_pb2.py %s" % cwd)
device.execute("bazel build //third_party/caffe:caffe_py")
device.execute("cp -f bazel-genfiles/third_party/caffe/caffe_pb2.py %s" % cwd)
device.execute("bazel build //third_party/caffe:caffe_py")
device.execute(
"cp -f bazel-genfiles/third_party/caffe/caffe_pb2.py %s" % cwd)
except: # noqa
MaceLogger.warning("No bazel, use cmake.")
# Copyright 2019 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import tempfile
import shutil
import numpy as np
from py_proto import mace_pb2
from utils import util
from utils import device
from utils import config_parser
from utils.config_parser import DeviceType
from utils.target import Target
from utils.config_parser import ModelKeys
from utils.util import MaceLogger
from utils.util import mace_check
import run_target
import validate
"""
Tool for mace_run:
python tools/python/run_model.py \
--config ../mace-models/mobilenet-v1/mobilenet-v1.yml --build --validate
python tools/python/run_model.py \
--config ../mace-models/mobilenet-v1/mobilenet-v1.yml --benchmark
python tools/python/run_model.py \
--config ../mace-models/mobilenet-v1/mobilenet-v1.yml --runtime=cpu
"""
def join_2d_array(xs):
return ":".join([",".join([str(y) for y in x]) for x in xs])
def build_engine(flags):
cmake_shell = os.path.abspath(
os.path.dirname(
__file__)) + "/../cmake/cmake-build-%s.sh" % flags.target_abi
os.environ["BUILD_DIR"] = flags.build_dir + "/" + flags.target_abi
if flags.runtime:
os.environ["RUNTIME"] = config_parser.parse_device_type(
flags.runtime).name
if flags.gencode_model:
os.environ["RUNMODE"] = "code"
device.execute("bash " + cmake_shell)
def run_models(flags, args):
if flags.device_conf:
device_conf = config_parser.parse_device_info(flags.device_conf)
device.ArmLinuxDevice.set_devices(device_conf)
run_devices = device.choose_devices(flags.target_abi, flags.target_socs)
MaceLogger.info("Run on devices: %s" % run_devices)
for device_id in run_devices:
dev = device.crete_device(flags.target_abi, device_id)
run_models_for_device(flags, args, dev)
def run_models_for_device(flags, args, dev):
conf = config_parser.parse(flags.config)
for name, model_conf in conf["models"].items():
if not flags.model_name or name == flags.model_name:
MaceLogger.info("Run model %s" % name)
model_conf = config_parser.normalize_model_config(model_conf)
run_model_for_device(flags, args, dev, name, model_conf)
def run_model_for_device(flags, args, dev, model_name, model_conf):
runtime = flags.runtime
target_abi = flags.target_abi
install_dir = run_target.default_install_dir(target_abi) + "/" + model_name
sysdir = install_dir + "/interior"
dev.mkdir(sysdir)
if not runtime:
runtime = model_conf[ModelKeys.runtime]
if runtime == DeviceType.CPU_GPU:
runtime = DeviceType.GPU
else:
runtime = config_parser.parse_device_type(runtime)
# install models to devices
workdir = flags.output + "/" + model_name
model_file = model_name + ".pb"
model_data_file = model_name + ".data"
model_path = workdir + "/model/" + model_file
model_data_path = workdir + "/model/" + model_data_file
if os.path.exists(model_path) and os.path.exists(model_data_path):
dev.install(Target(model_path), install_dir)
dev.install(Target(model_data_path), install_dir)
else:
MaceLogger.warning("No models exist in %s, use --model_file and"
" --model_data_file specified in args" % model_path)
if ModelKeys.check_tensors in model_conf:
model_conf[ModelKeys.output_tensors] = model_conf[
ModelKeys.check_tensors]
model_conf[ModelKeys.output_shapes] = model_conf[
ModelKeys.check_shapes]
model_file_path = ""
if not flags.gencode_model:
model_file_path = install_dir + "/" + model_file
model_data_file_path = ""
if not flags.gencode_param:
model_data_file_path = install_dir + "/" + model_data_file
model_args = {"model_name": model_name,
"model_file": model_file_path,
"model_data_file": model_data_file_path,
"input_node": ",".join(
model_conf[ModelKeys.input_tensors]),
"input_shape": join_2d_array(
model_conf[ModelKeys.input_shapes]),
"output_node": ",".join(
model_conf[ModelKeys.output_tensors]),
"output_shape": join_2d_array(
model_conf[ModelKeys.output_shapes]),
"input_data_format": ",".join(
[df.name for df in
model_conf[ModelKeys.input_data_formats]]),
"output_data_format": ",".join(
[df.name for df in
model_conf[ModelKeys.output_data_formats]]),
"device": runtime.name
}
opts = ["--%s=%s" % (arg_key, arg_val) for arg_key, arg_val in
model_args.items()] + args
should_generate_data = (flags.validate
or flags.tune or "--benchmark" in opts)
if should_generate_data:
tmpdirname = tempfile.mkdtemp()
input_file_prefix = tmpdirname + "/" + model_name
if ModelKeys.validation_inputs_data in model_conf:
input_tensor = model_conf[ModelKeys.input_tensors]
input_data = model_conf[ModelKeys.validation_inputs_data]
mace_check(len(input_tensor) == len(input_data),
"len(input_tensor) != len(validate_data")
for i in range(len(input_tensor)):
util.download_or_get_file(
model_conf[ModelKeys.validation_inputs_data][i], "",
util.formatted_file_name(input_file_prefix,
input_tensor[i]))
else:
generate_input_data(input_file_prefix,
model_conf[ModelKeys.input_tensors],
model_conf[ModelKeys.input_shapes],
model_conf[ModelKeys.input_ranges],
model_conf[ModelKeys.input_data_types])
dev.install(Target(tmpdirname), install_dir + "/validate_in")
target_input_file = "%s/validate_in/%s" % (
install_dir, model_name)
target_output_dir = "%s/validate_out" % install_dir
dev.mkdir(target_output_dir)
target_output_file = target_output_dir + "/" + model_name
opts += ["--input_file=%s" % target_input_file,
"--output_file=%s" % target_output_file]
# run
envs = flags.envs.split(" ") + ["MACE_INTERNAL_STORAGE_PATH=%s" % sysdir]
if flags.tune:
envs += ["MACE_TUNING=1",
"MACE_RUN_PARAMETER_PATH=%s/interior/tune_params"
% install_dir]
opts += ["--round=0"]
if flags.vlog_level > 0:
envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level]
build_dir = flags.build_dir + "/" + target_abi
libs = []
if model_conf[ModelKeys.runtime] == DeviceType.HEXAGON:
libs += ["third_party/nnlib/%s/libhexagon_controller.so" % target_abi]
elif model_conf[ModelKeys.runtime] == DeviceType.APU:
libs += ["third_party/apu/libapu-frontend.so"]
target = Target(build_dir + "/install/bin/mace_run", libs,
opts=opts, envs=envs)
run_target.run_target(target_abi, install_dir, target,
device_ids=flags.target_socs)
if runtime == DeviceType.GPU:
opencl_dir = workdir + "/opencl"
util.mkdir_p(opencl_dir)
dev.pull(
Target(install_dir + "/interior/mace_cl_compiled_program.bin"),
"%s/%s_compiled_opencl_kernel.%s.%s.bin" % (
opencl_dir, model_name,
dev.info()["ro.product.model"].replace(' ', ''),
dev.info()["ro.board.platform"]))
if flags.tune:
dev.pull(Target(install_dir + "/interior/tune_params"),
"%s/%s_tuned_opencl_parameter.%s.%s.bin" % (
opencl_dir, model_name,
dev.info()["ro.product.model"].replace(' ', ''),
dev.info()["ro.board.platform"]))
if flags.validate:
validate_model_file = util.download_or_get_model(
model_conf[ModelKeys.model_file_path],
model_conf[ModelKeys.model_sha256_checksum],
tmpdirname)
validate_weight_file = ""
if ModelKeys.weight_file_path in model_conf:
validate_weight_file = util.download_or_get_model(
model_conf[ModelKeys.weight_file_path],
model_conf[ModelKeys.weight_sha256_checksum],
tmpdirname)
dev.pull(Target(target_output_dir), tmpdirname + "/validate_out")
output_file_prefix = tmpdirname + "/validate_out/" + model_name
validate.validate(model_conf[ModelKeys.platform],
validate_model_file,
validate_weight_file,
input_file_prefix,
output_file_prefix,
model_conf[ModelKeys.input_shapes],
model_conf[ModelKeys.output_shapes],
model_conf[ModelKeys.input_data_formats],
model_conf[ModelKeys.output_data_formats],
model_conf[ModelKeys.input_tensors],
model_conf[ModelKeys.output_tensors],
flags.validate_threshold,
model_conf[ModelKeys.input_data_types],
flags.backend,
"",
"")
if should_generate_data:
shutil.rmtree(tmpdirname)
def generate_input_data(input_file, input_node, input_shape, input_ranges,
input_data_type):
np.random.seed()
for i in range(len(input_node)):
data = np.random.random(input_shape[i]) * (
input_ranges[i][1] - input_ranges[i][0]) + input_ranges[i][0]
input_file_name = util.formatted_file_name(input_file, input_node[i])
MaceLogger.info('Generate input file: %s' % input_file_name)
if input_data_type[i] == mace_pb2.DT_FLOAT:
np_data_type = np.float32
elif input_data_type[i] == mace_pb2.DT_INT32:
np_data_type = np.int32
data.astype(np_data_type).tofile(input_file_name)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--config",
type=str,
default="",
help="yaml conf path"
)
parser.add_argument(
"--model_name",
type=str,
default="",
help="model name in yaml conf"
)
parser.add_argument(
"--target_abi",
type=str,
default="armeabi-v7a",
help="Target ABI: host, armeabi-v7a, arm64-v8a,"
" arm-linux-gnueabihf, aarch64-linux-gnu"
)
parser.add_argument(
"--target_socs",
type=str,
default="all",
help="serialno for adb connection,"
" username@ip for arm linux,"
" host for host"
" | all | random"
)
parser.add_argument(
"--device_conf",
type=str,
default="",
help="device yaml config path"
)
parser.add_argument(
"--runtime",
type=str,
default="",
help="cpu/gpu/dsp/hta/apu"
)
parser.add_argument("--envs", type=str, default="",
help="Environment vars: "
" MACE_OUT_OF_RANGE_CHECK=1, "
" MACE_OPENCL_PROFILING=1,"
" MACE_INTERNAL_STORAGE_PATH=/path/to,"
" LD_PRELOAD=/path/to")
parser.add_argument(
"--validate",
action="store_true",
help="enable validate"
)
parser.add_argument(
"--validate_threshold",
type=float,
default="0.99",
help="validate threshold"
)
parser.add_argument(
"--backend",
type=str,
default="tensorflow",
help="onnx backend framework")
parser.add_argument(
"--tune",
action="store_true",
help="enable tuning"
)
parser.add_argument(
"--build_dir",
type=str,
default="build/cmake-build",
help="cmake build dir"
)
parser.add_argument(
"--build",
action="store_true",
help="if build before run"
)
parser.add_argument(
'--output',
type=str,
default="build",
help="output dir")
parser.add_argument(
'--vlog_level',
type=int,
default="0",
help="vlog level")
parser.add_argument(
"--gencode_model",
action="store_true",
help="use compiled model")
parser.add_argument(
"--gencode_param",
action="store_true",
help="use compiled param")
return parser.parse_known_args()
if __name__ == "__main__":
flags, args = parse_args()
if flags.build:
build_engine(flags)
run_models(flags, args)
......@@ -12,52 +12,33 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Internal tool for mace_cc_benchmark, mace_cc_test:
python tools/python/run_target.py \
--target_abi=armeabi-v7a --target_socs=all --target_name=mace_cc_test \
--gtest_filter=EnvTest.* --envs="MACE_CPP_MIN_VLOG_LEVEL=5"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import filelock
import random
import argparse
import os
from utils import device
from utils import target
from utils import config_parser
def device_lock(device_id, timeout=7200):
return filelock.FileLock("/tmp/device-lock-%s" % device_id,
timeout=timeout)
def is_device_locked(device_id):
try:
with device_lock(device_id, timeout=0.000001):
return False
except filelock.Timeout:
return True
from utils import util
def run_target(target_abi, install_dir, target_obj, device_ids="all"):
if not install_dir:
install_dir = default_install_dir(target_abi)
device_class = device.device_class(target_abi)
devices = device_class.list_devices()
if device_ids == "all":
run_devices = devices
elif device_ids == "random":
unlocked_devices = [dev for dev in devices if
not is_device_locked(dev)]
if unlocked_devices:
run_devices = [random.choice(unlocked_devices)]
else:
run_devices = [random.choice(devices)]
else:
device_id_list = [dev.strip() for dev in device_ids.split(",")]
run_devices = [dev for dev in device_id_list if dev in devices]
run_devices = device.choose_devices(target_abi, device_ids)
print("Run on devices: %s" % run_devices)
......@@ -72,7 +53,7 @@ def run_target(target_abi, install_dir, target_obj, device_ids="all"):
# run on device
print("Runing ...")
with device_lock(device_id):
with util.device_lock(device_id):
dev.run(device_target)
......@@ -84,15 +65,6 @@ def default_install_dir(target_abi):
return install_dir
"""
Internal tool for mace_cc_benchmark, mace_cc_test, mace_run:
python tools/experimental/run.py \
--target_abi=armeabi-v7a --target_socs=all --target_name=mace_cc_test \
--args="--gtest_filter=EnvTest.*" --envs="MACE_CPP_MIN_VLOG_LEVEL=5"
"""
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
......@@ -105,7 +77,7 @@ def parse_args():
parser.add_argument(
"--target_socs",
type=str,
default="",
default="all",
help="serialno for adb connection,"
" username@ip for arm linux,"
" host for host"
......@@ -126,7 +98,7 @@ def parse_args():
parser.add_argument(
"--build_dir",
type=str,
default="cmake-build-debug-tools",
default="build/cmake-build",
help="cmake build dir"
)
parser.add_argument(
......@@ -135,8 +107,6 @@ def parse_args():
help="if build before run"
)
parser.add_argument("--args", type=str, default="",
help="Command args: --gtest_filter=*, --filter=*")
parser.add_argument("--envs", type=str, default="",
help="Environment vars: "
" MACE_CPP_MIN_VLOG_LEVEL=2,"
......@@ -145,19 +115,18 @@ def parse_args():
" MACE_INTERNAL_STORAGE_PATH=/path/to,"
" LD_PRELOAD=/path/to")
flgs, _ = parser.parse_known_args()
return flgs
flgs, args = parser.parse_known_args()
return flgs, args
if __name__ == "__main__":
flags = parse_args()
flags, args = parse_args()
if flags.device_conf:
device_conf = config_parser.parse_device_info(flags.device_conf)
device.ArmLinuxDevice.set_devices(device_conf)
target_abi = flags.target_abi.strip()
target_name = flags.target_name.strip()
opts = flags.args.split(" ")
envs = flags.envs.split(" ")
# build
......@@ -165,11 +134,11 @@ if __name__ == "__main__":
if flags.build:
cmake_shell = os.path.abspath(
os.path.dirname(
__file__)) + "/config/build/cmake-build-%s.sh" % target_abi
__file__)) + "/../cmake/cmake-build-%s.sh" % target_abi
os.environ["BUILD_DIR"] = build_dir
device.execute(cmake_shell)
device.execute("bash " + cmake_shell)
# run
target = target.Target(build_dir + "/install/bin/" + target_name,
opts=opts, envs=envs)
opts=args, envs=envs)
run_target(target_abi, None, target, device_ids=flags.target_socs)
// Copyright 2019 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#include <cstring>
namespace mace {
const unsigned char *{{ load_func_name }}() {
{% if data_size == 0 %}
return nullptr;
{% else %}
static const unsigned char kData[{{ data_size }}] = {
{% for d in data %}{{"0x%02X, " % d }}{%endfor%}
};
return kData;
{% endif %}
}
size_t {{ size_func_name }}() {
return {{ data_size }};
}
} // namespace mace
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This is a generated file. DO NOT EDIT!
#ifndef MACE_CODEGEN_ENGINE_MACE_ENGINE_FACTORY_H_
#define MACE_CODEGEN_ENGINE_MACE_ENGINE_FACTORY_H_
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "mace/public/mace.h"
namespace mace {
{% for tag in model_tags %}
namespace {{tag}} {
extern const unsigned char *LoadModelData();
extern const std::shared_ptr<NetDef> CreateNet();
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
} // namespace {{tag}}
{% endfor %}
namespace {
std::map<std::string, int> model_name_map {
{% for i in range(model_tags |length) %}
std::make_pair({{ model_tags[i]|tojson }}, {{ i }}),
{% endfor %}
};
} // namespace
/// \brief Create MaceEngine from code
///
/// Create MaceEngine object based on model graph code and model data file or
/// model data code.
///
/// \param model_name[in]: the name of model you want to use.
/// \param model_data_file[in]: the path of model data file,
/// if model_data_format is code, just pass empty string("")
/// \param input_nodes[in]: the array of input nodes' name
/// \param output_nodes[in]: the array of output nodes' name
/// \param config[in]: configurations for MaceEngine.
/// \param engine[out]: output MaceEngine object
/// \return MaceStatus::MACE_SUCCESS for success, MACE_INVALID_ARGS for wrong arguments,
/// MACE_OUT_OF_RESOURCES for resources is out of range.
__attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode(
const std::string &model_name,
const std::string &model_data_file,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const MaceEngineConfig &config,
std::shared_ptr<MaceEngine> *engine) {
// load model
if (engine == nullptr) {
return MaceStatus::MACE_INVALID_ARGS;
}
std::shared_ptr<NetDef> net_def;
{% if embed_model_data %}
(void)model_data_file;
const unsigned char * model_data;
{% endif %}
MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %}
case {{ i }}:
net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(config));
{% if embed_model_data %}
model_data = mace::{{model_tags[i]}}::LoadModelData();
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_data);
{% else %}
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_data_file);
{% endif %}
break;
{% endfor %}
default:
status = MaceStatus::MACE_INVALID_ARGS;
}
return status;
}
MACE_API MaceStatus CreateMaceEngineFromCode(
const std::string &model_name,
const unsigned char *model_weights_data,
const size_t model_weights_data_size,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes,
const MaceEngineConfig &config,
std::shared_ptr<MaceEngine> *engine) {
// load model
if (engine == nullptr) {
return MaceStatus::MACE_INVALID_ARGS;
}
std::shared_ptr<NetDef> net_def;
{% if embed_model_data %}
const unsigned char * model_data;
(void)model_weights_data;
{% endif %}
// TODO(yejianwu) Add buffer range checking
(void)model_weights_data_size;
MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %}
case {{ i }}:
net_def = mace::{{model_tags[i]}}::CreateNet();
engine->reset(new mace::MaceEngine(config));
{% if embed_model_data %}
model_data = mace::{{model_tags[i]}}::LoadModelData();
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_data);
{% else %}
status = (*engine)->Init(net_def.get(), input_nodes, output_nodes,
model_weights_data);
{% endif %}
break;
{% endfor %}
default:
status = MaceStatus::MACE_INVALID_ARGS;
}
return status;
}
} // namespace mace
#endif // MACE_CODEGEN_ENGINE_MACE_ENGINE_FACTORY_H_
......@@ -133,7 +133,7 @@ void CreateTensors(NetDef *net_def) {
namespace {{tag}} {
const std::shared_ptr<NetDef> CreateNet() {
MACE_API const std::shared_ptr<NetDef> CreateNet() {
MACE_LATENCY_LOGGER(1, "Create net {{ net.name }}");
std::shared_ptr<NetDef> net_def(new NetDef());
......@@ -154,15 +154,15 @@ const std::shared_ptr<NetDef> CreateNet() {
return net_def;
}
const std::string ModelName() {
MACE_API const std::string ModelName() {
return {{ tag|tojson }};
}
const std::string ModelChecksum() {
MACE_API const std::string ModelChecksum() {
return {{ checksum|tojson }};
}
const std::string ModelBuildTime() {
MACE_API const std::string ModelBuildTime() {
return {{ build_time|tojson }};
}
......
......@@ -25,14 +25,14 @@ namespace mace {
namespace {{tag}} {
extern const unsigned char *LoadModelData();
MACE_API extern const unsigned char *LoadModelData();
extern const std::shared_ptr<NetDef> CreateNet();
MACE_API extern const std::shared_ptr<NetDef> CreateNet();
extern const std::string ModelName();
extern const std::string ModelChecksum();
extern const std::string ModelBuildTime();
extern const std::string ModelBuildOptions();
MACE_API extern const std::string ModelName();
MACE_API extern const std::string ModelChecksum();
MACE_API extern const std::string ModelBuildTime();
MACE_API extern const std::string ModelBuildOptions();
} // namespace {{ tag }}
} // namespace mace
......
......@@ -14,6 +14,7 @@
// This is a generated file. DO NOT EDIT!
#include "mace/public/mace.h"
namespace mace {
namespace {{tag}} {
......@@ -22,7 +23,7 @@ alignas(4) const unsigned char model_data[{{ model_data_size }}] = {
{% for d in model_data %}{{"0x%02X, " % d }}{%endfor%}
};
const unsigned char *LoadModelData() {
MACE_API const unsigned char *LoadModelData() {
return model_data;
}
......
......@@ -17,24 +17,8 @@ from enum import Enum
from py_proto import mace_pb2
class DeviceType(Enum):
CPU = 0
GPU = 2
HEXAGON = 3
HTA = 4
APU = 5
class DataFormat(Enum):
NONE = 0
NHWC = 1
NCHW = 2
HWIO = 100
OIHW = 101
HWOI = 102
OHWI = 103
AUTO = 1000
from utils.config_parser import DataFormat
from utils.config_parser import DeviceType
# SAME_LOWER: if the amount of paddings to be added is odd,
......@@ -402,7 +386,7 @@ class ConverterOption(object):
self._quantize_range_file = ""
self._change_concat_ranges = False
self._transformer_option = None
self._cl_mem_type = ""
self._cl_mem_type = "image"
@property
def input_nodes(self):
......
......@@ -18,8 +18,13 @@ from __future__ import print_function
import re
import os
import copy
import yaml
from enum import Enum
from utils.util import mace_check
from utils.util import MaceLogger
from py_proto import mace_pb2
CPP_KEYWORDS = [
'alignas', 'alignof', 'and', 'and_eq', 'asm', 'atomic_cancel',
......@@ -63,3 +68,227 @@ def parse(path):
def parse_device_info(path):
conf = parse(path)
return conf["devices"]
class ModelKeys(object):
platform = "platform"
runtime = "runtime"
graph_optimize_options = "graph_optimize_options"
input_tensors = "input_tensors"
input_shapes = "input_shapes"
input_data_types = "input_data_types"
input_data_formats = "input_data_formats"
input_ranges = "input_ranges"
output_tensors = "output_tensors"
output_shapes = "output_shapes"
output_data_types = "output_data_types"
output_data_formats = "output_data_formats"
check_tensors = "check_tensors"
check_shapes = "check_shapes"
model_file_path = "model_file_path"
model_sha256_checksum = "model_sha256_checksum"
weight_file_path = "weight_file_path"
weight_sha256_checksum = "weight_sha256_checksum"
quantize_range_file = "quantize_range_file"
quantize = "quantize"
quantize_large_weights = "quantize_large_weights"
change_concat_ranges = "change_concat_ranges"
winograd = "winograd"
cl_mem_type = "cl_mem_type"
data_types = "data_types"
subgraphs = "subgraphs"
validation_inputs_data = "validation_inputs_data"
class DataFormat(Enum):
NONE = 0
NHWC = 1
NCHW = 2
HWIO = 100
OIHW = 101
HWOI = 102
OHWI = 103
AUTO = 1000
def parse_data_format(str):
str = str.upper()
mace_check(str in [e.name for e in DataFormat],
"unknown data format %s" % str)
return DataFormat[str]
class DeviceType(Enum):
CPU = 0
GPU = 2
HEXAGON = 3
HTA = 4
APU = 5
CPU_GPU = 100
DEVICE_MAP = {
"cpu": DeviceType.CPU,
"gpu": DeviceType.GPU,
"hexagon": DeviceType.HEXAGON,
"dsp": DeviceType.HEXAGON,
"hta": DeviceType.HTA,
"apu": DeviceType.APU,
"cpu+gpu": DeviceType.CPU_GPU
}
def parse_device_type(str):
mace_check(str in DEVICE_MAP, "unknown device %s" % str)
return DEVICE_MAP[str]
class Platform(Enum):
TENSORFLOW = 0
CAFFE = 1
ONNX = 2
def parse_platform(str):
str = str.upper()
mace_check(str in [e.name for e in Platform],
"unknown platform %s" % str)
return Platform[str]
DATA_TYPE_MAP = {
'float32': mace_pb2.DT_FLOAT,
'int32': mace_pb2.DT_INT32,
}
def parse_data_type(str):
if str == "float32":
return mace_pb2.DT_FLOAT
elif str == "int32":
return mace_pb2.DT_INT32
else:
mace_check(False, "data type %s not supported" % str)
def parse_internal_data_type(str):
if str == 'fp32_fp32':
return mace_pb2.DT_FLOAT
else:
return mace_pb2.DT_HALF
def to_list(x):
if isinstance(x, list):
return x
else:
return [x]
def parse_int_array(xs):
return [int(x) for x in xs.split(",")]
def parse_float_array(xs):
return [float(x) for x in xs.split(",")]
def normalize_model_config(conf):
conf = copy.deepcopy(conf)
if ModelKeys.subgraphs in conf:
subgraph = conf[ModelKeys.subgraphs][0]
del conf[ModelKeys.subgraphs]
conf.update(subgraph)
print(conf)
conf[ModelKeys.platform] = parse_platform(conf[ModelKeys.platform])
conf[ModelKeys.runtime] = parse_device_type(conf[ModelKeys.runtime])
if ModelKeys.quantize in conf:
conf[ModelKeys.data_types] = mace_pb2.DT_FLOAT
else:
if ModelKeys.data_types in conf:
conf[ModelKeys.data_types] = parse_internal_data_type(
conf[ModelKeys.data_types])
else:
conf[ModelKeys.data_types] = mace_pb2.DT_HALF
# parse input
conf[ModelKeys.input_tensors] = to_list(conf[ModelKeys.input_tensors])
input_count = len(conf[ModelKeys.input_tensors])
conf[ModelKeys.input_shapes] = [parse_int_array(shape) for shape in
to_list(conf[ModelKeys.input_shapes])]
mace_check(
len(conf[ModelKeys.input_shapes]) == input_count,
"input node count and shape count do not match")
input_data_types = [parse_data_type(dt) for dt in
to_list(conf.get(ModelKeys.input_data_types,
["float32"]))]
if len(input_data_types) == 1 and input_count > 1:
input_data_types = [input_data_types[0]] * input_count
mace_check(len(input_data_types) == input_count,
"the number of input_data_types should be "
"the same as input tensors")
conf[ModelKeys.input_data_types] = input_data_types
input_data_formats = [parse_data_format(df) for df in
to_list(conf.get(ModelKeys.input_data_formats,
["NHWC"]))]
if len(input_data_formats) == 1 and input_count > 1:
input_data_formats = [input_data_formats[0]] * input_count
mace_check(len(input_data_formats) == input_count,
"the number of input_data_formats should be "
"the same as input tensors")
conf[ModelKeys.input_data_formats] = input_data_formats
input_ranges = [parse_float_array(r) for r in
to_list(conf.get(ModelKeys.input_ranges,
["-1.0,1.0"]))]
if len(input_ranges) == 1 and input_count > 1:
input_ranges = [input_ranges[0]] * input_count
mace_check(len(input_ranges) == input_count,
"the number of input_ranges should be "
"the same as input tensors")
conf[ModelKeys.input_ranges] = input_ranges
# parse output
conf[ModelKeys.output_tensors] = to_list(conf[ModelKeys.output_tensors])
output_count = len(conf[ModelKeys.output_tensors])
conf[ModelKeys.output_shapes] = [parse_int_array(shape) for shape in
to_list(conf[ModelKeys.output_shapes])]
mace_check(len(conf[ModelKeys.output_tensors]) == output_count,
"output node count and shape count do not match")
output_data_types = [parse_data_type(dt) for dt in
to_list(conf.get(ModelKeys.output_data_types,
["float32"]))]
if len(output_data_types) == 1 and output_count > 1:
output_data_types = [output_data_types[0]] * output_count
mace_check(len(output_data_types) == output_count,
"the number of output_data_types should be "
"the same as output tensors")
conf[ModelKeys.output_data_types] = output_data_types
output_data_formats = [parse_data_format(df) for df in
to_list(conf.get(ModelKeys.output_data_formats,
["NHWC"]))]
if len(output_data_formats) == 1 and output_count > 1:
output_data_formats = [output_data_formats[0]] * output_count
mace_check(len(output_data_formats) == output_count,
"the number of output_data_formats should be "
"the same as output tensors")
conf[ModelKeys.output_data_formats] = output_data_formats
if ModelKeys.check_tensors in conf:
conf[ModelKeys.check_tensors] = to_list(conf[ModelKeys.check_tensors])
conf[ModelKeys.check_shapes] = [parse_int_array(shape) for shape in
to_list(conf[ModelKeys.check_shapes])]
mace_check(len(conf[ModelKeys.check_tensors]) == len(
conf[ModelKeys.check_shapes]),
"check tensors count and shape count do not match.")
MaceLogger.summary(conf)
return conf
......@@ -17,13 +17,15 @@ from __future__ import division
from __future__ import print_function
import os
import re
import subprocess
import random
import tempfile
from utils import util
MACE_TOOL_QUIET_ENV = "MACE_TOOL_QUIET"
def execute(cmd):
def execute(cmd, verbose=True):
print("CMD> %s" % cmd)
p = subprocess.Popen([cmd],
shell=True,
......@@ -31,20 +33,28 @@ def execute(cmd):
stderr=subprocess.STDOUT,
stdin=subprocess.PIPE,
universal_newlines=True)
returncode = p.poll()
if not verbose:
if p.wait() != 0:
raise Exception("errorcode: %s" % p.returncode)
return p.stdout.read()
buf = []
while returncode is None:
line = p.stdout.readline()
returncode = p.poll()
line = line.strip()
if MACE_TOOL_QUIET_ENV not in os.environ:
while p.poll() is None:
line = p.stdout.readline().strip()
if verbose:
print(line)
buf.append(line)
p.wait()
for l in p.stdout:
line = l.strip()
if verbose:
print(line)
buf.append(line)
if returncode != 0:
raise Exception("errorcode: %s" % returncode)
if p.returncode != 0:
raise Exception("errorcode: %s" % p.returncode)
return "\n".join(buf)
......@@ -62,6 +72,12 @@ class Device(object):
def pull(self, target, out_dir):
pass
def mkdir(self, dirname):
pass
def info(self):
pass
class HostDevice(Device):
def __init__(self, device_id):
......@@ -98,6 +114,9 @@ class HostDevice(Device):
if out_dir.strip() and out_dir != os.path.dirname(target.path):
execute("cp -r %s %s" % (target.path, out_dir))
def mkdir(self, dirname):
execute("mkdir -p %s" % dirname)
class AndroidDevice(Device):
def __init__(self, device_id):
......@@ -120,9 +139,15 @@ class AndroidDevice(Device):
sn = self._device_id
execute("adb -s %s shell mkdir -p %s" % (sn, install_dir))
execute("adb -s %s push %s %s" % (sn, target.path, install_dir))
if os.path.isdir(target.path):
execute("adb -s %s push %s/* %s" % (sn, target.path, install_dir),
False)
else:
execute("adb -s %s push %s %s" % (sn, target.path, install_dir),
False)
for lib in target.libs:
execute("adb -s %s push %s %s" % (sn, lib, install_dir))
execute("adb -s %s push %s %s" % (sn, lib, install_dir), False)
target.path = "%s/%s" % (install_dir, os.path.basename(target.path))
target.libs = ["%s/%s" % (install_dir, os.path.basename(lib))
......@@ -132,7 +157,17 @@ class AndroidDevice(Device):
return target
def run(self, target):
out = execute("adb -s %s shell %s" % (self._device_id, target))
tmpdirname = tempfile.mkdtemp()
cmd_file_path = tmpdirname + "/cmd.sh"
with open(cmd_file_path, "w") as cmd_file:
cmd_file.write(str(target))
target_dir = os.path.dirname(target.path)
execute("adb -s %s push %s %s" % (self._device_id,
cmd_file_path,
target_dir))
out = execute("adb -s %s shell sh %s" % (self._device_id,
target_dir + "/cmd.sh"))
# May have false positive using the following error word
for line in out.split("\n")[:-10]:
if ("Aborted" in line
......@@ -141,7 +176,23 @@ class AndroidDevice(Device):
def pull(self, target, out_dir):
sn = self._device_id
execute("adb -s %s pull %s %s" % (sn, target.path, out_dir))
execute("adb -s %s pull %s %s" % (sn, target.path, out_dir), False)
def mkdir(self, dirname):
sn = self._device_id
execute("adb -s %s shell mkdir -p %s" % (sn, dirname))
def info(self):
sn = self._device_id
output = execute("adb -s %s shell getprop" % sn, False)
raw_props = output.split("\n")
props = {}
p = re.compile(r'\[(.+)\]: \[(.+)\]')
for raw_prop in raw_props:
m = p.match(raw_prop)
if m:
props[m.group(1)] = m.group(2)
return props
class ArmLinuxDevice(Device):
......@@ -153,10 +204,12 @@ class ArmLinuxDevice(Device):
@staticmethod
def list_devices():
device_ids = []
for dev_name, dev_info in ArmLinuxDevice.devices:
print("!!!", ArmLinuxDevice.devices)
for dev_name, dev_info in ArmLinuxDevice.devices.items():
address = dev_info["address"]
username = dev_info["username"]
device_ids.append("%s@%s" % (username, address))
return device_ids
@staticmethod
def set_devices(devices):
......@@ -166,10 +219,10 @@ class ArmLinuxDevice(Device):
install_dir = os.path.abspath(install_dir)
ip = self._device_id
execute("ssh %s mkdir -p %s" % install_dir)
execute("scp %s %s:%s" % (target.path, ip, install_dir))
execute("ssh %s mkdir -p %s" % (ip, install_dir))
execute("scp -r %s %s:%s" % (target.path, ip, install_dir))
for lib in target.libs:
execute("scp %s:%s" % (lib, install_dir))
execute("scp -r %s:%s" % (lib, install_dir))
target.path = "%s/%s" % (install_dir, os.path.basename(target.path))
target.libs = ["%s/%s" % (install_dir, os.path.basename(lib))
......@@ -179,11 +232,15 @@ class ArmLinuxDevice(Device):
return target
def run(self, target):
execute("ssh %s shell %s" % (self._device_id, target))
execute("ssh %s %s" % (self._device_id, target))
def pull(self, target, out_dir):
sn = self._device_id
execute("scp %s:%s %s" % (sn, target.path, out_dir))
execute("scp -r %s:%s %s" % (sn, target.path, out_dir))
def mkdir(self, dirname):
sn = self._device_id
execute("ssh %s mkdir -p %s" % (sn, dirname))
def device_class(target_abi):
......@@ -204,3 +261,23 @@ def device_class(target_abi):
def crete_device(target_abi, device_id=None):
return device_class(target_abi)(device_id)
def choose_devices(target_abi, target_ids):
device_clazz = device_class(target_abi)
devices = device_clazz.list_devices()
if target_ids == "all":
run_devices = devices
elif target_ids == "random":
unlocked_devices = [dev for dev in devices if
not util.is_device_locked(dev)]
if unlocked_devices:
run_devices = [random.choice(unlocked_devices)]
else:
run_devices = [random.choice(devices)]
else:
device_id_list = [dev.strip() for dev in target_ids.split(",")]
run_devices = [dev for dev in device_id_list if dev in devices]
return run_devices
......@@ -18,9 +18,12 @@ from __future__ import print_function
import inspect
import hashlib
import filelock
import errno
import os
import urllib
from utils import device
import sys
import shutil
import traceback
################################
......@@ -46,33 +49,92 @@ def get_frame_info(level=2):
class MaceLogger:
@staticmethod
def header(message):
print(CMDColors.PURPLE + message + CMDColors.ENDC)
print(CMDColors.PURPLE + str(message) + CMDColors.ENDC)
@staticmethod
def summary(message):
print(CMDColors.GREEN + message + CMDColors.ENDC)
print(CMDColors.GREEN + str(message) + CMDColors.ENDC)
@staticmethod
def info(message):
print(get_frame_info() + message)
print(get_frame_info() + str(message))
@staticmethod
def warning(message):
print(CMDColors.YELLOW + 'WARNING: ' + get_frame_info() + message
print(CMDColors.YELLOW + 'WARNING: ' + get_frame_info() + str(message)
+ CMDColors.ENDC)
@staticmethod
def error(message):
print(CMDColors.RED + 'ERROR: ' + get_frame_info() + message
print(CMDColors.RED + 'ERROR: ' + get_frame_info() + str(message)
+ CMDColors.ENDC)
exit(1)
def mace_check(condition, message):
if not condition:
for line in traceback.format_stack():
print(line.strip())
MaceLogger.error(message)
################################
# String Formatter
################################
class StringFormatter:
@staticmethod
def table(header, data, title, align="R"):
data_size = len(data)
column_size = len(header)
column_length = [len(str(ele)) + 1 for ele in header]
for row_idx in range(data_size):
data_tuple = data[row_idx]
ele_size = len(data_tuple)
assert (ele_size == column_size)
for i in range(ele_size):
column_length[i] = max(column_length[i],
len(str(data_tuple[i])) + 1)
table_column_length = sum(column_length) + column_size + 1
dash_line = '-' * table_column_length + '\n'
header_line = '=' * table_column_length + '\n'
output = ""
output += dash_line
output += str(title).center(table_column_length) + '\n'
output += dash_line
output += '|' + '|'.join([str(header[i]).center(column_length[i])
for i in range(column_size)]) + '|\n'
output += header_line
for data_tuple in data:
ele_size = len(data_tuple)
row_list = []
for i in range(ele_size):
if align == "R":
row_list.append(str(data_tuple[i]).rjust(column_length[i]))
elif align == "L":
row_list.append(str(data_tuple[i]).ljust(column_length[i]))
elif align == "C":
row_list.append(str(data_tuple[i])
.center(column_length[i]))
output += '|' + '|'.join(row_list) + "|\n" + dash_line
return output
@staticmethod
def block(message):
line_length = 10 + len(str(message)) + 10
star_line = '*' * line_length + '\n'
return star_line + str(message).center(line_length) + '\n' + star_line
def formatted_file_name(input_file_name, input_name):
res = input_file_name + '_'
for c in input_name:
res += c if c.isalnum() else '_'
return res
################################
# file
################################
......@@ -86,17 +148,86 @@ def file_checksum(fname):
def download_or_get_file(file,
sha256_checksum,
output_dir):
filename = os.path.basename(file)
output_file = "%s/%s-%s.pb" % (output_dir, filename, sha256_checksum)
output_file):
if file.startswith("http://") or file.startswith("https://"):
if not os.path.exists(output_file) or file_checksum(
output_file) != sha256_checksum:
MaceLogger.info("Downloading file %s, please wait ..." % file)
urllib.urlretrieve(file, output_file)
MaceLogger.info("Downloading file %s to %s, please wait ..."
% (file, output_file))
if sys.version_info >= (3, 0):
import urllib.request
data = urllib.request.urlopen(file)
out_handle = open(output_file, "wb")
out_handle.write(data.read())
out_handle.close()
else:
import urllib
urllib.urlretrieve(file, output_file)
MaceLogger.info("Model downloaded successfully.")
else:
device.execute("cp %s %s" % (file, output_file))
shutil.copyfile(file, output_file)
if sha256_checksum:
mace_check(file_checksum(output_file) == sha256_checksum,
"checksum validate failed")
return output_file
def download_or_get_model(file,
sha256_checksum,
output_dir):
filename = os.path.basename(file)
output_file = "%s/%s-%s.pb" % (output_dir, filename, sha256_checksum)
download_or_get_file(file, sha256_checksum, output_file)
return output_file
################################
# bazel commands
################################
class ABIType(object):
armeabi_v7a = 'armeabi-v7a'
arm64_v8a = 'arm64-v8a'
arm64 = 'arm64'
aarch64 = 'aarch64'
armhf = 'armhf'
host = 'host'
def abi_to_internal(abi):
if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]:
return abi
if abi == ABIType.arm64:
return ABIType.aarch64
if abi == ABIType.armhf:
return ABIType.armeabi_v7a
################################
# lock
################################
def device_lock(device_id, timeout=7200):
return filelock.FileLock("/tmp/device-lock-%s" % device_id,
timeout=timeout)
def is_device_locked(device_id):
try:
with device_lock(device_id, timeout=0.000001):
return False
except filelock.Timeout:
return True
################################
# os
################################
def mkdir_p(path):
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
# Copyright 2018 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path
import numpy as np
import six
from py_proto import mace_pb2
from utils import util
from utils.config_parser import DataFormat
from utils.config_parser import Platform
VALIDATION_MODULE = 'VALIDATION'
def load_data(file, data_type=mace_pb2.DT_FLOAT):
if os.path.isfile(file):
if data_type == mace_pb2.DT_FLOAT:
return np.fromfile(file=file, dtype=np.float32)
elif data_type == mace_pb2.DT_INT32:
return np.fromfile(file=file, dtype=np.int32)
return np.empty([0])
def calculate_sqnr(expected, actual):
noise = expected - actual
def power_sum(xs):
return sum([x * x for x in xs])
signal_power_sum = power_sum(expected)
noise_power_sum = power_sum(noise)
return signal_power_sum / (noise_power_sum + 1e-15)
def calculate_similarity(u, v, data_type=np.float64):
if u.dtype is not data_type:
u = u.astype(data_type)
if v.dtype is not data_type:
v = v.astype(data_type)
return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
def calculate_pixel_accuracy(out_value, mace_out_value):
if len(out_value.shape) < 2:
return 1.0
out_value = out_value.reshape((-1, out_value.shape[-1]))
batches = out_value.shape[0]
classes = out_value.shape[1]
mace_out_value = mace_out_value.reshape((batches, classes))
correct_count = 0
for i in range(batches):
if np.argmax(out_value[i]) == np.argmax(mace_out_value[i]):
correct_count += 1
return 1.0 * correct_count / batches
def compare_output(output_name, mace_out_value,
out_value, validation_threshold, log_file):
if mace_out_value.size != 0:
pixel_accuracy = calculate_pixel_accuracy(out_value, mace_out_value)
out_value = out_value.reshape(-1)
mace_out_value = mace_out_value.reshape(-1)
assert len(out_value) == len(mace_out_value)
sqnr = calculate_sqnr(out_value, mace_out_value)
similarity = calculate_similarity(out_value, mace_out_value)
util.MaceLogger.summary(
output_name + ' MACE VS training platform'
+ ' similarity: ' + str(similarity) + ' , sqnr: ' + str(sqnr)
+ ' , pixel_accuracy: ' + str(pixel_accuracy))
if log_file:
if not os.path.exists(log_file):
with open(log_file, 'w') as f:
f.write('output_name,similarity,sqnr,pixel_accuracy\n')
summary = '{output_name},{similarity},{sqnr},{pixel_accuracy}\n' \
.format(output_name=output_name,
similarity=similarity,
sqnr=sqnr,
pixel_accuracy=pixel_accuracy)
with open(log_file, "a") as f:
f.write(summary)
elif similarity > validation_threshold:
util.MaceLogger.summary(
util.StringFormatter.block("Similarity Test Passed"))
else:
util.MaceLogger.error(
"", util.StringFormatter.block("Similarity Test Failed"))
else:
util.MaceLogger.error(
"", util.StringFormatter.block(
"Similarity Test failed because of empty output"))
def normalize_tf_tensor_name(name):
if name.find(':') == -1:
return name + ':0'
else:
return name
def validate_with_file(output_names, output_shapes,
mace_out_file, validation_outputs_data,
validation_threshold, log_file):
for i in range(len(output_names)):
if validation_outputs_data[i].startswith("http://") or \
validation_outputs_data[i].startswith("https://"):
validation_file_name = util.formatted_file_name(
mace_out_file, output_names[i] + '_validation')
six.moves.urllib.request.urlretrieve(validation_outputs_data[i],
validation_file_name)
else:
validation_file_name = validation_outputs_data[i]
value = load_data(validation_file_name)
out_shape = output_shapes[i]
if len(out_shape) == 4:
out_shape[1], out_shape[2], out_shape[3] = \
out_shape[3], out_shape[1], out_shape[2]
value = value.reshape(out_shape).transpose((0, 2, 3, 1))
output_file_name = util.formatted_file_name(
mace_out_file, output_names[i])
mace_out_value = load_data(output_file_name)
compare_output(output_names[i], mace_out_value,
value, validation_threshold, log_file)
def validate_tf_model(model_file,
input_file, mace_out_file,
input_names, input_shapes, input_data_formats,
output_names, output_shapes, output_data_formats,
validation_threshold, input_data_types, log_file):
import tensorflow as tf
if not os.path.isfile(model_file):
util.MaceLogger.error(
VALIDATION_MODULE,
"Input graph file '" + model_file + "' does not exist!")
tf.reset_default_graph()
input_graph_def = tf.GraphDef()
with open(model_file, "rb") as f:
data = f.read()
input_graph_def.ParseFromString(data)
tf.import_graph_def(input_graph_def, name="")
with tf.Session() as session:
with session.graph.as_default() as graph:
tf.import_graph_def(input_graph_def, name="")
input_dict = {}
for i in range(len(input_names)):
input_value = load_data(
util.formatted_file_name(input_file, input_names[i]),
input_data_types[i])
input_value = input_value.reshape(input_shapes[i])
if input_data_formats[i] == DataFormat.NCHW and \
len(input_shapes[i]) == 4:
input_value = input_value.transpose((0, 2, 3, 1))
elif input_data_formats[i] == DataFormat.OIHW and \
len(input_shapes[i]) == 4:
# OIHW -> HWIO
input_value = input_value.transpose((2, 3, 1, 0))
input_node = graph.get_tensor_by_name(
normalize_tf_tensor_name(input_names[i]))
input_dict[input_node] = input_value
output_nodes = []
for name in output_names:
output_nodes.extend(
[graph.get_tensor_by_name(
normalize_tf_tensor_name(name))])
output_values = session.run(output_nodes, feed_dict=input_dict)
for i in range(len(output_names)):
output_file_name = util.formatted_file_name(
mace_out_file, output_names[i])
mace_out_value = load_data(output_file_name)
if output_data_formats[i] == DataFormat.NCHW and \
len(output_shapes[i]) == 4:
mace_out_value = mace_out_value. \
reshape(output_shapes[i]).transpose((0, 2, 3, 1))
compare_output(output_names[i],
mace_out_value, output_values[i],
validation_threshold, log_file)
def validate_caffe_model(model_file, input_file,
mace_out_file, weight_file,
input_names, input_shapes, input_data_formats,
output_names, output_shapes, output_data_formats,
validation_threshold, log_file):
os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints
import caffe
if not os.path.isfile(model_file):
util.MaceLogger.error(
VALIDATION_MODULE,
"Input graph file '" + model_file + "' does not exist!")
if not os.path.isfile(weight_file):
util.MaceLogger.error(
VALIDATION_MODULE,
"Input weight file '" + weight_file + "' does not exist!")
caffe.set_mode_cpu()
net = caffe.Net(model_file, caffe.TEST, weights=weight_file)
for i in range(len(input_names)):
input_value = load_data(
util.formatted_file_name(input_file, input_names[i]))
input_value = input_value.reshape(input_shapes[i])
if input_data_formats[i] == DataFormat.NHWC and \
len(input_shapes[i]) == 4:
input_value = input_value.transpose((0, 3, 1, 2))
input_blob_name = input_names[i]
try:
if input_names[i] in net.top_names:
input_blob_name = net.top_names[input_names[i]][0]
except ValueError:
pass
new_shape = input_value.shape
net.blobs[input_blob_name].reshape(*new_shape)
for index in range(input_value.shape[0]):
net.blobs[input_blob_name].data[index] = input_value[index]
net.forward()
for i in range(len(output_names)):
value = net.blobs[output_names[i]].data
output_file_name = util.formatted_file_name(
mace_out_file, output_names[i])
mace_out_value = load_data(output_file_name)
if output_data_formats[i] == DataFormat.NHWC and \
len(output_shapes[i]) == 4:
mace_out_value = mace_out_value.reshape(output_shapes[i]) \
.transpose((0, 3, 1, 2))
compare_output(output_names[i], mace_out_value,
value, validation_threshold, log_file)
def validate_onnx_model(model_file,
input_file, mace_out_file,
input_names, input_shapes, input_data_formats,
output_names, output_shapes, output_data_formats,
validation_threshold, input_data_types,
backend, log_file):
import onnx
if backend == "tensorflow":
from onnx_tf.backend import prepare
print("valivate on onnx tensorflow backend.")
elif backend == "caffe2" or backend == "pytorch":
from caffe2.python.onnx.backend import prepare
print("valivate on onnx caffe2 backend.")
else:
util.MaceLogger.error(
VALIDATION_MODULE,
"onnx backend framwork '" + backend + "' is invalid.")
if not os.path.isfile(model_file):
util.MaceLogger.error(
VALIDATION_MODULE,
"Input graph file '" + model_file + "' does not exist!")
model = onnx.load(model_file)
input_dict = {}
for i in range(len(input_names)):
input_value = load_data(util.formatted_file_name(input_file,
input_names[i]),
input_data_types[i])
input_value = input_value.reshape(input_shapes[i])
if input_data_formats[i] == DataFormat.NHWC and \
len(input_shapes[i]) == 4:
input_value = input_value.transpose((0, 3, 1, 2))
input_dict[input_names[i]] = input_value
onnx_outputs = []
for i in range(len(output_names)):
out_shape = output_shapes[i][:]
if output_data_formats[i] == DataFormat.NHWC and \
len(out_shape) == 4:
out_shape[1], out_shape[2], out_shape[3] = \
out_shape[3], out_shape[1], out_shape[2]
onnx_outputs.append(
onnx.helper.make_tensor_value_info(output_names[i],
onnx.TensorProto.FLOAT,
out_shape))
model.graph.output.extend(onnx_outputs)
rep = prepare(model)
output_values = rep.run(input_dict)
for i in range(len(output_names)):
out_name = output_names[i]
value = output_values[out_name].flatten()
output_file_name = util.formatted_file_name(mace_out_file,
output_names[i])
mace_out_value = load_data(output_file_name)
if output_data_formats[i] == DataFormat.NHWC and \
len(output_shapes[i]) == 4:
mace_out_value = mace_out_value.reshape(output_shapes[i]) \
.transpose((0, 3, 1, 2))
compare_output(output_names[i],
mace_out_value, value,
validation_threshold, log_file)
def validate(platform, model_file, weight_file, input_file, mace_out_file,
input_shape, output_shape, input_data_format,
output_data_format, input_node, output_node,
validation_threshold, input_data_type, backend,
validation_outputs_data, log_file):
if not isinstance(validation_outputs_data, list):
if os.path.isfile(validation_outputs_data):
validation_outputs = [validation_outputs_data]
else:
validation_outputs = []
else:
validation_outputs = validation_outputs_data
if validation_outputs:
validate_with_file(output_node, output_shape,
mace_out_file, validation_outputs,
validation_threshold, log_file)
elif platform == Platform.TENSORFLOW:
validate_tf_model(model_file, input_file, mace_out_file,
input_node, input_shape, input_data_format,
output_node, output_shape, output_data_format,
validation_threshold, input_data_type,
log_file)
elif platform == Platform.CAFFE:
validate_caffe_model(model_file,
input_file, mace_out_file, weight_file,
input_node, input_shape, input_data_format,
output_node, output_shape, output_data_format,
validation_threshold, log_file)
elif platform == Platform.ONNX:
validate_onnx_model(model_file,
input_file, mace_out_file,
input_node, input_shape, input_data_format,
output_node, output_shape, output_data_format,
validation_threshold,
input_data_type, backend, log_file)
......@@ -90,5 +90,5 @@ class ModelVisualizer(object):
def save_html(self):
html = self.render_html()
with open(self._output_file, "wb") as f:
with open(self._output_file, "w") as f:
f.write(html)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册