From 43b33191883c0ab8482e37d7fb7a1aea425cd7b5 Mon Sep 17 00:00:00 2001 From: liyin Date: Thu, 22 Aug 2019 19:06:38 +0800 Subject: [PATCH] Refactor model run: Support CMake everything --- .gitignore | 2 +- .gitlab-ci.yml | 8 +- CMakeLists.txt | 7 +- mace/codegen/CMakeLists.txt | 18 + mace/codegen/model_version_script.lds | 12 + mace/libmace/mace.cc | 4 +- mace/proto/CMakeLists.txt | 15 +- mace/tools/CMakeLists.txt | 1 + mace/tools/mace_run.cc | 16 +- third_party/caffe/caffe.cmake | 18 + third_party/third_party.cmake | 1 + tools/clear_workspace.sh | 11 + tools/cmake/cmake-build-aarch64-linux-gnu.sh | 35 ++ .../cmake/cmake-build-arm-linux-gnueabihf.sh | 35 ++ tools/cmake/cmake-build-arm64-v8a.sh | 49 +++ tools/cmake/cmake-build-armeabi-v7a.sh | 51 +++ tools/cmake/cmake-build-host.sh | 27 ++ tools/converter.py | 38 +- tools/python/CMakeLists.txt | 0 tools/python/README.md | 137 +++++++ tools/python/convert.py | 280 ++++--------- tools/python/encrypt.py | 128 ++++-- tools/python/gen_opencl.py | 207 ++++++++++ tools/python/py_proto/__init__.py | 13 +- tools/python/run_model.py | 379 ++++++++++++++++++ tools/python/{run.py => run_target.py} | 71 +--- tools/python/template/file_binary.cc.jinja2 | 38 ++ .../template/mace_engine_factory.h.jinja2 | 148 +++++++ tools/python/template/model.jinja2 | 8 +- tools/python/template/model_header.jinja2 | 12 +- tools/python/template/tensor_data.jinja2 | 3 +- tools/python/transform/base_converter.py | 22 +- tools/python/utils/config_parser.py | 229 +++++++++++ tools/python/utils/device.py | 121 +++++- tools/python/utils/util.py | 159 +++++++- tools/python/validate.py | 344 ++++++++++++++++ tools/python/visualize/visualize_model.py | 2 +- 37 files changed, 2271 insertions(+), 378 deletions(-) create mode 100644 mace/codegen/model_version_script.lds create mode 100644 third_party/caffe/caffe.cmake create mode 100755 tools/clear_workspace.sh create mode 100755 tools/cmake/cmake-build-aarch64-linux-gnu.sh create mode 100755 tools/cmake/cmake-build-arm-linux-gnueabihf.sh create mode 100755 tools/cmake/cmake-build-arm64-v8a.sh create mode 100755 tools/cmake/cmake-build-armeabi-v7a.sh create mode 100755 tools/cmake/cmake-build-host.sh create mode 100644 tools/python/CMakeLists.txt create mode 100644 tools/python/README.md create mode 100644 tools/python/gen_opencl.py create mode 100644 tools/python/run_model.py rename tools/python/{run.py => run_target.py} (69%) create mode 100644 tools/python/template/file_binary.cc.jinja2 create mode 100644 tools/python/template/mace_engine_factory.h.jinja2 create mode 100644 tools/python/validate.py diff --git a/.gitignore b/.gitignore index fb91b9f1..d4f869a4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ bazel-* -build/ +build/* cmake-build/ cmake-build-debug/ docs/_build/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e10a2c63..b685433a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -42,15 +42,15 @@ build_docs: cmake_build_android-armeabi-v7a: stage: build script: - - sh tools/cmake-build-android-armeabi-v7a-full.sh - - LIBMACE32_FULL_SIZE=`stat -c%s cmake-build/android-armeabi-v7a-full/install/lib/libmace.so` + - RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh + - LIBMACE32_FULL_SIZE=`stat -c%s build/cmake-build/armeabi-v7a/install/lib/libmace.so` - if (( LIBMACE32_FULL_SIZE > 2200000 )) ; then echo "The libmace.so size too large"; exit 1; fi cmake_build_android-arm64-v8: stage: build script: - - sh tools/cmake-build-android-arm64-v8a-full.sh - - LIBMACE64_FULL_SIZE=`stat -c%s cmake-build/android-arm64-v8a-full/install/lib/libmace.so` + - RUNTIME=GPU bash tools/cmake/cmake-build-arm64-v8a.sh + - LIBMACE64_FULL_SIZE=`stat -c%s build/cmake-build/arm64-v8a/install/lib/libmace.so` - if (( LIBMACE64_FULL_SIZE > 3100000 )) ; then echo "The libmace.so size too large"; exit 1; fi bazel_build: diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f60c0a7..17fa7ec3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ option(MACE_ENABLE_BENCHMARKS "whether to build c++ micro benchmarks" OFF) option(MACE_ENABLE_OPT_SIZE "whether to build with optimized binary size" ON) option(MACE_ENABLE_OBFUSCATE "whether to build with code obfuscation" ON) option(MACE_ENABLE_CCACHE "whether to build with ccache" ON) +option(MACE_ENABLE_CODE_MODE "whether to use code mode" OFF) message("CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") @@ -40,11 +41,15 @@ if(MACE_ENABLE_OPT_SIZE) set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -fno-rtti -fno-exceptions -DGOOGLE_PROTOBUF_NO_RTTI -DPROTOBUF_USE_EXCEPTIONS=0") endif(MACE_ENABLE_OPT_SIZE) +if(MACE_ENABLE_CODE_MODE) + set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -DMODEL_GRAPH_FORMAT_CODE") +endif(MACE_ENABLE_CODE_MODE) + # flags apply only to mace code (third_party excluded) # -Wno-error=unused-command-line-argument: official Android toolchain contains # unsupported argument and will break ccache preprocessor if(ANDROID) - set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wall -Werror -Wno-error=unused-command-line-argument") + set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wall -Werror -Wno-error=unused-command-line-argument -Wno-error=unevaluated-expression -Wno-error=tautological-compare") else(ANDROID) set(MACE_CODE_CC_FLAGS "${MACE_CODE_CC_FLAGS} -Wall -Werror") endif(ANDROID) diff --git a/mace/codegen/CMakeLists.txt b/mace/codegen/CMakeLists.txt index ea9a0ed9..4e96578a 100644 --- a/mace/codegen/CMakeLists.txt +++ b/mace/codegen/CMakeLists.txt @@ -31,3 +31,21 @@ add_dependencies(generated_opencl_kernel opencl_kernel_src) install(TARGETS generated_version ARCHIVE DESTINATION lib) install(TARGETS generated_opencl_kernel ARCHIVE DESTINATION lib) + +file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/null.cc "") +file(GLOB CODEGEN_MODELS ${CMAKE_CURRENT_BINARY_DIR}/null.cc models/**/code/*.cc) + +add_library(model STATIC ${CODEGEN_MODELS}) +target_link_libraries(model PRIVATE core proto utils port) +install(TARGETS model ARCHIVE DESTINATION lib) + +add_library(model_shared SHARED ${CODEGEN_MODELS}) +target_link_libraries(model_shared PRIVATE core proto utils port) +if(NOT APPLE) + set_target_properties(model_shared PROPERTIES LINK_FLAGS + "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/model_version_script.lds") +endif(NOT APPLE) +install(TARGETS model_shared DESTINATION lib) + +file(GLOB MODEL_HEADERS engine/mace_engine_factory.h models/**/code/*.h) +install(FILES ${MODEL_HEADERS} DESTINATION include/models) diff --git a/mace/codegen/model_version_script.lds b/mace/codegen/model_version_script.lds new file mode 100644 index 00000000..d1cc9dad --- /dev/null +++ b/mace/codegen/model_version_script.lds @@ -0,0 +1,12 @@ +mace { + global: + *LoadModelData*; + *CreateNet*; + *ModelName*; + *ModelChecksum*; + *ModelBuildTime*; + *ModelBuildOptions*; + + local: + *; +}; diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 13ce1a58..aae83089 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -603,9 +603,11 @@ MaceEngine::Impl::~Impl() { #if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) if (device_type_ == HEXAGON || device_type_ == HTA) { if (VLOG_IS_ON(2)) { - hexagon_controller_->GetPerfInfo(); hexagon_controller_->PrintLog(); } + if (VLOG_IS_ON(1)) { + hexagon_controller_->GetPerfInfo(); + } MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error"); MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error"); } diff --git a/mace/proto/CMakeLists.txt b/mace/proto/CMakeLists.txt index db875de4..1fc025ee 100644 --- a/mace/proto/CMakeLists.txt +++ b/mace/proto/CMakeLists.txt @@ -1,6 +1,7 @@ set(MACE_PROTO_PROTOS mace.proto) set(MACE_PROTO_SRCS) set(MACE_PROTO_HDRS) +set(MACE_PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto) foreach(proto_file ${MACE_PROTO_PROTOS}) get_filename_component(proto_file_abs ${proto_file} ABSOLUTE) @@ -17,10 +18,20 @@ foreach(proto_file ${MACE_PROTO_PROTOS}) DEPENDS protoc_bin VERBATIM ) + + set(PROTO_GENERATED_PY_FILES ${MACE_PROTO_PYTHON_DIR}/${basename}_pb2.py) + add_custom_command( + OUTPUT ${PROTO_GENERATED_PY_FILES} + COMMAND ${PROTOC_BIN} --python_out ${MACE_PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs} + COMMENT "Generating ${PROTO_GENERATED_PY_FILES} from ${proto_file}" + DEPENDS protoc_bin + VERBATIM + ) endforeach() -add_custom_target(mace_proto_src DEPENDS ${MACE_PROTO_SRCS} - COMMENT "Checking if re-generation is required" ) +add_custom_target(mace_proto_src DEPENDS ${PROTO_GENERATED_FILES} + COMMENT "Checking if re-generation is required") +add_custom_target(mace_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES}) add_library(proto ${MACE_PROTO_SRCS}) target_link_libraries(proto libprotobuf_lite) diff --git a/mace/tools/CMakeLists.txt b/mace/tools/CMakeLists.txt index a0166143..a3d2eae2 100644 --- a/mace/tools/CMakeLists.txt +++ b/mace/tools/CMakeLists.txt @@ -4,6 +4,7 @@ file(GLOB MACE_RUN_SRCS add_executable(mace_run ${MACE_RUN_SRCS}) target_link_libraries(mace_run PUBLIC mace_static + model gflags ) diff --git a/mace/tools/mace_run.cc b/mace/tools/mace_run.cc index 3529dfa8..74c02f23 100644 --- a/mace/tools/mace_run.cc +++ b/mace/tools/mace_run.cc @@ -548,10 +548,16 @@ int Main(int argc, char **argv) { LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint; LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads; LOG(INFO) << "cpu_affinity_policy: " << FLAGS_cpu_affinity_policy; - LOG(INFO) << "limit_opencl_kernel_time: " - << getenv("MACE_LIMIT_OPENCL_KERNEL_TIME"); - LOG(INFO) << "opencl_queue_window_size: " - << getenv("MACE_OPENCL_QUEUE_WINDOW_SIZE"); + auto limit_opencl_kernel_time = getenv("MACE_LIMIT_OPENCL_KERNEL_TIME"); + if (limit_opencl_kernel_time) { + LOG(INFO) << "limit_opencl_kernel_time: " + << limit_opencl_kernel_time; + } + auto opencl_queue_window_size = getenv("MACE_OPENCL_QUEUE_WINDOW_SIZE"); + if (opencl_queue_window_size) { + LOG(INFO) << "opencl_queue_window_size: " + << getenv("MACE_OPENCL_QUEUE_WINDOW_SIZE"); + } std::vector input_shapes = Split(FLAGS_input_shape, ':'); std::vector output_shapes = Split(FLAGS_output_shape, ':'); @@ -584,14 +590,12 @@ int Main(int argc, char **argv) { for (size_t i = 0; i < output_count; ++i) { output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]); } - float cpu_float32_performance = 0.0f; if (FLAGS_input_dir.empty()) { // get cpu capability Capability cpu_capability = GetCapability(DeviceType::CPU); cpu_float32_performance = cpu_capability.float32_performance.exec_time; } - bool ret = false; for (int i = 0; i < FLAGS_restart_round; ++i) { VLOG(0) << "restart round " << i; diff --git a/third_party/caffe/caffe.cmake b/third_party/caffe/caffe.cmake new file mode 100644 index 00000000..5ee08b3c --- /dev/null +++ b/third_party/caffe/caffe.cmake @@ -0,0 +1,18 @@ +set(CAFFE_PROTO_PROTOS ${PROJECT_SOURCE_DIR}/third_party/caffe/caffe.proto) +set(MACE_PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto) + +foreach(proto_file ${CAFFE_PROTO_PROTOS}) + get_filename_component(proto_file_abs ${proto_file} ABSOLUTE) + get_filename_component(basename ${proto_file} NAME_WE) + set(PROTO_GENERATED_PY_FILES ${MACE_PROTO_PYTHON_DIR}/${basename}_pb2.py) + + add_custom_command( + OUTPUT ${PROTO_GENERATED_PY_FILES} + COMMAND ${PROTOC_BIN} --python_out ${MACE_PROTO_PYTHON_DIR} -I ${PROJECT_SOURCE_DIR}/third_party/caffe ${proto_file_abs} + COMMENT "Generating ${PROTO_GENERATED_PY_FILES} from ${proto_file}" + DEPENDS protoc_bin + VERBATIM + ) +endforeach() + +add_custom_target(caffe_proto_src ALL DEPENDS ${PROTO_GENERATED_PY_FILES}) diff --git a/third_party/third_party.cmake b/third_party/third_party.cmake index 4317450c..f8007587 100644 --- a/third_party/third_party.cmake +++ b/third_party/third_party.cmake @@ -50,6 +50,7 @@ include(${PROJECT_SOURCE_DIR}/third_party/opencl-clhpp/opencl-clhpp.cmake) include(${PROJECT_SOURCE_DIR}/third_party/opencl-headers/opencl-headers.cmake) include(${PROJECT_SOURCE_DIR}/third_party/protobuf/protobuf.cmake) include(${PROJECT_SOURCE_DIR}/third_party/tflite/tflite.cmake) +include(${PROJECT_SOURCE_DIR}/third_party/caffe/caffe.cmake) if(MACE_ENABLE_HEXAGON_DSP) include(${PROJECT_SOURCE_DIR}/third_party/nnlib/nnlib.cmake) diff --git a/tools/clear_workspace.sh b/tools/clear_workspace.sh new file mode 100755 index 00000000..338a5410 --- /dev/null +++ b/tools/clear_workspace.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +rm -rf mace/codegen/models +rm -rf mace/codegen/engine +rm -rf mace/codegen/opencl + +for d in build/*; do + if [[ "$d" != "build/cmake-build*" ]]; then + rm -rf "$d" + fi +done diff --git a/tools/cmake/cmake-build-aarch64-linux-gnu.sh b/tools/cmake/cmake-build-aarch64-linux-gnu.sh new file mode 100755 index 00000000..25f425ad --- /dev/null +++ b/tools/cmake/cmake-build-aarch64-linux-gnu.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env sh + +set -e + +# build for arm linux aarch64 +if [[ -z "$BUILD_DIR" ]]; then + BUILD_DIR=build/cmake-build/aarch64-linux-gnu +fi + +MACE_ENABLE_OPENCL=OFF +if [[ "$RUNTIME" == "GPU" ]]; then + MACE_ENABLE_OPENCL=ON +fi + +MACE_ENABLE_CODE_MODE=OFF +if [[ "$RUNMODE" == "code" ]]; then + MACE_ENABLE_CODE_MODE=ON +fi + +mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} +cmake -DCROSSTOOL_ROOT=${LINARO_AARCH64_LINUX_GNU} \ + -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/aarch64-linux-gnu.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DMACE_ENABLE_NEON=ON \ + -DMACE_ENABLE_QUANTIZE=ON \ + -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ + -DMACE_ENABLE_OPT_SIZE=ON \ + -DMACE_ENABLE_OBFUSCATE=ON \ + -DMACE_ENABLE_TESTS=ON \ + -DMACE_ENABLE_BENCHMARKS=ON \ + -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DCMAKE_INSTALL_PREFIX=install \ + ../../.. +make -j6 VERBOSE=1 && make install +cd ../../.. diff --git a/tools/cmake/cmake-build-arm-linux-gnueabihf.sh b/tools/cmake/cmake-build-arm-linux-gnueabihf.sh new file mode 100755 index 00000000..5c96ed5e --- /dev/null +++ b/tools/cmake/cmake-build-arm-linux-gnueabihf.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env sh + +set -e + +# build for arm linux gnueabihf +if [[ -z "$BUILD_DIR" ]]; then + BUILD_DIR=build/cmake-build/arm-linux-gnueabihf +fi + +MACE_ENABLE_CODE_MODE=OFF +if [[ $RUNMODE == "code" ]]; then + MACE_ENABLE_CODE_MODE=ON +fi + +MACE_ENABLE_OPENCL=OFF +if [[ "$RUNTIME" == "GPU" ]]; then + MACE_ENABLE_OPENCL=ON +fi + +mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} +cmake -DCROSSTOOL_ROOT=${LINARO_ARM_LINUX_GNUEABIHF} \ + -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchains/arm-linux-gnueabihf.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DMACE_ENABLE_NEON=ON \ + -DMACE_ENABLE_QUANTIZE=ON \ + -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ + -DMACE_ENABLE_OPT_SIZE=ON \ + -DMACE_ENABLE_OBFUSCATE=ON \ + -DMACE_ENABLE_TESTS=ON \ + -DMACE_ENABLE_BENCHMARKS=ON \ + -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DCMAKE_INSTALL_PREFIX=install \ + ../../.. +make -j6 VERBOSE=1 && make install +cd ../../.. diff --git a/tools/cmake/cmake-build-arm64-v8a.sh b/tools/cmake/cmake-build-arm64-v8a.sh new file mode 100755 index 00000000..c6b603e3 --- /dev/null +++ b/tools/cmake/cmake-build-arm64-v8a.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env sh + +set -e + +# build for android arm64-v8a +if [[ -z "$BUILD_DIR" ]]; then + BUILD_DIR=build/cmake-build/arm64-v8a +fi + +MACE_ENABLE_OPENCL=OFF +MACE_ENABLE_HEXAGON_DSP=OFF +MACE_ENABLE_HEXAGON_HTA=OFF +MACE_ENABLE_MTK_APU=OFF +if [[ "$RUNTIME" == "GPU" ]]; then + MACE_ENABLE_OPENCL=ON +elif [[ "$RUNTIME" == "HEXAGON" ]]; then + MACE_ENABLE_HEXAGON_DSP=ON +elif [[ "$RUNTIME" == "HTA" ]]; then + MACE_ENABLE_HEXAGON_HTA=ON +elif [[ "$RUNTIME" == "APU" ]]; then + MACE_ENABLE_MTK_APU=ON +fi + +MACE_ENABLE_CODE_MODE=OFF +if [[ "$RUNMODE" == "code" ]]; then + MACE_ENABLE_CODE_MODE=ON +fi + +mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} +cmake -DANDROID_ABI="arm64-v8a" \ + -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \ + -DANDROID_NATIVE_API_LEVEL=28 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_ANDROID_STL_TYPE=c++_shared \ + -DMACE_ENABLE_NEON=ON \ + -DMACE_ENABLE_QUANTIZE=ON \ + -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ + -DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \ + -DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \ + -DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \ + -DMACE_ENABLE_OPT_SIZE=ON \ + -DMACE_ENABLE_OBFUSCATE=ON \ + -DMACE_ENABLE_TESTS=ON \ + -DMACE_ENABLE_BENCHMARKS=ON \ + -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DCMAKE_INSTALL_PREFIX=install \ + ../../.. +make -j6 VERBOSE=1 && make install +cd ../../.. diff --git a/tools/cmake/cmake-build-armeabi-v7a.sh b/tools/cmake/cmake-build-armeabi-v7a.sh new file mode 100755 index 00000000..e38afa83 --- /dev/null +++ b/tools/cmake/cmake-build-armeabi-v7a.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +set -e + +# build for android armeabi-v7a +if [[ -z "$BUILD_DIR" ]]; then + BUILD_DIR=build/cmake-build/armeabi-v7a +fi + +MACE_ENABLE_OPENCL=OFF +MACE_ENABLE_HEXAGON_DSP=OFF +MACE_ENABLE_HEXAGON_HTA=OFF +MACE_ENABLE_MTK_APU=OFF + +if [[ "$RUNTIME" == "GPU" ]]; then + MACE_ENABLE_OPENCL=ON +elif [[ "$RUNTIME" == "HEXAGON" ]]; then + MACE_ENABLE_HEXAGON_DSP=ON +elif [[ "$RUNTIME" == "HTA" ]]; then + MACE_ENABLE_HEXAGON_HTA=ON +elif [[ "$RUNTIME" == "APU" ]]; then + MACE_ENABLE_MTK_APU=ON +fi + +MACE_ENABLE_CODE_MODE=OFF +if [[ "$RUNMODE" == "code" ]]; then + MACE_ENABLE_CODE_MODE=ON +fi + +mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} +cmake -DANDROID_ABI="armeabi-v7a" \ + -DANDROID_ARM_NEON=ON \ + -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \ + -DANDROID_NATIVE_API_LEVEL=28 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_ANDROID_STL_TYPE=c++_shared \ + -DMACE_ENABLE_NEON=ON \ + -DMACE_ENABLE_QUANTIZE=ON \ + -DMACE_ENABLE_OPENCL=${MACE_ENABLE_OPENCL} \ + -DMACE_ENABLE_HEXAGON_DSP=${MACE_ENABLE_HEXAGON_DSP} \ + -DMACE_ENABLE_HEXAGON_HTA=${MACE_ENABLE_HEXAGON_HTA} \ + -DMACE_ENABLE_MTK_APU=${MACE_ENABLE_MTK_APU} \ + -DMACE_ENABLE_OPT_SIZE=ON \ + -DMACE_ENABLE_OBFUSCATE=ON \ + -DMACE_ENABLE_TESTS=ON \ + -DMACE_ENABLE_BENCHMARKS=ON \ + -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DCMAKE_INSTALL_PREFIX=install \ + ../../.. +make -j6 VERBOSE=1 && make install +cd ../../.. diff --git a/tools/cmake/cmake-build-host.sh b/tools/cmake/cmake-build-host.sh new file mode 100755 index 00000000..b76f8cf3 --- /dev/null +++ b/tools/cmake/cmake-build-host.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env sh + +set -e + +# build for host +if [[ -z "$BUILD_DIR" ]]; then + BUILD_DIR=build/cmake-build/host +fi + + +MACE_ENABLE_CODE_MODE=OFF +if [[ "$RUNMODE" == "code" ]]; then + MACE_ENABLE_CODE_MODE=ON +fi + + +mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} +cmake -DMACE_ENABLE_NEON=OFF \ + -DMACE_ENABLE_QUANTIZE=OFF \ + -DMACE_ENABLE_OPENCL=OFF \ + -DMACE_ENABLE_TESTS=ON \ + -DMACE_ENABLE_BENCHMARKS=ON \ + -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DCMAKE_INSTALL_PREFIX=install \ + ../../.. +make -j6 VERBOSE=1 && make install +cd ../../.. diff --git a/tools/converter.py b/tools/converter.py index 126ef215..172708f8 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -224,7 +224,7 @@ def get_opencl_mode(configs): def get_quantize_mode(configs): for model_name in configs[YAMLKeyword.models]: - quantize =\ + quantize = \ configs[YAMLKeyword.models][model_name].get( YAMLKeyword.quantize, 0) if quantize == 1: @@ -297,8 +297,8 @@ def get_model_files(model_config, model_output_dir): if sha256_checksum(model_file) != model_sha256_checksum: error_info = model_file_path + \ - " model file sha256checksum not match " + \ - model_sha256_checksum + " model file sha256checksum not match " + \ + model_sha256_checksum MaceLogger.error(ModuleName.MODEL_CONVERTER, error_info) if weight_file_path.startswith("http://") or \ @@ -316,8 +316,8 @@ def get_model_files(model_config, model_output_dir): if weight_file: if sha256_checksum(weight_file) != weight_sha256_checksum: error_info = weight_file_path + \ - " weight file sha256checksum not match " + \ - weight_sha256_checksum + " weight file sha256checksum not match " + \ + weight_sha256_checksum MaceLogger.error(ModuleName.MODEL_CONVERTER, error_info) if quantize_range_file_path.startswith("http://") or \ @@ -547,7 +547,7 @@ def format_model_config(flags): []) if input_data_formats: if not isinstance(input_data_formats, list): - subgraph[YAMLKeyword.input_data_formats] =\ + subgraph[YAMLKeyword.input_data_formats] = \ [input_data_formats] * input_size else: mace_check(len(input_data_formats) @@ -555,7 +555,7 @@ def format_model_config(flags): ModuleName.YAML_CONFIG, "input_data_formats should match" " the size of input.") - for input_data_format in\ + for input_data_format in \ subgraph[YAMLKeyword.input_data_formats]: mace_check(input_data_format in DataFormatStrs, ModuleName.YAML_CONFIG, @@ -578,14 +578,14 @@ def format_model_config(flags): ModuleName.YAML_CONFIG, "output_data_formats should match" " the size of output") - for output_data_format in\ + for output_data_format in \ subgraph[YAMLKeyword.output_data_formats]: mace_check(output_data_format in DataFormatStrs, ModuleName.YAML_CONFIG, "'output_data_formats' must be in " + str(DataFormatStrs)) else: - subgraph[YAMLKeyword.output_data_formats] =\ + subgraph[YAMLKeyword.output_data_formats] = \ [DataFormat.NHWC] * output_size validation_threshold = subgraph.get( @@ -767,6 +767,7 @@ def print_library_summary(configs): def convert_func(flags): configs = config_parser.parse(flags.config) + print(configs) library_name = configs[YAMLKeyword.library_name] if not os.path.exists(BUILD_OUTPUT_DIR): os.makedirs(BUILD_OUTPUT_DIR) @@ -817,26 +818,27 @@ def convert_func(flags): for model_name, model_config in configs[YAMLKeyword.models].items(): model_codegen_dir = "%s/%s" % (MODEL_CODEGEN_DIR, model_name) encrypt.encrypt(model_name, - "%s/%s.pb" % (model_codegen_dir, model_name), - "%s/%s.data" % (model_codegen_dir, model_name), - model_config[YAMLKeyword.runtime], + "%s/model/%s.pb" % (model_codegen_dir, model_name), + "%s/model/%s.data" % (model_codegen_dir, model_name), + config_parser.parse_device_type( + model_config[YAMLKeyword.runtime]), model_codegen_dir, - bool(model_config.get(YAMLKeyword.obfuscate, 1))) + bool(model_config.get(YAMLKeyword.obfuscate, 1)), + model_graph_format == "code", + model_data_format == "code") if model_graph_format == ModelFormat.file: sh.mv("-f", - '%s/file/%s.pb' % (model_codegen_dir, model_name), + '%s/model/%s.pb' % (model_codegen_dir, model_name), model_output_dir) sh.mv("-f", - '%s/file/%s.data' % (model_codegen_dir, model_name), + '%s/model/%s.data' % (model_codegen_dir, model_name), model_output_dir) - sh.rm("-rf", '%s/code' % model_codegen_dir) else: if not embed_model_data: sh.mv("-f", - '%s/file/%s.data' % (model_codegen_dir, model_name), + '%s/model/%s.data' % (model_codegen_dir, model_name), model_output_dir) - sh.rm('%s/code/tensor_data.cc' % model_codegen_dir) sh.cp("-f", glob.glob("mace/codegen/models/*/code/*.h"), model_header_dir) diff --git a/tools/python/CMakeLists.txt b/tools/python/CMakeLists.txt new file mode 100644 index 00000000..e69de29b diff --git a/tools/python/README.md b/tools/python/README.md new file mode 100644 index 00000000..d9d755fd --- /dev/null +++ b/tools/python/README.md @@ -0,0 +1,137 @@ +# MACE Build and Test Tools + +## Clear Workspace +Before you do anything, clear the workspace used by build and test process. +```bash +tools/clear_workspace.sh +``` + +## Build Engine +Please make sure you have CMake installed. +```bash +RUNTIME=GPU bash tools/cmake/cmake-build-armeabi-v7a.sh +``` +which generate libraries in `build/cmake-build/armeabi-v7a`, you can use either static libraries or the `libmace.so` shared library. + +You can also build for other target abis. +The default build command builds engine that runs on CPU, you can modify the cmake file to support other hardware, or you can just set environment variable before building. +```bash +RUNTIME: GPU/HEXAGON/HTA/APU +``` + +## Model Conversion +When you have prepared your model, the first thing to do is write a model config. + +```yaml +models: + mobilenet_v1: + platform: tensorflow + model_file_path: https://cnbj1.fds.api.xiaomi.com/mace/miai-models/mobilenet-v1/mobilenet-v1-1.0.pb + model_sha256_checksum: 71b10f540ece33c49a7b51f5d4095fc9bd78ce46ebf0300487b2ee23d71294e6 + subgraphs: + - input_tensors: + - input + input_shapes: + - 1,224,224,3 + output_tensors: + - MobilenetV1/Predictions/Reshape_1 + output_shapes: + - 1,1001 + runtime: gpu + +``` + +The following steps generate output to `build` directory which is the default build and test workspace. +Suppose you have the model config in `../mace-models/mobilenet-v1/mobilenet-v1.yml`. Then run + +```bash +python tools/python/convert.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml +``` + +which generate 4 files in `build/mobilenet_v1/model/` +``` +├── mobilenet_v1.pb (model file) +├── mobilenet_v1.data (param file) +├── mobilenet_v1_index.html (visualization page, you can open it in browser) +└── mobilenet_v1.pb_txt (model text file, which can be for debug use) +``` + +## Model Test and Benchmark +After model is converted, simply run +```bash +python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --validate +``` + +Or benchmark the model +```bash +python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --benchmark +``` + + +It will test your model on the device configured in the model config (`runtime`). +You can also test on other device by specify `--runtime=cpu (dsp/hta/apu)` if you previously build engine for the device. + +The log will be shown if `--vlog_level=2` is specified. + + +## Encrypt Model (optional) +Model can be encrypted by obfuscation. +```bash +python tools/python/encrypt.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml +``` +It will override `mobilenet_v1.pb` and `mobilenet_v1.data`. +If you want to compiled the model into a library, you should use options `--gencode_model --gencode_param` to generate model code, i.e., + +```bash +python tools/python/encrypt.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --gencode_model --gencode_param +``` +It will generate model code into `mace/codegen/models` and also generate a helper function `CreateMaceEngineFromCode` in `mace/codegen/engine/mace_engine_factory.h` by which you can create an engine with models built in it. + +After that you can rebuild the engine. +```bash +RUNTIME=GPU RUNMODE=code bash tools/cmake/cmake-build-armeabi-v7a.sh +``` +`RUNMODE=code` means you compile and link model library with MACE engine. + +When you test the model in code format, you should specify it in the script as follows. +```bash +python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --gencode_model --gencode_param +``` +Of course you can generate model code only, and use parameter file. + +## Precompile OpenCL (optional) +After you test model on GPU, it will generate compiled OpenCL binary file automatically in `build/mobilenet_v1/opencl` directory. +```bash +└── mobilenet_v1_compiled_opencl_kernel.MIX2S.sdm845.bin +``` +It specifies your test platform model and SoC. You can use it in production to accelerate the initialization. + + +## Auto Tune OpenCL kernels (optional) +MACE can auto tune OpenCL kernels used by models. You can specify `--tune` option. +```bash +python tools/python/run_model.py --config ../mace-models/mobilenet-v1/mobilenet-v1.yml --tune +``` +It will generate OpenCL tuned parameter binary file in `build/mobilenet_v1/opencl` directory. +```bash +└── mobilenet_v1_tuned_opencl_parameter.MIX2S.sdm845.bin +``` +It specifies your test platform model and SoC. You can use it in production to reduce latency on GPU. + + +## Multi Model Support (optional) +If multiple models are configured in config file. After you test it, it will generate more than one tuned parameter files. +Then you need to merge them together. +```bash +python tools/python/gen_opencl.py +``` +After that, it will generate one set of files into `build/opencl` directory. + +```bash +├── compiled_opencl_kernel.bin +└── tuned_opencl_parameter.bin +``` + +You can also generate code into the engine by specify `--gencode`, after which you should rebuild the engine. + + diff --git a/tools/python/convert.py b/tools/python/convert.py index 1ef320c5..24ad303b 100644 --- a/tools/python/convert.py +++ b/tools/python/convert.py @@ -12,63 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +# python tools/python/convert.py \ +# --config ../mace-models/mobilenet-v2/mobilenet-v2.yml + from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse -import os import sys import numpy as np +import shutil +import tempfile from utils import config_parser +from utils.config_parser import DataFormat +from utils.config_parser import DeviceType +from utils.config_parser import Platform from utils import util from utils.util import mace_check +from utils.config_parser import normalize_model_config +from utils.config_parser import ModelKeys from py_proto import mace_pb2 from transform import base_converter as cvt from transform import transformer from visualize import visualize_model -device_type_map = {'cpu': cvt.DeviceType.CPU.value, - 'gpu': cvt.DeviceType.GPU.value, - 'dsp': cvt.DeviceType.HEXAGON.value, - 'hta': cvt.DeviceType.HTA.value, - 'apu': cvt.DeviceType.APU.value, - 'cpu+gpu': cvt.DeviceType.CPU.value} - -data_format_map = { - 'NONE': cvt.DataFormat.NONE, - 'NHWC': cvt.DataFormat.NHWC, - 'NCHW': cvt.DataFormat.NCHW, - 'OIHW': cvt.DataFormat.OIHW, -} - -data_type_map = { - 'float32': mace_pb2.DT_FLOAT, - 'int32': mace_pb2.DT_INT32, -} - - -def parse_data_type(data_type, quantize): - if quantize or data_type == 'fp32_fp32': - return mace_pb2.DT_FLOAT - else: - return mace_pb2.DT_HALF - - -def split_shape(shape): - if shape.strip() == "": - return [] - else: - return shape.split(',') - - -def parse_int_array_from_str(ints_str): - return [int(i) for i in split_shape(ints_str)] - - -def parse_float_array_from_str(floats_str): - return [float(i) for i in floats_str.split(',')] - def transpose_shape(shape, dst_order): t_shape = [0] * len(shape) @@ -77,52 +45,32 @@ def transpose_shape(shape, dst_order): return t_shape -def to_list(x): - if isinstance(x, list): - return x - else: - return [x] - - -def separate_params(mace_model): - tensors = mace_model.tensors - params = mace_pb2.NetDef() - params.tensors.extend(tensors) - - model = mace_model - del model.tensors[:] - return model, params - - def convert(conf, output): - if not os.path.exists(output): - os.mkdir(output) - for model_name, model_conf in conf["models"].items(): - model_output = output + "/" + model_name - if not os.path.exists(model_output): - os.mkdir(model_output) - - subgraph = model_conf["subgraphs"][0] - del model_conf["subgraphs"] - model_conf.update(subgraph) - - model_file = util.download_or_get_file(model_conf["model_file_path"], - model_conf[ - "model_sha256_checksum"], - model_output) - model_conf["model_file_path"] = model_file - if "weight_file_path" in model_conf: - weight_file = util.download_or_get_file( - model_conf["weight_file_path"], - model_conf["weight_sha256_checksum"], model_output) - model_conf["weight_file_path"] = weight_file + model_output = output + "/" + model_name + "/model" + org_model_dir = output + "/" + model_name + "/org_model" + util.mkdir_p(model_output) + util.mkdir_p(org_model_dir) + + model_conf = normalize_model_config(model_conf) + + model_file = util.download_or_get_model( + model_conf[ModelKeys.model_file_path], # noqa + model_conf[ModelKeys.model_sha256_checksum], # noqa + output + "/" + model_name + "/org_model") + model_conf[ModelKeys.model_file_path] = model_file + if ModelKeys.weight_file_path in model_conf: + weight_file = util.download_or_get_model( + model_conf[ModelKeys.weight_file_path], + model_conf[ModelKeys.weight_sha256_checksum], "/tmp/") + model_conf[ModelKeys.weight_file_path] = weight_file + # TODO: remove the following after quantize tool is made - if "quantize_range_file" in model_conf: - range_file = util.download_or_get_file( - model_conf["quantize_range_file"], + if ModelKeys.quantize_range_file in model_conf: + range_file = util.download_or_get_model( + model_conf[ModelKeys.quantize_range_file], "", model_output) - model_conf["quantize_range_file"] = range_file + model_conf[ModelKeys.quantize_range_file] = range_file mace_model = convert_model(model_conf) @@ -132,7 +80,7 @@ def convert(conf, output): model_output) visualizer.save_html() except: # noqa - print("Failed to visualize model:", sys.exc_info()[0]) + print("Failed to visualize model:", sys.exc_info()) model, params = merge_params(mace_model) @@ -147,115 +95,64 @@ def convert(conf, output): def convert_model(conf): - print(conf) - platform = conf["platform"] - mace_check(platform in ['tensorflow', 'caffe', 'onnx'], - "platform not supported") - runtime = conf["runtime"] - mace_check( - runtime in ['cpu', 'gpu', 'dsp', 'hta', 'apu', 'cpu+gpu'], - "runtime not supported") - option = cvt.ConverterOption() - if "graph_optimize_options" in conf: - option.transformer_option = conf["graph_optimize_options"] - option.winograd = conf.get("winograd", 0) - option.quantize = bool(conf.get("quantize", 0)) - option.quantize_large_weights = bool(conf.get("quantize_large_weights", 0)) - option.quantize_range_file = conf.get("quantize_range_file", "") - option.change_concat_ranges = bool(conf.get("change_concat_ranges", 0)) - option.cl_mem_type = conf.get("cl_mem_type", "image") - option.device = device_type_map[conf.get("runtime", "cpu")] - option.data_type = parse_data_type(conf.get("data_type", "fp16_fp32"), - option.quantize) - input_tensors = to_list(conf["input_tensors"]) - input_shapes = [parse_int_array_from_str(shape) for shape in - to_list(conf["input_shapes"])] - mace_check(len(input_tensors) == len(input_shapes), - "input node count and shape count do not match") - input_count = len(input_tensors) - input_data_types = [data_type_map[dt] for dt in - to_list(conf.get("input_data_types", - ["float32"]))] - if len(input_data_types) == 1 and input_count > 1: - input_data_types = [input_data_types[0]] * input_count - mace_check(len(input_data_types) == input_count, - "the number of input_data_types should be " - "the same as input tensors") - input_data_formats = [data_format_map[df] for df in - to_list(conf.get("input_data_formats", - ["NHWC"]))] - if len(input_data_formats) == 1 and input_count > 1: - input_data_formats = [input_data_formats[0]] * input_count - mace_check(len(input_data_formats) == input_count, - "the number of input_data_formats should be " - "the same as input tensors") - input_ranges = [parse_float_array_from_str(r) for r in - to_list(conf.get("input_ranges", - ["-1.0,1.0"]))] - if len(input_ranges) == 1 and input_count > 1: - input_ranges = [input_ranges[0]] * input_count - mace_check(len(input_ranges) == input_count, - "the number of input_ranges should be " - "the same as input tensors") - for i in range(len(input_tensors)): + + if ModelKeys.graph_optimize_options in conf: + option.transformer_option = conf[ModelKeys.graph_optimize_options] + if ModelKeys.winograd in conf: + option.winograd = conf[ModelKeys.winograd] + if ModelKeys.quantize in conf: + option.quantize = conf[ModelKeys.quantize] + if ModelKeys.quantize_large_weights in conf: + option.quantize_large_weights = conf[ModelKeys.quantize_large_weights] + if ModelKeys.quantize_range_file in conf: + option.quantize_range_file = conf[ModelKeys.quantize_range_file] + if ModelKeys.change_concat_ranges in conf: + option.change_concat_ranges = conf[ModelKeys.change_concat_ranges] + if ModelKeys.cl_mem_type in conf: + option.cl_mem_type = conf[ModelKeys.cl_mem_type] + if ModelKeys.runtime in conf: + option.device = conf[ModelKeys.runtime] + if option.device == DeviceType.CPU_GPU: + # when convert, cpu and gpu share the same model + option.device = DeviceType.CPU + # we don't need `value`, but to be consistent with legacy code + # used by `base_converter` + option.device = option.device.value + + option.data_type = conf[ModelKeys.data_types] + + for i in range(len(conf[ModelKeys.input_tensors])): input_node = cvt.NodeInfo() - input_node.name = input_tensors[i] - input_node.shape = input_shapes[i] - input_node.data_type = input_data_types[i] - input_node.data_format = input_data_formats[i] - if (input_node.data_format == cvt.DataFormat.NCHW and len( - input_node.shape) == 4): + input_node.name = conf[ModelKeys.input_tensors][i] + input_node.shape = conf[ModelKeys.input_shapes][i] + input_node.data_type = conf[ModelKeys.input_data_types][i] + input_node.data_format = conf[ModelKeys.input_data_formats][i] + if (input_node.data_format == DataFormat.NCHW and len( + input_node.shape) == 4): input_node.shape = transpose_shape(input_node.shape, [0, 2, 3, 1]) - input_node.data_format = cvt.DataFormat.NHWC - input_node.range = input_ranges[i] + input_node.data_format = DataFormat.NHWC + input_node.range = conf[ModelKeys.input_ranges][i] option.add_input_node(input_node) - output_tensors = to_list(conf["output_tensors"]) - output_shapes = [parse_int_array_from_str(shape) for shape in - to_list(conf["output_shapes"])] - mace_check(len(output_tensors) == len(output_shapes), - "output node count and shape count do not match") - output_count = len(output_tensors) - output_data_types = [data_type_map[dt] for dt in - to_list(conf.get("output_data_types", - ["float32"]))] - if len(output_data_types) == 1 and output_count > 1: - output_data_types = [output_data_types[0]] * output_count - mace_check(len(output_data_types) == output_count, - "the number of output_data_types should be " - "the same as output tensors") - output_data_formats = [data_format_map[df] for df in - to_list(conf.get("output_data_formats", - ["NHWC"]))] - if len(output_data_formats) == 1 and output_count > 1: - output_data_formats = [output_data_formats[0]] * output_count - mace_check(len(output_data_formats) == output_count, - "the number of output_data_formats should be " - "the same as output tensors") - for i in range(len(output_tensors)): + for i in range(len(conf[ModelKeys.output_tensors])): output_node = cvt.NodeInfo() - output_node.name = output_tensors[i] - output_node.shape = output_shapes[i] - output_node.data_type = output_data_types[i] - output_node.data_format = output_data_formats[i] - if output_node.data_format == cvt.DataFormat.NCHW and len( + output_node.name = conf[ModelKeys.output_tensors][i] + output_node.shape = conf[ModelKeys.output_shapes][i] + output_node.data_type = conf[ModelKeys.output_data_types][i] + output_node.data_format = conf[ModelKeys.output_data_formats][i] + if output_node.data_format == DataFormat.NCHW and len( output_node.shape) == 4: output_node.shape = transpose_shape(output_node.shape, [0, 2, 3, 1]) - output_node.data_format = cvt.DataFormat.NHWC + output_node.data_format = DataFormat.NHWC option.add_output_node(output_node) - if "check_tensors" in conf: - check_tensors = to_list(conf["check_tensors"]) - check_tensors_shapes = [parse_int_array_from_str(shape) for shape in - to_list(conf["check_shapes"])] - mace_check(len(check_tensors) == len(check_tensors_shapes), - "check tensors count and shape count do not match.") - for i in range(len(check_tensors)): + if ModelKeys.check_tensors in conf: + for i in range(len(conf[ModelKeys.check_tensors])): check_node = cvt.NodeInfo() - check_node.name = check_tensors[i] - check_node.shape = check_tensors_shapes[i] + check_node.name = conf[ModelKeys.check_tensors][i] + check_node.shape = conf[ModelKeys.check_shapes][i] option.add_check_node(check_node) else: option.check_nodes = option.output_nodes @@ -263,17 +160,17 @@ def convert_model(conf): option.build() print("Transform model to one that can better run on device") - - if platform == 'tensorflow': + platform = conf[ModelKeys.platform] + if platform == Platform.TENSORFLOW: from transform import tensorflow_converter converter = tensorflow_converter.TensorflowConverter( option, conf["model_file_path"]) - elif platform == 'caffe': + elif platform == Platform.CAFFE: from transform import caffe_converter converter = caffe_converter.CaffeConverter(option, conf["model_file_path"], conf["weight_file_path"]) - elif platform == 'onnx': + elif platform == Platform.ONNX: from transform import onnx_converter converter = onnx_converter.OnnxConverter(option, conf["model_file_path"]) @@ -285,14 +182,15 @@ def convert_model(conf): option, output_graph_def) output_graph_def, quantize_activation_info = mace_transformer.run() - if option.device in [cvt.DeviceType.HEXAGON.value, - cvt.DeviceType.HTA.value]: + runtime = conf[ModelKeys.runtime] + if runtime in [DeviceType.HEXAGON, + DeviceType.HTA]: from transform import hexagon_converter converter = hexagon_converter.HexagonConverter( option, output_graph_def, quantize_activation_info) output_graph_def = converter.run() - elif runtime == 'apu': - mace_check(platform == "tensorflow", + elif runtime == DeviceType.APU: + mace_check(platform == Platform.TENSORFLOW, "apu only support model from tensorflow") from transform import apu_converter converter = apu_converter.ApuConverter( @@ -366,7 +264,7 @@ def parse_args(): parser.add_argument( '--output', type=str, - default=".", + default="build", help="output dir") flgs, _ = parser.parse_known_args() return flgs diff --git a/tools/python/encrypt.py b/tools/python/encrypt.py index 79440f45..b612d654 100644 --- a/tools/python/encrypt.py +++ b/tools/python/encrypt.py @@ -22,10 +22,13 @@ import os import hashlib from jinja2 import Environment, FileSystemLoader from py_proto import mace_pb2 +from utils import device from utils import util -from transform import base_converter as cvt from utils.util import mace_check +from utils.util import MaceLogger +from utils import config_parser from utils.config_parser import CPP_KEYWORDS +from utils.config_parser import ModelKeys GENERATED_NAME = set() @@ -99,9 +102,8 @@ def obfuscate_name(model): def save_model_to_code(namespace, model, params, model_checksum, - params_checksum, device, output): - if not os.path.exists(output): - os.mkdir(output) + params_checksum, device, output, gencode_params): + util.mkdir_p(output) cwd = os.path.dirname(__file__) j2_env = Environment( loader=FileSystemLoader(cwd + "/template"), trim_blocks=True) @@ -120,24 +122,18 @@ def save_model_to_code(namespace, model, params, model_checksum, f.write(source) counter += 1 - template_name = "tensor_data.jinja2" - source = j2_env.get_template(template_name).render( - tag=namespace, - model_data_size=len(params), - model_data=params) - with open(output + "/tensor_data.cc", "w") as f: - f.write(source) + if gencode_params: + template_name = "tensor_data.jinja2" + source = j2_env.get_template(template_name).render( + tag=namespace, + model_data_size=len(params), + model_data=params) + with open(output + "/tensor_data.cc", "w") as f: + f.write(source) template_name = "operator.jinja2" counter = 0 op_size = len(model.op) - try: - device = cvt.DeviceType[device.upper()] - except: # noqa - if device.upper() == "DSP": - device = cvt.DeviceType.HEXAGON - else: - device = cvt.DeviceType.CPU for start in range(0, op_size, 10): source = j2_env.get_template(template_name).render( @@ -170,8 +166,7 @@ def save_model_to_code(namespace, model, params, model_checksum, def save_model_to_file(model_name, model, params, output): - if not os.path.exists(output): - os.mkdir(output) + util.mkdir_p(output) with open(output + "/" + model_name + ".pb", "wb") as f: f.write(model.SerializeToString()) with open(output + "/" + model_name + ".data", "wb") as f: @@ -179,7 +174,7 @@ def save_model_to_file(model_name, model, params, output): def encrypt(model_name, model_file, params_file, device, output, - is_obfuscate=False): + is_obfuscate=False, gencode_model=False, gencode_params=False): model_checksum = util.file_checksum(model_file) params_checksum = util.file_checksum(params_file) @@ -191,9 +186,11 @@ def encrypt(model_name, model_file, params_file, device, output, if is_obfuscate: obfuscate_name(model) - save_model_to_file(model_name, model, params, output + "/file/") - save_model_to_code(model_name, model, params, model_checksum, - params_checksum, device, output + "/code/") + save_model_to_file(model_name, model, params, output) + if gencode_model: + save_model_to_code(model_name, model, params, model_checksum, + params_checksum, device, output + "/code/", + gencode_params) def parse_args(): @@ -216,22 +213,89 @@ def parse_args(): default='cpu', help="cpu/gpu/hexagon/hta/apu") parser.add_argument( - '--output', + '--config', type=str, - default=".", - help="output dir") + help="model config") parser.add_argument( - "--obfuscate", + "--no_obfuscate", action="store_true", help="obfuscate model names") + parser.add_argument( + "--gencode_model", + action="store_true", + help="generate model code") + parser.add_argument( + "--gencode_param", + action="store_true", + help="generate params code") + parser.add_argument( + '--output', + type=str, + default="build", + help="output dir") flgs, _ = parser.parse_known_args() - mace_check(flags.model_name not in CPP_KEYWORDS, "model name cannot be cpp" - "keywords") + mace_check(flgs.model_name not in CPP_KEYWORDS, "model name cannot be cpp" + "keywords") return flgs +def gen_mace_engine_factory(model_name, embed_model_data, output): + util.mkdir_p(output) + cwd = os.path.dirname(__file__) + j2_env = Environment( + loader=FileSystemLoader(cwd + "/template"), trim_blocks=True) + # generate mace_run BUILD file + template_name = 'mace_engine_factory.h.jinja2' + model_name = list(model_name) + source = j2_env.get_template(template_name).render( + model_tags=model_name, + embed_model_data=embed_model_data, + ) + with open(output + '/mace_engine_factory.h', "w") as f: + f.write(source) + + if __name__ == '__main__': flags = parse_args() - encrypt(flags.model_name, flags.model_file, flags.params_file, - flags.device, flags.output, flags.obfuscate) + codegen_dir = "mace/codegen/models" + device.execute("rm -rf %s/*" % codegen_dir) + + models = [] + if flags.config: + conf = config_parser.parse(flags.config) + + for name, model_conf in conf["models"].items(): + model_conf = config_parser.normalize_model_config(model_conf) + if not flags.model_name or name == flags.model_name: + MaceLogger.info("Encrypt model %s" % name) + encrypt(name, + "build/%s/model/%s.pb" % (name, name), + "build/%s/model/%s.data" % (name, name), + model_conf[ModelKeys.runtime], + codegen_dir + "/" + name, + not flags.no_obfuscate, + flags.gencode_model, + flags.gencode_param) + models.append(name) + os.rename("%s/%s/%s.pb" % (codegen_dir, name, name), + "build/%s/model/%s.pb" % (name, name)) + os.rename("%s/%s/%s.data" % (codegen_dir, name, name), + "build/%s/model/%s.data" % (name, name)) + else: + device_type = config_parser.parse_device_type(flags.device) + encrypt(flags.model_name, flags.model_file, flags.params_file, + device_type, codegen_dir, not flags.no_obfuscate, + flags.gencode_model, flags.gencode_param) + models.append(flags.model_name) + os.rename( + "%s/%s/%s.pb" % (codegen_dir, flags.model_name, flags.model_name), + "build/%s/model/%s.pb" % (flags.model_name, flags.model_name)) + os.rename( + "%s/%s/%s.data" % (codegen_dir, flags.model_name, + flags.model_name), + "build/%s/model/%s.data" % (flags.model_name, flags.model_name)) + + if flags.gencode_model: + gen_mace_engine_factory(models, flags.gencode_param, + "mace/codegen/engine") diff --git a/tools/python/gen_opencl.py b/tools/python/gen_opencl.py new file mode 100644 index 00000000..ff82e528 --- /dev/null +++ b/tools/python/gen_opencl.py @@ -0,0 +1,207 @@ +# Copyright 2019 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import jinja2 +import os +import struct +import numpy as np + +from utils import util +from utils.util import MaceLogger +from utils.util import mace_check + + +def generate_opencl_code(binary_file_name, load_func_name, size_func_name, + output_path): + binary_array = [] + if os.path.exists(binary_file_name): + with open(binary_file_name, 'rb') as f: + binary_array = np.fromfile(f, dtype=np.uint8) + + cwd = os.path.dirname(__file__) + env = jinja2.Environment( + loader=jinja2.FileSystemLoader(cwd + "/template")) + content = env.get_template('file_binary.cc.jinja2').render( + data=binary_array, + data_size=len(binary_array), + load_func_name=load_func_name, + size_func_name=size_func_name) + + if os.path.exists(output_path): + os.remove(output_path) + with open(output_path, "w") as w_file: + w_file.write(content) + + +def merge_opencl_binaries(opencl_binaries, + output_file): + platform_info_key = 'mace_opencl_precompiled_platform_info_key' + + kvs = {} + for binary in opencl_binaries: + if not os.path.exists(binary): + MaceLogger.warning("OpenCL bin %s not found" % binary) + continue + + with open(binary, "rb") as f: + binary_array = np.fromfile(f, dtype=np.uint8) + + idx = 0 + size, = struct.unpack("Q", binary_array[idx:idx + 8]) + idx += 8 + for _ in range(size): + key_size, = struct.unpack("i", binary_array[idx:idx + 4]) + idx += 4 + key, = struct.unpack( + str(key_size) + "s", binary_array[idx:idx + key_size]) + idx += key_size + value_size, = struct.unpack("i", binary_array[idx:idx + 4]) + idx += 4 + if key == platform_info_key and key in kvs: + mace_check( + (kvs[key] == binary_array[idx:idx + value_size]).all(), + "There exists more than one OpenCL version for models:" + " %s vs %s " % + (kvs[key], binary_array[idx:idx + value_size])) + else: + kvs[key] = binary_array[idx:idx + value_size] + idx += value_size + + output_byte_array = bytearray() + data_size = len(kvs) + output_byte_array.extend(struct.pack("Q", data_size)) + for key, value in kvs.items(): + key_size = len(key) + output_byte_array.extend(struct.pack("i", key_size)) + output_byte_array.extend(struct.pack(str(key_size) + "s", key)) + value_size = len(value) + output_byte_array.extend(struct.pack("i", value_size)) + output_byte_array.extend(value) + + np.array(output_byte_array).tofile(output_file) + + +def merge_opencl_parameters(params_files, + output_file): + kvs = {} + for params in params_files: + if not os.path.exists(params): + MaceLogger.warning("Tune param %s not found" % params) + continue + + with open(params, "rb") as f: + binary_array = np.fromfile(f, dtype=np.uint8) + + idx = 0 + size, = struct.unpack("Q", binary_array[idx:idx + 8]) + idx += 8 + for _ in range(size): + key_size, = struct.unpack("i", binary_array[idx:idx + 4]) + idx += 4 + key, = struct.unpack( + str(key_size) + "s", binary_array[idx:idx + key_size]) + idx += key_size + value_size, = struct.unpack("i", binary_array[idx:idx + 4]) + idx += 4 + kvs[key] = binary_array[idx:idx + value_size] + idx += value_size + + output_byte_array = bytearray() + data_size = len(kvs) + output_byte_array.extend(struct.pack("Q", data_size)) + for key, value in kvs.items(): + key_size = len(key) + output_byte_array.extend(struct.pack("i", key_size)) + output_byte_array.extend(struct.pack(str(key_size) + "s", key)) + value_size = len(value) + output_byte_array.extend(struct.pack("i", value_size)) + output_byte_array.extend(value) + + np.array(output_byte_array).tofile(output_file) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--binary_files', + type=str, + default="", + help="opencl binary files") + parser.add_argument( + '--tuning_files', + type=str, + default="", + help="tuning params files") + parser.add_argument( + '--output', + type=str, + default="build", + help="output dir") + parser.add_argument( + "--gencode", + action="store_true", + help="generate code") + flgs, _ = parser.parse_known_args() + return flgs + + +if __name__ == '__main__': + flags = parse_args() + util.mkdir_p(flags.output) + opencl_binary_files = [] + if flags.binary_files: + opencl_binary_files = flags.binary_files.split(",") + opencl_tuning_files = [] + if flags.tuning_files: + opencl_tuning_files = flags.tuning_files.split(",") + + compiled_opencl_kernel_prefix = "compiled_opencl_kernel" + tuned_opencl_parameter_prefix = "tuned_opencl_parameter" + + if not opencl_binary_files and not opencl_tuning_files: + for root, dirs, files in os.walk("build", topdown=False): + for name in files: + if compiled_opencl_kernel_prefix in name: + opencl_binary_files.append(os.path.join(root, name)) + elif tuned_opencl_parameter_prefix in name: + opencl_tuning_files.append(os.path.join(root, name)) + + opencl_dir = flags.output + "/opencl" + util.mkdir_p(opencl_dir) + merged_opencl_bin_file = "%s/%s.bin" % (opencl_dir, + compiled_opencl_kernel_prefix) + merged_opencl_tuning_file = "%s/%s.bin" % (opencl_dir, + tuned_opencl_parameter_prefix) + + merge_opencl_binaries(opencl_binary_files, + merged_opencl_bin_file) + if flags.gencode: + util.mkdir_p('mace/codegen/opencl') + generate_opencl_code(merged_opencl_bin_file, + "LoadOpenCLBinary", + "OpenCLBinarySize", + "mace/codegen/opencl/opencl_binary.cc") + + merge_opencl_binaries(opencl_tuning_files, + merged_opencl_tuning_file) + if flags.gencode: + generate_opencl_code(merged_opencl_tuning_file, + "LoadOpenCLParameter", + "LoadOpenCLParameter", + "mace/codegen/opencl/opencl_parameter.cc") diff --git a/tools/python/py_proto/__init__.py b/tools/python/py_proto/__init__.py index c281485f..da5f48fd 100644 --- a/tools/python/py_proto/__init__.py +++ b/tools/python/py_proto/__init__.py @@ -18,12 +18,17 @@ from __future__ import print_function import os from utils import device +from utils.util import MaceLogger cwd = os.path.dirname(__file__) # TODO: Remove bazel deps -device.execute("bazel build //mace/proto:mace_py") -device.execute("cp -f bazel-genfiles/mace/proto/mace_pb2.py %s" % cwd) +try: + device.execute("bazel build //mace/proto:mace_py") + device.execute("cp -f bazel-genfiles/mace/proto/mace_pb2.py %s" % cwd) -device.execute("bazel build //third_party/caffe:caffe_py") -device.execute("cp -f bazel-genfiles/third_party/caffe/caffe_pb2.py %s" % cwd) + device.execute("bazel build //third_party/caffe:caffe_py") + device.execute( + "cp -f bazel-genfiles/third_party/caffe/caffe_pb2.py %s" % cwd) +except: # noqa + MaceLogger.warning("No bazel, use cmake.") diff --git a/tools/python/run_model.py b/tools/python/run_model.py new file mode 100644 index 00000000..52e081fa --- /dev/null +++ b/tools/python/run_model.py @@ -0,0 +1,379 @@ +# Copyright 2019 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import tempfile +import shutil +import numpy as np + +from py_proto import mace_pb2 +from utils import util +from utils import device +from utils import config_parser +from utils.config_parser import DeviceType +from utils.target import Target +from utils.config_parser import ModelKeys +from utils.util import MaceLogger +from utils.util import mace_check +import run_target +import validate + +""" +Tool for mace_run: + +python tools/python/run_model.py \ +--config ../mace-models/mobilenet-v1/mobilenet-v1.yml --build --validate +python tools/python/run_model.py \ +--config ../mace-models/mobilenet-v1/mobilenet-v1.yml --benchmark +python tools/python/run_model.py \ +--config ../mace-models/mobilenet-v1/mobilenet-v1.yml --runtime=cpu + +""" + + +def join_2d_array(xs): + return ":".join([",".join([str(y) for y in x]) for x in xs]) + + +def build_engine(flags): + cmake_shell = os.path.abspath( + os.path.dirname( + __file__)) + "/../cmake/cmake-build-%s.sh" % flags.target_abi + os.environ["BUILD_DIR"] = flags.build_dir + "/" + flags.target_abi + if flags.runtime: + os.environ["RUNTIME"] = config_parser.parse_device_type( + flags.runtime).name + if flags.gencode_model: + os.environ["RUNMODE"] = "code" + device.execute("bash " + cmake_shell) + + +def run_models(flags, args): + if flags.device_conf: + device_conf = config_parser.parse_device_info(flags.device_conf) + device.ArmLinuxDevice.set_devices(device_conf) + + run_devices = device.choose_devices(flags.target_abi, flags.target_socs) + MaceLogger.info("Run on devices: %s" % run_devices) + + for device_id in run_devices: + dev = device.crete_device(flags.target_abi, device_id) + run_models_for_device(flags, args, dev) + + +def run_models_for_device(flags, args, dev): + conf = config_parser.parse(flags.config) + for name, model_conf in conf["models"].items(): + if not flags.model_name or name == flags.model_name: + MaceLogger.info("Run model %s" % name) + model_conf = config_parser.normalize_model_config(model_conf) + run_model_for_device(flags, args, dev, name, model_conf) + + +def run_model_for_device(flags, args, dev, model_name, model_conf): + runtime = flags.runtime + target_abi = flags.target_abi + install_dir = run_target.default_install_dir(target_abi) + "/" + model_name + sysdir = install_dir + "/interior" + dev.mkdir(sysdir) + + if not runtime: + runtime = model_conf[ModelKeys.runtime] + if runtime == DeviceType.CPU_GPU: + runtime = DeviceType.GPU + else: + runtime = config_parser.parse_device_type(runtime) + + # install models to devices + workdir = flags.output + "/" + model_name + model_file = model_name + ".pb" + model_data_file = model_name + ".data" + model_path = workdir + "/model/" + model_file + model_data_path = workdir + "/model/" + model_data_file + if os.path.exists(model_path) and os.path.exists(model_data_path): + dev.install(Target(model_path), install_dir) + dev.install(Target(model_data_path), install_dir) + else: + MaceLogger.warning("No models exist in %s, use --model_file and" + " --model_data_file specified in args" % model_path) + + if ModelKeys.check_tensors in model_conf: + model_conf[ModelKeys.output_tensors] = model_conf[ + ModelKeys.check_tensors] + model_conf[ModelKeys.output_shapes] = model_conf[ + ModelKeys.check_shapes] + + model_file_path = "" + if not flags.gencode_model: + model_file_path = install_dir + "/" + model_file + model_data_file_path = "" + if not flags.gencode_param: + model_data_file_path = install_dir + "/" + model_data_file + model_args = {"model_name": model_name, + "model_file": model_file_path, + "model_data_file": model_data_file_path, + "input_node": ",".join( + model_conf[ModelKeys.input_tensors]), + "input_shape": join_2d_array( + model_conf[ModelKeys.input_shapes]), + "output_node": ",".join( + model_conf[ModelKeys.output_tensors]), + "output_shape": join_2d_array( + model_conf[ModelKeys.output_shapes]), + "input_data_format": ",".join( + [df.name for df in + model_conf[ModelKeys.input_data_formats]]), + "output_data_format": ",".join( + [df.name for df in + model_conf[ModelKeys.output_data_formats]]), + "device": runtime.name + } + + opts = ["--%s=%s" % (arg_key, arg_val) for arg_key, arg_val in + model_args.items()] + args + should_generate_data = (flags.validate + or flags.tune or "--benchmark" in opts) + + if should_generate_data: + tmpdirname = tempfile.mkdtemp() + input_file_prefix = tmpdirname + "/" + model_name + + if ModelKeys.validation_inputs_data in model_conf: + input_tensor = model_conf[ModelKeys.input_tensors] + input_data = model_conf[ModelKeys.validation_inputs_data] + mace_check(len(input_tensor) == len(input_data), + "len(input_tensor) != len(validate_data") + + for i in range(len(input_tensor)): + util.download_or_get_file( + model_conf[ModelKeys.validation_inputs_data][i], "", + util.formatted_file_name(input_file_prefix, + input_tensor[i])) + else: + generate_input_data(input_file_prefix, + model_conf[ModelKeys.input_tensors], + model_conf[ModelKeys.input_shapes], + model_conf[ModelKeys.input_ranges], + model_conf[ModelKeys.input_data_types]) + + dev.install(Target(tmpdirname), install_dir + "/validate_in") + target_input_file = "%s/validate_in/%s" % ( + install_dir, model_name) + target_output_dir = "%s/validate_out" % install_dir + dev.mkdir(target_output_dir) + target_output_file = target_output_dir + "/" + model_name + opts += ["--input_file=%s" % target_input_file, + "--output_file=%s" % target_output_file] + + # run + envs = flags.envs.split(" ") + ["MACE_INTERNAL_STORAGE_PATH=%s" % sysdir] + if flags.tune: + envs += ["MACE_TUNING=1", + "MACE_RUN_PARAMETER_PATH=%s/interior/tune_params" + % install_dir] + opts += ["--round=0"] + if flags.vlog_level > 0: + envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level] + + build_dir = flags.build_dir + "/" + target_abi + libs = [] + if model_conf[ModelKeys.runtime] == DeviceType.HEXAGON: + libs += ["third_party/nnlib/%s/libhexagon_controller.so" % target_abi] + elif model_conf[ModelKeys.runtime] == DeviceType.APU: + libs += ["third_party/apu/libapu-frontend.so"] + + target = Target(build_dir + "/install/bin/mace_run", libs, + opts=opts, envs=envs) + run_target.run_target(target_abi, install_dir, target, + device_ids=flags.target_socs) + + if runtime == DeviceType.GPU: + opencl_dir = workdir + "/opencl" + util.mkdir_p(opencl_dir) + dev.pull( + Target(install_dir + "/interior/mace_cl_compiled_program.bin"), + "%s/%s_compiled_opencl_kernel.%s.%s.bin" % ( + opencl_dir, model_name, + dev.info()["ro.product.model"].replace(' ', ''), + dev.info()["ro.board.platform"])) + if flags.tune: + dev.pull(Target(install_dir + "/interior/tune_params"), + "%s/%s_tuned_opencl_parameter.%s.%s.bin" % ( + opencl_dir, model_name, + dev.info()["ro.product.model"].replace(' ', ''), + dev.info()["ro.board.platform"])) + + if flags.validate: + validate_model_file = util.download_or_get_model( + model_conf[ModelKeys.model_file_path], + model_conf[ModelKeys.model_sha256_checksum], + tmpdirname) + + validate_weight_file = "" + if ModelKeys.weight_file_path in model_conf: + validate_weight_file = util.download_or_get_model( + model_conf[ModelKeys.weight_file_path], + model_conf[ModelKeys.weight_sha256_checksum], + tmpdirname) + + dev.pull(Target(target_output_dir), tmpdirname + "/validate_out") + output_file_prefix = tmpdirname + "/validate_out/" + model_name + validate.validate(model_conf[ModelKeys.platform], + validate_model_file, + validate_weight_file, + input_file_prefix, + output_file_prefix, + model_conf[ModelKeys.input_shapes], + model_conf[ModelKeys.output_shapes], + model_conf[ModelKeys.input_data_formats], + model_conf[ModelKeys.output_data_formats], + model_conf[ModelKeys.input_tensors], + model_conf[ModelKeys.output_tensors], + flags.validate_threshold, + model_conf[ModelKeys.input_data_types], + flags.backend, + "", + "") + if should_generate_data: + shutil.rmtree(tmpdirname) + + +def generate_input_data(input_file, input_node, input_shape, input_ranges, + input_data_type): + np.random.seed() + for i in range(len(input_node)): + data = np.random.random(input_shape[i]) * ( + input_ranges[i][1] - input_ranges[i][0]) + input_ranges[i][0] + input_file_name = util.formatted_file_name(input_file, input_node[i]) + MaceLogger.info('Generate input file: %s' % input_file_name) + if input_data_type[i] == mace_pb2.DT_FLOAT: + np_data_type = np.float32 + elif input_data_type[i] == mace_pb2.DT_INT32: + np_data_type = np.int32 + + data.astype(np_data_type).tofile(input_file_name) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--config", + type=str, + default="", + help="yaml conf path" + ) + parser.add_argument( + "--model_name", + type=str, + default="", + help="model name in yaml conf" + ) + parser.add_argument( + "--target_abi", + type=str, + default="armeabi-v7a", + help="Target ABI: host, armeabi-v7a, arm64-v8a," + " arm-linux-gnueabihf, aarch64-linux-gnu" + ) + parser.add_argument( + "--target_socs", + type=str, + default="all", + help="serialno for adb connection," + " username@ip for arm linux," + " host for host" + " | all | random" + ) + parser.add_argument( + "--device_conf", + type=str, + default="", + help="device yaml config path" + ) + parser.add_argument( + "--runtime", + type=str, + default="", + help="cpu/gpu/dsp/hta/apu" + ) + parser.add_argument("--envs", type=str, default="", + help="Environment vars: " + " MACE_OUT_OF_RANGE_CHECK=1, " + " MACE_OPENCL_PROFILING=1," + " MACE_INTERNAL_STORAGE_PATH=/path/to," + " LD_PRELOAD=/path/to") + parser.add_argument( + "--validate", + action="store_true", + help="enable validate" + ) + parser.add_argument( + "--validate_threshold", + type=float, + default="0.99", + help="validate threshold" + ) + parser.add_argument( + "--backend", + type=str, + default="tensorflow", + help="onnx backend framework") + parser.add_argument( + "--tune", + action="store_true", + help="enable tuning" + ) + parser.add_argument( + "--build_dir", + type=str, + default="build/cmake-build", + help="cmake build dir" + ) + parser.add_argument( + "--build", + action="store_true", + help="if build before run" + ) + parser.add_argument( + '--output', + type=str, + default="build", + help="output dir") + parser.add_argument( + '--vlog_level', + type=int, + default="0", + help="vlog level") + parser.add_argument( + "--gencode_model", + action="store_true", + help="use compiled model") + parser.add_argument( + "--gencode_param", + action="store_true", + help="use compiled param") + + return parser.parse_known_args() + + +if __name__ == "__main__": + flags, args = parse_args() + if flags.build: + build_engine(flags) + run_models(flags, args) diff --git a/tools/python/run.py b/tools/python/run_target.py similarity index 69% rename from tools/python/run.py rename to tools/python/run_target.py index 6fcd8e2d..41d89c67 100644 --- a/tools/python/run.py +++ b/tools/python/run_target.py @@ -12,52 +12,33 @@ # See the License for the specific language governing permissions and # limitations under the License. + +""" +Internal tool for mace_cc_benchmark, mace_cc_test: + +python tools/python/run_target.py \ + --target_abi=armeabi-v7a --target_socs=all --target_name=mace_cc_test \ + --gtest_filter=EnvTest.* --envs="MACE_CPP_MIN_VLOG_LEVEL=5" +""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function -import filelock -import random import argparse import os from utils import device from utils import target from utils import config_parser - - -def device_lock(device_id, timeout=7200): - return filelock.FileLock("/tmp/device-lock-%s" % device_id, - timeout=timeout) - - -def is_device_locked(device_id): - try: - with device_lock(device_id, timeout=0.000001): - return False - except filelock.Timeout: - return True +from utils import util def run_target(target_abi, install_dir, target_obj, device_ids="all"): if not install_dir: install_dir = default_install_dir(target_abi) - device_class = device.device_class(target_abi) - devices = device_class.list_devices() - - if device_ids == "all": - run_devices = devices - elif device_ids == "random": - unlocked_devices = [dev for dev in devices if - not is_device_locked(dev)] - if unlocked_devices: - run_devices = [random.choice(unlocked_devices)] - else: - run_devices = [random.choice(devices)] - else: - device_id_list = [dev.strip() for dev in device_ids.split(",")] - run_devices = [dev for dev in device_id_list if dev in devices] + run_devices = device.choose_devices(target_abi, device_ids) print("Run on devices: %s" % run_devices) @@ -72,7 +53,7 @@ def run_target(target_abi, install_dir, target_obj, device_ids="all"): # run on device print("Runing ...") - with device_lock(device_id): + with util.device_lock(device_id): dev.run(device_target) @@ -84,15 +65,6 @@ def default_install_dir(target_abi): return install_dir -""" -Internal tool for mace_cc_benchmark, mace_cc_test, mace_run: - -python tools/experimental/run.py \ - --target_abi=armeabi-v7a --target_socs=all --target_name=mace_cc_test \ - --args="--gtest_filter=EnvTest.*" --envs="MACE_CPP_MIN_VLOG_LEVEL=5" -""" - - def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( @@ -105,7 +77,7 @@ def parse_args(): parser.add_argument( "--target_socs", type=str, - default="", + default="all", help="serialno for adb connection," " username@ip for arm linux," " host for host" @@ -126,7 +98,7 @@ def parse_args(): parser.add_argument( "--build_dir", type=str, - default="cmake-build-debug-tools", + default="build/cmake-build", help="cmake build dir" ) parser.add_argument( @@ -135,8 +107,6 @@ def parse_args(): help="if build before run" ) - parser.add_argument("--args", type=str, default="", - help="Command args: --gtest_filter=*, --filter=*") parser.add_argument("--envs", type=str, default="", help="Environment vars: " " MACE_CPP_MIN_VLOG_LEVEL=2," @@ -145,19 +115,18 @@ def parse_args(): " MACE_INTERNAL_STORAGE_PATH=/path/to," " LD_PRELOAD=/path/to") - flgs, _ = parser.parse_known_args() - return flgs + flgs, args = parser.parse_known_args() + return flgs, args if __name__ == "__main__": - flags = parse_args() + flags, args = parse_args() if flags.device_conf: device_conf = config_parser.parse_device_info(flags.device_conf) device.ArmLinuxDevice.set_devices(device_conf) target_abi = flags.target_abi.strip() target_name = flags.target_name.strip() - opts = flags.args.split(" ") envs = flags.envs.split(" ") # build @@ -165,11 +134,11 @@ if __name__ == "__main__": if flags.build: cmake_shell = os.path.abspath( os.path.dirname( - __file__)) + "/config/build/cmake-build-%s.sh" % target_abi + __file__)) + "/../cmake/cmake-build-%s.sh" % target_abi os.environ["BUILD_DIR"] = build_dir - device.execute(cmake_shell) + device.execute("bash " + cmake_shell) # run target = target.Target(build_dir + "/install/bin/" + target_name, - opts=opts, envs=envs) + opts=args, envs=envs) run_target(target_abi, None, target, device_ids=flags.target_socs) diff --git a/tools/python/template/file_binary.cc.jinja2 b/tools/python/template/file_binary.cc.jinja2 new file mode 100644 index 00000000..d3fa6e19 --- /dev/null +++ b/tools/python/template/file_binary.cc.jinja2 @@ -0,0 +1,38 @@ +// Copyright 2019 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#include + +namespace mace { + +const unsigned char *{{ load_func_name }}() { +{% if data_size == 0 %} + return nullptr; +{% else %} + static const unsigned char kData[{{ data_size }}] = { + {% for d in data %}{{"0x%02X, " % d }}{%endfor%} + }; + + return kData; +{% endif %} +} + +size_t {{ size_func_name }}() { + return {{ data_size }}; +} + +} // namespace mace + diff --git a/tools/python/template/mace_engine_factory.h.jinja2 b/tools/python/template/mace_engine_factory.h.jinja2 new file mode 100644 index 00000000..d9557be3 --- /dev/null +++ b/tools/python/template/mace_engine_factory.h.jinja2 @@ -0,0 +1,148 @@ +// Copyright 2018 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This is a generated file. DO NOT EDIT! + +#ifndef MACE_CODEGEN_ENGINE_MACE_ENGINE_FACTORY_H_ +#define MACE_CODEGEN_ENGINE_MACE_ENGINE_FACTORY_H_ +#include +#include +#include +#include + +#include "mace/public/mace.h" + +namespace mace { + +{% for tag in model_tags %} +namespace {{tag}} { + +extern const unsigned char *LoadModelData(); + +extern const std::shared_ptr CreateNet(); + +extern const std::string ModelName(); +extern const std::string ModelChecksum(); +extern const std::string ModelBuildTime(); +extern const std::string ModelBuildOptions(); + +} // namespace {{tag}} +{% endfor %} + +namespace { +std::map model_name_map { +{% for i in range(model_tags |length) %} + std::make_pair({{ model_tags[i]|tojson }}, {{ i }}), +{% endfor %} +}; +} // namespace + +/// \brief Create MaceEngine from code +/// +/// Create MaceEngine object based on model graph code and model data file or +/// model data code. +/// +/// \param model_name[in]: the name of model you want to use. +/// \param model_data_file[in]: the path of model data file, +/// if model_data_format is code, just pass empty string("") +/// \param input_nodes[in]: the array of input nodes' name +/// \param output_nodes[in]: the array of output nodes' name +/// \param config[in]: configurations for MaceEngine. +/// \param engine[out]: output MaceEngine object +/// \return MaceStatus::MACE_SUCCESS for success, MACE_INVALID_ARGS for wrong arguments, +/// MACE_OUT_OF_RESOURCES for resources is out of range. +__attribute__((deprecated)) MaceStatus CreateMaceEngineFromCode( + const std::string &model_name, + const std::string &model_data_file, + const std::vector &input_nodes, + const std::vector &output_nodes, + const MaceEngineConfig &config, + std::shared_ptr *engine) { + // load model + if (engine == nullptr) { + return MaceStatus::MACE_INVALID_ARGS; + } + std::shared_ptr net_def; +{% if embed_model_data %} + (void)model_data_file; + const unsigned char * model_data; +{% endif %} + MaceStatus status = MaceStatus::MACE_SUCCESS; + switch (model_name_map[model_name]) { +{% for i in range(model_tags |length) %} + case {{ i }}: + net_def = mace::{{model_tags[i]}}::CreateNet(); + engine->reset(new mace::MaceEngine(config)); +{% if embed_model_data %} + model_data = mace::{{model_tags[i]}}::LoadModelData(); + status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, + model_data); +{% else %} + status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, + model_data_file); +{% endif %} + break; +{% endfor %} + default: + status = MaceStatus::MACE_INVALID_ARGS; + } + + return status; +} + +MACE_API MaceStatus CreateMaceEngineFromCode( + const std::string &model_name, + const unsigned char *model_weights_data, + const size_t model_weights_data_size, + const std::vector &input_nodes, + const std::vector &output_nodes, + const MaceEngineConfig &config, + std::shared_ptr *engine) { + // load model + if (engine == nullptr) { + return MaceStatus::MACE_INVALID_ARGS; + } + std::shared_ptr net_def; +{% if embed_model_data %} + const unsigned char * model_data; + (void)model_weights_data; +{% endif %} + // TODO(yejianwu) Add buffer range checking + (void)model_weights_data_size; + + MaceStatus status = MaceStatus::MACE_SUCCESS; + switch (model_name_map[model_name]) { +{% for i in range(model_tags |length) %} + case {{ i }}: + net_def = mace::{{model_tags[i]}}::CreateNet(); + engine->reset(new mace::MaceEngine(config)); +{% if embed_model_data %} + model_data = mace::{{model_tags[i]}}::LoadModelData(); + status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, + model_data); +{% else %} + status = (*engine)->Init(net_def.get(), input_nodes, output_nodes, + model_weights_data); +{% endif %} + break; +{% endfor %} + default: + status = MaceStatus::MACE_INVALID_ARGS; + } + + return status; +} + +} // namespace mace +#endif // MACE_CODEGEN_ENGINE_MACE_ENGINE_FACTORY_H_ diff --git a/tools/python/template/model.jinja2 b/tools/python/template/model.jinja2 index f38f42e0..494b0986 100644 --- a/tools/python/template/model.jinja2 +++ b/tools/python/template/model.jinja2 @@ -133,7 +133,7 @@ void CreateTensors(NetDef *net_def) { namespace {{tag}} { -const std::shared_ptr CreateNet() { +MACE_API const std::shared_ptr CreateNet() { MACE_LATENCY_LOGGER(1, "Create net {{ net.name }}"); std::shared_ptr net_def(new NetDef()); @@ -154,15 +154,15 @@ const std::shared_ptr CreateNet() { return net_def; } -const std::string ModelName() { +MACE_API const std::string ModelName() { return {{ tag|tojson }}; } -const std::string ModelChecksum() { +MACE_API const std::string ModelChecksum() { return {{ checksum|tojson }}; } -const std::string ModelBuildTime() { +MACE_API const std::string ModelBuildTime() { return {{ build_time|tojson }}; } diff --git a/tools/python/template/model_header.jinja2 b/tools/python/template/model_header.jinja2 index 1aba6d88..ea1c5f6c 100644 --- a/tools/python/template/model_header.jinja2 +++ b/tools/python/template/model_header.jinja2 @@ -25,14 +25,14 @@ namespace mace { namespace {{tag}} { -extern const unsigned char *LoadModelData(); +MACE_API extern const unsigned char *LoadModelData(); -extern const std::shared_ptr CreateNet(); +MACE_API extern const std::shared_ptr CreateNet(); -extern const std::string ModelName(); -extern const std::string ModelChecksum(); -extern const std::string ModelBuildTime(); -extern const std::string ModelBuildOptions(); +MACE_API extern const std::string ModelName(); +MACE_API extern const std::string ModelChecksum(); +MACE_API extern const std::string ModelBuildTime(); +MACE_API extern const std::string ModelBuildOptions(); } // namespace {{ tag }} } // namespace mace diff --git a/tools/python/template/tensor_data.jinja2 b/tools/python/template/tensor_data.jinja2 index 64d020b4..337bf2b6 100644 --- a/tools/python/template/tensor_data.jinja2 +++ b/tools/python/template/tensor_data.jinja2 @@ -14,6 +14,7 @@ // This is a generated file. DO NOT EDIT! +#include "mace/public/mace.h" namespace mace { namespace {{tag}} { @@ -22,7 +23,7 @@ alignas(4) const unsigned char model_data[{{ model_data_size }}] = { {% for d in model_data %}{{"0x%02X, " % d }}{%endfor%} }; -const unsigned char *LoadModelData() { +MACE_API const unsigned char *LoadModelData() { return model_data; } diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py index e3146892..691ead78 100644 --- a/tools/python/transform/base_converter.py +++ b/tools/python/transform/base_converter.py @@ -17,24 +17,8 @@ from enum import Enum from py_proto import mace_pb2 - -class DeviceType(Enum): - CPU = 0 - GPU = 2 - HEXAGON = 3 - HTA = 4 - APU = 5 - - -class DataFormat(Enum): - NONE = 0 - NHWC = 1 - NCHW = 2 - HWIO = 100 - OIHW = 101 - HWOI = 102 - OHWI = 103 - AUTO = 1000 +from utils.config_parser import DataFormat +from utils.config_parser import DeviceType # SAME_LOWER: if the amount of paddings to be added is odd, @@ -402,7 +386,7 @@ class ConverterOption(object): self._quantize_range_file = "" self._change_concat_ranges = False self._transformer_option = None - self._cl_mem_type = "" + self._cl_mem_type = "image" @property def input_nodes(self): diff --git a/tools/python/utils/config_parser.py b/tools/python/utils/config_parser.py index e4e2a04c..35de4bd9 100644 --- a/tools/python/utils/config_parser.py +++ b/tools/python/utils/config_parser.py @@ -18,8 +18,13 @@ from __future__ import print_function import re import os +import copy import yaml +from enum import Enum +from utils.util import mace_check +from utils.util import MaceLogger +from py_proto import mace_pb2 CPP_KEYWORDS = [ 'alignas', 'alignof', 'and', 'and_eq', 'asm', 'atomic_cancel', @@ -63,3 +68,227 @@ def parse(path): def parse_device_info(path): conf = parse(path) return conf["devices"] + + +class ModelKeys(object): + platform = "platform" + runtime = "runtime" + graph_optimize_options = "graph_optimize_options" + input_tensors = "input_tensors" + input_shapes = "input_shapes" + input_data_types = "input_data_types" + input_data_formats = "input_data_formats" + input_ranges = "input_ranges" + output_tensors = "output_tensors" + output_shapes = "output_shapes" + output_data_types = "output_data_types" + output_data_formats = "output_data_formats" + check_tensors = "check_tensors" + check_shapes = "check_shapes" + model_file_path = "model_file_path" + model_sha256_checksum = "model_sha256_checksum" + weight_file_path = "weight_file_path" + weight_sha256_checksum = "weight_sha256_checksum" + quantize_range_file = "quantize_range_file" + quantize = "quantize" + quantize_large_weights = "quantize_large_weights" + change_concat_ranges = "change_concat_ranges" + winograd = "winograd" + cl_mem_type = "cl_mem_type" + data_types = "data_types" + subgraphs = "subgraphs" + validation_inputs_data = "validation_inputs_data" + + +class DataFormat(Enum): + NONE = 0 + NHWC = 1 + NCHW = 2 + HWIO = 100 + OIHW = 101 + HWOI = 102 + OHWI = 103 + AUTO = 1000 + + +def parse_data_format(str): + str = str.upper() + mace_check(str in [e.name for e in DataFormat], + "unknown data format %s" % str) + return DataFormat[str] + + +class DeviceType(Enum): + CPU = 0 + GPU = 2 + HEXAGON = 3 + HTA = 4 + APU = 5 + CPU_GPU = 100 + + +DEVICE_MAP = { + "cpu": DeviceType.CPU, + "gpu": DeviceType.GPU, + "hexagon": DeviceType.HEXAGON, + "dsp": DeviceType.HEXAGON, + "hta": DeviceType.HTA, + "apu": DeviceType.APU, + "cpu+gpu": DeviceType.CPU_GPU +} + + +def parse_device_type(str): + mace_check(str in DEVICE_MAP, "unknown device %s" % str) + return DEVICE_MAP[str] + + +class Platform(Enum): + TENSORFLOW = 0 + CAFFE = 1 + ONNX = 2 + + +def parse_platform(str): + str = str.upper() + mace_check(str in [e.name for e in Platform], + "unknown platform %s" % str) + return Platform[str] + + +DATA_TYPE_MAP = { + 'float32': mace_pb2.DT_FLOAT, + 'int32': mace_pb2.DT_INT32, +} + + +def parse_data_type(str): + if str == "float32": + return mace_pb2.DT_FLOAT + elif str == "int32": + return mace_pb2.DT_INT32 + else: + mace_check(False, "data type %s not supported" % str) + + +def parse_internal_data_type(str): + if str == 'fp32_fp32': + return mace_pb2.DT_FLOAT + else: + return mace_pb2.DT_HALF + + +def to_list(x): + if isinstance(x, list): + return x + else: + return [x] + + +def parse_int_array(xs): + return [int(x) for x in xs.split(",")] + + +def parse_float_array(xs): + return [float(x) for x in xs.split(",")] + + +def normalize_model_config(conf): + conf = copy.deepcopy(conf) + if ModelKeys.subgraphs in conf: + subgraph = conf[ModelKeys.subgraphs][0] + del conf[ModelKeys.subgraphs] + conf.update(subgraph) + + print(conf) + conf[ModelKeys.platform] = parse_platform(conf[ModelKeys.platform]) + conf[ModelKeys.runtime] = parse_device_type(conf[ModelKeys.runtime]) + + if ModelKeys.quantize in conf: + conf[ModelKeys.data_types] = mace_pb2.DT_FLOAT + else: + if ModelKeys.data_types in conf: + conf[ModelKeys.data_types] = parse_internal_data_type( + conf[ModelKeys.data_types]) + else: + conf[ModelKeys.data_types] = mace_pb2.DT_HALF + + # parse input + conf[ModelKeys.input_tensors] = to_list(conf[ModelKeys.input_tensors]) + input_count = len(conf[ModelKeys.input_tensors]) + conf[ModelKeys.input_shapes] = [parse_int_array(shape) for shape in + to_list(conf[ModelKeys.input_shapes])] + mace_check( + len(conf[ModelKeys.input_shapes]) == input_count, + "input node count and shape count do not match") + + input_data_types = [parse_data_type(dt) for dt in + to_list(conf.get(ModelKeys.input_data_types, + ["float32"]))] + + if len(input_data_types) == 1 and input_count > 1: + input_data_types = [input_data_types[0]] * input_count + mace_check(len(input_data_types) == input_count, + "the number of input_data_types should be " + "the same as input tensors") + conf[ModelKeys.input_data_types] = input_data_types + + input_data_formats = [parse_data_format(df) for df in + to_list(conf.get(ModelKeys.input_data_formats, + ["NHWC"]))] + if len(input_data_formats) == 1 and input_count > 1: + input_data_formats = [input_data_formats[0]] * input_count + mace_check(len(input_data_formats) == input_count, + "the number of input_data_formats should be " + "the same as input tensors") + conf[ModelKeys.input_data_formats] = input_data_formats + + input_ranges = [parse_float_array(r) for r in + to_list(conf.get(ModelKeys.input_ranges, + ["-1.0,1.0"]))] + if len(input_ranges) == 1 and input_count > 1: + input_ranges = [input_ranges[0]] * input_count + mace_check(len(input_ranges) == input_count, + "the number of input_ranges should be " + "the same as input tensors") + conf[ModelKeys.input_ranges] = input_ranges + + # parse output + conf[ModelKeys.output_tensors] = to_list(conf[ModelKeys.output_tensors]) + output_count = len(conf[ModelKeys.output_tensors]) + conf[ModelKeys.output_shapes] = [parse_int_array(shape) for shape in + to_list(conf[ModelKeys.output_shapes])] + mace_check(len(conf[ModelKeys.output_tensors]) == output_count, + "output node count and shape count do not match") + + output_data_types = [parse_data_type(dt) for dt in + to_list(conf.get(ModelKeys.output_data_types, + ["float32"]))] + if len(output_data_types) == 1 and output_count > 1: + output_data_types = [output_data_types[0]] * output_count + mace_check(len(output_data_types) == output_count, + "the number of output_data_types should be " + "the same as output tensors") + conf[ModelKeys.output_data_types] = output_data_types + + output_data_formats = [parse_data_format(df) for df in + to_list(conf.get(ModelKeys.output_data_formats, + ["NHWC"]))] + if len(output_data_formats) == 1 and output_count > 1: + output_data_formats = [output_data_formats[0]] * output_count + mace_check(len(output_data_formats) == output_count, + "the number of output_data_formats should be " + "the same as output tensors") + conf[ModelKeys.output_data_formats] = output_data_formats + + if ModelKeys.check_tensors in conf: + conf[ModelKeys.check_tensors] = to_list(conf[ModelKeys.check_tensors]) + conf[ModelKeys.check_shapes] = [parse_int_array(shape) for shape in + to_list(conf[ModelKeys.check_shapes])] + mace_check(len(conf[ModelKeys.check_tensors]) == len( + conf[ModelKeys.check_shapes]), + "check tensors count and shape count do not match.") + + MaceLogger.summary(conf) + + return conf diff --git a/tools/python/utils/device.py b/tools/python/utils/device.py index 3879e4c7..f15be425 100644 --- a/tools/python/utils/device.py +++ b/tools/python/utils/device.py @@ -17,13 +17,15 @@ from __future__ import division from __future__ import print_function import os +import re import subprocess +import random +import tempfile +from utils import util -MACE_TOOL_QUIET_ENV = "MACE_TOOL_QUIET" - -def execute(cmd): +def execute(cmd, verbose=True): print("CMD> %s" % cmd) p = subprocess.Popen([cmd], shell=True, @@ -31,20 +33,28 @@ def execute(cmd): stderr=subprocess.STDOUT, stdin=subprocess.PIPE, universal_newlines=True) - returncode = p.poll() + + if not verbose: + if p.wait() != 0: + raise Exception("errorcode: %s" % p.returncode) + return p.stdout.read() + buf = [] - while returncode is None: - line = p.stdout.readline() - returncode = p.poll() - line = line.strip() - if MACE_TOOL_QUIET_ENV not in os.environ: + + while p.poll() is None: + line = p.stdout.readline().strip() + if verbose: print(line) buf.append(line) - p.wait() + for l in p.stdout: + line = l.strip() + if verbose: + print(line) + buf.append(line) - if returncode != 0: - raise Exception("errorcode: %s" % returncode) + if p.returncode != 0: + raise Exception("errorcode: %s" % p.returncode) return "\n".join(buf) @@ -62,6 +72,12 @@ class Device(object): def pull(self, target, out_dir): pass + def mkdir(self, dirname): + pass + + def info(self): + pass + class HostDevice(Device): def __init__(self, device_id): @@ -98,6 +114,9 @@ class HostDevice(Device): if out_dir.strip() and out_dir != os.path.dirname(target.path): execute("cp -r %s %s" % (target.path, out_dir)) + def mkdir(self, dirname): + execute("mkdir -p %s" % dirname) + class AndroidDevice(Device): def __init__(self, device_id): @@ -120,9 +139,15 @@ class AndroidDevice(Device): sn = self._device_id execute("adb -s %s shell mkdir -p %s" % (sn, install_dir)) - execute("adb -s %s push %s %s" % (sn, target.path, install_dir)) + if os.path.isdir(target.path): + execute("adb -s %s push %s/* %s" % (sn, target.path, install_dir), + False) + else: + execute("adb -s %s push %s %s" % (sn, target.path, install_dir), + False) + for lib in target.libs: - execute("adb -s %s push %s %s" % (sn, lib, install_dir)) + execute("adb -s %s push %s %s" % (sn, lib, install_dir), False) target.path = "%s/%s" % (install_dir, os.path.basename(target.path)) target.libs = ["%s/%s" % (install_dir, os.path.basename(lib)) @@ -132,7 +157,17 @@ class AndroidDevice(Device): return target def run(self, target): - out = execute("adb -s %s shell %s" % (self._device_id, target)) + tmpdirname = tempfile.mkdtemp() + cmd_file_path = tmpdirname + "/cmd.sh" + with open(cmd_file_path, "w") as cmd_file: + cmd_file.write(str(target)) + target_dir = os.path.dirname(target.path) + execute("adb -s %s push %s %s" % (self._device_id, + cmd_file_path, + target_dir)) + + out = execute("adb -s %s shell sh %s" % (self._device_id, + target_dir + "/cmd.sh")) # May have false positive using the following error word for line in out.split("\n")[:-10]: if ("Aborted" in line @@ -141,7 +176,23 @@ class AndroidDevice(Device): def pull(self, target, out_dir): sn = self._device_id - execute("adb -s %s pull %s %s" % (sn, target.path, out_dir)) + execute("adb -s %s pull %s %s" % (sn, target.path, out_dir), False) + + def mkdir(self, dirname): + sn = self._device_id + execute("adb -s %s shell mkdir -p %s" % (sn, dirname)) + + def info(self): + sn = self._device_id + output = execute("adb -s %s shell getprop" % sn, False) + raw_props = output.split("\n") + props = {} + p = re.compile(r'\[(.+)\]: \[(.+)\]') + for raw_prop in raw_props: + m = p.match(raw_prop) + if m: + props[m.group(1)] = m.group(2) + return props class ArmLinuxDevice(Device): @@ -153,10 +204,12 @@ class ArmLinuxDevice(Device): @staticmethod def list_devices(): device_ids = [] - for dev_name, dev_info in ArmLinuxDevice.devices: + print("!!!", ArmLinuxDevice.devices) + for dev_name, dev_info in ArmLinuxDevice.devices.items(): address = dev_info["address"] username = dev_info["username"] device_ids.append("%s@%s" % (username, address)) + return device_ids @staticmethod def set_devices(devices): @@ -166,10 +219,10 @@ class ArmLinuxDevice(Device): install_dir = os.path.abspath(install_dir) ip = self._device_id - execute("ssh %s mkdir -p %s" % install_dir) - execute("scp %s %s:%s" % (target.path, ip, install_dir)) + execute("ssh %s mkdir -p %s" % (ip, install_dir)) + execute("scp -r %s %s:%s" % (target.path, ip, install_dir)) for lib in target.libs: - execute("scp %s:%s" % (lib, install_dir)) + execute("scp -r %s:%s" % (lib, install_dir)) target.path = "%s/%s" % (install_dir, os.path.basename(target.path)) target.libs = ["%s/%s" % (install_dir, os.path.basename(lib)) @@ -179,11 +232,15 @@ class ArmLinuxDevice(Device): return target def run(self, target): - execute("ssh %s shell %s" % (self._device_id, target)) + execute("ssh %s %s" % (self._device_id, target)) def pull(self, target, out_dir): sn = self._device_id - execute("scp %s:%s %s" % (sn, target.path, out_dir)) + execute("scp -r %s:%s %s" % (sn, target.path, out_dir)) + + def mkdir(self, dirname): + sn = self._device_id + execute("ssh %s mkdir -p %s" % (sn, dirname)) def device_class(target_abi): @@ -204,3 +261,23 @@ def device_class(target_abi): def crete_device(target_abi, device_id=None): return device_class(target_abi)(device_id) + + +def choose_devices(target_abi, target_ids): + device_clazz = device_class(target_abi) + devices = device_clazz.list_devices() + + if target_ids == "all": + run_devices = devices + elif target_ids == "random": + unlocked_devices = [dev for dev in devices if + not util.is_device_locked(dev)] + if unlocked_devices: + run_devices = [random.choice(unlocked_devices)] + else: + run_devices = [random.choice(devices)] + else: + device_id_list = [dev.strip() for dev in target_ids.split(",")] + run_devices = [dev for dev in device_id_list if dev in devices] + + return run_devices diff --git a/tools/python/utils/util.py b/tools/python/utils/util.py index 423a9ef6..d2e25a92 100644 --- a/tools/python/utils/util.py +++ b/tools/python/utils/util.py @@ -18,9 +18,12 @@ from __future__ import print_function import inspect import hashlib +import filelock +import errno import os -import urllib -from utils import device +import sys +import shutil +import traceback ################################ @@ -46,33 +49,92 @@ def get_frame_info(level=2): class MaceLogger: @staticmethod def header(message): - print(CMDColors.PURPLE + message + CMDColors.ENDC) + print(CMDColors.PURPLE + str(message) + CMDColors.ENDC) @staticmethod def summary(message): - print(CMDColors.GREEN + message + CMDColors.ENDC) + print(CMDColors.GREEN + str(message) + CMDColors.ENDC) @staticmethod def info(message): - print(get_frame_info() + message) + print(get_frame_info() + str(message)) @staticmethod def warning(message): - print(CMDColors.YELLOW + 'WARNING: ' + get_frame_info() + message + print(CMDColors.YELLOW + 'WARNING: ' + get_frame_info() + str(message) + CMDColors.ENDC) @staticmethod def error(message): - print(CMDColors.RED + 'ERROR: ' + get_frame_info() + message + print(CMDColors.RED + 'ERROR: ' + get_frame_info() + str(message) + CMDColors.ENDC) exit(1) def mace_check(condition, message): if not condition: + for line in traceback.format_stack(): + print(line.strip()) + MaceLogger.error(message) +################################ +# String Formatter +################################ +class StringFormatter: + @staticmethod + def table(header, data, title, align="R"): + data_size = len(data) + column_size = len(header) + column_length = [len(str(ele)) + 1 for ele in header] + for row_idx in range(data_size): + data_tuple = data[row_idx] + ele_size = len(data_tuple) + assert (ele_size == column_size) + for i in range(ele_size): + column_length[i] = max(column_length[i], + len(str(data_tuple[i])) + 1) + + table_column_length = sum(column_length) + column_size + 1 + dash_line = '-' * table_column_length + '\n' + header_line = '=' * table_column_length + '\n' + output = "" + output += dash_line + output += str(title).center(table_column_length) + '\n' + output += dash_line + output += '|' + '|'.join([str(header[i]).center(column_length[i]) + for i in range(column_size)]) + '|\n' + output += header_line + + for data_tuple in data: + ele_size = len(data_tuple) + row_list = [] + for i in range(ele_size): + if align == "R": + row_list.append(str(data_tuple[i]).rjust(column_length[i])) + elif align == "L": + row_list.append(str(data_tuple[i]).ljust(column_length[i])) + elif align == "C": + row_list.append(str(data_tuple[i]) + .center(column_length[i])) + output += '|' + '|'.join(row_list) + "|\n" + dash_line + return output + + @staticmethod + def block(message): + line_length = 10 + len(str(message)) + 10 + star_line = '*' * line_length + '\n' + return star_line + str(message).center(line_length) + '\n' + star_line + + +def formatted_file_name(input_file_name, input_name): + res = input_file_name + '_' + for c in input_name: + res += c if c.isalnum() else '_' + return res + + ################################ # file ################################ @@ -86,17 +148,86 @@ def file_checksum(fname): def download_or_get_file(file, sha256_checksum, - output_dir): - filename = os.path.basename(file) - output_file = "%s/%s-%s.pb" % (output_dir, filename, sha256_checksum) - + output_file): if file.startswith("http://") or file.startswith("https://"): if not os.path.exists(output_file) or file_checksum( output_file) != sha256_checksum: - MaceLogger.info("Downloading file %s, please wait ..." % file) - urllib.urlretrieve(file, output_file) + MaceLogger.info("Downloading file %s to %s, please wait ..." + % (file, output_file)) + if sys.version_info >= (3, 0): + import urllib.request + data = urllib.request.urlopen(file) + out_handle = open(output_file, "wb") + out_handle.write(data.read()) + out_handle.close() + else: + import urllib + urllib.urlretrieve(file, output_file) MaceLogger.info("Model downloaded successfully.") else: - device.execute("cp %s %s" % (file, output_file)) + shutil.copyfile(file, output_file) + + if sha256_checksum: + mace_check(file_checksum(output_file) == sha256_checksum, + "checksum validate failed") return output_file + + +def download_or_get_model(file, + sha256_checksum, + output_dir): + filename = os.path.basename(file) + output_file = "%s/%s-%s.pb" % (output_dir, filename, sha256_checksum) + download_or_get_file(file, sha256_checksum, output_file) + return output_file + + +################################ +# bazel commands +################################ +class ABIType(object): + armeabi_v7a = 'armeabi-v7a' + arm64_v8a = 'arm64-v8a' + arm64 = 'arm64' + aarch64 = 'aarch64' + armhf = 'armhf' + host = 'host' + + +def abi_to_internal(abi): + if abi in [ABIType.armeabi_v7a, ABIType.arm64_v8a]: + return abi + if abi == ABIType.arm64: + return ABIType.aarch64 + if abi == ABIType.armhf: + return ABIType.armeabi_v7a + + +################################ +# lock +################################ +def device_lock(device_id, timeout=7200): + return filelock.FileLock("/tmp/device-lock-%s" % device_id, + timeout=timeout) + + +def is_device_locked(device_id): + try: + with device_lock(device_id, timeout=0.000001): + return False + except filelock.Timeout: + return True + + +################################ +# os +################################ +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise diff --git a/tools/python/validate.py b/tools/python/validate.py new file mode 100644 index 00000000..7004d2a8 --- /dev/null +++ b/tools/python/validate.py @@ -0,0 +1,344 @@ +# Copyright 2018 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path +import numpy as np +import six + +from py_proto import mace_pb2 +from utils import util +from utils.config_parser import DataFormat +from utils.config_parser import Platform + +VALIDATION_MODULE = 'VALIDATION' + + +def load_data(file, data_type=mace_pb2.DT_FLOAT): + if os.path.isfile(file): + if data_type == mace_pb2.DT_FLOAT: + return np.fromfile(file=file, dtype=np.float32) + elif data_type == mace_pb2.DT_INT32: + return np.fromfile(file=file, dtype=np.int32) + return np.empty([0]) + + +def calculate_sqnr(expected, actual): + noise = expected - actual + + def power_sum(xs): + return sum([x * x for x in xs]) + + signal_power_sum = power_sum(expected) + noise_power_sum = power_sum(noise) + return signal_power_sum / (noise_power_sum + 1e-15) + + +def calculate_similarity(u, v, data_type=np.float64): + if u.dtype is not data_type: + u = u.astype(data_type) + if v.dtype is not data_type: + v = v.astype(data_type) + return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v)) + + +def calculate_pixel_accuracy(out_value, mace_out_value): + if len(out_value.shape) < 2: + return 1.0 + out_value = out_value.reshape((-1, out_value.shape[-1])) + batches = out_value.shape[0] + classes = out_value.shape[1] + mace_out_value = mace_out_value.reshape((batches, classes)) + correct_count = 0 + for i in range(batches): + if np.argmax(out_value[i]) == np.argmax(mace_out_value[i]): + correct_count += 1 + return 1.0 * correct_count / batches + + +def compare_output(output_name, mace_out_value, + out_value, validation_threshold, log_file): + if mace_out_value.size != 0: + pixel_accuracy = calculate_pixel_accuracy(out_value, mace_out_value) + out_value = out_value.reshape(-1) + mace_out_value = mace_out_value.reshape(-1) + assert len(out_value) == len(mace_out_value) + sqnr = calculate_sqnr(out_value, mace_out_value) + similarity = calculate_similarity(out_value, mace_out_value) + util.MaceLogger.summary( + output_name + ' MACE VS training platform' + + ' similarity: ' + str(similarity) + ' , sqnr: ' + str(sqnr) + + ' , pixel_accuracy: ' + str(pixel_accuracy)) + if log_file: + if not os.path.exists(log_file): + with open(log_file, 'w') as f: + f.write('output_name,similarity,sqnr,pixel_accuracy\n') + summary = '{output_name},{similarity},{sqnr},{pixel_accuracy}\n' \ + .format(output_name=output_name, + similarity=similarity, + sqnr=sqnr, + pixel_accuracy=pixel_accuracy) + with open(log_file, "a") as f: + f.write(summary) + elif similarity > validation_threshold: + util.MaceLogger.summary( + util.StringFormatter.block("Similarity Test Passed")) + else: + util.MaceLogger.error( + "", util.StringFormatter.block("Similarity Test Failed")) + else: + util.MaceLogger.error( + "", util.StringFormatter.block( + "Similarity Test failed because of empty output")) + + +def normalize_tf_tensor_name(name): + if name.find(':') == -1: + return name + ':0' + else: + return name + + +def validate_with_file(output_names, output_shapes, + mace_out_file, validation_outputs_data, + validation_threshold, log_file): + for i in range(len(output_names)): + if validation_outputs_data[i].startswith("http://") or \ + validation_outputs_data[i].startswith("https://"): + validation_file_name = util.formatted_file_name( + mace_out_file, output_names[i] + '_validation') + six.moves.urllib.request.urlretrieve(validation_outputs_data[i], + validation_file_name) + else: + validation_file_name = validation_outputs_data[i] + value = load_data(validation_file_name) + out_shape = output_shapes[i] + if len(out_shape) == 4: + out_shape[1], out_shape[2], out_shape[3] = \ + out_shape[3], out_shape[1], out_shape[2] + value = value.reshape(out_shape).transpose((0, 2, 3, 1)) + output_file_name = util.formatted_file_name( + mace_out_file, output_names[i]) + mace_out_value = load_data(output_file_name) + compare_output(output_names[i], mace_out_value, + value, validation_threshold, log_file) + + +def validate_tf_model(model_file, + input_file, mace_out_file, + input_names, input_shapes, input_data_formats, + output_names, output_shapes, output_data_formats, + validation_threshold, input_data_types, log_file): + import tensorflow as tf + if not os.path.isfile(model_file): + util.MaceLogger.error( + VALIDATION_MODULE, + "Input graph file '" + model_file + "' does not exist!") + + tf.reset_default_graph() + input_graph_def = tf.GraphDef() + with open(model_file, "rb") as f: + data = f.read() + input_graph_def.ParseFromString(data) + tf.import_graph_def(input_graph_def, name="") + + with tf.Session() as session: + with session.graph.as_default() as graph: + tf.import_graph_def(input_graph_def, name="") + input_dict = {} + for i in range(len(input_names)): + input_value = load_data( + util.formatted_file_name(input_file, input_names[i]), + input_data_types[i]) + input_value = input_value.reshape(input_shapes[i]) + if input_data_formats[i] == DataFormat.NCHW and \ + len(input_shapes[i]) == 4: + input_value = input_value.transpose((0, 2, 3, 1)) + elif input_data_formats[i] == DataFormat.OIHW and \ + len(input_shapes[i]) == 4: + # OIHW -> HWIO + input_value = input_value.transpose((2, 3, 1, 0)) + input_node = graph.get_tensor_by_name( + normalize_tf_tensor_name(input_names[i])) + input_dict[input_node] = input_value + + output_nodes = [] + for name in output_names: + output_nodes.extend( + [graph.get_tensor_by_name( + normalize_tf_tensor_name(name))]) + output_values = session.run(output_nodes, feed_dict=input_dict) + for i in range(len(output_names)): + output_file_name = util.formatted_file_name( + mace_out_file, output_names[i]) + mace_out_value = load_data(output_file_name) + if output_data_formats[i] == DataFormat.NCHW and \ + len(output_shapes[i]) == 4: + mace_out_value = mace_out_value. \ + reshape(output_shapes[i]).transpose((0, 2, 3, 1)) + compare_output(output_names[i], + mace_out_value, output_values[i], + validation_threshold, log_file) + + +def validate_caffe_model(model_file, input_file, + mace_out_file, weight_file, + input_names, input_shapes, input_data_formats, + output_names, output_shapes, output_data_formats, + validation_threshold, log_file): + os.environ['GLOG_minloglevel'] = '1' # suprress Caffe verbose prints + import caffe + if not os.path.isfile(model_file): + util.MaceLogger.error( + VALIDATION_MODULE, + "Input graph file '" + model_file + "' does not exist!") + if not os.path.isfile(weight_file): + util.MaceLogger.error( + VALIDATION_MODULE, + "Input weight file '" + weight_file + "' does not exist!") + + caffe.set_mode_cpu() + + net = caffe.Net(model_file, caffe.TEST, weights=weight_file) + + for i in range(len(input_names)): + input_value = load_data( + util.formatted_file_name(input_file, input_names[i])) + input_value = input_value.reshape(input_shapes[i]) + if input_data_formats[i] == DataFormat.NHWC and \ + len(input_shapes[i]) == 4: + input_value = input_value.transpose((0, 3, 1, 2)) + input_blob_name = input_names[i] + try: + if input_names[i] in net.top_names: + input_blob_name = net.top_names[input_names[i]][0] + except ValueError: + pass + new_shape = input_value.shape + net.blobs[input_blob_name].reshape(*new_shape) + for index in range(input_value.shape[0]): + net.blobs[input_blob_name].data[index] = input_value[index] + + net.forward() + + for i in range(len(output_names)): + value = net.blobs[output_names[i]].data + output_file_name = util.formatted_file_name( + mace_out_file, output_names[i]) + mace_out_value = load_data(output_file_name) + if output_data_formats[i] == DataFormat.NHWC and \ + len(output_shapes[i]) == 4: + mace_out_value = mace_out_value.reshape(output_shapes[i]) \ + .transpose((0, 3, 1, 2)) + compare_output(output_names[i], mace_out_value, + value, validation_threshold, log_file) + + +def validate_onnx_model(model_file, + input_file, mace_out_file, + input_names, input_shapes, input_data_formats, + output_names, output_shapes, output_data_formats, + validation_threshold, input_data_types, + backend, log_file): + import onnx + if backend == "tensorflow": + from onnx_tf.backend import prepare + print("valivate on onnx tensorflow backend.") + elif backend == "caffe2" or backend == "pytorch": + from caffe2.python.onnx.backend import prepare + print("valivate on onnx caffe2 backend.") + else: + util.MaceLogger.error( + VALIDATION_MODULE, + "onnx backend framwork '" + backend + "' is invalid.") + if not os.path.isfile(model_file): + util.MaceLogger.error( + VALIDATION_MODULE, + "Input graph file '" + model_file + "' does not exist!") + model = onnx.load(model_file) + input_dict = {} + for i in range(len(input_names)): + input_value = load_data(util.formatted_file_name(input_file, + input_names[i]), + input_data_types[i]) + input_value = input_value.reshape(input_shapes[i]) + if input_data_formats[i] == DataFormat.NHWC and \ + len(input_shapes[i]) == 4: + input_value = input_value.transpose((0, 3, 1, 2)) + input_dict[input_names[i]] = input_value + onnx_outputs = [] + for i in range(len(output_names)): + out_shape = output_shapes[i][:] + if output_data_formats[i] == DataFormat.NHWC and \ + len(out_shape) == 4: + out_shape[1], out_shape[2], out_shape[3] = \ + out_shape[3], out_shape[1], out_shape[2] + onnx_outputs.append( + onnx.helper.make_tensor_value_info(output_names[i], + onnx.TensorProto.FLOAT, + out_shape)) + model.graph.output.extend(onnx_outputs) + rep = prepare(model) + + output_values = rep.run(input_dict) + for i in range(len(output_names)): + out_name = output_names[i] + value = output_values[out_name].flatten() + output_file_name = util.formatted_file_name(mace_out_file, + output_names[i]) + mace_out_value = load_data(output_file_name) + if output_data_formats[i] == DataFormat.NHWC and \ + len(output_shapes[i]) == 4: + mace_out_value = mace_out_value.reshape(output_shapes[i]) \ + .transpose((0, 3, 1, 2)) + compare_output(output_names[i], + mace_out_value, value, + validation_threshold, log_file) + + +def validate(platform, model_file, weight_file, input_file, mace_out_file, + input_shape, output_shape, input_data_format, + output_data_format, input_node, output_node, + validation_threshold, input_data_type, backend, + validation_outputs_data, log_file): + if not isinstance(validation_outputs_data, list): + if os.path.isfile(validation_outputs_data): + validation_outputs = [validation_outputs_data] + else: + validation_outputs = [] + else: + validation_outputs = validation_outputs_data + if validation_outputs: + validate_with_file(output_node, output_shape, + mace_out_file, validation_outputs, + validation_threshold, log_file) + elif platform == Platform.TENSORFLOW: + validate_tf_model(model_file, input_file, mace_out_file, + input_node, input_shape, input_data_format, + output_node, output_shape, output_data_format, + validation_threshold, input_data_type, + log_file) + elif platform == Platform.CAFFE: + validate_caffe_model(model_file, + input_file, mace_out_file, weight_file, + input_node, input_shape, input_data_format, + output_node, output_shape, output_data_format, + validation_threshold, log_file) + elif platform == Platform.ONNX: + validate_onnx_model(model_file, + input_file, mace_out_file, + input_node, input_shape, input_data_format, + output_node, output_shape, output_data_format, + validation_threshold, + input_data_type, backend, log_file) diff --git a/tools/python/visualize/visualize_model.py b/tools/python/visualize/visualize_model.py index 372c1b56..a2a48d5d 100644 --- a/tools/python/visualize/visualize_model.py +++ b/tools/python/visualize/visualize_model.py @@ -90,5 +90,5 @@ class ModelVisualizer(object): def save_html(self): html = self.render_html() - with open(self._output_file, "wb") as f: + with open(self._output_file, "w") as f: f.write(html) -- GitLab