diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c29745fe94193518e0d872140adea40d658dce9..4fc705a53d1b8c266d1c6104982fda5caf33426a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ option(MACE_ENABLE_OPENCL "whether to enable OpenCL support" OFF) option(MACE_ENABLE_CUDA "whether to enable CUDA support" OFF) option(MACE_ENABLE_HEXAGON_DSP "whether to enable Hexagon DSP support" OFF) option(MACE_ENABLE_HEXAGON_HTA "whether to enable Hexagon HTA support" OFF) +option(MACE_ENABLE_MTK_APU "whether to enable MTK APU support" OFF) option(MACE_ENABLE_TESTS "whether to build c++ unit tests" OFF) option(MACE_ENABLE_BENCHMARKS "whether to build c++ micro benchmarks" OFF) option(MACE_ENABLE_EXAMPLES "whether to build examples" OFF) @@ -89,6 +90,13 @@ if(MACE_ENABLE_HEXAGON_HTA) add_definitions(-DMACE_ENABLE_HEXAGON_HTA) endif(MACE_ENABLE_HEXAGON_HTA) +if(MACE_ENABLE_MTK_APU) + if(NOT ANDROID) + message(FATAL_ERROR "MTK APU is only supported on Android") + endif(NOT ANDROID) + add_definitions(-DMACE_ENABLE_MTK_APU) +endif(MACE_ENABLE_MTK_APU) + if(MACE_ENABLE_OBFUSCATE) add_definitions(-DMACE_OBFUSCATE_LITERALS) endif(MACE_ENABLE_OBFUSCATE) diff --git a/mace/core/CMakeLists.txt b/mace/core/CMakeLists.txt index e4b3a0e06d9ea1ebb35b062b113bec91dd1d833c..c9b18826ee5ee409db0da8c76837a2dcea76b7b7 100644 --- a/mace/core/CMakeLists.txt +++ b/mace/core/CMakeLists.txt @@ -40,6 +40,11 @@ if(MACE_ENABLE_HEXAGON_HTA) set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} hta_controller hta_hexagon_runtime npu) endif(MACE_ENABLE_HEXAGON_HTA) +if(MACE_ENABLE_MTK_APU) + set(CORE_SRCS ${CORE_SRCS} runtime/apu/apu_wrapper.cc) + set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} apu-frontend) +endif(MACE_ENABLE_MTK_APU) + add_library(core STATIC ${CORE_SRCS}) target_link_libraries(core PRIVATE proto diff --git a/mace/python/tools/converter_tool/apu_converter.py b/mace/python/tools/converter_tool/apu_converter.py index 5fbf2da7ce9c993a9fbcc18392a59314913c2a4f..5de7a6fc4b8530c56e3a47841ed78eec4c080787 100644 --- a/mace/python/tools/converter_tool/apu_converter.py +++ b/mace/python/tools/converter_tool/apu_converter.py @@ -397,12 +397,13 @@ class ApuConverter(base_converter.ConverterInterface): _op.input.extend([tensor.name]) def use_uint8_in_out(self): + replace_dict = {} for input_info in self._model.input_info: if input_info.data_type == mace_pb2.DT_FLOAT: for op in self._model.op: if op.input[0] == input_info.name \ and op.type == MaceOp.Quantize.name: - input_info.name = op.output[0] + replace_dict[op.output[0]] = input_info.name input_info.scale = op.quantize_info[0].scale input_info.zero_point = op.quantize_info[0].zero_point break @@ -412,6 +413,13 @@ class ApuConverter(base_converter.ConverterInterface): for op in self._model.op: if op.output[0] == output_info.name \ and op.type == MaceOp.Dequantize.name: - output_info.name = op.input[0] + replace_dict[op.input[0]] = output_info.name break self._model.op.remove(op) + for op in self._model.op: + for i, op_input in enumerate(op.input): + if op_input in replace_dict.keys(): + op.input[i] = replace_dict[op_input] + for i, op_output in enumerate(op.output): + if op_output in replace_dict.keys(): + op.output[i] = replace_dict[op_output] diff --git a/third_party/apu/apu.cmake b/third_party/apu/apu.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e476e7bec337e402160617c3a0e312115c384d83 --- /dev/null +++ b/third_party/apu/apu.cmake @@ -0,0 +1,13 @@ +set(APU_INSTALL_DIR "${PROJECT_SOURCE_DIR}/third_party/apu") +set(APU_INCLUDE_DIR "${APU_INSTALL_DIR}") + +include_directories(SYSTEM "${APU_INCLUDE_DIR}") + +set(APU-FRONTEND + "${APU_INSTALL_DIR}/libapu-frontend.so" +) + +add_library(apu-frontend SHARED IMPORTED GLOBAL) +set_target_properties(apu-frontend PROPERTIES IMPORTED_LOCATION ${APU-FRONTEND}) + +install(FILES ${APU-FRONTEND} DESTINATION lib) diff --git a/third_party/third_party.cmake b/third_party/third_party.cmake index b25a168977064ba22813d336e3dfdf9f01f30081..4317450c3a9e0e93aed438484548b1b15c6c1cb6 100644 --- a/third_party/third_party.cmake +++ b/third_party/third_party.cmake @@ -58,3 +58,7 @@ endif(MACE_ENABLE_HEXAGON_DSP) if(MACE_ENABLE_HEXAGON_HTA) include(${PROJECT_SOURCE_DIR}/third_party/hta/hta.cmake) endif(MACE_ENABLE_HEXAGON_HTA) + +if(MACE_ENABLE_MTK_APU) + include(${PROJECT_SOURCE_DIR}/third_party/apu/apu.cmake) +endif(MACE_ENABLE_MTK_APU) diff --git a/tools/bazel-build-standalone-lib.sh b/tools/bazel-build-standalone-lib.sh index 2b7116e218d0a27da91cace868bbfc6fe9e1f20f..61ebd9d6f76fc3d89e93aff512e8e5d198c45780 100755 --- a/tools/bazel-build-standalone-lib.sh +++ b/tools/bazel-build-standalone-lib.sh @@ -19,6 +19,7 @@ mkdir -p $LIB_DIR/armeabi-v7a/cpu_gpu rm -rf $LIB_DIR/arm64-v8a mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu_dsp mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu +mkdir -p $LIB_DIR/arm64-v8a/cpu_gpu_apu rm -rf $LIB_DIR/linux-x86-64 mkdir -p $LIB_DIR/linux-x86-64 @@ -42,6 +43,11 @@ bazel build --config android --config optimization mace/libmace:libmace_dynamic cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ +echo "build shared lib for arm64-v8a + cpu_gpu_apu" +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=false --define opencl=true --define apu=true --define quantize=true --cpu=arm64-v8a +cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ +cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ + echo "build shared lib for armeabi-v7a + cpu_gpu" bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define openmp=false --define opencl=true --define quantize=true --cpu=armeabi-v7a cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/ @@ -75,6 +81,11 @@ bazel build --config android --config optimization mace/libmace:libmace_static - cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ +echo "build static lib for arm64-v8a + cpu_gpu_apu" +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=false --define opencl=true --define apu=true --define quantize=true --cpu=arm64-v8a +cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_apu/ +cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ + echo "build static lib for armeabi-v7a + cpu_gpu" bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define openmp=false --define opencl=true --define quantize=true --cpu=armeabi-v7a cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/ diff --git a/tools/cmake-build-android-arm64-v8a-cpu.sh b/tools/cmake-build-android-arm64-v8a-cpu.sh index dea3fe5d1c7d9798d69277173618ce6368f8c5a3..57d2a3607c402dc8c6dbde6bb1d79124fe5cc5ae 100755 --- a/tools/cmake-build-android-arm64-v8a-cpu.sh +++ b/tools/cmake-build-android-arm64-v8a-cpu.sh @@ -15,6 +15,7 @@ cmake -DANDROID_ABI="arm64-v8a" \ -DMACE_ENABLE_OPENCL=OFF \ -DMACE_ENABLE_HEXAGON_DSP=OFF \ -DMACE_ENABLE_HEXAGON_HTA=OFF \ + -DMACE_ENABLE_MTK_APU=OFF \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DCMAKE_INSTALL_PREFIX=install \ diff --git a/tools/cmake-build-android-arm64-v8a-full.sh b/tools/cmake-build-android-arm64-v8a-full.sh index b51ec2004ed33b179a87771ccf24a388943a16db..3db663553cb13a173c39a350bcecd8ef1bef71b1 100755 --- a/tools/cmake-build-android-arm64-v8a-full.sh +++ b/tools/cmake-build-android-arm64-v8a-full.sh @@ -15,6 +15,7 @@ cmake -DANDROID_ABI="arm64-v8a" \ -DMACE_ENABLE_OPENCL=ON \ -DMACE_ENABLE_HEXAGON_DSP=ON \ -DMACE_ENABLE_HEXAGON_HTA=ON \ + -DMACE_ENABLE_MTK_APU=ON \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DCMAKE_INSTALL_PREFIX=install \ diff --git a/tools/cmake-build-android-armeabi-v7a-cpu.sh b/tools/cmake-build-android-armeabi-v7a-cpu.sh index bbea9d4c8eb0f918844ad62360d25f61446e2d31..4018b8d5ba9b00405e79864be4f1822b4e64dd36 100755 --- a/tools/cmake-build-android-armeabi-v7a-cpu.sh +++ b/tools/cmake-build-android-armeabi-v7a-cpu.sh @@ -16,6 +16,7 @@ cmake -DANDROID_ABI="armeabi-v7a" \ -DMACE_ENABLE_OPENCL=OFF \ -DMACE_ENABLE_HEXAGON_DSP=OFF \ -DMACE_ENABLE_HEXAGON_HTA=OFF \ + -DMACE_ENABLE_MTK_APU=OFF \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DCMAKE_INSTALL_PREFIX=install \ diff --git a/tools/cmake-build-android-armeabi-v7a-full.sh b/tools/cmake-build-android-armeabi-v7a-full.sh index 09e5c7f896d709b0e1e794e35f087c85be397ea8..6ac7f7f6e2db8a313fb7b1058fce8154bf31cd17 100755 --- a/tools/cmake-build-android-armeabi-v7a-full.sh +++ b/tools/cmake-build-android-armeabi-v7a-full.sh @@ -16,6 +16,7 @@ cmake -DANDROID_ABI="armeabi-v7a" \ -DMACE_ENABLE_OPENCL=ON \ -DMACE_ENABLE_HEXAGON_DSP=ON \ -DMACE_ENABLE_HEXAGON_HTA=ON \ + -DMACE_ENABLE_MTK_APU=OFF \ -DMACE_ENABLE_OPT_SIZE=ON \ -DMACE_ENABLE_OBFUSCATE=ON \ -DCMAKE_INSTALL_PREFIX=install \ diff --git a/tools/converter.py b/tools/converter.py index 98fb6bc36b6b88973d16882ffb95ec4b82447f5d..aca556aedcc01c7c6bd9f78acb9c883030071918 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -189,6 +189,19 @@ def get_hta_mode(configs): return False +def get_apu_mode(configs): + runtime_list = [] + for model_name in configs[YAMLKeyword.models]: + model_runtime = \ + configs[YAMLKeyword.models][model_name].get( + YAMLKeyword.runtime, "") + runtime_list.append(model_runtime.lower()) + + if RuntimeType.apu in runtime_list: + return True + return False + + def get_opencl_mode(configs): runtime_list = [] for model_name in configs[YAMLKeyword.models]: @@ -575,6 +588,7 @@ def format_model_config(flags): DeviceType.GPU: ValidationThreshold.gpu_threshold, DeviceType.HEXAGON: ValidationThreshold.quantize_threshold, DeviceType.HTA: ValidationThreshold.quantize_threshold, + DeviceType.APU: ValidationThreshold.quantize_threshold, DeviceType.QUANTIZE: ValidationThreshold.quantize_threshold, } for k, v in six.iteritems(validation_threshold): @@ -816,6 +830,7 @@ def build_model_lib(configs, address_sanitizer, debug_mode): toolchain=toolchain, enable_hexagon=get_hexagon_mode(configs), enable_hta=get_hta_mode(configs), + enable_apu=get_apu_mode(configs), enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), address_sanitizer=address_sanitizer, @@ -884,6 +899,7 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp, toolchain=toolchain, enable_hexagon=get_hexagon_mode(configs), enable_hta=get_hta_mode(configs), + enable_apu=get_apu_mode(configs), enable_openmp=enable_openmp, enable_opencl=get_opencl_mode(configs), enable_quantize=get_quantize_mode(configs), @@ -931,6 +947,7 @@ def build_example(configs, target_abi, toolchain, enable_openmp, mace_lib_type, enable_quantize=get_quantize_mode(configs), enable_hexagon=get_hexagon_mode(configs), enable_hta=get_hta_mode(configs), + enable_apu=get_apu_mode(configs), address_sanitizer=flags.address_sanitizer, symbol_hidden=get_symbol_hidden_mode(debug_mode, mace_lib_type), # noqa debug_mode=debug_mode) @@ -964,6 +981,7 @@ def build_example(configs, target_abi, toolchain, enable_openmp, mace_lib_type, enable_quantize=get_quantize_mode(configs), enable_hexagon=get_hexagon_mode(configs), enable_hta=get_hta_mode(configs), + enable_apu=get_apu_mode(configs), address_sanitizer=flags.address_sanitizer, debug_mode=debug_mode, extra_args=build_arg) @@ -1072,6 +1090,7 @@ def build_benchmark_model(configs, enable_quantize=get_quantize_mode(configs), enable_hexagon=get_hexagon_mode(configs), enable_hta=get_hta_mode(configs), + enable_apu=get_apu_mode(configs), symbol_hidden=get_symbol_hidden_mode(debug_mode, mace_lib_type), # noqa debug_mode=debug_mode, extra_args=build_arg) diff --git a/tools/device.py b/tools/device.py index 22648ad9a0806ca914717778865ba9453ef91f8f..ca21c285f0019a842c39f7f47a264d0e6e9445f5 100644 --- a/tools/device.py +++ b/tools/device.py @@ -280,6 +280,10 @@ class DeviceWrapper: "third_party/nnlib/%s/libhexagon_controller.so" % abi, self.data_dir) + if device_type == common.DeviceType.APU: + self.push("third_party/apu/libapu-frontend.so", + self.data_dir) + mace_model_phone_path = "" if model_graph_format == ModelFormat.file: mace_model_phone_path = "%s/%s.pb" % (self.data_dir, diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 172b8d44fb9c17914e719bec9fc811ac9820b3aa..e9d051b3c718e1621e00c7160567944c29b940ae 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -264,6 +264,7 @@ def bazel_build(target, toolchain='android', enable_hexagon=False, enable_hta=False, + enable_apu=False, enable_openmp=False, enable_neon=True, enable_opencl=True, @@ -303,8 +304,9 @@ def bazel_build(target, "--define", "hexagon=%s" % str(enable_hexagon).lower(), "--define", - "hta=%s" % str(enable_hta).lower()) - + "hta=%s" % str(enable_hta).lower(), + "--define", + "apu=%s" % str(enable_apu).lower()) if address_sanitizer: bazel_args += ("--config", "asan") if debug_mode: @@ -322,6 +324,7 @@ def bazel_build(target, sh.bazel( _fg=True, *bazel_args) + six.print_(bazel_args) six.print_("Build done!\n") @@ -881,6 +884,7 @@ def build_run_throughput_test(abi, cpu_model_tag, gpu_model_tag, dsp_model_tag, + apu_model_tag, phone_data_dir, strip="always", input_file_name="model_input"): @@ -896,7 +900,9 @@ def build_run_throughput_test(abi, if dsp_model_tag: model_tag_build_flag += "--copt=-DMACE_DSP_MODEL_TAG=%s " % \ dsp_model_tag - + if apu_model_tag: + model_tag_build_flag += "--copt=-DMACE_APU_MODEL_TAG=%s " % \ + apu_model_tag sh.cp("-f", merged_lib_file, "mace/benchmark/libmace_merged.a") sh.bazel( "build", @@ -948,7 +954,10 @@ def build_run_throughput_test(abi, adb_push("third_party/nnlib/%s/libhexagon_controller.so" % abi, phone_data_dir, serialno) - + if apu_model_tag: + adb_push("third_party/apu/libapu-frontend.so", + phone_data_dir, + serialno) sh.adb( "-s", serialno,