diff --git a/WORKSPACE b/WORKSPACE index e62557bddf0cfa8a45ecd6fff10469524f4152ab..4bb0f0e25df8c0226a8699a18c36497dcd762bb2 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -123,3 +123,9 @@ android_ndk_repository( # Android 5.0 api_level = 21, ) + +new_local_repository( + name = "libmace", + path = "./dynamic_lib/", + build_file = "./dynamic_lib/libmace.BUILD" +) diff --git a/docs/getting_started/create_a_model_deployment.rst b/docs/getting_started/create_a_model_deployment.rst index 46be54ad0bb3ac53f044f07b31e83520130ec360..92e969e30ac6c7a3c496800f833c60fbcf66a23b 100644 --- a/docs/getting_started/create_a_model_deployment.rst +++ b/docs/getting_started/create_a_model_deployment.rst @@ -31,49 +31,51 @@ Configurations :align: left * - library_name - - library name + - library name. * - target_abis - - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a' + - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a'. * - target_socs - [optional] build for specified socs if you just want use the model for that socs. * - embed_model_data - - Whether embedding model weights as the code, default to 0 + - Whether embedding model weights as the code, default to 0. * - build_type - model build type, can be ['proto', 'code']. 'proto' for converting model to ProtoBuf file and 'code' for converting model to c++ code. + * - dynamic_link + - [optional] link mace library in dynamic method if set to 1, default to 0. * - model_name - model name. should be unique if there are multiple models. **LIMIT: if build_type is code, model_name will used in c++ code so that model_name must fulfill c++ name specification.** * - platform - - The source framework, one of [tensorflow, caffe] + - The source framework, one of [tensorflow, caffe]. * - model_file_path - - The path of the model file, can be local or remote + - The path of the model file, can be local or remote. * - model_sha256_checksum - - The SHA256 checksum of the model file + - The SHA256 checksum of the model file. * - weight_file_path - - [optional] The path of the model weights file, used by Caffe model + - [optional] The path of the model weights file, used by Caffe model. * - weight_sha256_checksum - - [optional] The SHA256 checksum of the weight file, used by Caffe model + - [optional] The SHA256 checksum of the weight file, used by Caffe model. * - subgraphs - subgraphs key. ** DO NOT EDIT ** * - input_tensors - - The input tensor names (tensorflow), top name of inputs' layer (caffe). one or more strings + - The input tensor names (tensorflow), top name of inputs' layer (caffe). one or more strings. * - output_tensors - - The output tensor names (tensorflow), top name of outputs' layer (caffe). one or more strings + - The output tensor names (tensorflow), top name of outputs' layer (caffe). one or more strings. * - input_shapes - - The shapes of the input tensors, in NHWC order + - The shapes of the input tensors, in NHWC order. * - output_shapes - - The shapes of the output tensors, in NHWC order + - The shapes of the output tensors, in NHWC order. * - validation_inputs_data - - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used + - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used. * - runtime - The running device, one of [cpu, gpu, dsp, cpu_gpu]. cpu_gpu contains cpu and gpu model definition so you can run the model on both cpu and gpu. * - data_type - [optional] The data type used for specified runtime. [fp16_fp32, fp32_fp32] for gpu, default is fp16_fp32. [fp32] for cpu. [uint8] for dsp. * - limit_opencl_kernel_time - - [optional] Whether splitting the OpenCL kernel within 1 ms to keep UI responsiveness, default to 0 + - [optional] Whether splitting the OpenCL kernel within 1 ms to keep UI responsiveness, default to 0. * - nnlib_graph_mode - - [optional] Control the DSP precision and performance, default to 0 usually works for most cases + - [optional] Control the DSP precision and performance, default to 0 usually works for most cases. * - obfuscate - - [optional] Whether to obfuscate the model operator name, default to 0 + - [optional] Whether to obfuscate the model operator name, default to 0. * - winograd - - [optional] Whether to enable Winograd convolution, **will increase memory consumption** + - [optional] Whether to enable Winograd convolution, **will increase memory consumption**. diff --git a/docs/getting_started/how_to_build.rst b/docs/getting_started/how_to_build.rst index f541660ef80513bc8fca355fe01b01c8d6fee351..9f332c0c61e5df0744acc48caca793082df64d68 100644 --- a/docs/getting_started/how_to_build.rst +++ b/docs/getting_started/how_to_build.rst @@ -165,13 +165,15 @@ built-in tool when necessary. $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel ============================ -3. Build static library +3. Build static/shared library ============================ ----------------- 3.1 Overview ----------------- -MiAI Compute Engine only build static library. The followings are two use cases. +MiAI Compute Engine can build either static or shared library (which is +specified by ``dynamic_link`` in YAML model deployment file). +The followings are two use cases. * **Build well tuned library for specific SoCs** @@ -185,7 +187,7 @@ MiAI Compute Engine only build static library. The followings are two use cases. * **Build generic library for all SoCs** - When ``target_soc`` is not specified, the generated library is compatible + When ``target_socs`` is not specified, the generated library is compatible with general devices. .. note:: @@ -198,7 +200,9 @@ model conversion, compiling, test run, benchmark and correctness validation. .. note:: - ``tools/converter.py`` should be run at the root directory of this project. + 1. ``tools/converter.py`` should be run at the root directory of this project. + 2. When ``dynamic_link`` is set to ``1``, ``build_type`` should be ``proto``. + And currently only android devices supported. ------------------------------------------ @@ -211,7 +215,7 @@ model conversion, compiling, test run, benchmark and correctness validation. .. note:: - build static library and test tools. + build library and test tools. * *--config* (type=str, default="", required): the path of model yaml configuration file. * *--tuning* (default=false, optional): whether tuning the parameters for the GPU of specified SoC. @@ -302,7 +306,7 @@ Using ``-h`` to get detailed help. .. code:: sh - # Build the static library + # Build library python tools/converter.py build --config=models/config.yaml # Test model run time @@ -325,9 +329,11 @@ Using ``-h`` to get detailed help. 4. Deployment ============= -``build`` command will generate the static library, model files and header files -and packaged as ``build/${library_name}/libmace_${library_name}.tar.gz``. -They are organized as follows, +``build`` command will generate the static/shared library, model files and +header files and packaged as +``build/${library_name}/libmace_${library_name}.tar.gz``. + +- The generated ``static`` libraries are organized as follows, .. code:: @@ -347,19 +353,52 @@ They are organized as follows, ├── model │   ├── mobilenet_v2.data │   └── mobilenet_v2.pb - └─── opencl -    └── compiled_opencl_kernel.bin + └── opencl + ├── arm64-v8a + │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin + └── armeabi-v7a + └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin + +- The generated ``shared`` libraries are organized as follows, + +.. code:: + + build + └── mobilenet-v2-gpu + ├── include + │   └── mace + │   └── public + │   ├── mace.h + │   └── mace_runtime.h + ├── lib + │   ├── arm64-v8a + │   │   ├── libgnustl_shared.so + │   │   └── libmace.so + │   └── armeabi-v7a + │   ├── libgnustl_shared.so + │   └── libmace.so + ├── model + │   ├── mobilenet_v2.data + │   └── mobilenet_v2.pb + └── opencl + ├── arm64-v8a + │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin + └── armeabi-v7a + └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin .. note:: 1. DSP runtime depends on ``libhexagon_controller.so``. 2. ``${MODEL_TAG}.pb`` file will be generated only when ``build_type`` is ``proto``. - 3. ``compiled_kernel.bin`` will be generated only when ``target_soc`` and ``gpu`` runtime are specified. + 3. ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` will + be generated only when ``target_socs`` and ``gpu`` runtime are specified. + 4. Generated shared library depends on ``libgnustl_shared.so``. .. warning:: - ``compiled_kernel.bin`` depends on the OpenCL version of the device, you should maintan the - compatibility or configure compiling cache store with ``ConfigKVStorageFactory``. + ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` depends + on the OpenCL version of the device, you should maintan the compatibility or + configure compiling cache store with ``ConfigKVStorageFactory``. ========================================= 5. How to use the library in your project diff --git a/docs/getting_started/how_to_build_zh.rst b/docs/getting_started/how_to_build_zh.rst index 7bbf05d93f8fa076c74aceb611c851a1e26a0e71..12df73e3dccb2ca6aa03a31eeaaa78958dd0efc1 100644 --- a/docs/getting_started/how_to_build_zh.rst +++ b/docs/getting_started/how_to_build_zh.rst @@ -163,18 +163,18 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级 $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel ================== -3. 生成模型静态库 +3. 生成模型库 ================== --------------------------------------- 3.1 简介 --------------------------------------- -Mace目前只提供静态库,有以下两种使用场景。 +Mace目前提供静态库和动态库(可以在\ ``yaml``\ 文件中通过\ ``dynamic_link``\ 指定),有以下两种使用场景。 **特定SOC库** - 该使用场景要求在``yaml``文件中必须制定``target_socs``。主要用于为编译适用于指定手机SOC的静态库。 + 该使用场景要求在\ ``yaml``\ 文件中必须指定\ ``target_socs``\ 。主要用于为编译适用于指定手机SOC的库。 如果希望使用GPU,那么编译过程会自动测试选择最佳的GPU相关参数以获得更好的性能。 .. warning:: @@ -183,7 +183,7 @@ Mace目前只提供静态库,有以下两种使用场景。 **通用库** - 如果在``yaml``文件中没有指定``target_soc``,生成的静态库适用于所有手机。 + 如果在\ ``yaml``\ 文件中没有指定\ ``target_socs``\ ,生成的库适用于所有手机。 .. warning:: @@ -194,7 +194,8 @@ Mace目前只提供静态库,有以下两种使用场景。 .. warning:: - 必须在项目的根目录下运行\ ``tools/converter.py``\ 脚本。 + 1. 必须在项目的根目录下运行\ ``tools/converter.py``\ 脚本。 + 2. 当\ ``dynamic_link``\ 被设置为1时,\ ``build_type``\ 必需设置为\ ``proto``\ 。当前动态链接的方式只支持安卓设备。 --------------------------------------- @@ -207,7 +208,7 @@ Mace目前只提供静态库,有以下两种使用场景。 .. note:: - build模型静态库以及测试工具。 + build模型库以及测试工具。 * *--config* (type=str, default="", required):模型配置yaml文件路径. * *--tuning* (default=false, optional):是否为特定SOC调制GPU参数. @@ -293,7 +294,7 @@ Mace目前只提供静态库,有以下两种使用场景。 python tools/converter.py run -h python tools/converter.py benchmark -h - # 仅编译模型和生成静态库 + # 仅编译模型和生成库 python tools/converter.py build --config=models/config.yaml # 测试模型的运行时间 @@ -316,7 +317,7 @@ Mace目前只提供静态库,有以下两种使用场景。 4. 发布 ========== -``build``命令会生成一个tar包,里面包含了发布所需要的所有文件,其位于``./build/${library_name}/libmace_${library_name}.tar.gz``. +\ ``build``\ 命令会生成一个tar包,里面包含了发布所需要的所有文件,其位于\ ``./build/${library_name}/libmace_${library_name}.tar.gz``\ . 下面解释了该包中包含了哪些文件。 **头文件** @@ -326,6 +327,12 @@ Mace目前只提供静态库,有以下两种使用场景。 * ``./build/${library_name}/library/${target_abi}/*.a`` **动态库** + * ``./build/${library_name}/library/${target_abi}/*.so`` + + .. note:: + + 当\ ``dynamic_link``\ 设置为1时生成动态链接库。 + * ``./build/${library_name}/library/${target_abi}/libhexagon_controller.so`` .. note:: @@ -345,7 +352,7 @@ Mace目前只提供静态库,有以下两种使用场景。 .. note:: - 只有指定了``target_soc``并且``runtime==gpu``的情况下才会生成。 + 只有指定了\ ``target_socs``\ 并且\ ``runtime==gpu``\ 的情况下才会生成。 .. warning:: diff --git a/docs/getting_started/models/demo_app_models.yaml b/docs/getting_started/models/demo_app_models.yaml index 216deea517a3d6b3ef8e7673e90fb1f439655206..fb39af92f681dc57858aa39ff84329706a737ab9 100644 --- a/docs/getting_started/models/demo_app_models.yaml +++ b/docs/getting_started/models/demo_app_models.yaml @@ -1,13 +1,17 @@ -# 库的名字 +# The name of library library_name: library_name -# 配置文件名会被用作生成库的名称:libmace-${library_name}.a target_abis: [armeabi-v7a, arm64-v8a] -# 具体机型的soc编号,可以使用`adb shell getprop | grep ro.board.platform | cut -d [ -f3 | cut -d ] -f1`获取 +# The soc serialno of specific device. +# Get by command `adb shell getprop | grep ro.board.platform | cut -d [ -f3 | cut -d ] -f1`. target_socs: [msm8998] embed_model_data: 1 -build_type: code # 模型build类型。code表示将模型转为代码,proto表示将模型转为protobuf文件 -models: # 一个配置文件可以包含多个模型的配置信息,最终生成的库中包含多个模型 - model_name: # 模型的标签,在调度模型的时候,会用这个变量,必须唯一 +# The build mode for model(s). +# 'code' stand for transfer model(s) into cpp code, 'proto' for model(s) in protobuf file(s). +build_type: code +dynamic_link: 1 +# One yaml config file can contain multi models' config message. +models: + model_name: # model tag, which will be used in model loading and must be specific. platform: tensorflow model_file_path: path/to/model64.pb # also support http:// and https:// model_sha256_checksum: 7f7462333406e7dea87222737590ebb7d94490194d2f21a7d72bafa87e64e9f9 diff --git a/dynamic_lib/arm64-v8a/libgnustl_shared.so b/dynamic_lib/arm64-v8a/libgnustl_shared.so new file mode 100755 index 0000000000000000000000000000000000000000..4d772d72f3a8acd63fe4521c8c6e1a21250c7920 Binary files /dev/null and b/dynamic_lib/arm64-v8a/libgnustl_shared.so differ diff --git a/dynamic_lib/armeabi-v7a/libgnustl_shared.so b/dynamic_lib/armeabi-v7a/libgnustl_shared.so new file mode 100755 index 0000000000000000000000000000000000000000..a6b58f1df6f7a68e982d31d9939775100c1b4e61 Binary files /dev/null and b/dynamic_lib/armeabi-v7a/libgnustl_shared.so differ diff --git a/dynamic_lib/libmace.BUILD b/dynamic_lib/libmace.BUILD new file mode 100644 index 0000000000000000000000000000000000000000..7cc4d96dcae202112b90ee1e9543616374c62de3 --- /dev/null +++ b/dynamic_lib/libmace.BUILD @@ -0,0 +1,5 @@ +cc_library( + name = "libmace", + srcs = ["libmace.so"], + visibility = ["//visibility:public"], +) diff --git a/mace/BUILD b/mace/BUILD index bebe0e9db98a475a98888e7f2689f326e21d9f3f..59f97c0993fcf20f3503060a9edd736715db7833 100644 --- a/mace/BUILD +++ b/mace/BUILD @@ -59,3 +59,18 @@ config_setting( }, visibility = ["//visibility:public"], ) + +cc_binary( + name = "libmace.so", + linkshared = 1, + linkstatic = 0, + linkopts = [ + "-Wl,-soname,libmace.so", + "-Wl,--version-script", + "mace_version_script.lds", + ], + deps = [ + "//mace/ops", + ":mace_version_script.lds", + ], +) diff --git a/mace/benchmark/BUILD b/mace/benchmark/BUILD index c15e74081f0adf50b533b3f59966f7f0f11c5a83..a4e04d19f3891da32ad934c214117f961df140a6 100644 --- a/mace/benchmark/BUILD +++ b/mace/benchmark/BUILD @@ -39,6 +39,26 @@ cc_binary( ], ) +cc_binary( + name = "benchmark_model_deps_so", + srcs = [ + "benchmark_model.cc", + ], + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ] + if_android(["-DMACE_ENABLE_OPENCL"]), + linkopts = ["-lm", "-pie", "-fPIE"] + if_openmp_enabled(["-fopenmp"]), + linkstatic = 0, + deps = [ + ":statistics", + "//external:gflags_nothreads", + "//mace/codegen:generated_mace_engine_factory", + "@libmace//:libmace", + ], +) + cc_library( name = "libmace_merged", srcs = [ diff --git a/mace/benchmark/statistics.cc b/mace/benchmark/statistics.cc index 83e3e5a013a2667c33bbb539a30865d324fe7285..3de3f4f89bd9286ed0ef35eda30e8794ba5d6014 100644 --- a/mace/benchmark/statistics.cc +++ b/mace/benchmark/statistics.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/benchmark/statistics.h" - #include +#include "mace/benchmark/statistics.h" #include "mace/utils/logging.h" #include "mace/utils/string_util.h" diff --git a/mace/core/BUILD b/mace/core/BUILD index 93fbbd72dab969ff69c37e27f462f31be4e77d8f..c819d80033a66e18ed20b0522bbc62ff94658ef4 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -71,6 +71,7 @@ cc_library( ]) + if_hexagon_enabled([ "//third_party/nnlib:libhexagon", ]), + alwayslink = 1, ) cc_library( diff --git a/mace/mace_version_script.lds b/mace/mace_version_script.lds new file mode 100644 index 0000000000000000000000000000000000000000..270952dc798dc3fe89357f4a98c22597c60f86a4 --- /dev/null +++ b/mace/mace_version_script.lds @@ -0,0 +1,14 @@ +mace { + global: + *MaceTensor*; + *MaceEngine*; + *MaceVersion*; + *SetOpenMPThreadPolicy*; + *SetGPUHints*; + *SetOpenCLBinaryPaths*; + *FileStorageFactory*; + *SetKVStorageFactory*; + *CreateMaceEngineFromProto*; + local: + *; +}; diff --git a/mace/tools/validation/BUILD b/mace/tools/validation/BUILD index f53b9ab70141b4035598c16b186da278a7bb6afc..d4ccd447f4eaa3b7d0a62c7b397cc82456dcd8f7 100644 --- a/mace/tools/validation/BUILD +++ b/mace/tools/validation/BUILD @@ -15,3 +15,19 @@ cc_binary( "//mace/codegen:generated_models", ], ) + +cc_binary( + name = "mace_run_deps_so", + srcs = ["mace_run.cc"], + copts = if_android([ + "-DMACE_ENABLE_OPENCL", + ]), + linkopts = ["-lm", "-pie", "-fPIE"] + if_openmp_enabled(["-fopenmp"]), + linkstatic = 0, + deps = [ + "//external:gflags_nothreads", + "//mace/codegen:generated_mace_engine_factory", + "//mace/utils:utils", + "@libmace//:libmace", + ], +) diff --git a/tools/bazel.rc b/tools/bazel.rc index 9348ee1e33be256e4868f6d96398d4bdb7c33ee1..0629fd70fd4d788b8a0c9b44048960e2cf6e9d16 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -18,7 +18,7 @@ build:android --config=cross_compile # Usage example: bazel build --config optimization build:optimization -c opt build:optimization --copt=-O3 -build:optimization --strip=always +build:optimization --linkopt=-Wl,--strip-all # Address sanitizer build:asan --strip=never diff --git a/tools/converter.py b/tools/converter.py index 0d2b19c3509eedff23308d15f39c27cdcd1eb313..564425c9828de775c56a64d2ac73ca32d5f2396a 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -44,13 +44,15 @@ MODEL_OUTPUT_DIR_NAME = 'model' MODEL_HEADER_DIR_PATH = 'include/mace/public' BUILD_TMP_DIR_NAME = '_tmp' BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general' -OUTPUT_LIBRARY_DIR_NAME = 'library' +OUTPUT_LIBRARY_DIR_NAME = 'lib' OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl' OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel' CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin" CODEGEN_BASE_DIR = 'mace/codegen' MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' +LIBMACE_SO_TARGET = "//mace:libmace.so" MACE_RUN_TARGET = "//mace/tools/validation:mace_run" +MACE_RUN_TARGET_DEPS_SO = "//mace/tools/validation:mace_run_deps_so" ALL_SOC_TAG = 'all' ABITypeStrs = [ @@ -124,6 +126,7 @@ class YAMLKeyword(object): target_socs = 'target_socs' build_type = 'build_type' embed_model_data = 'embed_model_data' + dynamic_link = 'dynamic_link' models = 'models' platform = 'platform' model_file_path = 'model_file_path' @@ -279,6 +282,21 @@ def format_model_config(config_file_path): if build_type == BuildType.proto: configs[YAMLKeyword.embed_model_data] = 0 + dynamic_link = configs.get(YAMLKeyword.dynamic_link, "") + if dynamic_link == "": + configs[YAMLKeyword.dynamic_link] = 0 + dynamic_link = 0 + if not isinstance(dynamic_link, int) or dynamic_link < 0 or \ + dynamic_link > 1: + MaceLogger.error(ModuleName.YAML_CONFIG, + "dynamic_link must be 0 or 1. " + "default is 0, for link mace lib statically, " + "1 for dynamic link.") + if build_type == BuildType.code and dynamic_link == 1: + MaceLogger.error(ModuleName.YAML_CONFIG, + "'dynamic_link == 1' only support when " + "'build_type == proto'") + model_names = configs.get(YAMLKeyword.models, []) mace_check(len(model_names) > 0, ModuleName.YAML_CONFIG, "no model found in config file") @@ -451,6 +469,13 @@ def get_opencl_binary_output_path(library_name, target_abi, target_soc) +def get_shared_library_dir(library_name, abi): + return '%s/%s/%s/%s' % (BUILD_OUTPUT_DIR, + library_name, + OUTPUT_LIBRARY_DIR_NAME, + abi) + + ################################ # build ################################ @@ -475,6 +500,8 @@ def print_configuration(flags, configs): configs[YAMLKeyword.build_type]]) data.append([YAMLKeyword.embed_model_data, configs[YAMLKeyword.embed_model_data]]) + data.append([YAMLKeyword.dynamic_link, + configs[YAMLKeyword.dynamic_link]]) data.append(["Tuning", flags.tuning]) MaceLogger.summary(StringFormatter.table(header, data, title)) @@ -624,6 +651,7 @@ def build_specific_lib(target_abi, target_soc, serial_num, library_name = configs[YAMLKeyword.library_name] build_type = configs[YAMLKeyword.build_type] embed_model_data = configs[YAMLKeyword.embed_model_data] + dynamic_link = configs[YAMLKeyword.dynamic_link] hexagon_mode = get_hexagon_mode(configs) model_output_dirs = [] @@ -634,14 +662,31 @@ def build_specific_lib(target_abi, target_soc, serial_num, os.makedirs(build_tmp_binary_dir) sh_commands.gen_tuning_param_code(model_output_dirs) + if dynamic_link == 0: + mace_run_target = MACE_RUN_TARGET + else: + mace_run_target = MACE_RUN_TARGET_DEPS_SO + sh_commands.bazel_build( + LIBMACE_SO_TARGET, + abi=target_abi, + hexagon_mode=hexagon_mode, + enable_openmp=enable_openmp, + address_sanitizer=address_sanitizer + ) + sh_commands.update_libmace_shared_library(serial_num, + target_abi, + library_name, + BUILD_OUTPUT_DIR, + OUTPUT_LIBRARY_DIR_NAME) + sh_commands.bazel_build( - MACE_RUN_TARGET, + mace_run_target, abi=target_abi, hexagon_mode=hexagon_mode, enable_openmp=enable_openmp, address_sanitizer=address_sanitizer ) - sh_commands.update_mace_run_lib(build_tmp_binary_dir) + sh_commands.update_mace_run_lib(build_tmp_binary_dir, dynamic_link) binary_changed = False for model_name in configs[YAMLKeyword.models]: @@ -696,6 +741,8 @@ def build_specific_lib(target_abi, target_soc, serial_num, phone_data_dir=PHONE_DATA_DIR, build_type=build_type, opencl_binary_file="", + shared_library_dir=get_shared_library_dir(library_name, target_abi), # noqa + dynamic_link=dynamic_link, ) pull_opencl_binary_and_tuning_param(target_abi, serial_num, @@ -712,13 +759,13 @@ def build_specific_lib(target_abi, target_soc, serial_num, opencl_output_bin_path) sh_commands.gen_tuning_param_code(model_output_dirs) sh_commands.bazel_build( - MACE_RUN_TARGET, + mace_run_target, abi=target_abi, hexagon_mode=hexagon_mode, enable_openmp=enable_openmp, address_sanitizer=address_sanitizer ) - sh_commands.update_mace_run_lib(build_tmp_binary_dir) + sh_commands.update_mace_run_lib(build_tmp_binary_dir, dynamic_link) if target_abi == ABIType.host: sh_commands.build_host_libraries(build_type, target_abi) @@ -726,17 +773,19 @@ def build_specific_lib(target_abi, target_soc, serial_num, # build benchmark_model binary sh_commands.build_benchmark_model(target_abi, build_tmp_binary_dir, - hexagon_mode) + hexagon_mode, + dynamic_link) # generate library - sh_commands.merge_libs(target_soc, - serial_num, - target_abi, - library_name, - BUILD_OUTPUT_DIR, - OUTPUT_LIBRARY_DIR_NAME, - build_type, - hexagon_mode) + if dynamic_link == 0: + sh_commands.merge_libs(target_soc, + serial_num, + target_abi, + library_name, + BUILD_OUTPUT_DIR, + OUTPUT_LIBRARY_DIR_NAME, + build_type, + hexagon_mode) def generate_library(configs, tuning, enable_openmp, address_sanitizer): @@ -864,6 +913,7 @@ def run_specific_target(flags, configs, target_abi, build_type = configs[YAMLKeyword.build_type] embed_model_data = configs[YAMLKeyword.embed_model_data] opencl_output_bin_path = "" + dynamic_link = configs[YAMLKeyword.dynamic_link] if not configs[YAMLKeyword.target_socs]: build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, None, None) @@ -951,6 +1001,8 @@ def run_specific_target(flags, configs, target_abi, runtime_failure_ratio=flags.runtime_failure_ratio, address_sanitizer=flags.address_sanitizer, opencl_binary_file=opencl_output_bin_path, + shared_library_dir=get_shared_library_dir(library_name, target_abi), # noqa + dynamic_link=dynamic_link, ) if flags.validate: model_file_path, weight_file_path = get_model_files_path( @@ -1010,6 +1062,7 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): build_type = configs[YAMLKeyword.build_type] embed_model_data = configs[YAMLKeyword.embed_model_data] opencl_output_bin_path = "" + dynamic_link = configs[YAMLKeyword.dynamic_link] if not configs[YAMLKeyword.target_socs]: build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, None, None) @@ -1087,7 +1140,9 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): cpu_affinity_policy=flags.cpu_affinity_policy, gpu_perf_hint=flags.gpu_perf_hint, gpu_priority_hint=flags.gpu_priority_hint, - opencl_binary_file=opencl_output_bin_path) + opencl_binary_file=opencl_output_bin_path, + shared_library_dir=get_shared_library_dir(library_name, target_abi), # noqa + dynamic_link=dynamic_link) def benchmark_model(flags): diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 289f76dcd13f2e4e6957c52e1afcf591a649c1cf..3dca39714f6fa9d6a0dadcd5064dc7a7a0f3c308 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -561,12 +561,16 @@ def gen_random_input(model_output_dir, sh.cp("-f", input_file_list[i], dst_input_file) -def update_mace_run_lib(build_tmp_binary_dir): +def update_mace_run_lib(build_tmp_binary_dir, dynamic_link=0): mace_run_filepath = build_tmp_binary_dir + "/mace_run" if os.path.exists(mace_run_filepath): sh.rm("-rf", mace_run_filepath) - sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run", - build_tmp_binary_dir) + if dynamic_link == 0: + sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run", + build_tmp_binary_dir) + else: + sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run_deps_so", + "%s/mace_run" % build_tmp_binary_dir) def touch_tuned_file_flag(build_tmp_binary_dir): @@ -583,6 +587,30 @@ def create_internal_storage_dir(serialno, phone_data_dir): return internal_storage_dir +def update_libmace_shared_library(serial_num, + abi, + project_name, + build_output_dir, + library_output_dir): + libmace_name = "libmace.so" + mace_library_dir = "./dynamic_lib/" + library_dir = "%s/%s/%s/%s" % ( + build_output_dir, project_name, library_output_dir, abi) + libmace_file = "%s/%s" % (library_dir, libmace_name) + + if os.path.exists(libmace_file): + sh.rm("-rf", library_dir) + sh.mkdir("-p", library_dir) + sh.cp("-f", "bazel-bin/mace/libmace.so", library_dir) + sh.cp("-f", "%s/%s/libgnustl_shared.so" % (mace_library_dir, abi), + library_dir) + + libmace_load_path = "%s/%s" % (mace_library_dir, libmace_name) + if os.path.exists(libmace_load_path): + sh.rm("-f", libmace_load_path) + sh.cp("-f", "bazel-bin/mace/libmace.so", mace_library_dir) + + def tuning_run(abi, serialno, mace_run_dir, @@ -604,6 +632,7 @@ def tuning_run(abi, phone_data_dir, build_type, opencl_binary_file, + shared_library_dir, omp_num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, @@ -611,7 +640,8 @@ def tuning_run(abi, input_file_name="model_input", output_file_name="model_out", runtime_failure_ratio=0.0, - address_sanitizer=False): + address_sanitizer=False, + dynamic_link=0): print("* Run '%s' with round=%s, restart_round=%s, tuning=%s, " "out_of_range_check=%s, omp_num_threads=%s, cpu_affinity_policy=%s, " "gpu_perf_hint=%s, gpu_priority_hint=%s" % @@ -683,6 +713,13 @@ def tuning_run(abi, mace_model_phone_path, serialno) + if dynamic_link == 1: + adb_push("%s/libmace.so" % shared_library_dir, phone_data_dir, + serialno) + adb_push("%s/libgnustl_shared.so" % shared_library_dir, + phone_data_dir, + serialno) + adb_push("%s/mace_run" % mace_run_dir, phone_data_dir, serialno) @@ -907,7 +944,7 @@ def merge_libs(target_soc, "bazel-bin/mace/codegen/libgenerated_version.pic.a\n") mri_stream += ( "addlib " - "bazel-bin/mace/core/libcore.pic.a\n") + "bazel-bin/mace/core/libcore.pic.lo\n") mri_stream += ( "addlib " "bazel-bin/mace/kernels/libkernels.pic.a\n") @@ -951,7 +988,7 @@ def merge_libs(target_soc, "bazel-bin/mace/codegen/libgenerated_version.a\n") mri_stream += ( "addlib " - "bazel-bin/mace/core/libcore.a\n") + "bazel-bin/mace/core/libcore.lo\n") mri_stream += ( "addlib " "bazel-bin/mace/kernels/libkernels.a\n") @@ -1002,18 +1039,25 @@ def packaging_lib(libmace_output_dir, project_name): def build_benchmark_model(abi, model_output_dir, - hexagon_mode): + hexagon_mode, + dynamic_link=False): benchmark_binary_file = "%s/benchmark_model" % model_output_dir if os.path.exists(benchmark_binary_file): sh.rm("-rf", benchmark_binary_file) - benchmark_target = "//mace/benchmark:benchmark_model" + if dynamic_link == 0: + benchmark_target = "//mace/benchmark:benchmark_model" + else: + benchmark_target = "//mace/benchmark:benchmark_model_deps_so" bazel_build(benchmark_target, abi=abi, hexagon_mode=hexagon_mode) target_bin = "/".join(bazel_target_to_bin(benchmark_target)) - sh.cp("-f", target_bin, model_output_dir) + if dynamic_link == 0: + sh.cp("-f", target_bin, model_output_dir) + else: + sh.cp("-f", target_bin, "%s/benchmark_model" % model_output_dir) def benchmark_model(abi, @@ -1032,11 +1076,13 @@ def benchmark_model(abi, phone_data_dir, build_type, opencl_binary_file, + shared_library_dir, omp_num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, gpu_priority_hint=3, - input_file_name="model_input"): + input_file_name="model_input", + dynamic_link=0): print("* Benchmark for %s" % model_tag) mace_model_path = "" @@ -1085,6 +1131,13 @@ def benchmark_model(abi, adb_push(mace_model_path, mace_model_phone_path, serialno) + + if dynamic_link == 1: + adb_push("%s/libmace.so" % shared_library_dir, phone_data_dir, + serialno) + adb_push("%s/libgnustl_shared.so" % shared_library_dir, + phone_data_dir, + serialno) adb_push("%s/benchmark_model" % benchmark_binary_dir, phone_data_dir, serialno)