add dynamic link

cf6c7d7b · yejianwu · dad3d11a · cf6c7d7b · cf6c7d7b · cf6c7d7b
17 changed file
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -123,3 +123,9 @@ android_ndk_repository(
    # Android 5.0
    api_level = 21,
 )
+new_local_repository(
+    name = "libmace",
+    path = "./dynamic_lib/",
+    build_file = "./dynamic_lib/libmace.BUILD"
+)
--- a/docs/getting_started/create_a_model_deployment.rst
+++ b/docs/getting_started/create_a_model_deployment.rst
@@ -31,49 +31,51 @@ Configurations
    :align: left
    * - library_name
-      - library name
+      - library name.
    * - target_abis
-      - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a'
+      - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a'.
    * - target_socs
      - [optional] build for specified socs if you just want use the model for that socs.
    * - embed_model_data
-      - Whether embedding model weights as the code, default to 0
+      - Whether embedding model weights as the code, default to 0.
    * - build_type
      - model build type, can be ['proto', 'code']. 'proto' for converting model to ProtoBuf file and 'code' for converting model to c++ code.
+    * - dynamic_link
+      - [optional] link mace library in dynamic method if set to 1, default to 0.
    * - model_name
      - model name. should be unique if there are multiple models.
        **LIMIT: if build_type is code, model_name will used in c++ code so that model_name must fulfill c++ name specification.**
    * - platform
-      - The source framework, one of [tensorflow, caffe]
+      - The source framework, one of [tensorflow, caffe].
    * - model_file_path
-      - The path of the model file, can be local or remote
+      - The path of the model file, can be local or remote.
    * - model_sha256_checksum
-      - The SHA256 checksum of the model file
+      - The SHA256 checksum of the model file.
    * - weight_file_path
-      - [optional] The path of the model weights file, used by Caffe model
+      - [optional] The path of the model weights file, used by Caffe model.
    * - weight_sha256_checksum
-      - [optional] The SHA256 checksum of the weight file, used by Caffe model
+      - [optional] The SHA256 checksum of the weight file, used by Caffe model.
    * - subgraphs
      - subgraphs key. ** DO NOT EDIT **
    * - input_tensors
-      - The input tensor names (tensorflow), top name of inputs' layer (caffe). one or more strings
+      - The input tensor names (tensorflow), top name of inputs' layer (caffe). one or more strings.
    * - output_tensors
-      - The output tensor names (tensorflow), top name of outputs' layer (caffe). one or more strings
+      - The output tensor names (tensorflow), top name of outputs' layer (caffe). one or more strings.
    * - input_shapes
-      - The shapes of the input tensors, in NHWC order
+      - The shapes of the input tensors, in NHWC order.
    * - output_shapes
-      - The shapes of the output tensors, in NHWC order
+      - The shapes of the output tensors, in NHWC order.
    * - validation_inputs_data
-      - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used
+      - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used.
    * - runtime
      - The running device, one of [cpu, gpu, dsp, cpu_gpu]. cpu_gpu contains cpu and gpu model definition so you can run the model on both cpu and gpu.
    * - data_type
      - [optional] The data type used for specified runtime. [fp16_fp32, fp32_fp32] for gpu, default is fp16_fp32. [fp32] for cpu. [uint8] for dsp.
    * - limit_opencl_kernel_time
-      - [optional] Whether splitting the OpenCL kernel within 1 ms to keep UI responsiveness, default to 0
+      - [optional] Whether splitting the OpenCL kernel within 1 ms to keep UI responsiveness, default to 0.
    * - nnlib_graph_mode
-      - [optional] Control the DSP precision and performance, default to 0 usually works for most cases
+      - [optional] Control the DSP precision and performance, default to 0 usually works for most cases.
    * - obfuscate
-      - [optional] Whether to obfuscate the model operator name, default to 0
+      - [optional] Whether to obfuscate the model operator name, default to 0.
    * - winograd
-      - [optional] Whether to enable Winograd convolution, **will increase memory consumption**
+      - [optional] Whether to enable Winograd convolution, **will increase memory consumption**.
--- a/docs/getting_started/how_to_build.rst
+++ b/docs/getting_started/how_to_build.rst
@@ -165,13 +165,15 @@ built-in tool when necessary.
    $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel
 ============================
-3. Build static library
+3. Build static/shared library
 ============================
 -----------------
 3.1 Overview
 -----------------
-MiAI Compute Engine only build static library. The followings are two use cases.
+MiAI Compute Engine can build either static or shared library (which is
+specified by ``dynamic_link`` in YAML model deployment file).
+The followings are two use cases.
 * **Build well tuned library for specific SoCs**
@@ -185,7 +187,7 @@ MiAI Compute Engine only build static library. The followings are two use cases.
 * **Build generic library for all SoCs**
-    When ``target_soc`` is not specified, the generated library is compatible
+    When ``target_socs`` is not specified, the generated library is compatible
    with general devices.
    .. note::
@@ -198,7 +200,9 @@ model conversion, compiling, test run, benchmark and correctness validation.
 .. note::
-     ``tools/converter.py`` should be run at the root directory of this project.
+     1. ``tools/converter.py`` should be run at the root directory of this project.
+     2. When ``dynamic_link`` is set to ``1``, ``build_type`` should be ``proto``.
+        And currently only android devices supported.
 ------------------------------------------
@@ -211,7 +215,7 @@ model conversion, compiling, test run, benchmark and correctness validation.
        .. note::
-           build static library and test tools.
+           build library and test tools.
        * *--config* (type=str,  default="",  required)： the path of model yaml configuration file.
        * *--tuning* (default=false, optional)： whether tuning the parameters for the GPU of specified SoC.
@@ -302,7 +306,7 @@ Using ``-h`` to get detailed help.
 .. code:: sh
-    # Build the static library
+    # Build library
    python tools/converter.py build --config=models/config.yaml
    # Test model run time
@@ -325,9 +329,11 @@ Using ``-h`` to get detailed help.
 4. Deployment
 =============
-``build`` command will generate the static library, model files and header files
+``build`` command will generate the static/shared library, model files and
-and packaged as ``build/${library_name}/libmace_${library_name}.tar.gz``.
+header files and packaged as
-They are organized as follows,
+``build/${library_name}/libmace_${library_name}.tar.gz``.
+-  The generated ``static`` libraries are organized as follows,
 .. code::
@@ -347,19 +353,52 @@ They are organized as follows,
          ├── model
          │   ├── mobilenet_v2.data
          │   └── mobilenet_v2.pb
-          └─── opencl
+          └── opencl
-               └── compiled_opencl_kernel.bin
+              ├── arm64-v8a
+              │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin
+              └── armeabi-v7a
+                  └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin
+-  The generated ``shared`` libraries are organized as follows,
+.. code::
+      build
+      └── mobilenet-v2-gpu
+          ├── include
+          │   └── mace
+          │       └── public
+          │           ├── mace.h
+          │           └── mace_runtime.h
+          ├── lib
+          │   ├── arm64-v8a
+          │   │   ├── libgnustl_shared.so
+          │   │   └── libmace.so
+          │   └── armeabi-v7a
+          │       ├── libgnustl_shared.so
+          │       └── libmace.so
+          ├── model
+          │   ├── mobilenet_v2.data
+          │   └── mobilenet_v2.pb
+          └── opencl
+              ├── arm64-v8a
+              │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin
+              └── armeabi-v7a
+                  └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin
 .. note::
    1. DSP runtime depends on ``libhexagon_controller.so``.
    2. ``${MODEL_TAG}.pb`` file will be generated only when ``build_type`` is ``proto``.
-    3. ``compiled_kernel.bin`` will be generated only when ``target_soc`` and ``gpu`` runtime are specified.
+    3. ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` will
+       be generated only when ``target_socs`` and ``gpu`` runtime are specified.
+    4. Generated shared library depends on ``libgnustl_shared.so``.
 .. warning::
-    ``compiled_kernel.bin`` depends on the OpenCL version of the device, you should maintan the
+    ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` depends
-    compatibility or configure compiling cache store with ``ConfigKVStorageFactory``.
+    on the OpenCL version of the device, you should maintan the compatibility or
+    configure compiling cache store with ``ConfigKVStorageFactory``.
 =========================================
 5. How to use the library in your project

--- a/docs/getting_started/how_to_build_zh.rst
+++ b/docs/getting_started/how_to_build_zh.rst
@@ -163,18 +163,18 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
    $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel
 ==================
-3. 生成模型静态库
+3. 生成模型库
 ==================
 ---------------------------------------
 3.1 简介
 ---------------------------------------
-Mace目前只提供静态库，有以下两种使用场景。
+Mace目前提供静态库和动态库（可以在\ ``yaml``\ 文件中通过\ ``dynamic_link``\ 指定），有以下两种使用场景。
 **特定SOC库**
-    该使用场景要求在``yaml``文件中必须制定``target_socs``。主要用于为编译适用于指定手机SOC的静态库。
+    该使用场景要求在\ ``yaml``\ 文件中必须指定\ ``target_socs``\ 。主要用于为编译适用于指定手机SOC的库。
    如果希望使用GPU，那么编译过程会自动测试选择最佳的GPU相关参数以获得更好的性能。
    .. warning::
@@ -183,7 +183,7 @@ Mace目前只提供静态库，有以下两种使用场景。
 **通用库**
-    如果在``yaml``文件中没有指定``target_soc``，生成的静态库适用于所有手机。
+    如果在\ ``yaml``\ 文件中没有指定\ ``target_socs``\ ，生成的库适用于所有手机。
    .. warning::
@@ -194,7 +194,8 @@ Mace目前只提供静态库，有以下两种使用场景。
 .. warning::
-     必须在项目的根目录下运行\ ``tools/converter.py``\ 脚本。
+     1. 必须在项目的根目录下运行\ ``tools/converter.py``\ 脚本。
+     2. 当\ ``dynamic_link``\ 被设置为1时，\ ``build_type``\ 必需设置为\ ``proto``\ 。当前动态链接的方式只支持安卓设备。
 ---------------------------------------
@@ -207,7 +208,7 @@ Mace目前只提供静态库，有以下两种使用场景。
        .. note::
-            build模型静态库以及测试工具。
+            build模型库以及测试工具。
        * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
        * *--tuning* (default=false, optional)：是否为特定SOC调制GPU参数.
@@ -293,7 +294,7 @@ Mace目前只提供静态库，有以下两种使用场景。
    python tools/converter.py run -h
    python tools/converter.py benchmark -h
-    # 仅编译模型和生成静态库
+    # 仅编译模型和生成库
    python tools/converter.py build --config=models/config.yaml
    # 测试模型的运行时间
@@ -316,7 +317,7 @@ Mace目前只提供静态库，有以下两种使用场景。
 4. 发布
 ==========
-``build``命令会生成一个tar包，里面包含了发布所需要的所有文件，其位于``./build/${library_name}/libmace_${library_name}.tar.gz``.
+\ ``build``\ 命令会生成一个tar包，里面包含了发布所需要的所有文件，其位于\ ``./build/${library_name}/libmace_${library_name}.tar.gz``\ .
 下面解释了该包中包含了哪些文件。
 **头文件**
@@ -326,6 +327,12 @@ Mace目前只提供静态库，有以下两种使用场景。
    * ``./build/${library_name}/library/${target_abi}/*.a``
 **动态库**
+    * ``./build/${library_name}/library/${target_abi}/*.so``
+    .. note::
+        当\ ``dynamic_link``\ 设置为1时生成动态链接库。
    * ``./build/${library_name}/library/${target_abi}/libhexagon_controller.so``
    .. note::
@@ -345,7 +352,7 @@ Mace目前只提供静态库，有以下两种使用场景。
    .. note::
-        只有指定了``target_soc``并且``runtime==gpu``的情况下才会生成。
+        只有指定了\ ``target_socs``\ 并且\ ``runtime==gpu``\ 的情况下才会生成。
    .. warning::

--- a/docs/getting_started/models/demo_app_models.yaml
+++ b/docs/getting_started/models/demo_app_models.yaml
-# 库的名字
+# The name of library
 library_name: library_name
-# 配置文件名会被用作生成库的名称：libmace-${library_name}.a
 target_abis: [armeabi-v7a, arm64-v8a]
-# 具体机型的soc编号，可以使用`adb shell getprop | grep ro.board.platform | cut -d [ -f3 | cut -d ] -f1`获取
+# The soc serialno of specific device.
+# Get by command `adb shell getprop | grep ro.board.platform | cut -d [ -f3 | cut -d ] -f1`.
 target_socs: [msm8998]
 embed_model_data: 1
-build_type: code # 模型build类型。code表示将模型转为代码，proto表示将模型转为protobuf文件
+# The build mode for model(s).
-models: # 一个配置文件可以包含多个模型的配置信息，最终生成的库中包含多个模型
+# 'code' stand for transfer model(s) into cpp code, 'proto' for model(s) in protobuf file(s).
-  model_name: # 模型的标签，在调度模型的时候，会用这个变量，必须唯一
+build_type: code
+dynamic_link: 1
+# One yaml config file can contain multi models' config message.
+models:
+  model_name: # model tag, which will be used in model loading and must be specific.
    platform: tensorflow
    model_file_path: path/to/model64.pb # also support http:// and https://
    model_sha256_checksum: 7f7462333406e7dea87222737590ebb7d94490194d2f21a7d72bafa87e64e9f9

--- a/dynamic_lib/arm64-v8a/libgnustl_shared.so
+++ b/dynamic_lib/arm64-v8a/libgnustl_shared.so
--- a/dynamic_lib/armeabi-v7a/libgnustl_shared.so
+++ b/dynamic_lib/armeabi-v7a/libgnustl_shared.so
--- a/dynamic_lib/libmace.BUILD
+++ b/dynamic_lib/libmace.BUILD
+cc_library(
+  name = "libmace",
+  srcs = ["libmace.so"],
+  visibility = ["//visibility:public"],
+)
--- a/mace/BUILD
+++ b/mace/BUILD
@@ -59,3 +59,18 @@ config_setting(
    },
    visibility = ["//visibility:public"],
 )
+cc_binary(
+    name = "libmace.so",
+    linkshared = 1,
+    linkstatic = 0,
+    linkopts = [
+        "-Wl,-soname,libmace.so",
+        "-Wl,--version-script",
+        "mace_version_script.lds",
+    ],
+    deps = [
+        "//mace/ops",
+        ":mace_version_script.lds",
+    ],
+)
--- a/mace/benchmark/BUILD
+++ b/mace/benchmark/BUILD
@@ -39,6 +39,26 @@ cc_binary(
    ],
 )
+cc_binary(
+    name = "benchmark_model_deps_so",
+    srcs = [
+        "benchmark_model.cc",
+    ],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ] + if_android(["-DMACE_ENABLE_OPENCL"]),
+    linkopts = ["-lm", "-pie", "-fPIE"] + if_openmp_enabled(["-fopenmp"]),
+    linkstatic = 0,
+    deps = [
+        ":statistics",
+        "//external:gflags_nothreads",
+        "//mace/codegen:generated_mace_engine_factory",
+        "@libmace//:libmace",
+    ],
+)
 cc_library(
    name = "libmace_merged",
    srcs = [

--- a/mace/benchmark/statistics.cc
+++ b/mace/benchmark/statistics.cc
@@ -12,10 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "mace/benchmark/statistics.h"
 #include <set>
+#include "mace/benchmark/statistics.h"
 #include "mace/utils/logging.h"
 #include "mace/utils/string_util.h"

--- a/mace/core/BUILD
+++ b/mace/core/BUILD
@@ -71,6 +71,7 @@ cc_library(
    ]) + if_hexagon_enabled([
        "//third_party/nnlib:libhexagon",
    ]),
+    alwayslink = 1,
 )
 cc_library(

--- a/mace/mace_version_script.lds
+++ b/mace/mace_version_script.lds
+mace {
+  global:
+    *MaceTensor*;
+    *MaceEngine*;
+    *MaceVersion*;
+    *SetOpenMPThreadPolicy*;
+    *SetGPUHints*;
+    *SetOpenCLBinaryPaths*;
+    *FileStorageFactory*;
+    *SetKVStorageFactory*;
+    *CreateMaceEngineFromProto*;
+  local:
+    *;
+};
--- a/mace/tools/validation/BUILD
+++ b/mace/tools/validation/BUILD
@@ -15,3 +15,19 @@ cc_binary(
        "//mace/codegen:generated_models",
    ],
 )
+cc_binary(
+    name = "mace_run_deps_so",
+    srcs = ["mace_run.cc"],
+    copts = if_android([
+        "-DMACE_ENABLE_OPENCL",
+    ]),
+    linkopts = ["-lm", "-pie", "-fPIE"] + if_openmp_enabled(["-fopenmp"]),
+    linkstatic = 0,
+    deps = [
+        "//external:gflags_nothreads",
+        "//mace/codegen:generated_mace_engine_factory",
+        "//mace/utils:utils",
+        "@libmace//:libmace",
+    ],
+)
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -18,7 +18,7 @@ build:android --config=cross_compile
 # Usage example: bazel build --config optimization 
 build:optimization -c opt
 build:optimization --copt=-O3
-build:optimization --strip=always
+build:optimization --linkopt=-Wl,--strip-all
 # Address sanitizer
 build:asan --strip=never

--- a/tools/converter.py
+++ b/tools/converter.py
@@ -44,13 +44,15 @@ MODEL_OUTPUT_DIR_NAME = 'model'
 MODEL_HEADER_DIR_PATH = 'include/mace/public'
 BUILD_TMP_DIR_NAME = '_tmp'
 BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
-OUTPUT_LIBRARY_DIR_NAME = 'library'
+OUTPUT_LIBRARY_DIR_NAME = 'lib'
 OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
 OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
 CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
 CODEGEN_BASE_DIR = 'mace/codegen'
 MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
+LIBMACE_SO_TARGET = "//mace:libmace.so"
 MACE_RUN_TARGET = "//mace/tools/validation:mace_run"
+MACE_RUN_TARGET_DEPS_SO = "//mace/tools/validation:mace_run_deps_so"
 ALL_SOC_TAG = 'all'
 ABITypeStrs = [
@@ -124,6 +126,7 @@ class YAMLKeyword(object):
    target_socs = 'target_socs'
    build_type = 'build_type'
    embed_model_data = 'embed_model_data'
+    dynamic_link = 'dynamic_link'
    models = 'models'
    platform = 'platform'
    model_file_path = 'model_file_path'
@@ -279,6 +282,21 @@ def format_model_config(config_file_path):
    if build_type == BuildType.proto:
        configs[YAMLKeyword.embed_model_data] = 0
+    dynamic_link = configs.get(YAMLKeyword.dynamic_link, "")
+    if dynamic_link == "":
+        configs[YAMLKeyword.dynamic_link] = 0
+        dynamic_link = 0
+    if not isinstance(dynamic_link, int) or dynamic_link < 0 or \
+       dynamic_link > 1:
+        MaceLogger.error(ModuleName.YAML_CONFIG,
+                         "dynamic_link must be 0 or 1. "
+                         "default is 0, for link mace lib statically, "
+                         "1 for dynamic link.")
+    if build_type == BuildType.code and dynamic_link == 1:
+        MaceLogger.error(ModuleName.YAML_CONFIG,
+                         "'dynamic_link == 1' only support when "
+                         "'build_type == proto'")
    model_names = configs.get(YAMLKeyword.models, [])
    mace_check(len(model_names) > 0, ModuleName.YAML_CONFIG,
               "no model found in config file")
@@ -451,6 +469,13 @@ def get_opencl_binary_output_path(library_name, target_abi,
            target_soc)
+def get_shared_library_dir(library_name, abi):
+    return '%s/%s/%s/%s' % (BUILD_OUTPUT_DIR,
+                            library_name,
+                            OUTPUT_LIBRARY_DIR_NAME,
+                            abi)
 ################################
 # build
 ################################
@@ -475,6 +500,8 @@ def print_configuration(flags, configs):
                 configs[YAMLKeyword.build_type]])
    data.append([YAMLKeyword.embed_model_data,
                 configs[YAMLKeyword.embed_model_data]])
+    data.append([YAMLKeyword.dynamic_link,
+                 configs[YAMLKeyword.dynamic_link]])
    data.append(["Tuning", flags.tuning])
    MaceLogger.summary(StringFormatter.table(header, data, title))
@@ -624,6 +651,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
    library_name = configs[YAMLKeyword.library_name]
    build_type = configs[YAMLKeyword.build_type]
    embed_model_data = configs[YAMLKeyword.embed_model_data]
+    dynamic_link = configs[YAMLKeyword.dynamic_link]
    hexagon_mode = get_hexagon_mode(configs)
    model_output_dirs = []
@@ -634,14 +662,31 @@ def build_specific_lib(target_abi, target_soc, serial_num,
    os.makedirs(build_tmp_binary_dir)
    sh_commands.gen_tuning_param_code(model_output_dirs)
+    if dynamic_link == 0:
+        mace_run_target = MACE_RUN_TARGET
+    else:
+        mace_run_target = MACE_RUN_TARGET_DEPS_SO
+        sh_commands.bazel_build(
+            LIBMACE_SO_TARGET,
+            abi=target_abi,
+            hexagon_mode=hexagon_mode,
+            enable_openmp=enable_openmp,
+            address_sanitizer=address_sanitizer
+        )
+        sh_commands.update_libmace_shared_library(serial_num,
+                                                  target_abi,
+                                                  library_name,
+                                                  BUILD_OUTPUT_DIR,
+                                                  OUTPUT_LIBRARY_DIR_NAME)
    sh_commands.bazel_build(
-        MACE_RUN_TARGET,
+        mace_run_target,
        abi=target_abi,
        hexagon_mode=hexagon_mode,
        enable_openmp=enable_openmp,
        address_sanitizer=address_sanitizer
    )
-    sh_commands.update_mace_run_lib(build_tmp_binary_dir)
+    sh_commands.update_mace_run_lib(build_tmp_binary_dir, dynamic_link)
    binary_changed = False
    for model_name in configs[YAMLKeyword.models]:
@@ -696,6 +741,8 @@ def build_specific_lib(target_abi, target_soc, serial_num,
                phone_data_dir=PHONE_DATA_DIR,
                build_type=build_type,
                opencl_binary_file="",
+                shared_library_dir=get_shared_library_dir(library_name, target_abi),  # noqa
+                dynamic_link=dynamic_link,
            )
            pull_opencl_binary_and_tuning_param(target_abi, serial_num,
@@ -712,13 +759,13 @@ def build_specific_lib(target_abi, target_soc, serial_num,
            opencl_output_bin_path)
        sh_commands.gen_tuning_param_code(model_output_dirs)
        sh_commands.bazel_build(
-            MACE_RUN_TARGET,
+            mace_run_target,
            abi=target_abi,
            hexagon_mode=hexagon_mode,
            enable_openmp=enable_openmp,
            address_sanitizer=address_sanitizer
        )
-        sh_commands.update_mace_run_lib(build_tmp_binary_dir)
+        sh_commands.update_mace_run_lib(build_tmp_binary_dir, dynamic_link)
    if target_abi == ABIType.host:
        sh_commands.build_host_libraries(build_type, target_abi)
@@ -726,17 +773,19 @@ def build_specific_lib(target_abi, target_soc, serial_num,
    # build benchmark_model binary
    sh_commands.build_benchmark_model(target_abi,
                                      build_tmp_binary_dir,
-                                      hexagon_mode)
+                                      hexagon_mode,
+                                      dynamic_link)
    # generate library
-    sh_commands.merge_libs(target_soc,
+    if dynamic_link == 0:
-                           serial_num,
+        sh_commands.merge_libs(target_soc,
-                           target_abi,
+                               serial_num,
-                           library_name,
+                               target_abi,
-                           BUILD_OUTPUT_DIR,
+                               library_name,
-                           OUTPUT_LIBRARY_DIR_NAME,
+                               BUILD_OUTPUT_DIR,
-                           build_type,
+                               OUTPUT_LIBRARY_DIR_NAME,
-                           hexagon_mode)
+                               build_type,
+                               hexagon_mode)
 def generate_library(configs, tuning, enable_openmp, address_sanitizer):
@@ -864,6 +913,7 @@ def run_specific_target(flags, configs, target_abi,
    build_type = configs[YAMLKeyword.build_type]
    embed_model_data = configs[YAMLKeyword.embed_model_data]
    opencl_output_bin_path = ""
+    dynamic_link = configs[YAMLKeyword.dynamic_link]
    if not configs[YAMLKeyword.target_socs]:
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                    None, None)
@@ -951,6 +1001,8 @@ def run_specific_target(flags, configs, target_abi,
                runtime_failure_ratio=flags.runtime_failure_ratio,
                address_sanitizer=flags.address_sanitizer,
                opencl_binary_file=opencl_output_bin_path,
+                shared_library_dir=get_shared_library_dir(library_name, target_abi),  # noqa
+                dynamic_link=dynamic_link,
            )
            if flags.validate:
                model_file_path, weight_file_path = get_model_files_path(
@@ -1010,6 +1062,7 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
    build_type = configs[YAMLKeyword.build_type]
    embed_model_data = configs[YAMLKeyword.embed_model_data]
    opencl_output_bin_path = ""
+    dynamic_link = configs[YAMLKeyword.dynamic_link]
    if not configs[YAMLKeyword.target_socs]:
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                    None, None)
@@ -1087,7 +1140,9 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
                cpu_affinity_policy=flags.cpu_affinity_policy,
                gpu_perf_hint=flags.gpu_perf_hint,
                gpu_priority_hint=flags.gpu_priority_hint,
-                opencl_binary_file=opencl_output_bin_path)
+                opencl_binary_file=opencl_output_bin_path,
+                shared_library_dir=get_shared_library_dir(library_name, target_abi),  # noqa
+                dynamic_link=dynamic_link)
 def benchmark_model(flags):

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -561,12 +561,16 @@ def gen_random_input(model_output_dir,
                    sh.cp("-f", input_file_list[i], dst_input_file)
-def update_mace_run_lib(build_tmp_binary_dir):
+def update_mace_run_lib(build_tmp_binary_dir, dynamic_link=0):
    mace_run_filepath = build_tmp_binary_dir + "/mace_run"
    if os.path.exists(mace_run_filepath):
        sh.rm("-rf", mace_run_filepath)
-    sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run",
+    if dynamic_link == 0:
-          build_tmp_binary_dir)
+        sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run",
+              build_tmp_binary_dir)
+    else:
+        sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run_deps_so",
+              "%s/mace_run" % build_tmp_binary_dir)
 def touch_tuned_file_flag(build_tmp_binary_dir):
@@ -583,6 +587,30 @@ def create_internal_storage_dir(serialno, phone_data_dir):
    return internal_storage_dir
+def update_libmace_shared_library(serial_num,
+                                  abi,
+                                  project_name,
+                                  build_output_dir,
+                                  library_output_dir):
+    libmace_name = "libmace.so"
+    mace_library_dir = "./dynamic_lib/"
+    library_dir = "%s/%s/%s/%s" % (
+            build_output_dir, project_name, library_output_dir, abi)
+    libmace_file = "%s/%s" % (library_dir, libmace_name)
+    if os.path.exists(libmace_file):
+        sh.rm("-rf", library_dir)
+    sh.mkdir("-p", library_dir)
+    sh.cp("-f", "bazel-bin/mace/libmace.so", library_dir)
+    sh.cp("-f", "%s/%s/libgnustl_shared.so" % (mace_library_dir, abi),
+          library_dir)
+    libmace_load_path = "%s/%s" % (mace_library_dir, libmace_name)
+    if os.path.exists(libmace_load_path):
+        sh.rm("-f", libmace_load_path)
+    sh.cp("-f", "bazel-bin/mace/libmace.so", mace_library_dir)
 def tuning_run(abi,
               serialno,
               mace_run_dir,
@@ -604,6 +632,7 @@ def tuning_run(abi,
               phone_data_dir,
               build_type,
               opencl_binary_file,
+               shared_library_dir,
               omp_num_threads=-1,
               cpu_affinity_policy=1,
               gpu_perf_hint=3,
@@ -611,7 +640,8 @@ def tuning_run(abi,
               input_file_name="model_input",
               output_file_name="model_out",
               runtime_failure_ratio=0.0,
-               address_sanitizer=False):
+               address_sanitizer=False,
+               dynamic_link=0):
    print("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
          "out_of_range_check=%s, omp_num_threads=%s, cpu_affinity_policy=%s, "
          "gpu_perf_hint=%s, gpu_priority_hint=%s" %
@@ -683,6 +713,13 @@ def tuning_run(abi,
                     mace_model_phone_path,
                     serialno)
+        if dynamic_link == 1:
+            adb_push("%s/libmace.so" % shared_library_dir, phone_data_dir,
+                     serialno)
+            adb_push("%s/libgnustl_shared.so" % shared_library_dir,
+                     phone_data_dir,
+                     serialno)
        adb_push("%s/mace_run" % mace_run_dir, phone_data_dir,
                 serialno)
@@ -907,7 +944,7 @@ def merge_libs(target_soc,
            "bazel-bin/mace/codegen/libgenerated_version.pic.a\n")
        mri_stream += (
            "addlib "
-            "bazel-bin/mace/core/libcore.pic.a\n")
+            "bazel-bin/mace/core/libcore.pic.lo\n")
        mri_stream += (
            "addlib "
            "bazel-bin/mace/kernels/libkernels.pic.a\n")
@@ -951,7 +988,7 @@ def merge_libs(target_soc,
            "bazel-bin/mace/codegen/libgenerated_version.a\n")
        mri_stream += (
            "addlib "
-            "bazel-bin/mace/core/libcore.a\n")
+            "bazel-bin/mace/core/libcore.lo\n")
        mri_stream += (
            "addlib "
            "bazel-bin/mace/kernels/libkernels.a\n")
@@ -1002,18 +1039,25 @@ def packaging_lib(libmace_output_dir, project_name):
 def build_benchmark_model(abi,
                          model_output_dir,
-                          hexagon_mode):
+                          hexagon_mode,
+                          dynamic_link=False):
    benchmark_binary_file = "%s/benchmark_model" % model_output_dir
    if os.path.exists(benchmark_binary_file):
        sh.rm("-rf", benchmark_binary_file)
-    benchmark_target = "//mace/benchmark:benchmark_model"
+    if dynamic_link == 0:
+        benchmark_target = "//mace/benchmark:benchmark_model"
+    else:
+        benchmark_target = "//mace/benchmark:benchmark_model_deps_so"
    bazel_build(benchmark_target,
                abi=abi,
                hexagon_mode=hexagon_mode)
    target_bin = "/".join(bazel_target_to_bin(benchmark_target))
-    sh.cp("-f", target_bin, model_output_dir)
+    if dynamic_link == 0:
+        sh.cp("-f", target_bin, model_output_dir)
+    else:
+        sh.cp("-f", target_bin, "%s/benchmark_model" % model_output_dir)
 def benchmark_model(abi,
@@ -1032,11 +1076,13 @@ def benchmark_model(abi,
                    phone_data_dir,
                    build_type,
                    opencl_binary_file,
+                    shared_library_dir,
                    omp_num_threads=-1,
                    cpu_affinity_policy=1,
                    gpu_perf_hint=3,
                    gpu_priority_hint=3,
-                    input_file_name="model_input"):
+                    input_file_name="model_input",
+                    dynamic_link=0):
    print("* Benchmark for %s" % model_tag)
    mace_model_path = ""
@@ -1085,6 +1131,13 @@ def benchmark_model(abi,
            adb_push(mace_model_path,
                     mace_model_phone_path,
                     serialno)
+        if dynamic_link == 1:
+            adb_push("%s/libmace.so" % shared_library_dir, phone_data_dir,
+                     serialno)
+            adb_push("%s/libgnustl_shared.so" % shared_library_dir,
+                     phone_data_dir,
+                     serialno)
        adb_push("%s/benchmark_model" % benchmark_binary_dir, phone_data_dir,
                 serialno)