diff --git a/.gitignore b/.gitignore
index 54dacd4552610ddd4fa36d1517ce6da23cc9c4ef..adce04e177caa7dad2a7665449f1d9f922ade717 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,5 +11,6 @@ mace/codegen/version/
 mace/codegen/engine/
 build/
 docs/_build/
+*.a
 
 \.project/
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 72820f2b14e09efd74226d35c23d72ae27f7cdec..fe0fd433b61fdcb48acd4aa2e733f137d7919fe2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -101,5 +101,5 @@ python_tools_tests:
     - rm -rf mace-models
     - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/mace-models.git
     - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml
-    - sh -c "python tools/converter.py build --config=${CONF_FILE} --disable_tuning && python tools/converter.py run --config=${CONF_FILE} --round=1 --validate" || exit 1
+    - sh -c "python tools/converter.py build --config=${CONF_FILE} --disable_tuning && python tools/converter.py run --config=${CONF_FILE} --round=1 --validate && python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate" || exit 1
     - rm -rf mace-models
diff --git a/mace/BUILD b/mace/BUILD
index a216f9f15e84ae3707fbaf13e1578dc510038bcb..d012231aa1ff62e66aed22b8d13223b0c5a76751 100644
--- a/mace/BUILD
+++ b/mace/BUILD
@@ -80,3 +80,9 @@ cc_library(
   srcs = ["libmace.so"],
   visibility = ["//visibility:public"],
 )
+
+cc_library(
+  name = "libmace_static",
+  srcs = ["libmace.a"],
+  visibility = ["//visibility:public"],
+)
diff --git a/mace/examples/BUILD b/mace/examples/BUILD
deleted file mode 100644
index 3f2fc38da2437948acd29b1702691a780945e52a..0000000000000000000000000000000000000000
--- a/mace/examples/BUILD
+++ /dev/null
@@ -1,15 +0,0 @@
-# Examples
-load("//mace:mace.bzl", "if_openmp_enabled")
-
-cc_binary(
-    name = "example",
-    srcs = ["example.cc"],
-    linkopts = if_openmp_enabled(["-fopenmp"]),
-    linkstatic = 1,
-    copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
-    deps = [
-        "//external:gflags_nothreads",
-        "//mace/codegen:generated_models",
-        "//mace/codegen:generated_mace_engine_factory",
-    ],
-)
diff --git a/mace/examples/cli/BUILD b/mace/examples/cli/BUILD
new file mode 100644
index 0000000000000000000000000000000000000000..4fe3e88a9c92f7732383c2f7fb3552c2eefc0deb
--- /dev/null
+++ b/mace/examples/cli/BUILD
@@ -0,0 +1,49 @@
+# Examples
+load("//mace:mace.bzl", "if_openmp_enabled", "if_android")
+
+cc_binary(
+    name = "example_static",
+    srcs = ["example.cc"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+    ] + if_android([
+        "-DMACE_ENABLE_OPENCL",
+    ]),
+    linkopts = [
+        "-lm",
+    ] + if_openmp_enabled([
+        "-fopenmp"
+    ]) + if_android([
+        "-ldl",
+        "-pie",
+        "-llog",
+    ]),
+    linkstatic = 1,
+    deps = [
+        "//external:gflags_nothreads",
+        "//mace/codegen:generated_mace_engine_factory",
+        "//mace:libmace_static",
+    ],
+)
+
+cc_binary(
+    name = "example_shared",
+    srcs = ["example.cc"],
+    copts = [
+        "-Werror",
+        "-Wextra",
+        "-Wno-missing-field-initializers",
+    ] + if_android([
+        "-DMACE_ENABLE_OPENCL",
+    ]),
+    linkopts = ["-lm", "-pie", "-fPIE"] + if_openmp_enabled(["-fopenmp"]),
+    linkstatic = 0,
+    deps = [
+        "//external:gflags_nothreads",
+        "//mace/codegen:generated_mace_engine_factory",
+        "//mace/utils:utils",
+        "//mace:libmace",
+    ],
+)
+
diff --git a/mace/examples/example.cc b/mace/examples/cli/example.cc
similarity index 72%
rename from mace/examples/example.cc
rename to mace/examples/cli/example.cc
index caa80dcdc1da69176393af076a2ae7bf998e9f7e..dbccc36f3d7c1646771d833e7e918b77083dbba8 100644
--- a/mace/examples/example.cc
+++ b/mace/examples/cli/example.cc
@@ -12,18 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-/**
- * Usage:
- * mace_run --model=mobi_mace.pb \
- *          --input=input_node  \
- *          --output=output_node  \
- *          --input_shape=1,224,224,3   \
- *          --output_shape=1,224,224,2   \
- *          --input_file=input_data \
- *          --output_file=mace.out  \
- *          --model_data_file=model_data.data \
- *          --device=GPU
- */
 #include <malloc.h>
 #include <stdint.h>
 #include <cstdlib>
@@ -35,9 +23,9 @@
 #include "mace/public/mace.h"
 #include "mace/public/mace_runtime.h"
 // if convert model to code.
+#ifdef CODE_TYPE
 #include "mace/codegen/engine/mace_engine_factory.h"
-#include "mace/utils/env_time.h"
-#include "mace/utils/logging.h"
+#endif
 
 namespace mace {
 namespace examples {
@@ -98,7 +86,7 @@ DeviceType ParseDeviceType(const std::string &device_str) {
 
 DEFINE_string(model_name,
               "",
-              "model name in yaml file");
+              "model name in model deployment file");
 DEFINE_string(input_node,
               "input_node0,input_node1",
               "input nodes, separated by comma");
@@ -117,15 +105,15 @@ DEFINE_string(input_file,
 DEFINE_string(output_file,
               "",
               "output file name | output file prefix for multiple outputs");
+DEFINE_string(opencl_binary_file,
+              "",
+              "compiled opencl binary file path");
 DEFINE_string(model_data_file,
               "",
               "model data file name, used when EMBED_MODEL_DATA set to 0");
 DEFINE_string(model_file,
               "",
               "model file name, used when load mace model in pb");
-DEFINE_string(opencl_binary_file,
-              "",
-              "compiled opencl binary file path");
 DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
 DEFINE_int32(round, 1, "round");
 DEFINE_int32(restart_round, 1, "restart round");
@@ -136,6 +124,29 @@ DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
 DEFINE_int32(cpu_affinity_policy, 1,
              "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
 
+namespace {
+bool ReadBinaryFile(std::vector<unsigned char> *data,
+                           const std::string &filename) {
+  std::ifstream ifs(filename, std::ios::in | std::ios::binary);
+  if (!ifs.is_open()) {
+    return false;
+  }
+  ifs.seekg(0, ifs.end);
+  size_t length = ifs.tellg();
+  ifs.seekg(0, ifs.beg);
+
+  data->reserve(length);
+  data->insert(data->begin(), std::istreambuf_iterator<char>(ifs),
+               std::istreambuf_iterator<char>());
+  if (ifs.fail()) {
+    return false;
+  }
+  ifs.close();
+
+  return true;
+}
+}  // namespace
+
 bool RunModel(const std::vector<std::string> &input_names,
               const std::vector<std::vector<int64_t>> &input_shapes,
               const std::vector<std::string> &output_names,
@@ -155,6 +166,12 @@ bool RunModel(const std::vector<std::string> &input_names,
 #endif  // MACE_ENABLE_OPENCL
 
   if (device_type == DeviceType::GPU) {
+    // Just call once. (Not thread-safe)
+    // Set paths of Generated OpenCL Compiled Kernel Binary file
+    // if you build gpu library of specific soc.
+    // Using OpenCL binary will speed up the initialization.
+    // OpenCL binary is corresponding to the OpenCL Driver version,
+    // you should update the binary when OpenCL Driver changed.
     std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
     mace::SetOpenCLBinaryPaths(opencl_binary_paths);
   }
@@ -172,34 +189,33 @@ bool RunModel(const std::vector<std::string> &input_names,
   // Create Engine
   std::shared_ptr<mace::MaceEngine> engine;
   MaceStatus create_engine_status;
-  // Create Engine
-  MaceStatus create_engine_status;
-  // Create Engine
-  int64_t t0 = NowMicros();
-  if (FLAGS_model_file != "") {
-    std::vector<unsigned char> model_pb_data;
-    if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
-      LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
-    }
-    create_engine_status =
-        CreateMaceEngineFromProto(model_pb_data,
-                                  FLAGS_model_data_file,
-                                  input_names,
-                                  output_names,
-                                  device_type,
-                                  &engine);
-  } else {
-    create_engine_status =
-        CreateMaceEngineFromCode(FLAGS_model_name,
-                                 FLAGS_model_data_file,
-                                 input_names,
-                                 output_names,
-                                 device_type,
-                                 &engine);
+  // Only choose one of the two type based on the `build_type`
+  // in model deployment file(.yml).
+#ifdef CODE_TYPE
+  create_engine_status =
+      CreateMaceEngineFromCode(FLAGS_model_name,
+                               FLAGS_model_data_file,
+                               input_names,
+                               output_names,
+                               device_type,
+                               &engine);
+#else
+  std::vector<unsigned char> model_pb_data;
+  if (!ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
+    std::cerr << "Failed to read file: " << FLAGS_model_file << std::endl;
   }
+  create_engine_status =
+      CreateMaceEngineFromProto(model_pb_data,
+                                FLAGS_model_data_file,
+                                input_names,
+                                output_names,
+                                device_type,
+                                &engine);
+#endif
 
   if (create_engine_status != MaceStatus::MACE_SUCCESS) {
-    LOG(FATAL) << "Create engine error, please check the arguments";
+    std::cerr << "Create engine error, please check the arguments" << std::endl;
+    exit(1);
   }
 
   const size_t input_count = input_names.size();
@@ -222,7 +238,7 @@ bool RunModel(const std::vector<std::string> &input_names,
                    input_size * sizeof(float));
       in_file.close();
     } else {
-      LOG(INFO) << "Open input file failed";
+      std::cout << "Open input file failed" << std::endl;
       return -1;
     }
     inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in);
@@ -237,16 +253,17 @@ bool RunModel(const std::vector<std::string> &input_names,
     outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
   }
 
-  LOG(INFO) << "Warm up run";
+  std::cout << "Warm up run" << std::endl;
   engine->Run(inputs, &outputs);
 
   if (FLAGS_round > 0) {
-    LOG(INFO) << "Run model";
+    std::cout << "Run model" << std::endl;
     for (int i = 0; i < FLAGS_round; ++i) {
       engine->Run(inputs, &outputs);
     }
   }
 
+  std::cout << "Write output" << std::endl;
   for (size_t i = 0; i < output_count; ++i) {
     std::string output_name =
         FLAGS_output_file + "_" + FormatName(output_names[i]);
@@ -260,6 +277,7 @@ bool RunModel(const std::vector<std::string> &input_names,
     out_file.flush();
     out_file.close();
   }
+  std::cout << "Finished" << std::endl;
 
   return true;
 }
@@ -270,22 +288,24 @@ int Main(int argc, char **argv) {
   gflags::SetUsageMessage(usage);
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
-  LOG(INFO) << "mace version: " << MaceVersion();
-  LOG(INFO) << "input node: " << FLAGS_input_node;
-  LOG(INFO) << "input shape: " << FLAGS_input_shape;
-  LOG(INFO) << "output node: " << FLAGS_output_node;
-  LOG(INFO) << "output shape: " << FLAGS_output_shape;
-  LOG(INFO) << "input_file: " << FLAGS_input_file;
-  LOG(INFO) << "output_file: " << FLAGS_output_file;
-  LOG(INFO) << "model_data_file: " << FLAGS_model_data_file;
-  LOG(INFO) << "model_file: " << FLAGS_model_file;
-  LOG(INFO) << "device: " << FLAGS_device;
-  LOG(INFO) << "round: " << FLAGS_round;
-  LOG(INFO) << "restart_round: " << FLAGS_restart_round;
-  LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint;
-  LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint;
-  LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads;
-  LOG(INFO) << "cpu_affinity_policy: " << FLAGS_cpu_affinity_policy;
+  std::cout << "mace version: " << MaceVersion() << std::endl;
+  std::cout << "input node: " << FLAGS_input_node << std::endl;
+  std::cout << "input shape: " << FLAGS_input_shape << std::endl;
+  std::cout << "output node: " << FLAGS_output_node << std::endl;
+  std::cout << "output shape: " << FLAGS_output_shape << std::endl;
+  std::cout << "input_file: " << FLAGS_input_file << std::endl;
+  std::cout << "output_file: " << FLAGS_output_file << std::endl;
+  std::cout << "model_data_file: " << FLAGS_model_data_file << std::endl;
+  std::cout << "model_file: " << FLAGS_model_file << std::endl;
+  std::cout << "device: " << FLAGS_device << std::endl;
+  std::cout << "round: " << FLAGS_round << std::endl;
+  std::cout << "restart_round: " << FLAGS_restart_round << std::endl;
+  std::cout << "gpu_perf_hint: " << FLAGS_gpu_perf_hint << std::endl;
+  std::cout << "gpu_priority_hint: " << FLAGS_gpu_priority_hint << std::endl;
+  std::cout << "omp_num_threads: " << FLAGS_omp_num_threads << std::endl;
+  std::cout << "cpu_affinity_policy: "
+            << FLAGS_cpu_affinity_policy
+            << std::endl;
 
   std::vector<std::string> input_names = str_util::Split(FLAGS_input_node, ',');
   std::vector<std::string> output_names =
@@ -306,10 +326,9 @@ int Main(int argc, char **argv) {
     ParseShape(output_shapes[i], &output_shape_vec[i]);
   }
 
-  bool ret;
-#pragma omp parallel for
+  bool ret = false;
   for (int i = 0; i < FLAGS_restart_round; ++i) {
-    VLOG(0) << "restart round " << i;
+    std::cout << "restart round " << i << std::endl;
     ret =
         RunModel(input_names, input_shape_vec, output_names, output_shape_vec);
   }
diff --git a/mace/public/mace_runtime.h b/mace/public/mace_runtime.h
index f353da75cc3bde108a549a535be968769d907ca0..38f8a62a9c6bb9726387a0dd1cca73a457acac51 100644
--- a/mace/public/mace_runtime.h
+++ b/mace/public/mace_runtime.h
@@ -80,7 +80,8 @@ class FileStorageFactory : public KVStorageFactory {
 void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory);
 
 // Just call once. (Not thread-safe)
-// Set paths of OpenCL Compiled Binary file if you use gpu of specific soc.
+// Set paths of Generated OpenCL Compiled Kernel Binary file (not libOpenCL.so)
+// if you use gpu of specific soc.
 // Using OpenCL binary will speed up the initialization.
 // OpenCL binary is corresponding to the OpenCL Driver version,
 // you should update the binary when OpenCL Driver changed.
diff --git a/tools/converter.py b/tools/converter.py
index 8e36e9f62397b08b39aff5b6029937c362791b75..c244214d5e8ab629e52237b00e57ff6107af0a8e 100644
--- a/tools/converter.py
+++ b/tools/converter.py
@@ -51,8 +51,12 @@ CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
 CODEGEN_BASE_DIR = 'mace/codegen'
 MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
 LIBMACE_SO_TARGET = "//mace:libmace.so"
-MACE_RUN_STATIC_TARGET = "//mace/tools/validation:mace_run_static"
-MACE_RUN_SHARED_TARGET = "//mace/tools/validation:mace_run_shared"
+MACE_RUN_STATIC_NAME = "mace_run_static"
+MACE_RUN_SHARED_NAME = "mace_run_shared"
+EXAMPLE_STATIC_NAME = "example_static"
+EXAMPLE_SHARED_NAME = "example_shared"
+MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
+MACE_RUN_SHARED_TARGET = "//mace/tools/validation:" + MACE_RUN_SHARED_NAME
 ALL_SOC_TAG = 'all'
 
 ABITypeStrs = [
@@ -696,8 +700,10 @@ def build_specific_lib(target_abi, target_soc, serial_num,
 
     sh_commands.gen_tuning_param_code(model_output_dirs)
     if linkshared == 0:
+        mace_run_name = MACE_RUN_STATIC_NAME
         mace_run_target = MACE_RUN_STATIC_TARGET
     else:
+        mace_run_name = MACE_RUN_SHARED_NAME
         mace_run_target = MACE_RUN_SHARED_TARGET
         sh_commands.bazel_build(
             LIBMACE_SO_TARGET,
@@ -756,7 +762,8 @@ def build_specific_lib(target_abi, target_soc, serial_num,
             sh_commands.tuning_run(
                 abi=target_abi,
                 serialno=serial_num,
-                mace_run_dir=build_tmp_binary_dir,
+                target_dir=build_tmp_binary_dir,
+                target_name=mace_run_name,
                 vlog_level=0,
                 embed_model_data=embed_model_data,
                 model_output_dir=model_output_dir,
@@ -822,6 +829,13 @@ def build_specific_lib(target_abi, target_soc, serial_num,
                                build_type,
                                hexagon_mode)
 
+    # build example binary
+    sh_commands.build_example(target_soc, serial_num, target_abi,
+                              library_name, BUILD_OUTPUT_DIR,
+                              OUTPUT_LIBRARY_DIR_NAME,
+                              build_tmp_binary_dir, build_type,
+                              hexagon_mode, enable_openmp, linkshared)
+
 
 def generate_library(configs, tuning, enable_openmp, address_sanitizer):
     MaceLogger.header(StringFormatter.block("Building library"))
@@ -959,6 +973,17 @@ def run_specific_target(flags, configs, target_abi,
                ModuleName.RUN,
                'You should build before run.')
 
+    if flags.example:
+        if linkshared == 0:
+            target_name = EXAMPLE_STATIC_NAME
+        else:
+            target_name = EXAMPLE_SHARED_NAME
+    else:
+        if linkshared == 0:
+            target_name = MACE_RUN_STATIC_NAME
+        else:
+            target_name = MACE_RUN_SHARED_NAME
+
     for model_name in configs[YAMLKeyword.models]:
         if target_abi == ABIType.host:
             device_name = ABIType.host
@@ -1009,7 +1034,8 @@ def run_specific_target(flags, configs, target_abi,
             run_output = sh_commands.tuning_run(
                 abi=target_abi,
                 serialno=serial_num,
-                mace_run_dir=build_tmp_binary_dir,
+                target_dir=build_tmp_binary_dir,
+                target_name=target_name,
                 vlog_level=flags.vlog_level,
                 embed_model_data=embed_model_data,
                 model_output_dir=model_output_dir,
@@ -1334,6 +1360,10 @@ def parse_args():
         type=float,
         default=0.0,
         help="[mock runtime failure ratio].")
+    run.add_argument(
+        "--example",
+        action="store_true",
+        help="whether to run example.")
     benchmark = subparsers.add_parser(
         'benchmark',
         parents=[all_type_parent_parser, run_bm_parent_parser,
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index 92752c5790612791c17595b0fdb83a9fa486aee0..19e8f4da562c4fb35d92323fe0735a81ac8cbb95 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -292,7 +292,8 @@ def bazel_build(target,
                 hexagon_mode=False,
                 enable_openmp=True,
                 enable_neon=True,
-                address_sanitizer=False):
+                address_sanitizer=False,
+                extra_args=""):
     print("* Build %s with ABI %s" % (target, abi))
     if abi == "host":
         bazel_args = (
@@ -318,6 +319,8 @@ def bazel_build(target,
         bazel_args += ("--config", "asan")
     else:
         bazel_args += ("--config", "optimization")
+    if extra_args:
+        bazel_args += (extra_args, )
     sh.bazel(
         _fg=True,
         *bazel_args)
@@ -620,7 +623,8 @@ def update_libmace_shared_library(serial_num,
 
 def tuning_run(abi,
                serialno,
-               mace_run_dir,
+               target_dir,
+               target_name,
                vlog_level,
                embed_model_data,
                model_output_dir,
@@ -658,17 +662,13 @@ def tuning_run(abi,
     mace_model_path = ""
     if build_type == BuildType.proto:
         mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
-    if linkshared == 0:
-        mace_run_target = "mace_run_static"
-    else:
-        mace_run_target = "mace_run_shared"
     if abi == "host":
         p = subprocess.Popen(
             [
                 "env",
                 "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
                 "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
-                "%s/%s" % (mace_run_dir, mace_run_target),
+                "%s/%s" % (target_dir, target_name),
                 "--model_name=%s" % model_tag,
                 "--input_node=%s" % ",".join(input_nodes),
                 "--output_node=%s" % ",".join(output_nodes),
@@ -731,7 +731,7 @@ def tuning_run(abi,
                      phone_data_dir,
                      serialno)
 
-        adb_push("%s/%s" % (mace_run_dir, mace_run_target), phone_data_dir,
+        adb_push("%s/%s" % (target_dir, target_name), phone_data_dir,
                  serialno)
 
         stdout_buff = []
@@ -752,7 +752,7 @@ def tuning_run(abi,
                                       asan_rt_library_names(abi))
             ])
         adb_cmd.extend([
-            "%s/%s" % (phone_data_dir, mace_run_target),
+            "%s/%s" % (phone_data_dir, target_name),
             "--model_name=%s" % model_tag,
             "--input_node=%s" % ",".join(input_nodes),
             "--output_node=%s" % ",".join(output_nodes),
@@ -926,6 +926,29 @@ def build_host_libraries(model_build_type, abi):
             abi=abi)
 
 
+################################
+# library
+################################
+def get_lib_path(target_soc, serial_num, abi, project_name, build_output_dir,
+                 library_output_dir):
+    project_output_dir = "%s/%s" % (build_output_dir, project_name)
+    library_dir = "%s/%s" % (project_output_dir, library_output_dir)
+    model_bin_dir = "%s/%s/" % (library_dir, abi)
+    if abi == "host":
+        lib_path = "%s/libmace_%s.a" % \
+                   (model_bin_dir, project_name)
+    else:
+        if not target_soc:
+            lib_path = "%s/libmace_%s.a" % \
+                       (model_bin_dir, project_name)
+        else:
+            device_name = adb_get_device_name_by_serialno(serial_num)
+            lib_path = "%s/libmace_%s.%s.%s.a" % \
+                       (model_bin_dir, project_name,
+                        device_name, target_soc)
+    return lib_path
+
+
 def merge_libs(target_soc,
                serial_num,
                abi,
@@ -945,11 +968,12 @@ def merge_libs(target_soc,
     if hexagon_mode:
         sh.cp("-f", hexagon_lib_file, library_dir)
 
+    lib_path = get_lib_path(target_soc, serial_num, abi,
+                            project_name, build_output_dir, library_output_dir)
     # make static library
     mri_stream = ""
     if abi == "host":
-        mri_stream += "create %s/libmace_%s.a\n" % \
-                      (model_bin_dir, project_name)
+        mri_stream += "create %s\n" % lib_path
         mri_stream += (
             "addlib "
             "bazel-bin/mace/codegen/libgenerated_opencl.pic.a\n")
@@ -982,14 +1006,7 @@ def merge_libs(target_soc,
                 "addlib "
                 "bazel-bin/mace/codegen/libgenerated_models.pic.a\n")
     else:
-        if not target_soc:
-            mri_stream += "create %s/libmace_%s.a\n" % \
-                          (model_bin_dir, project_name)
-        else:
-            device_name = adb_get_device_name_by_serialno(serial_num)
-            mri_stream += "create %s/libmace_%s.%s.%s.a\n" % \
-                          (model_bin_dir, project_name,
-                           device_name, target_soc)
+        mri_stream += "create %s\n" % lib_path
         if model_build_type == BuildType.code:
             mri_stream += (
                 "addlib "
@@ -1054,6 +1071,53 @@ def packaging_lib(libmace_output_dir, project_name):
     print("Packaging Done!\n")
 
 
+################################
+# example
+################################
+def build_example(target_soc,
+                  serial_num,
+                  abi,
+                  project_name,
+                  build_output_dir,
+                  library_output_dir,
+                  model_output_dir,
+                  build_type,
+                  hexagon_mode,
+                  enable_openmp,
+                  linkshared=False):
+    static_lib_name = "mace/libmace.a"
+    if not linkshared:
+        target_name = "example_static"
+        lib_path = get_lib_path(target_soc, serial_num, abi, project_name,
+                                build_output_dir, library_output_dir)
+        sh.cp("-f", lib_path, static_lib_name)
+    else:
+        target_name = "example_shared"
+    example_target = "//mace/examples/cli:%s" % target_name
+
+    if build_type == BuildType.code:
+        build_arg = "--per_file_copt=//mace/examples/cli:example.cc@-DCODE_TYPE"  # noqa
+    else:
+        build_arg = ""
+
+    bazel_build(example_target,
+                abi=abi,
+                enable_openmp=enable_openmp,
+                hexagon_mode=hexagon_mode,
+                extra_args=build_arg)
+
+    example_binary_file = "%s/%s" % (model_output_dir, target_name)
+    if os.path.exists(example_binary_file):
+        sh.rm("-rf", example_binary_file)
+
+    target_bin = "/".join(bazel_target_to_bin(example_target))
+    sh.cp("-f", target_bin, model_output_dir)
+    sh.rm("-rf", static_lib_name)
+
+
+################################
+# benchmark
+################################
 def build_benchmark_model(abi,
                           model_output_dir,
                           hexagon_mode,