Merge branch 'opencl-binary-name' into 'master'

Refactor logic of opencl platform info write logic and docs. See merge request !570

Merge branch 'opencl-binary-name' into 'master'
Refactor logic of opencl platform info write logic and docs. See merge request !570
4aed901d · 叶剑武 · df29340c · 174c999d · 4aed901d · 4aed901d
8 changed file
--- a/docs/getting_started/create_a_model_deployment.rst
+++ b/docs/getting_started/create_a_model_deployment.rst
@@ -35,7 +35,7 @@ Configurations
    * - target_abis
      - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a'
    * - target_socs
-      - build for specified socs if you just want use the model for that socs.
+      - [optional] build for specified socs if you just want use the model for that socs.
    * - embed_model_data
      - Whether embedding model weights as the code, default to 0
    * - build_type
@@ -50,9 +50,9 @@ Configurations
    * - model_sha256_checksum
      - The SHA256 checksum of the model file
    * - weight_file_path
-      - The path of the model weights file, used by Caffe model
+      - [optional] The path of the model weights file, used by Caffe model
    * - weight_sha256_checksum
-      - The SHA256 checksum of the weight file, used by Caffe model
+      - [optional] The SHA256 checksum of the weight file, used by Caffe model
    * - subgraphs
      - subgraphs key. ** DO NOT EDIT **
    * - input_tensors
@@ -63,6 +63,8 @@ Configurations
      - The shapes of the input tensors, in NHWC order
    * - output_shapes
      - The shapes of the output tensors, in NHWC order
+    * - validation_inputs_data
+      - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used
    * - runtime
      - The running device, one of [cpu, gpu, dsp, cpu_gpu]. cpu_gpu contains cpu and gpu model definition so you can run the model on both cpu and gpu.
    * - data_type
@@ -75,5 +77,3 @@ Configurations
      - [optional] Whether to obfuscate the model operator name, default to 0
    * - winograd
      - [optional] Whether to enable Winograd convolution, **will increase memory consumption**
-    * - input_files
-      - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used
--- a/docs/getting_started/how_to_build.rst
+++ b/docs/getting_started/how_to_build.rst
@@ -366,7 +366,7 @@ The followings list the details.
        ``.pb`` file will be generated only when build_type is ``proto``.

 **OpenCL compiled kernel binary file**
-    * ``opencl/compiled_kernel.bin``
+    * ``opencl/${target_abi}/${library_name}_compiled_opencl_kernel.${device_name}.${target_soc}.bin``

    .. note::

@@ -376,6 +376,13 @@ The followings list the details.

        This file rely on the OpenCL driver on the phone, you should update the file when OpenCL driver changed.

+**tar package**
+    * ``./build/${library_name}/libmace_${library_name}.tar.gz``
+
+    .. note::
+
+        This file package all the above files which used for deployment.
+
 =============
 5. how to use
 =============

--- a/docs/getting_started/how_to_build_zh.rst
+++ b/docs/getting_started/how_to_build_zh.rst
@@ -364,6 +364,16 @@ Mace目前只提供静态库，有以下两种使用场景。

        pb文件紧当模型build_type设置为proto时才会产生。

+**OpenCL预编译文件**
+    * ``opencl/${target_abi}/${library_name}_compiled_opencl_kernel.${device_name}.${target_soc}.bin``
+
+    .. note::
+
+        只有指定了``target_soc``并且``runtime==gpu``的情况下才会生成。
+
+    .. warning::
+
+        该文件依赖于手机上opencl驱动，如果OpenCL版本变化，请更新该文件。

 **库文件tar包**
    * ``./build/${library_name}/libmace_${library_name}.tar.gz``

--- a/docs/getting_started/models/demo_app_models.yaml
+++ b/docs/getting_started/models/demo_app_models.yaml
@@ -43,10 +43,10 @@ models: # 一个配置文件可以包含多个模型的配置信息，最终生
        output_shapes:
          - 1,256,256,2
          - 1,1,1,2
+        validation_inputs_data:
+          - path/to/input_files # support http://
    runtime: cpu
    limit_opencl_kernel_time: 1
    nnlib_graph_mode: 0
    obfuscate: 1
    winograd: 0
-    input_files:
-      - path/to/input_files # support http://
--- a/mace/core/file_storage.cc
+++ b/mace/core/file_storage.cc
@@ -150,8 +150,10 @@ int FileStorage::Load() {
 bool FileStorage::Insert(const std::string &key,
                         const std::vector<unsigned char> &value) {
  utils::WriteLock lock(&data_mutex_);
-  data_.emplace(key, value);
-  data_changed_ = true;
+  auto res = data_.emplace(key, value);
+  if (res.second) {
+    data_changed_ = true;
+  }
  return true;
 }


--- a/mace/core/runtime/opencl/opencl_runtime.cc
+++ b/mace/core/runtime/opencl/opencl_runtime.cc
@@ -604,6 +604,11 @@ void OpenCLRuntime::BuildProgramFromSource(

    if (this->cache_storage_ != nullptr) {
      this->cache_storage_->Insert(built_program_key, content);
+      // update platform info
+      this->cache_storage_->Insert(
+          kOpenCLPlatformInfoKey,
+          std::vector<unsigned char>(platform_info_.begin(),
+                                     platform_info_.end()));
    }

    VLOG(3) << "Program from source: " << built_program_key;
@@ -656,10 +661,6 @@ cl::Kernel OpenCLRuntime::BuildKernel(

 void OpenCLRuntime::SaveBuiltCLProgram() {
  if (cache_storage_ != nullptr) {
-    // update platform info
-    cache_storage_->Insert(kOpenCLPlatformInfoKey,
-                           std::vector<unsigned char>(platform_info_.begin(),
-                                                      platform_info_.end()));
    if (cache_storage_->Flush() != 0) {
      LOG(FATAL) << "Store OPENCL compiled kernel to file failed. "
                 << "Please make sure the storage directory exist "

--- a/tools/converter.py
+++ b/tools/converter.py
@@ -14,6 +14,7 @@

 import argparse
 import filelock
+import glob
 import hashlib
 import os
 import re
@@ -40,11 +41,12 @@ from common import StringFormatter
 BUILD_OUTPUT_DIR = 'build'
 PHONE_DATA_DIR = "/data/local/tmp/mace_run"
 MODEL_OUTPUT_DIR_NAME = 'model'
+MODEL_HEADER_DIR_PATH = 'include/mace/public'
 BUILD_TMP_DIR_NAME = '_tmp'
 BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
 OUTPUT_LIBRARY_DIR_NAME = 'library'
 OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl'
-OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel.bin'
+OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel'
 CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
 CODEGEN_BASE_DIR = 'mace/codegen'
 MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models'
@@ -434,11 +436,19 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc,
    return model_output_base_dir, model_output_dir, mace_model_dir


-def get_opencl_binary_output_path(library_name):
-    return '%s/%s/%s/%s' % (BUILD_OUTPUT_DIR,
-                            library_name,
-                            OUTPUT_OPENCL_BINARY_DIR_NAME,
-                            OUTPUT_OPENCL_BINARY_FILE_NAME)
+def get_opencl_binary_output_path(library_name, target_abi,
+                                  target_soc, serial_num):
+    device_name = \
+        sh_commands.adb_get_device_name_by_serialno(serial_num)
+    return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \
+           (BUILD_OUTPUT_DIR,
+            library_name,
+            OUTPUT_OPENCL_BINARY_DIR_NAME,
+            target_abi,
+            library_name,
+            OUTPUT_OPENCL_BINARY_FILE_NAME,
+            device_name,
+            target_soc)


 ################################
@@ -512,9 +522,16 @@ def convert_model(configs):

    model_output_dir = \
        '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME)
+    model_header_dir = \
+        '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_HEADER_DIR_PATH)
    if os.path.exists(model_output_dir):
        sh.rm("-rf", model_output_dir)
    os.makedirs(model_output_dir)
+    if os.path.exists(model_header_dir):
+        sh.rm("-rf", model_header_dir)
+    os.makedirs(model_header_dir)
+    # copy header files
+    sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir)

    embed_model_data = configs[YAMLKeyword.embed_model_data]

@@ -582,14 +599,20 @@ def convert_model(configs):
            configs[YAMLKeyword.build_type],
            data_type)

-        # mv pb and data file to build/model_name/model
        if not embed_model_data:
-            sh_commands.mv_model_file_to_output_dir(
-                model_build_type=configs[YAMLKeyword.build_type],
-                model_codegen_dir=model_codegen_dir,
-                model_name=model_name,
-                output_dir=model_output_dir
-            )
+            # mv pb and data file to build/model_name/model
+            sh.mv("-f",
+                  '%s/%s.data' % (model_codegen_dir, model_name),
+                  model_output_dir)
+            if configs[YAMLKeyword.build_type] == BuildType.proto:
+                sh.mv("-f",
+                      '%s/%s.pb' % (model_codegen_dir, model_name),
+                      model_output_dir)
+            else:
+                sh.cp("-f", glob.glob("mace/codegen/engine/*.h"),
+                      model_header_dir)
+                sh.cp("-f", glob.glob("mace/codegen/models/*/*.h"),
+                      model_header_dir)

        MaceLogger.summary(
            StringFormatter.block("Model %s converted" % model_name))
@@ -681,9 +704,12 @@ def build_specific_lib(target_abi, target_soc, serial_num,
            binary_changed = True

    if binary_changed:
+        opencl_output_bin_path = get_opencl_binary_output_path(
+            library_name, target_abi, target_soc, serial_num
+        )
        sh_commands.merge_opencl_binaries(
            model_output_dirs, CL_COMPILED_BINARY_FILE_NAME,
-            get_opencl_binary_output_path(library_name))
+            opencl_output_bin_path)
        sh_commands.gen_tuning_param_code(model_output_dirs)
        sh_commands.bazel_build(
            MACE_RUN_TARGET,
@@ -837,12 +863,16 @@ def run_specific_target(flags, configs, target_abi,
    library_name = configs[YAMLKeyword.library_name]
    build_type = configs[YAMLKeyword.build_type]
    embed_model_data = configs[YAMLKeyword.embed_model_data]
+    opencl_output_bin_path = ""
    if not configs[YAMLKeyword.target_socs]:
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                    None, None)
    else:
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                    target_soc, serial_num)
+        opencl_output_bin_path = get_opencl_binary_output_path(
+            library_name, target_abi, target_soc, serial_num
+        )
    mace_check(os.path.exists(build_tmp_binary_dir),
               ModuleName.RUN,
               'You should build before run.')
@@ -892,6 +922,7 @@ def run_specific_target(flags, configs, target_abi,
            runtime_list.extend([model_runtime])
        for runtime in runtime_list:
            device_type = parse_device_type(runtime)
+
            run_output = sh_commands.tuning_run(
                abi=target_abi,
                serialno=serial_num,
@@ -919,7 +950,7 @@ def run_specific_target(flags, configs, target_abi,
                gpu_priority_hint=flags.gpu_priority_hint,
                runtime_failure_ratio=flags.runtime_failure_ratio,
                address_sanitizer=flags.address_sanitizer,
-                opencl_binary_file=get_opencl_binary_output_path(library_name),
+                opencl_binary_file=opencl_output_bin_path,
            )
            if flags.validate:
                model_file_path, weight_file_path = get_model_files_path(
@@ -978,12 +1009,16 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
    library_name = configs[YAMLKeyword.library_name]
    build_type = configs[YAMLKeyword.build_type]
    embed_model_data = configs[YAMLKeyword.embed_model_data]
+    opencl_output_bin_path = ""
    if not configs[YAMLKeyword.target_socs]:
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                    None, None)
    else:
        build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi,
                                                    target_soc, serial_num)
+        opencl_output_bin_path = get_opencl_binary_output_path(
+            library_name, target_abi, target_soc, serial_num
+        )
    mace_check(os.path.exists(build_tmp_binary_dir),
               ModuleName.BENCHMARK,
               'You should build before benchmark.')
@@ -1052,7 +1087,7 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num):
                cpu_affinity_policy=flags.cpu_affinity_policy,
                gpu_perf_hint=flags.gpu_perf_hint,
                gpu_priority_hint=flags.gpu_priority_hint,
-                opencl_binary_file=get_opencl_binary_output_path(library_name))
+                opencl_binary_file=opencl_output_bin_path)


 def benchmark_model(flags):

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -400,9 +400,8 @@ def merge_opencl_binaries(binaries_dirs,
        cl_bin_dirs.append(os.path.join(d, "opencl_bin"))
    # create opencl binary output dir
    opencl_binary_dir = os.path.dirname(output_file_path)
-    if os.path.exists(opencl_binary_dir):
-        sh.rm("-rf", opencl_binary_dir)
-    sh.mkdir("-p", opencl_binary_dir)
+    if not os.path.exists(opencl_binary_dir):
+        sh.mkdir("-p", opencl_binary_dir)
    kvs = {}
    for binary_dir in cl_bin_dirs:
        binary_path = os.path.join(binary_dir, cl_compiled_program_file_name)
@@ -578,20 +577,6 @@ def is_binary_tuned(build_tmp_binary_dir):
    return os.path.exists(build_tmp_binary_dir + '/tuned')


-def mv_model_file_to_output_dir(
-        model_build_type,
-        model_codegen_dir,
-        model_name,
-        output_dir):
-    if model_build_type == BuildType.proto:
-        sh.mv("-f",
-              '%s/%s.pb' % (model_codegen_dir, model_name),
-              output_dir)
-    sh.mv("-f",
-          '%s/%s.data' % (model_codegen_dir, model_name),
-          output_dir)
-
-
 def create_internal_storage_dir(serialno, phone_data_dir):
    internal_storage_dir = "%s/interior/" % phone_data_dir
    sh.adb("-s", serialno, "shell", "mkdir", "-p", internal_storage_dir)
@@ -897,26 +882,15 @@ def merge_libs(target_soc,
               hexagon_mode):
    print("* Merge mace lib")
    project_output_dir = "%s/%s" % (build_output_dir, project_name)
-    model_header_dir = "%s/include/mace/public" % project_output_dir
    hexagon_lib_file = "third_party/nnlib/libhexagon_controller.so"
    library_dir = "%s/%s" % (project_output_dir, library_output_dir)
    model_bin_dir = "%s/%s/" % (library_dir, abi)

-    if os.path.exists(model_bin_dir):
-        sh.rm("-rf", model_bin_dir)
-    sh.mkdir("-p", model_bin_dir)
-    if os.path.exists(model_header_dir):
-        sh.rm("-rf", model_header_dir)
-    sh.mkdir("-p", model_header_dir)
-    # copy header files
-    sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir)
+    if not os.path.exists(model_bin_dir):
+        sh.mkdir("-p", model_bin_dir)
    if hexagon_mode:
        sh.cp("-f", hexagon_lib_file, library_dir)

-    if model_build_type == BuildType.code:
-        sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir)
-        sh.cp("-f", glob.glob("mace/codegen/models/*/*.h"), model_header_dir)
-
    # make static library
    mri_stream = ""
    if abi == "host":