diff --git a/docs/getting_started/create_a_model_deployment.rst b/docs/getting_started/create_a_model_deployment.rst index 918c84a2f33692dd923ccfdffa649f8f83bcda76..46be54ad0bb3ac53f044f07b31e83520130ec360 100644 --- a/docs/getting_started/create_a_model_deployment.rst +++ b/docs/getting_started/create_a_model_deployment.rst @@ -35,7 +35,7 @@ Configurations * - target_abis - The target ABI to build, can be one or more of 'host', 'armeabi-v7a' or 'arm64-v8a' * - target_socs - - build for specified socs if you just want use the model for that socs. + - [optional] build for specified socs if you just want use the model for that socs. * - embed_model_data - Whether embedding model weights as the code, default to 0 * - build_type @@ -50,9 +50,9 @@ Configurations * - model_sha256_checksum - The SHA256 checksum of the model file * - weight_file_path - - The path of the model weights file, used by Caffe model + - [optional] The path of the model weights file, used by Caffe model * - weight_sha256_checksum - - The SHA256 checksum of the weight file, used by Caffe model + - [optional] The SHA256 checksum of the weight file, used by Caffe model * - subgraphs - subgraphs key. ** DO NOT EDIT ** * - input_tensors @@ -63,6 +63,8 @@ Configurations - The shapes of the input tensors, in NHWC order * - output_shapes - The shapes of the output tensors, in NHWC order + * - validation_inputs_data + - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used * - runtime - The running device, one of [cpu, gpu, dsp, cpu_gpu]. cpu_gpu contains cpu and gpu model definition so you can run the model on both cpu and gpu. * - data_type @@ -75,5 +77,3 @@ Configurations - [optional] Whether to obfuscate the model operator name, default to 0 * - winograd - [optional] Whether to enable Winograd convolution, **will increase memory consumption** - * - input_files - - [optional] Specify Numpy validation inputs. When not provided, [-1, 1] random values will be used diff --git a/docs/getting_started/how_to_build.rst b/docs/getting_started/how_to_build.rst index d07379bf2e0a5ca852ec1baeb2ba231379766941..51c4b0ac8aa0e0db163afc154bb28160d0e836ab 100644 --- a/docs/getting_started/how_to_build.rst +++ b/docs/getting_started/how_to_build.rst @@ -366,7 +366,7 @@ The followings list the details. ``.pb`` file will be generated only when build_type is ``proto``. **OpenCL compiled kernel binary file** - * ``opencl/compiled_kernel.bin`` + * ``opencl/${target_abi}/${library_name}_compiled_opencl_kernel.${device_name}.${target_soc}.bin`` .. note:: @@ -376,6 +376,13 @@ The followings list the details. This file rely on the OpenCL driver on the phone, you should update the file when OpenCL driver changed. +**tar package** + * ``./build/${library_name}/libmace_${library_name}.tar.gz`` + + .. note:: + + This file package all the above files which used for deployment. + ============= 5. how to use ============= diff --git a/docs/getting_started/how_to_build_zh.rst b/docs/getting_started/how_to_build_zh.rst index 7945d7a1c6a6df8b95944abee4d2afe717500b55..0695442ea09cc3a88cc563c0fffb495f04edbaf3 100644 --- a/docs/getting_started/how_to_build_zh.rst +++ b/docs/getting_started/how_to_build_zh.rst @@ -364,6 +364,16 @@ Mace目前只提供静态库,有以下两种使用场景。 pb文件紧当模型build_type设置为proto时才会产生。 +**OpenCL预编译文件** + * ``opencl/${target_abi}/${library_name}_compiled_opencl_kernel.${device_name}.${target_soc}.bin`` + + .. note:: + + 只有指定了``target_soc``并且``runtime==gpu``的情况下才会生成。 + + .. warning:: + + 该文件依赖于手机上opencl驱动,如果OpenCL版本变化,请更新该文件。 **库文件tar包** * ``./build/${library_name}/libmace_${library_name}.tar.gz`` diff --git a/docs/getting_started/models/demo_app_models.yaml b/docs/getting_started/models/demo_app_models.yaml index f78dc40a2f383a19eecd373e4ac7cb5bbdea3338..216deea517a3d6b3ef8e7673e90fb1f439655206 100644 --- a/docs/getting_started/models/demo_app_models.yaml +++ b/docs/getting_started/models/demo_app_models.yaml @@ -43,10 +43,10 @@ models: # 一个配置文件可以包含多个模型的配置信息,最终生 output_shapes: - 1,256,256,2 - 1,1,1,2 + validation_inputs_data: + - path/to/input_files # support http:// runtime: cpu limit_opencl_kernel_time: 1 nnlib_graph_mode: 0 obfuscate: 1 winograd: 0 - input_files: - - path/to/input_files # support http:// diff --git a/mace/core/file_storage.cc b/mace/core/file_storage.cc index 4d93da5106d74f13e1668e71fd6ef4bae4c5fc73..37c2ece13841e5eef49d61cc0103f3217c091ff3 100644 --- a/mace/core/file_storage.cc +++ b/mace/core/file_storage.cc @@ -150,8 +150,10 @@ int FileStorage::Load() { bool FileStorage::Insert(const std::string &key, const std::vector &value) { utils::WriteLock lock(&data_mutex_); - data_.emplace(key, value); - data_changed_ = true; + auto res = data_.emplace(key, value); + if (res.second) { + data_changed_ = true; + } return true; } diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 0fad713155671c3df9506e6aea8cf395034dd06c..5235479db1455f1b7830445b6e8d3de1d56da9db 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -604,6 +604,11 @@ void OpenCLRuntime::BuildProgramFromSource( if (this->cache_storage_ != nullptr) { this->cache_storage_->Insert(built_program_key, content); + // update platform info + this->cache_storage_->Insert( + kOpenCLPlatformInfoKey, + std::vector(platform_info_.begin(), + platform_info_.end())); } VLOG(3) << "Program from source: " << built_program_key; @@ -656,10 +661,6 @@ cl::Kernel OpenCLRuntime::BuildKernel( void OpenCLRuntime::SaveBuiltCLProgram() { if (cache_storage_ != nullptr) { - // update platform info - cache_storage_->Insert(kOpenCLPlatformInfoKey, - std::vector(platform_info_.begin(), - platform_info_.end())); if (cache_storage_->Flush() != 0) { LOG(FATAL) << "Store OPENCL compiled kernel to file failed. " << "Please make sure the storage directory exist " diff --git a/tools/converter.py b/tools/converter.py index 6d9df68dfba3ff6157ce262b51001799728d2221..0230f144c30e8315558a4d0e239672e2eccc6f24 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -14,6 +14,7 @@ import argparse import filelock +import glob import hashlib import os import re @@ -40,11 +41,12 @@ from common import StringFormatter BUILD_OUTPUT_DIR = 'build' PHONE_DATA_DIR = "/data/local/tmp/mace_run" MODEL_OUTPUT_DIR_NAME = 'model' +MODEL_HEADER_DIR_PATH = 'include/mace/public' BUILD_TMP_DIR_NAME = '_tmp' BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general' OUTPUT_LIBRARY_DIR_NAME = 'library' OUTPUT_OPENCL_BINARY_DIR_NAME = 'opencl' -OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel.bin' +OUTPUT_OPENCL_BINARY_FILE_NAME = 'compiled_opencl_kernel' CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin" CODEGEN_BASE_DIR = 'mace/codegen' MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' @@ -434,11 +436,19 @@ def get_build_model_dirs(library_name, model_name, target_abi, target_soc, return model_output_base_dir, model_output_dir, mace_model_dir -def get_opencl_binary_output_path(library_name): - return '%s/%s/%s/%s' % (BUILD_OUTPUT_DIR, - library_name, - OUTPUT_OPENCL_BINARY_DIR_NAME, - OUTPUT_OPENCL_BINARY_FILE_NAME) +def get_opencl_binary_output_path(library_name, target_abi, + target_soc, serial_num): + device_name = \ + sh_commands.adb_get_device_name_by_serialno(serial_num) + return '%s/%s/%s/%s/%s_%s.%s.%s.bin' % \ + (BUILD_OUTPUT_DIR, + library_name, + OUTPUT_OPENCL_BINARY_DIR_NAME, + target_abi, + library_name, + OUTPUT_OPENCL_BINARY_FILE_NAME, + device_name, + target_soc) ################################ @@ -512,9 +522,16 @@ def convert_model(configs): model_output_dir = \ '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_OUTPUT_DIR_NAME) + model_header_dir = \ + '%s/%s/%s' % (BUILD_OUTPUT_DIR, library_name, MODEL_HEADER_DIR_PATH) if os.path.exists(model_output_dir): sh.rm("-rf", model_output_dir) os.makedirs(model_output_dir) + if os.path.exists(model_header_dir): + sh.rm("-rf", model_header_dir) + os.makedirs(model_header_dir) + # copy header files + sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir) embed_model_data = configs[YAMLKeyword.embed_model_data] @@ -582,14 +599,20 @@ def convert_model(configs): configs[YAMLKeyword.build_type], data_type) - # mv pb and data file to build/model_name/model if not embed_model_data: - sh_commands.mv_model_file_to_output_dir( - model_build_type=configs[YAMLKeyword.build_type], - model_codegen_dir=model_codegen_dir, - model_name=model_name, - output_dir=model_output_dir - ) + # mv pb and data file to build/model_name/model + sh.mv("-f", + '%s/%s.data' % (model_codegen_dir, model_name), + model_output_dir) + if configs[YAMLKeyword.build_type] == BuildType.proto: + sh.mv("-f", + '%s/%s.pb' % (model_codegen_dir, model_name), + model_output_dir) + else: + sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), + model_header_dir) + sh.cp("-f", glob.glob("mace/codegen/models/*/*.h"), + model_header_dir) MaceLogger.summary( StringFormatter.block("Model %s converted" % model_name)) @@ -681,9 +704,12 @@ def build_specific_lib(target_abi, target_soc, serial_num, binary_changed = True if binary_changed: + opencl_output_bin_path = get_opencl_binary_output_path( + library_name, target_abi, target_soc, serial_num + ) sh_commands.merge_opencl_binaries( model_output_dirs, CL_COMPILED_BINARY_FILE_NAME, - get_opencl_binary_output_path(library_name)) + opencl_output_bin_path) sh_commands.gen_tuning_param_code(model_output_dirs) sh_commands.bazel_build( MACE_RUN_TARGET, @@ -837,12 +863,16 @@ def run_specific_target(flags, configs, target_abi, library_name = configs[YAMLKeyword.library_name] build_type = configs[YAMLKeyword.build_type] embed_model_data = configs[YAMLKeyword.embed_model_data] + opencl_output_bin_path = "" if not configs[YAMLKeyword.target_socs]: build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, None, None) else: build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, target_soc, serial_num) + opencl_output_bin_path = get_opencl_binary_output_path( + library_name, target_abi, target_soc, serial_num + ) mace_check(os.path.exists(build_tmp_binary_dir), ModuleName.RUN, 'You should build before run.') @@ -892,6 +922,7 @@ def run_specific_target(flags, configs, target_abi, runtime_list.extend([model_runtime]) for runtime in runtime_list: device_type = parse_device_type(runtime) + run_output = sh_commands.tuning_run( abi=target_abi, serialno=serial_num, @@ -919,7 +950,7 @@ def run_specific_target(flags, configs, target_abi, gpu_priority_hint=flags.gpu_priority_hint, runtime_failure_ratio=flags.runtime_failure_ratio, address_sanitizer=flags.address_sanitizer, - opencl_binary_file=get_opencl_binary_output_path(library_name), + opencl_binary_file=opencl_output_bin_path, ) if flags.validate: model_file_path, weight_file_path = get_model_files_path( @@ -978,12 +1009,16 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): library_name = configs[YAMLKeyword.library_name] build_type = configs[YAMLKeyword.build_type] embed_model_data = configs[YAMLKeyword.embed_model_data] + opencl_output_bin_path = "" if not configs[YAMLKeyword.target_socs]: build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, None, None) else: build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi, target_soc, serial_num) + opencl_output_bin_path = get_opencl_binary_output_path( + library_name, target_abi, target_soc, serial_num + ) mace_check(os.path.exists(build_tmp_binary_dir), ModuleName.BENCHMARK, 'You should build before benchmark.') @@ -1052,7 +1087,7 @@ def bm_specific_target(flags, configs, target_abi, target_soc, serial_num): cpu_affinity_policy=flags.cpu_affinity_policy, gpu_perf_hint=flags.gpu_perf_hint, gpu_priority_hint=flags.gpu_priority_hint, - opencl_binary_file=get_opencl_binary_output_path(library_name)) + opencl_binary_file=opencl_output_bin_path) def benchmark_model(flags): diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 8282f8840138e3c1bc3f501386d9fe02aff4d094..289f76dcd13f2e4e6957c52e1afcf591a649c1cf 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -400,9 +400,8 @@ def merge_opencl_binaries(binaries_dirs, cl_bin_dirs.append(os.path.join(d, "opencl_bin")) # create opencl binary output dir opencl_binary_dir = os.path.dirname(output_file_path) - if os.path.exists(opencl_binary_dir): - sh.rm("-rf", opencl_binary_dir) - sh.mkdir("-p", opencl_binary_dir) + if not os.path.exists(opencl_binary_dir): + sh.mkdir("-p", opencl_binary_dir) kvs = {} for binary_dir in cl_bin_dirs: binary_path = os.path.join(binary_dir, cl_compiled_program_file_name) @@ -578,20 +577,6 @@ def is_binary_tuned(build_tmp_binary_dir): return os.path.exists(build_tmp_binary_dir + '/tuned') -def mv_model_file_to_output_dir( - model_build_type, - model_codegen_dir, - model_name, - output_dir): - if model_build_type == BuildType.proto: - sh.mv("-f", - '%s/%s.pb' % (model_codegen_dir, model_name), - output_dir) - sh.mv("-f", - '%s/%s.data' % (model_codegen_dir, model_name), - output_dir) - - def create_internal_storage_dir(serialno, phone_data_dir): internal_storage_dir = "%s/interior/" % phone_data_dir sh.adb("-s", serialno, "shell", "mkdir", "-p", internal_storage_dir) @@ -897,26 +882,15 @@ def merge_libs(target_soc, hexagon_mode): print("* Merge mace lib") project_output_dir = "%s/%s" % (build_output_dir, project_name) - model_header_dir = "%s/include/mace/public" % project_output_dir hexagon_lib_file = "third_party/nnlib/libhexagon_controller.so" library_dir = "%s/%s" % (project_output_dir, library_output_dir) model_bin_dir = "%s/%s/" % (library_dir, abi) - if os.path.exists(model_bin_dir): - sh.rm("-rf", model_bin_dir) - sh.mkdir("-p", model_bin_dir) - if os.path.exists(model_header_dir): - sh.rm("-rf", model_header_dir) - sh.mkdir("-p", model_header_dir) - # copy header files - sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir) + if not os.path.exists(model_bin_dir): + sh.mkdir("-p", model_bin_dir) if hexagon_mode: sh.cp("-f", hexagon_lib_file, library_dir) - if model_build_type == BuildType.code: - sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir) - sh.cp("-f", glob.glob("mace/codegen/models/*/*.h"), model_header_dir) - # make static library mri_stream = "" if abi == "host":