diff --git a/mace/codegen/BUILD b/mace/codegen/BUILD index 433a42976c76298b955920a35aa4176e8e1b0b6e..04b0f8d6f5c5f71091a7605c04d4d288f4e0e499 100644 --- a/mace/codegen/BUILD +++ b/mace/codegen/BUILD @@ -15,7 +15,10 @@ cc_library( deps = [ "//mace/core", "//mace/ops", - ] + if_embed_binary_program(['//mace/codegen:generated_opencl_lib']), + ] + if_embed_binary_program([ + '//mace/codegen:generated_opencl_lib', + '//mace/codegen:generated_tuning_lib', + ]), ) cc_library( @@ -24,3 +27,10 @@ cc_library( copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], linkstatic = 1, ) + +cc_library( + name = "generated_tuning_lib", + srcs = glob(["tuning/*.cc"]), + copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], + linkstatic = 1, +) diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index d40faf9163f767e6fdaf90e9bac9e36c2f3a6f32..e03e63e2eabefb4d66ab498f77a3531fc112d522 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_android_arm64") +load("//mace:mace.bzl", "if_android", "if_android_arm64", "if_embed_binary_program") cc_library( name = "kernels", @@ -21,7 +21,8 @@ cc_library( "-std=c++11", "-fopenmp", "-D_GLIBCXX_USE_C99_MATH_TR1", - ], + ] + + if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]), linkopts = if_android(["-lm"]), deps = [ "//mace/core", diff --git a/mace/python/tools/opencl_compiled_program.cc.tmpl b/mace/python/tools/binary.cc.tmpl similarity index 50% rename from mace/python/tools/opencl_compiled_program.cc.tmpl rename to mace/python/tools/binary.cc.tmpl index 84dd3f5c6fc6d8e655db4b3414a43c11595d3c60..354af0aa38e18eebb344cb6493de2d17e0946a92 100644 --- a/mace/python/tools/opencl_compiled_program.cc.tmpl +++ b/mace/python/tools/binary.cc.tmpl @@ -10,21 +10,17 @@ namespace mace { -{% for map in binary_maps %} -// {{map.name}} -{% endfor %} - -extern const std::map> kCompiledProgramMap = +extern const std::map> {{variable_name}}= { - {% for map in binary_maps %} + {% for key, value in maps.iteritems() %} { - "{{map.name}}", + "{{key}}", { - {%- for ele in map.content -%} + {%- for ele in value -%} {{ele}}, {%- endfor -%} } - }, // {{map.name}} + }, // {{key}} {% endfor %} }; diff --git a/mace/python/tools/binary_codegen.py b/mace/python/tools/binary_codegen.py new file mode 100644 index 0000000000000000000000000000000000000000..fbb2c8230f5087c4668276165ebf00370c9d4238 --- /dev/null +++ b/mace/python/tools/binary_codegen.py @@ -0,0 +1,79 @@ +import argparse +import os +import sys +import struct + +import jinja2 + +import numpy as np + +# python mace/python/tools/binary_codegen.py \ +# --binary_file=${BIN_FILE} --output_path=${CODE_GEN_PATH} --variable_name=kTuningParamsData + +FLAGS = None + + +def generate_cpp_source(): + with open(FLAGS.binary_file, "rb") as binary_file: + binary_array = np.fromfile(binary_file, dtype=np.uint8) + + data_map = {} + + idx = 0 + size, = struct.unpack("Q", binary_array[idx:idx+8]) + print size + idx += 8 + for _ in xrange(size): + key_size, = struct.unpack("i", binary_array[idx:idx+4]) + idx += 4 + key, = struct.unpack(str(key_size) + "s", binary_array[idx:idx+key_size]) + idx += key_size + params_size, = struct.unpack("i", binary_array[idx:idx+4]) + idx += 4 + data_map[key] = [] + count = params_size / 4 + params = struct.unpack(str(count) + "i", binary_array[idx:idx+params_size]) + for i in params: + data_map[key].append(i) + idx += params_size + + env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) + return env.get_template('binary.cc.tmpl').render( + maps = data_map, + data_type = 'int', + variable_name = FLAGS.variable_name + ) + +def main(unused_args): + + cpp_binary_source = generate_cpp_source() + if os.path.isfile(FLAGS.output_path): + os.remove(FLAGS.output_path) + w_file = open(FLAGS.output_path, "w") + w_file.write(cpp_binary_source) + w_file.close() + +def parse_args(): + """Parses command line arguments.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--binary_file", + type=str, + default="", + help="The binaries file path.") + parser.add_argument( + "--output_path", + type=str, + default="", + help="The path of generated C++ source file which contains the binary.") + parser.add_argument( + "--variable_name", + type=str, + default="kTuningParamsData", + help="global variable name.") + return parser.parse_known_args() + + +if __name__ == '__main__': + FLAGS, unparsed = parse_args() + main(unused_args=[sys.argv[0]] + unparsed) diff --git a/mace/python/tools/opencl_codegen.py b/mace/python/tools/opencl_codegen.py index 7c6d74892f7b535714dd00f3719e4987df3c20d0..b85412df3684a874b965cc0eead1c34cb6e0afb6 100644 --- a/mace/python/tools/opencl_codegen.py +++ b/mace/python/tools/opencl_codegen.py @@ -14,7 +14,7 @@ FLAGS = None def generate_cpp_source(): - maps = {"binary_maps": []} + maps = {} for file_name in os.listdir(FLAGS.cl_binary_dir): file_path = os.path.join(FLAGS.cl_binary_dir, file_name) if file_path[-4:] == ".bin": @@ -23,13 +23,16 @@ def generate_cpp_source(): binary_array = np.fromfile(f, dtype=np.uint8) f.close() - binary_dict = {"name": file_name[:-4], "content": []} + maps[file_name[:-4]] = [] for ele in binary_array: - binary_dict["content"].append(hex(ele)) - maps["binary_maps"].append(binary_dict) + maps[file_name[:-4]].append(hex(ele)) env = jinja2.Environment(loader=jinja2.FileSystemLoader(sys.path[0])) - return env.get_template('opencl_compiled_program.cc.tmpl').render(maps) + return env.get_template('binary.cc.tmpl').render( + maps = maps, + data_type = 'unsigned char', + variable_name = 'kCompiledProgramMap' + ) def main(unused_args): diff --git a/mace/utils/tuner.h b/mace/utils/tuner.h index 7168980b27ed680bb3a13b90b26c5da271a0df95..b784bcc7b845ef3ad024f5ed1a3170d8a55a4d19 100644 --- a/mace/utils/tuner.h +++ b/mace/utils/tuner.h @@ -11,6 +11,7 @@ #include #include #include +#include #include "mace/utils/logging.h" #include "mace/utils/timer.h" @@ -72,7 +73,7 @@ class Tuner { if (path_ != nullptr) { std::ofstream ofs(path_, std::ios::binary | std::ios::out); if (ofs.is_open()) { - size_t num_pramas = param_table_.size(); + int64_t num_pramas = param_table_.size(); ofs.write(reinterpret_cast(&num_pramas), sizeof(num_pramas)); for (auto &kp : param_table_) { int32_t key_size = kp.first.size(); @@ -97,13 +98,20 @@ class Tuner { } inline void ReadRunParamters() { +#ifdef MACE_EMBED_BINARY_PROGRAM + extern const std::map> kTuningParamsData; + VLOG(1) << "Read tuning parameters from source"; + for (auto it = kTuningParamsData.begin(); it != kTuningParamsData.end(); ++it) { + param_table_.emplace(it->first, std::vector(it->second.begin(), it->second.end())); + } +#else if (path_ != nullptr) { std::ifstream ifs(path_, std::ios::binary | std::ios::in); if (ifs.is_open()) { int32_t key_size = 0; int32_t params_size = 0; int32_t params_count = 0; - size_t num_pramas = 0; + int64_t num_pramas = 0; ifs.read(reinterpret_cast(&num_pramas), sizeof(num_pramas)); while (num_pramas--) { ifs.read(reinterpret_cast(&key_size), sizeof(key_size)); @@ -123,6 +131,7 @@ class Tuner { LOG(WARNING) << "Read run parameter file failed."; } } +#endif } template diff --git a/tools/validate_gcn.sh b/tools/validate_gcn.sh index 46952446b77d09dea7e72f20bce2e4964b3e514b..393e8b1146cb7cd85f174a9136f643c3cf0dcf52 100644 --- a/tools/validate_gcn.sh +++ b/tools/validate_gcn.sh @@ -2,10 +2,10 @@ # Must run at root dir of mace project. set +x Usage() { - echo 'Usage: bash tools/validate_gcn.sh tf_model_path image_size' + echo 'Usage: bash tools/validate_gcn.sh tf_model_path image_size [tuning]' } -if [ $# != 2 ];then +if [ $# -lt 2 ];then Usage exit -1 fi @@ -25,6 +25,8 @@ CODEGEN_DIR=${MACE_SOURCE_DIR}/mace/codegen MODEL_CODEGEN_DIR=${CODEGEN_DIR}/models/gcn-$IMAGE_SIZE CL_CODEGEN_DIR=${CODEGEN_DIR}/opencl CL_BIN_DIR=${CODEGEN_DIR}/opencl_bin +TUNING_CODEGEN_DIR=${CODEGEN_DIR}/tuning +TUNING_OR_NOT=${3:-0} build_and_run() { @@ -40,18 +42,24 @@ build_and_run() $EMBED_OPENCL_BINARY_BUILD_FLAGS \ --copt=-DMACE_MODEL_FUNCTION=Create${MODEL_TAG} - adb shell "rm -rf ${PHONE_DATA_DIR}" adb shell "mkdir -p ${PHONE_DATA_DIR}" if [ "$EMBED_OPENCL_BINARY" = false ]; then + adb shell "rm -rf ${KERNEL_DIR}" adb shell "mkdir -p ${KERNEL_DIR}" - adb push mace/kernels/opencl/cl/ ${KERNEL_DIR} + adb push mace/kernels/opencl/cl/. ${KERNEL_DIR} fi adb push ${MODEL_DIR}/${INPUT_FILE_NAME} ${PHONE_DATA_DIR} adb push bazel-bin/mace/examples/mace_run ${PHONE_DATA_DIR} num_threads=${1:-4} + if [[ "${TUNING_OR_NOT}" != "0" && "$EMBED_OPENCL_BINARY" != true ]];then + tuning_flag=1 + else + tuning_flag=0 + fi - adb