diff --git a/.gitignore b/.gitignore index 54dacd4552610ddd4fa36d1517ce6da23cc9c4ef..adce04e177caa7dad2a7665449f1d9f922ade717 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,6 @@ mace/codegen/version/ mace/codegen/engine/ build/ docs/_build/ +*.a \.project/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 72820f2b14e09efd74226d35c23d72ae27f7cdec..fe0fd433b61fdcb48acd4aa2e733f137d7919fe2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -101,5 +101,5 @@ python_tools_tests: - rm -rf mace-models - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@v9.git.n.xiaomi.com:deep-computing/mace-models.git - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2.yml - - sh -c "python tools/converter.py build --config=${CONF_FILE} --disable_tuning && python tools/converter.py run --config=${CONF_FILE} --round=1 --validate" || exit 1 + - sh -c "python tools/converter.py build --config=${CONF_FILE} --disable_tuning && python tools/converter.py run --config=${CONF_FILE} --round=1 --validate && python tools/converter.py run --config=${CONF_FILE} --example --round=1 --validate" || exit 1 - rm -rf mace-models diff --git a/mace/BUILD b/mace/BUILD index a216f9f15e84ae3707fbaf13e1578dc510038bcb..d012231aa1ff62e66aed22b8d13223b0c5a76751 100644 --- a/mace/BUILD +++ b/mace/BUILD @@ -80,3 +80,9 @@ cc_library( srcs = ["libmace.so"], visibility = ["//visibility:public"], ) + +cc_library( + name = "libmace_static", + srcs = ["libmace.a"], + visibility = ["//visibility:public"], +) diff --git a/mace/examples/BUILD b/mace/examples/BUILD deleted file mode 100644 index 3f2fc38da2437948acd29b1702691a780945e52a..0000000000000000000000000000000000000000 --- a/mace/examples/BUILD +++ /dev/null @@ -1,15 +0,0 @@ -# Examples -load("//mace:mace.bzl", "if_openmp_enabled") - -cc_binary( - name = "example", - srcs = ["example.cc"], - linkopts = if_openmp_enabled(["-fopenmp"]), - linkstatic = 1, - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], - deps = [ - "//external:gflags_nothreads", - "//mace/codegen:generated_models", - "//mace/codegen:generated_mace_engine_factory", - ], -) diff --git a/mace/examples/cli/BUILD b/mace/examples/cli/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..4fe3e88a9c92f7732383c2f7fb3552c2eefc0deb --- /dev/null +++ b/mace/examples/cli/BUILD @@ -0,0 +1,49 @@ +# Examples +load("//mace:mace.bzl", "if_openmp_enabled", "if_android") + +cc_binary( + name = "example_static", + srcs = ["example.cc"], + copts = [ + "-Werror", + "-Wextra", + ] + if_android([ + "-DMACE_ENABLE_OPENCL", + ]), + linkopts = [ + "-lm", + ] + if_openmp_enabled([ + "-fopenmp" + ]) + if_android([ + "-ldl", + "-pie", + "-llog", + ]), + linkstatic = 1, + deps = [ + "//external:gflags_nothreads", + "//mace/codegen:generated_mace_engine_factory", + "//mace:libmace_static", + ], +) + +cc_binary( + name = "example_shared", + srcs = ["example.cc"], + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ] + if_android([ + "-DMACE_ENABLE_OPENCL", + ]), + linkopts = ["-lm", "-pie", "-fPIE"] + if_openmp_enabled(["-fopenmp"]), + linkstatic = 0, + deps = [ + "//external:gflags_nothreads", + "//mace/codegen:generated_mace_engine_factory", + "//mace/utils:utils", + "//mace:libmace", + ], +) + diff --git a/mace/examples/example.cc b/mace/examples/cli/example.cc similarity index 72% rename from mace/examples/example.cc rename to mace/examples/cli/example.cc index caa80dcdc1da69176393af076a2ae7bf998e9f7e..dbccc36f3d7c1646771d833e7e918b77083dbba8 100644 --- a/mace/examples/example.cc +++ b/mace/examples/cli/example.cc @@ -12,18 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -/** - * Usage: - * mace_run --model=mobi_mace.pb \ - * --input=input_node \ - * --output=output_node \ - * --input_shape=1,224,224,3 \ - * --output_shape=1,224,224,2 \ - * --input_file=input_data \ - * --output_file=mace.out \ - * --model_data_file=model_data.data \ - * --device=GPU - */ #include #include #include @@ -35,9 +23,9 @@ #include "mace/public/mace.h" #include "mace/public/mace_runtime.h" // if convert model to code. +#ifdef CODE_TYPE #include "mace/codegen/engine/mace_engine_factory.h" -#include "mace/utils/env_time.h" -#include "mace/utils/logging.h" +#endif namespace mace { namespace examples { @@ -98,7 +86,7 @@ DeviceType ParseDeviceType(const std::string &device_str) { DEFINE_string(model_name, "", - "model name in yaml file"); + "model name in model deployment file"); DEFINE_string(input_node, "input_node0,input_node1", "input nodes, separated by comma"); @@ -117,15 +105,15 @@ DEFINE_string(input_file, DEFINE_string(output_file, "", "output file name | output file prefix for multiple outputs"); +DEFINE_string(opencl_binary_file, + "", + "compiled opencl binary file path"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); DEFINE_string(model_file, "", "model file name, used when load mace model in pb"); -DEFINE_string(opencl_binary_file, - "", - "compiled opencl binary file path"); DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); DEFINE_int32(round, 1, "round"); DEFINE_int32(restart_round, 1, "restart round"); @@ -136,6 +124,29 @@ DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); +namespace { +bool ReadBinaryFile(std::vector *data, + const std::string &filename) { + std::ifstream ifs(filename, std::ios::in | std::ios::binary); + if (!ifs.is_open()) { + return false; + } + ifs.seekg(0, ifs.end); + size_t length = ifs.tellg(); + ifs.seekg(0, ifs.beg); + + data->reserve(length); + data->insert(data->begin(), std::istreambuf_iterator(ifs), + std::istreambuf_iterator()); + if (ifs.fail()) { + return false; + } + ifs.close(); + + return true; +} +} // namespace + bool RunModel(const std::vector &input_names, const std::vector> &input_shapes, const std::vector &output_names, @@ -155,6 +166,12 @@ bool RunModel(const std::vector &input_names, #endif // MACE_ENABLE_OPENCL if (device_type == DeviceType::GPU) { + // Just call once. (Not thread-safe) + // Set paths of Generated OpenCL Compiled Kernel Binary file + // if you build gpu library of specific soc. + // Using OpenCL binary will speed up the initialization. + // OpenCL binary is corresponding to the OpenCL Driver version, + // you should update the binary when OpenCL Driver changed. std::vector opencl_binary_paths = {FLAGS_opencl_binary_file}; mace::SetOpenCLBinaryPaths(opencl_binary_paths); } @@ -172,34 +189,33 @@ bool RunModel(const std::vector &input_names, // Create Engine std::shared_ptr engine; MaceStatus create_engine_status; - // Create Engine - MaceStatus create_engine_status; - // Create Engine - int64_t t0 = NowMicros(); - if (FLAGS_model_file != "") { - std::vector model_pb_data; - if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) { - LOG(FATAL) << "Failed to read file: " << FLAGS_model_file; - } - create_engine_status = - CreateMaceEngineFromProto(model_pb_data, - FLAGS_model_data_file, - input_names, - output_names, - device_type, - &engine); - } else { - create_engine_status = - CreateMaceEngineFromCode(FLAGS_model_name, - FLAGS_model_data_file, - input_names, - output_names, - device_type, - &engine); + // Only choose one of the two type based on the `build_type` + // in model deployment file(.yml). +#ifdef CODE_TYPE + create_engine_status = + CreateMaceEngineFromCode(FLAGS_model_name, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); +#else + std::vector model_pb_data; + if (!ReadBinaryFile(&model_pb_data, FLAGS_model_file)) { + std::cerr << "Failed to read file: " << FLAGS_model_file << std::endl; } + create_engine_status = + CreateMaceEngineFromProto(model_pb_data, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); +#endif if (create_engine_status != MaceStatus::MACE_SUCCESS) { - LOG(FATAL) << "Create engine error, please check the arguments"; + std::cerr << "Create engine error, please check the arguments" << std::endl; + exit(1); } const size_t input_count = input_names.size(); @@ -222,7 +238,7 @@ bool RunModel(const std::vector &input_names, input_size * sizeof(float)); in_file.close(); } else { - LOG(INFO) << "Open input file failed"; + std::cout << "Open input file failed" << std::endl; return -1; } inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in); @@ -237,16 +253,17 @@ bool RunModel(const std::vector &input_names, outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); } - LOG(INFO) << "Warm up run"; + std::cout << "Warm up run" << std::endl; engine->Run(inputs, &outputs); if (FLAGS_round > 0) { - LOG(INFO) << "Run model"; + std::cout << "Run model" << std::endl; for (int i = 0; i < FLAGS_round; ++i) { engine->Run(inputs, &outputs); } } + std::cout << "Write output" << std::endl; for (size_t i = 0; i < output_count; ++i) { std::string output_name = FLAGS_output_file + "_" + FormatName(output_names[i]); @@ -260,6 +277,7 @@ bool RunModel(const std::vector &input_names, out_file.flush(); out_file.close(); } + std::cout << "Finished" << std::endl; return true; } @@ -270,22 +288,24 @@ int Main(int argc, char **argv) { gflags::SetUsageMessage(usage); gflags::ParseCommandLineFlags(&argc, &argv, true); - LOG(INFO) << "mace version: " << MaceVersion(); - LOG(INFO) << "input node: " << FLAGS_input_node; - LOG(INFO) << "input shape: " << FLAGS_input_shape; - LOG(INFO) << "output node: " << FLAGS_output_node; - LOG(INFO) << "output shape: " << FLAGS_output_shape; - LOG(INFO) << "input_file: " << FLAGS_input_file; - LOG(INFO) << "output_file: " << FLAGS_output_file; - LOG(INFO) << "model_data_file: " << FLAGS_model_data_file; - LOG(INFO) << "model_file: " << FLAGS_model_file; - LOG(INFO) << "device: " << FLAGS_device; - LOG(INFO) << "round: " << FLAGS_round; - LOG(INFO) << "restart_round: " << FLAGS_restart_round; - LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint; - LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint; - LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads; - LOG(INFO) << "cpu_affinity_policy: " << FLAGS_cpu_affinity_policy; + std::cout << "mace version: " << MaceVersion() << std::endl; + std::cout << "input node: " << FLAGS_input_node << std::endl; + std::cout << "input shape: " << FLAGS_input_shape << std::endl; + std::cout << "output node: " << FLAGS_output_node << std::endl; + std::cout << "output shape: " << FLAGS_output_shape << std::endl; + std::cout << "input_file: " << FLAGS_input_file << std::endl; + std::cout << "output_file: " << FLAGS_output_file << std::endl; + std::cout << "model_data_file: " << FLAGS_model_data_file << std::endl; + std::cout << "model_file: " << FLAGS_model_file << std::endl; + std::cout << "device: " << FLAGS_device << std::endl; + std::cout << "round: " << FLAGS_round << std::endl; + std::cout << "restart_round: " << FLAGS_restart_round << std::endl; + std::cout << "gpu_perf_hint: " << FLAGS_gpu_perf_hint << std::endl; + std::cout << "gpu_priority_hint: " << FLAGS_gpu_priority_hint << std::endl; + std::cout << "omp_num_threads: " << FLAGS_omp_num_threads << std::endl; + std::cout << "cpu_affinity_policy: " + << FLAGS_cpu_affinity_policy + << std::endl; std::vector input_names = str_util::Split(FLAGS_input_node, ','); std::vector output_names = @@ -306,10 +326,9 @@ int Main(int argc, char **argv) { ParseShape(output_shapes[i], &output_shape_vec[i]); } - bool ret; -#pragma omp parallel for + bool ret = false; for (int i = 0; i < FLAGS_restart_round; ++i) { - VLOG(0) << "restart round " << i; + std::cout << "restart round " << i << std::endl; ret = RunModel(input_names, input_shape_vec, output_names, output_shape_vec); } diff --git a/mace/public/mace_runtime.h b/mace/public/mace_runtime.h index f353da75cc3bde108a549a535be968769d907ca0..38f8a62a9c6bb9726387a0dd1cca73a457acac51 100644 --- a/mace/public/mace_runtime.h +++ b/mace/public/mace_runtime.h @@ -80,7 +80,8 @@ class FileStorageFactory : public KVStorageFactory { void SetKVStorageFactory(std::shared_ptr storage_factory); // Just call once. (Not thread-safe) -// Set paths of OpenCL Compiled Binary file if you use gpu of specific soc. +// Set paths of Generated OpenCL Compiled Kernel Binary file (not libOpenCL.so) +// if you use gpu of specific soc. // Using OpenCL binary will speed up the initialization. // OpenCL binary is corresponding to the OpenCL Driver version, // you should update the binary when OpenCL Driver changed. diff --git a/tools/converter.py b/tools/converter.py index 8e36e9f62397b08b39aff5b6029937c362791b75..c244214d5e8ab629e52237b00e57ff6107af0a8e 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -51,8 +51,12 @@ CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin" CODEGEN_BASE_DIR = 'mace/codegen' MODEL_CODEGEN_DIR = CODEGEN_BASE_DIR + '/models' LIBMACE_SO_TARGET = "//mace:libmace.so" -MACE_RUN_STATIC_TARGET = "//mace/tools/validation:mace_run_static" -MACE_RUN_SHARED_TARGET = "//mace/tools/validation:mace_run_shared" +MACE_RUN_STATIC_NAME = "mace_run_static" +MACE_RUN_SHARED_NAME = "mace_run_shared" +EXAMPLE_STATIC_NAME = "example_static" +EXAMPLE_SHARED_NAME = "example_shared" +MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME +MACE_RUN_SHARED_TARGET = "//mace/tools/validation:" + MACE_RUN_SHARED_NAME ALL_SOC_TAG = 'all' ABITypeStrs = [ @@ -696,8 +700,10 @@ def build_specific_lib(target_abi, target_soc, serial_num, sh_commands.gen_tuning_param_code(model_output_dirs) if linkshared == 0: + mace_run_name = MACE_RUN_STATIC_NAME mace_run_target = MACE_RUN_STATIC_TARGET else: + mace_run_name = MACE_RUN_SHARED_NAME mace_run_target = MACE_RUN_SHARED_TARGET sh_commands.bazel_build( LIBMACE_SO_TARGET, @@ -756,7 +762,8 @@ def build_specific_lib(target_abi, target_soc, serial_num, sh_commands.tuning_run( abi=target_abi, serialno=serial_num, - mace_run_dir=build_tmp_binary_dir, + target_dir=build_tmp_binary_dir, + target_name=mace_run_name, vlog_level=0, embed_model_data=embed_model_data, model_output_dir=model_output_dir, @@ -822,6 +829,13 @@ def build_specific_lib(target_abi, target_soc, serial_num, build_type, hexagon_mode) + # build example binary + sh_commands.build_example(target_soc, serial_num, target_abi, + library_name, BUILD_OUTPUT_DIR, + OUTPUT_LIBRARY_DIR_NAME, + build_tmp_binary_dir, build_type, + hexagon_mode, enable_openmp, linkshared) + def generate_library(configs, tuning, enable_openmp, address_sanitizer): MaceLogger.header(StringFormatter.block("Building library")) @@ -959,6 +973,17 @@ def run_specific_target(flags, configs, target_abi, ModuleName.RUN, 'You should build before run.') + if flags.example: + if linkshared == 0: + target_name = EXAMPLE_STATIC_NAME + else: + target_name = EXAMPLE_SHARED_NAME + else: + if linkshared == 0: + target_name = MACE_RUN_STATIC_NAME + else: + target_name = MACE_RUN_SHARED_NAME + for model_name in configs[YAMLKeyword.models]: if target_abi == ABIType.host: device_name = ABIType.host @@ -1009,7 +1034,8 @@ def run_specific_target(flags, configs, target_abi, run_output = sh_commands.tuning_run( abi=target_abi, serialno=serial_num, - mace_run_dir=build_tmp_binary_dir, + target_dir=build_tmp_binary_dir, + target_name=target_name, vlog_level=flags.vlog_level, embed_model_data=embed_model_data, model_output_dir=model_output_dir, @@ -1334,6 +1360,10 @@ def parse_args(): type=float, default=0.0, help="[mock runtime failure ratio].") + run.add_argument( + "--example", + action="store_true", + help="whether to run example.") benchmark = subparsers.add_parser( 'benchmark', parents=[all_type_parent_parser, run_bm_parent_parser, diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 92752c5790612791c17595b0fdb83a9fa486aee0..19e8f4da562c4fb35d92323fe0735a81ac8cbb95 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -292,7 +292,8 @@ def bazel_build(target, hexagon_mode=False, enable_openmp=True, enable_neon=True, - address_sanitizer=False): + address_sanitizer=False, + extra_args=""): print("* Build %s with ABI %s" % (target, abi)) if abi == "host": bazel_args = ( @@ -318,6 +319,8 @@ def bazel_build(target, bazel_args += ("--config", "asan") else: bazel_args += ("--config", "optimization") + if extra_args: + bazel_args += (extra_args, ) sh.bazel( _fg=True, *bazel_args) @@ -620,7 +623,8 @@ def update_libmace_shared_library(serial_num, def tuning_run(abi, serialno, - mace_run_dir, + target_dir, + target_name, vlog_level, embed_model_data, model_output_dir, @@ -658,17 +662,13 @@ def tuning_run(abi, mace_model_path = "" if build_type == BuildType.proto: mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag) - if linkshared == 0: - mace_run_target = "mace_run_static" - else: - mace_run_target = "mace_run_shared" if abi == "host": p = subprocess.Popen( [ "env", "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, - "%s/%s" % (mace_run_dir, mace_run_target), + "%s/%s" % (target_dir, target_name), "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), @@ -731,7 +731,7 @@ def tuning_run(abi, phone_data_dir, serialno) - adb_push("%s/%s" % (mace_run_dir, mace_run_target), phone_data_dir, + adb_push("%s/%s" % (target_dir, target_name), phone_data_dir, serialno) stdout_buff = [] @@ -752,7 +752,7 @@ def tuning_run(abi, asan_rt_library_names(abi)) ]) adb_cmd.extend([ - "%s/%s" % (phone_data_dir, mace_run_target), + "%s/%s" % (phone_data_dir, target_name), "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), "--output_node=%s" % ",".join(output_nodes), @@ -926,6 +926,29 @@ def build_host_libraries(model_build_type, abi): abi=abi) +################################ +# library +################################ +def get_lib_path(target_soc, serial_num, abi, project_name, build_output_dir, + library_output_dir): + project_output_dir = "%s/%s" % (build_output_dir, project_name) + library_dir = "%s/%s" % (project_output_dir, library_output_dir) + model_bin_dir = "%s/%s/" % (library_dir, abi) + if abi == "host": + lib_path = "%s/libmace_%s.a" % \ + (model_bin_dir, project_name) + else: + if not target_soc: + lib_path = "%s/libmace_%s.a" % \ + (model_bin_dir, project_name) + else: + device_name = adb_get_device_name_by_serialno(serial_num) + lib_path = "%s/libmace_%s.%s.%s.a" % \ + (model_bin_dir, project_name, + device_name, target_soc) + return lib_path + + def merge_libs(target_soc, serial_num, abi, @@ -945,11 +968,12 @@ def merge_libs(target_soc, if hexagon_mode: sh.cp("-f", hexagon_lib_file, library_dir) + lib_path = get_lib_path(target_soc, serial_num, abi, + project_name, build_output_dir, library_output_dir) # make static library mri_stream = "" if abi == "host": - mri_stream += "create %s/libmace_%s.a\n" % \ - (model_bin_dir, project_name) + mri_stream += "create %s\n" % lib_path mri_stream += ( "addlib " "bazel-bin/mace/codegen/libgenerated_opencl.pic.a\n") @@ -982,14 +1006,7 @@ def merge_libs(target_soc, "addlib " "bazel-bin/mace/codegen/libgenerated_models.pic.a\n") else: - if not target_soc: - mri_stream += "create %s/libmace_%s.a\n" % \ - (model_bin_dir, project_name) - else: - device_name = adb_get_device_name_by_serialno(serial_num) - mri_stream += "create %s/libmace_%s.%s.%s.a\n" % \ - (model_bin_dir, project_name, - device_name, target_soc) + mri_stream += "create %s\n" % lib_path if model_build_type == BuildType.code: mri_stream += ( "addlib " @@ -1054,6 +1071,53 @@ def packaging_lib(libmace_output_dir, project_name): print("Packaging Done!\n") +################################ +# example +################################ +def build_example(target_soc, + serial_num, + abi, + project_name, + build_output_dir, + library_output_dir, + model_output_dir, + build_type, + hexagon_mode, + enable_openmp, + linkshared=False): + static_lib_name = "mace/libmace.a" + if not linkshared: + target_name = "example_static" + lib_path = get_lib_path(target_soc, serial_num, abi, project_name, + build_output_dir, library_output_dir) + sh.cp("-f", lib_path, static_lib_name) + else: + target_name = "example_shared" + example_target = "//mace/examples/cli:%s" % target_name + + if build_type == BuildType.code: + build_arg = "--per_file_copt=//mace/examples/cli:example.cc@-DCODE_TYPE" # noqa + else: + build_arg = "" + + bazel_build(example_target, + abi=abi, + enable_openmp=enable_openmp, + hexagon_mode=hexagon_mode, + extra_args=build_arg) + + example_binary_file = "%s/%s" % (model_output_dir, target_name) + if os.path.exists(example_binary_file): + sh.rm("-rf", example_binary_file) + + target_bin = "/".join(bazel_target_to_bin(example_target)) + sh.cp("-f", target_bin, model_output_dir) + sh.rm("-rf", static_lib_name) + + +################################ +# benchmark +################################ def build_benchmark_model(abi, model_output_dir, hexagon_mode,