diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9cad515c2bef7e44769ce0d452a96ed5bad7e3f0..a6f6b42fc5f3f172213bc076753dfe30f8814c8e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -112,16 +112,13 @@ model_tests: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file - - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file - CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=file --model_data_format=file --address_sanitizer - - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=file --model_data_format=file - - python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=file --model_data_format=file + - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=file --model_data_format=file --benchmark - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file - - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=code --model_data_format=file - - python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file + - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark - rm -rf mace-models quantization_tests: @@ -141,7 +138,6 @@ quantization_tests: do python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file || exit 1; python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1; - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --layers=0 --model_graph_format=file --model_data_format=file || exit 1; done - rm -rf mace-models only: @@ -162,7 +158,6 @@ dynamic_linking_test: fi - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer - - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --mace_lib_type=dynamic --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file - rm -rf mace-models only: - triggers diff --git a/CMakeLists.txt b/CMakeLists.txt index 08d70f98bf7d68c56b8e066f0357b7751e16df6e..2f60c0a7a9c9b20a15a66de07d0acf9b0e9af0ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,6 @@ option(MACE_ENABLE_HEXAGON_HTA "whether to enable Hexagon HTA support" OFF) option(MACE_ENABLE_MTK_APU "whether to enable MTK APU support" OFF) option(MACE_ENABLE_TESTS "whether to build c++ unit tests" OFF) option(MACE_ENABLE_BENCHMARKS "whether to build c++ micro benchmarks" OFF) -option(MACE_ENABLE_EXAMPLES "whether to build examples" OFF) option(MACE_ENABLE_OPT_SIZE "whether to build with optimized binary size" ON) option(MACE_ENABLE_OBFUSCATE "whether to build with code obfuscation" ON) option(MACE_ENABLE_CCACHE "whether to build with ccache" ON) @@ -131,10 +130,6 @@ include_directories("${PROJECT_BINARY_DIR}") # proto add_subdirectory(include) add_subdirectory(mace) -if(MACE_ENABLE_EXAMPLES) - add_subdirectory(examples) -endif(MACE_ENABLE_EXAMPLES) - if(MACE_ENABLE_TESTS OR MACE_ENABLE_BENCHMARKS) add_subdirectory(test) endif(MACE_ENABLE_TESTS OR MACE_ENABLE_BENCHMARKS) diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index 14c77cae38e0b6d29e2b9052ed86867be58b2f48..b86b37ef09215712bf2c5a5a20405658034b55c2 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -236,7 +236,7 @@ Convert model(s) to C++ code * **3. Deployment** * Link `libmace.a` and `${library_name}.a` to your target. - * Refer to \ ``mace/examples/example.cc``\ for full usage. The following list the key steps. + * Refer to \ ``mace/tools/mace_run.cc``\ for full usage. The following list the key steps. .. code:: cpp @@ -404,7 +404,7 @@ the detailed information is in :doc:`benchmark`. .. code:: sh # Benchmark model, get detailed statistics of each Op. - python tools/converter.py benchmark --config=/path/to/model_deployment_file.yml + python tools/converter.py run --config=/path/to/model_deployment_file.yml --benchmark .. warning:: @@ -424,17 +424,17 @@ the detailed information is in :doc:`benchmark`. * - --omp_num_threads - int - -1 - - ``run``/``benchmark`` + - ``run`` - number of threads * - --cpu_affinity_policy - int - 1 - - ``run``/``benchmark`` + - ``run`` - 0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY * - --gpu_perf_hint - int - 3 - - ``run``/``benchmark`` + - ``run`` - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH * - --gpu_priority_hint - int @@ -449,7 +449,6 @@ Use ``-h`` to get detailed help. python tools/converter.py -h python tools/converter.py build -h python tools/converter.py run -h - python tools/converter.py benchmark -h Reduce Library Size ------------------- diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index 3ec5886ef8836671c39600d20e227f78ac2903b6..759ca760dd55be1fe6dd13ae8930a91b97f42d34 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -75,8 +75,8 @@ Here we use the mobilenet-v2 model as an example. .. code:: sh - # Run example - python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --example + # Run + python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml # Test model run time python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --round=100 @@ -233,7 +233,7 @@ to run and validate your model. .. code:: sh # Benchmark model, get detailed statistics of each Op. - python tools/converter.py benchmark --config=/path/to/your/model_deployment_file.yml + python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --benchmark ======================================= @@ -308,7 +308,7 @@ header files. └── mace_run_static -Please refer to \ ``mace/examples/example.cc``\ for full usage. The following list the key steps. +Please refer to \ ``mace/tools/mace_run.cc``\ for full usage. The following list the key steps. .. code:: cpp diff --git a/docs/user_guide/benchmark.rst b/docs/user_guide/benchmark.rst index 7992c70f06375c9fc89ca64622a7ecf686d7ef67..f6a058952a098e41dcc3d7e49238bc4d4486aeaf 100644 --- a/docs/user_guide/benchmark.rst +++ b/docs/user_guide/benchmark.rst @@ -68,7 +68,7 @@ Usage .. code:: bash - python tools/converter.py benchmark --config=/path/to/your/model_deployment.yml + python tools/converter.py run --config=/path/to/your/model_deployment.yml --benchmark ====== Output @@ -76,29 +76,6 @@ Output .. code:: bash - I benchmark_model.cc:158 --------------------------------------------------------------------- - I benchmark_model.cc:158 Warm Up - I benchmark_model.cc:158 ---------------------------------------------------------------------- - I benchmark_model.cc:158 | round | first(ms) | curr(ms) | min(ms) | max(ms) | avg(ms) | std | - I benchmark_model.cc:158 ---------------------------------------------------------------------- - I benchmark_model.cc:158 | 1 | 51.481 | 51.481 | 51.481 | 51.481 | 51.481 | 0.000 | - I benchmark_model.cc:158 ---------------------------------------------------------------------- - I benchmark_model.cc:158 - I benchmark_model.cc:158 ------------------------------------------------------------------------ - I benchmark_model.cc:158 Run without statistics - I benchmark_model.cc:158 ------------------------------------------------------------------------- - I benchmark_model.cc:158 | round | first(ms) | curr(ms) | min(ms) | max(ms) | avg(ms) | std | - I benchmark_model.cc:158 ------------------------------------------------------------------------- - I benchmark_model.cc:158 | 100 | 30.272 | 31.390 | 29.938 | 45.966 | 30.913 | 1850.983 | - I benchmark_model.cc:158 ------------------------------------------------------------------------- - I benchmark_model.cc:158 - I benchmark_model.cc:158 ----------------------------------------------------------------------- - I benchmark_model.cc:158 Run with statistics - I benchmark_model.cc:158 ------------------------------------------------------------------------ - I benchmark_model.cc:158 | round | first(ms) | curr(ms) | min(ms) | max(ms) | avg(ms) | std | - I benchmark_model.cc:158 ------------------------------------------------------------------------ - I benchmark_model.cc:158 | 100 | 32.358 | 33.327 | 32.293 | 33.607 | 33.002 | 310.435 | - I benchmark_model.cc:158 ------------------------------------------------------------------------ I statistics.cc:343 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- I statistics.cc:343 Sort by Run Order I statistics.cc:343 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/docs/user_guide/quantization_usage.rst b/docs/user_guide/quantization_usage.rst index 2caecf6ef16fad87cad15ced132369ea52e8e9b3..320f16f3947ac08698ce92295e9f8f9b2d142752 100644 --- a/docs/user_guide/quantization_usage.rst +++ b/docs/user_guide/quantization_usage.rst @@ -52,7 +52,7 @@ MACE provides tools to do statistics with following steps: rename 's/^/input/' * # Run with input tensors - python tools/converter.py run --config ../mace-models/inception-v3/inception-v3.yml --example + python tools/converter.py run --config ../mace-models/inception-v3/inception-v3.yml --quantize_stat --input_dir /path/to/directory/of/input/tensors > range_log diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt deleted file mode 100644 index 24b69cd3ea02f2b2a63bec9e98c0797957569cbf..0000000000000000000000000000000000000000 --- a/examples/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -file(GLOB MACE_EXAMPLE_SRCS - cli/example.cc -) -add_executable(mace_example ${MACE_EXAMPLE_SRCS}) -target_link_libraries(mace_example PUBLIC - mace_static - gflags -) - -install(TARGETS mace_example RUNTIME DESTINATION bin) diff --git a/examples/cli/BUILD.bazel b/examples/cli/BUILD.bazel deleted file mode 100644 index ce3c1ea79b3eaca2a39b31caca7bd2d7bc058407..0000000000000000000000000000000000000000 --- a/examples/cli/BUILD.bazel +++ /dev/null @@ -1,80 +0,0 @@ -# Examples -load( - "//mace:mace.bzl", - "if_android", - "if_darwin", - "if_hexagon_enabled", - "if_hta_enabled", - "if_linux", - "if_opencl_enabled", - "if_openmp_enabled", -) - -cc_binary( - name = "example_static", - srcs = ["example.cc"], - copts = [ - "-Werror", - "-Wextra", - ] + if_opencl_enabled([ - "-DMACE_ENABLE_OPENCL", - ]), - linkopts = [ - "-lm", - "-ldl", - ] + if_linux(["-lpthread"]) + if_darwin( - ["-lpthread"], - default_value = ["-fuse-ld=gold"], - ) + if_openmp_enabled([ - "-fopenmp", - ]) + if_android([ - "-pie", - "-llog", - ]), - linkstatic = 1, - deps = [ - "//external:gflags_nothreads", - "//mace/codegen:generated_mace_engine_factory", - "//mace/codegen:generated_models", - "//mace/libmace", - ] + if_opencl_enabled([ - "//mace/codegen:generated_opencl_binary", - "//mace/codegen:generated_opencl_parameter", - ]) + if_hexagon_enabled([ - "//third_party/nnlib:libhexagon", - ]) + if_hta_enabled([ - "//third_party/hta", - ]), -) - -cc_binary( - name = "example_dynamic", - srcs = ["example.cc"], - copts = [ - "-Werror", - "-Wextra", - "-Wno-missing-field-initializers", - ] + if_opencl_enabled([ - "-DMACE_ENABLE_OPENCL", - ]), - linkopts = [ - "-lm", - "-ldl", - ] + if_linux(["-lpthread"]) + if_darwin( - ["-lpthread"], - default_value = ["-fuse-ld=gold"], - ) + if_android([ - "-pie", - "-llog", - ]), - linkstatic = 0, - deps = [ - "//external:gflags_nothreads", - "//mace/codegen:generated_mace_engine_factory", - "//mace/codegen:generated_models", - "//mace/libmace:libmace_dynamic", - ] + if_opencl_enabled([ - "//mace/codegen:generated_opencl_binary", - "//mace/codegen:generated_opencl_parameter", - ]), -) diff --git a/examples/cli/README.md b/examples/cli/README.md deleted file mode 100644 index 50e64f950e80afa1cb72199df3f68e0c0e7b518b..0000000000000000000000000000000000000000 --- a/examples/cli/README.md +++ /dev/null @@ -1,23 +0,0 @@ -Examples -======= - -* Convert model - -``` -python tools/converter.py convert --config=/path/to/your/model_deployment_file -``` - -* Run example -``` -python tools/converter.py run --config=/path/to/your/model_deployment_file --example -``` - -* Validate result -``` -python tools/converter.py run --config=/path/to/your/model_deployment_file --example --validate -``` - -* Check the logs -``` -adb logcat -``` diff --git a/examples/cli/example.cc b/examples/cli/example.cc deleted file mode 100644 index 103138447cea0ed7a54f8374e88c4cdb0e69ca92..0000000000000000000000000000000000000000 --- a/examples/cli/example.cc +++ /dev/null @@ -1,465 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gflags/gflags.h" -#include "mace/port/env.h" -#include "mace/port/file_system.h" -#include "mace/public/mace.h" -#include "mace/utils/logging.h" -#include "mace/utils/memory.h" -#include "mace/utils/string_util.h" -// if convert model to code. -#ifdef MODEL_GRAPH_FORMAT_CODE -#include "mace/codegen/engine/mace_engine_factory.h" -#endif - -#ifdef MACE_ENABLE_OPENCL -namespace mace { -const unsigned char *LoadOpenCLBinary(); -size_t OpenCLBinarySize(); -const unsigned char *LoadOpenCLParameter(); -size_t OpenCLParameterSize(); -} // namespace mace -#endif - - -namespace mace { -namespace examples { - -void ParseShape(const std::string &str, std::vector *shape) { - std::string tmp = str; - while (!tmp.empty()) { - int dim = atoi(tmp.data()); - shape->push_back(dim); - size_t next_offset = tmp.find(","); - if (next_offset == std::string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } -} - -std::string FormatName(const std::string input) { - std::string res = input; - for (size_t i = 0; i < input.size(); ++i) { - if (!isalnum(res[i])) res[i] = '_'; - } - return res; -} - -DeviceType ParseDeviceType(const std::string &device_str) { - if (device_str.compare("CPU") == 0) { - return DeviceType::CPU; - } else if (device_str.compare("GPU") == 0) { - return DeviceType::GPU; - } else if (device_str.compare("HEXAGON") == 0) { - return DeviceType::HEXAGON; - } else if (device_str.compare("HTA") == 0) { - return DeviceType::HTA; - } else { - return DeviceType::CPU; - } -} - -DataFormat ParseDataFormat(const std::string &data_format_str) { - if (data_format_str == "NHWC") { - return DataFormat::NHWC; - } else if (data_format_str == "NCHW") { - return DataFormat::NCHW; - } else if (data_format_str == "OIHW") { - return DataFormat::OIHW; - } else { - return DataFormat::NONE; - } -} - -DEFINE_string(model_name, - "", - "model name in model deployment file"); -DEFINE_string(input_node, - "", - "input nodes, separated by comma," - "example: input_node0,input_node1"); -DEFINE_string(input_shape, - "", - "input shapes, separated by colon and comma, " - "example: 1,224,224,3:1,1,1,10"); -DEFINE_string(output_node, - "output_node0,output_node1", - "output nodes, separated by comma"); -DEFINE_string(output_shape, - "", - "output shapes, separated by colon and comma, " - "example: 1,224,224,2:1,1,1,10"); -DEFINE_string(input_data_format, - "NHWC", - "input data formats, NONE|NHWC|NCHW"); -DEFINE_string(output_data_format, - "NHWC", - "output data formats, NONE|NHWC|NCHW"); -DEFINE_string(input_file, - "", - "input file name | input file prefix for multiple inputs."); -DEFINE_string(output_file, - "", - "output file name | output file prefix for multiple outputs"); -DEFINE_string(input_dir, - "", - "input directory name"); -DEFINE_string(output_dir, - "", - "output directory name"); -DEFINE_string(opencl_binary_file, - "", - "compiled opencl binary file path"); -DEFINE_string(opencl_parameter_file, - "", - "tuned OpenCL parameter file path"); -DEFINE_string(model_data_file, - "", - "model data file name, used when model_data_format == file"); -DEFINE_string(model_file, - "", - "model file name, used when load mace model in pb"); -DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); -DEFINE_int32(round, 1, "round"); -DEFINE_int32(restart_round, 1, "restart round"); -DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); -DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); -DEFINE_int32(cpu_affinity_policy, 1, - "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); - -bool RunModel(const std::vector &input_names, - const std::vector> &input_shapes, - const std::vector &input_data_formats, - const std::vector &output_names, - const std::vector> &output_shapes, - const std::vector &output_data_formats) { - // load model - DeviceType device_type = ParseDeviceType(FLAGS_device); - // configuration - // Detailed information please see mace.h - MaceStatus status; - MaceEngineConfig config(device_type); - status = config.SetCPUThreadPolicy( - FLAGS_omp_num_threads, - static_cast(FLAGS_cpu_affinity_policy)); - if (status != MaceStatus::MACE_SUCCESS) { - std::cerr << "Set openmp or cpu affinity failed." << std::endl; - } -#ifdef MACE_ENABLE_OPENCL - std::shared_ptr gpu_context; - if (device_type == DeviceType::GPU) { - // DO NOT USE tmp directory. - // Please use APP's own directory and make sure the directory exists. - const char *storage_path_ptr = getenv("MACE_INTERNAL_STORAGE_PATH"); - const std::string storage_path = - std::string(storage_path_ptr == nullptr ? - "/data/local/tmp/mace_run/interior" : storage_path_ptr); - std::vector opencl_binary_paths = {FLAGS_opencl_binary_file}; - - gpu_context = GPUContextBuilder() - .SetStoragePath(storage_path) - .SetOpenCLBinaryPaths(opencl_binary_paths) - .SetOpenCLBinary(LoadOpenCLBinary(), OpenCLBinarySize()) - .SetOpenCLParameterPath(FLAGS_opencl_parameter_file) - .SetOpenCLParameter(LoadOpenCLParameter(), OpenCLParameterSize()) - .Finalize(); - - config.SetGPUContext(gpu_context); - config.SetGPUHints( - static_cast(FLAGS_gpu_perf_hint), - static_cast(FLAGS_gpu_priority_hint)); - } -#endif // MACE_ENABLE_OPENCL - - // Create Engine - std::shared_ptr engine; - MaceStatus create_engine_status; - - std::unique_ptr model_graph_data = - make_unique(); - if (FLAGS_model_file != "") { - auto fs = GetFileSystem(); - auto status = fs->NewReadOnlyMemoryRegionFromFile(FLAGS_model_file.c_str(), - &model_graph_data); - if (status != MaceStatus::MACE_SUCCESS) { - LOG(FATAL) << "Failed to read file: " << FLAGS_model_file; - } - } - - std::unique_ptr model_weights_data = - make_unique(); - if (FLAGS_model_data_file != "") { - auto fs = GetFileSystem(); - auto status = fs->NewReadOnlyMemoryRegionFromFile( - FLAGS_model_data_file.c_str(), - &model_weights_data); - if (status != MaceStatus::MACE_SUCCESS) { - LOG(FATAL) << "Failed to read file: " << FLAGS_model_data_file; - } - MACE_CHECK(model_weights_data->length() > 0); - } - - // Only choose one of the two type based on the `model_graph_format` - // in model deployment file(.yml). -#ifdef MODEL_GRAPH_FORMAT_CODE - // if model_data_format == code, just pass an empty string("") - // to model_data_file parameter. - create_engine_status = CreateMaceEngineFromCode( - FLAGS_model_name, - reinterpret_cast(model_weights_data->data()), - model_weights_data->length(), - input_names, - output_names, - config, - &engine); -#else - create_engine_status = CreateMaceEngineFromProto( - reinterpret_cast(model_graph_data->data()), - model_graph_data->length(), - reinterpret_cast(model_weights_data->data()), - model_weights_data->length(), - input_names, - output_names, - config, - &engine); -#endif - - if (create_engine_status != MaceStatus::MACE_SUCCESS) { - std::cerr << "Create engine error, please check the arguments first, " - << "if correct, the device may not run the model, " - << "please fall back to other strategy." - << std::endl; - exit(1); - } - - const size_t input_count = input_names.size(); - const size_t output_count = output_names.size(); - - std::map inputs; - std::map outputs; - std::map inputs_size; - for (size_t i = 0; i < input_count; ++i) { - int64_t input_size = - std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1, - std::multiplies()); - inputs_size[input_names[i]] = input_size; - // Only support float and int32 data type - auto buffer_in = std::shared_ptr(new float[input_size], - std::default_delete()); - inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in, - input_data_formats[i]); - } - - for (size_t i = 0; i < output_count; ++i) { - int64_t output_size = - std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, - std::multiplies()); - // Only support float and int32 data type - auto buffer_out = std::shared_ptr(new float[output_size], - std::default_delete()); - outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out, - output_data_formats[i]); - } - - if (!FLAGS_input_dir.empty()) { - DIR *dir_parent; - struct dirent *entry; - dir_parent = opendir(FLAGS_input_dir.c_str()); - if (dir_parent) { - while ((entry = readdir(dir_parent))) { - std::string file_name = std::string(entry->d_name); - std::string prefix = FormatName(input_names[0]); - if (file_name.find(prefix) == 0) { - std::string suffix = file_name.substr(prefix.size()); - - for (size_t i = 0; i < input_count; ++i) { - file_name = FLAGS_input_dir + "/" + FormatName(input_names[i]) - + suffix; - std::ifstream in_file(file_name, std::ios::in | std::ios::binary); - std::cout << "Read " << file_name << std::endl; - if (in_file.is_open()) { - in_file.read(reinterpret_cast( - inputs[input_names[i]].data().get()), - inputs_size[input_names[i]] * sizeof(float)); - in_file.close(); - } else { - std::cerr << "Open input file failed" << std::endl; - return -1; - } - } - engine->Run(inputs, &outputs); - - if (!FLAGS_output_dir.empty()) { - for (size_t i = 0; i < output_count; ++i) { - std::string output_name = - FLAGS_output_dir + "/" + FormatName(output_names[i]) + suffix; - std::ofstream out_file(output_name, std::ios::binary); - if (out_file.is_open()) { - int64_t output_size = - std::accumulate(output_shapes[i].begin(), - output_shapes[i].end(), - 1, - std::multiplies()); - out_file.write( - reinterpret_cast( - outputs[output_names[i]].data().get()), - output_size * sizeof(float)); - out_file.flush(); - out_file.close(); - } else { - std::cerr << "Open output file failed" << std::endl; - return -1; - } - } - } - } - } - - closedir(dir_parent); - } else { - std::cerr << "Directory " << FLAGS_input_dir << " does not exist." - << std::endl; - } - } else { - for (size_t i = 0; i < input_count; ++i) { - std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]), - std::ios::in | std::ios::binary); - if (in_file.is_open()) { - in_file.read(reinterpret_cast( - inputs[input_names[i]].data().get()), - inputs_size[input_names[i]] * sizeof(float)); - in_file.close(); - } else { - std::cerr << "Open input file failed" << std::endl; - return -1; - } - } - engine->Run(inputs, &outputs); - for (size_t i = 0; i < output_count; ++i) { - std::string output_name = - FLAGS_output_file + "_" + FormatName(output_names[i]); - std::ofstream out_file(output_name, std::ios::binary); - int64_t output_size = - std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, - std::multiplies()); - if (out_file.is_open()) { - out_file.write( - reinterpret_cast(outputs[output_names[i]].data().get()), - output_size * sizeof(float)); - out_file.flush(); - out_file.close(); - } else { - std::cerr << "Open output file failed" << std::endl; - return -1; - } - } - } - - std::cout << "Finished" << std::endl; - - return true; -} - -int Main(int argc, char **argv) { - std::string usage = "example run\nusage: " + std::string(argv[0]) - + " [flags]"; - gflags::SetUsageMessage(usage); - gflags::ParseCommandLineFlags(&argc, &argv, true); - - std::cout << "mace version: " << MaceVersion() << std::endl; - std::cout << "input node: " << FLAGS_input_node << std::endl; - std::cout << "input shape: " << FLAGS_input_shape << std::endl; - std::cout << "output node: " << FLAGS_output_node << std::endl; - std::cout << "output shape: " << FLAGS_output_shape << std::endl; - std::cout << "input_file: " << FLAGS_input_file << std::endl; - std::cout << "output_file: " << FLAGS_output_file << std::endl; - std::cout << "input_dir: " << FLAGS_input_dir << std::endl; - std::cout << "output dir: " << FLAGS_output_dir << std::endl; - std::cout << "model_data_file: " << FLAGS_model_data_file << std::endl; - std::cout << "model_file: " << FLAGS_model_file << std::endl; - std::cout << "device: " << FLAGS_device << std::endl; - std::cout << "round: " << FLAGS_round << std::endl; - std::cout << "restart_round: " << FLAGS_restart_round << std::endl; - std::cout << "gpu_perf_hint: " << FLAGS_gpu_perf_hint << std::endl; - std::cout << "gpu_priority_hint: " << FLAGS_gpu_priority_hint << std::endl; - std::cout << "omp_num_threads: " << FLAGS_omp_num_threads << std::endl; - std::cout << "cpu_affinity_policy: " - << FLAGS_cpu_affinity_policy - << std::endl; - - std::vector input_names = Split(FLAGS_input_node, ','); - std::vector output_names = Split(FLAGS_output_node, ','); - std::vector input_shapes = Split(FLAGS_input_shape, ':'); - std::vector output_shapes = Split(FLAGS_output_shape, ':'); - - const size_t input_count = input_shapes.size(); - const size_t output_count = output_shapes.size(); - std::vector> input_shape_vec(input_count); - std::vector> output_shape_vec(output_count); - for (size_t i = 0; i < input_count; ++i) { - ParseShape(input_shapes[i], &input_shape_vec[i]); - } - for (size_t i = 0; i < output_count; ++i) { - ParseShape(output_shapes[i], &output_shape_vec[i]); - } - - std::vector raw_input_data_formats = - Split(FLAGS_input_data_format, ','); - std::vector raw_output_data_formats = - Split(FLAGS_output_data_format, ','); - std::vector input_data_formats(input_count); - std::vector output_data_formats(output_count); - for (size_t i = 0; i < input_count; ++i) { - input_data_formats[i] = ParseDataFormat(raw_input_data_formats[i]); - } - for (size_t i = 0; i < output_count; ++i) { - output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]); - } - - bool ret = false; - for (int i = 0; i < FLAGS_restart_round; ++i) { - std::cout << "restart round " << i << std::endl; - ret = - RunModel(input_names, input_shape_vec, input_data_formats, - output_names, output_shape_vec, output_data_formats); - } - if (ret) { - return 0; - } else { - return -1; - } -} - -} // namespace examples -} // namespace mace - -int main(int argc, char **argv) { mace::examples::Main(argc, argv); } diff --git a/mace/core/net.cc b/mace/core/net.cc index 8c301dc728f0af53137023f4d019e9a89cf3e6ce..78d40dd7f57440055eea4c48c375071db2e6bf13 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -124,6 +124,11 @@ MaceStatus SerialNet::Init() { } MaceStatus SerialNet::Run(RunMetadata *run_metadata) { + const char *profiling = getenv("MACE_OPENCL_PROFILING"); + bool + enable_opencl_profiling = + profiling != nullptr && strlen(profiling) == 1 && profiling[0] == '1'; + MACE_MEMORY_LOGGING_GUARD(); MACE_LATENCY_LOGGER(1, "Running net"); OpContext context(ws_, cpu_device_.get()); @@ -146,7 +151,8 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { if (run_metadata == nullptr) { MACE_RETURN_IF_ERROR(op->Run(&context)); } else { - if (device_type == DeviceType::CPU) { + if (device_type == DeviceType::CPU + || (device_type == DeviceType::GPU && !enable_opencl_profiling)) { call_stats.start_micros = NowMicros(); MACE_RETURN_IF_ERROR(op->Run(&context)); call_stats.end_micros = NowMicros(); diff --git a/mace/tools/validation/BUILD.bazel b/mace/tools/BUILD.bazel similarity index 59% rename from mace/tools/validation/BUILD.bazel rename to mace/tools/BUILD.bazel index 476fc15a66ec0792d657b0ad2250730ea0ff05fe..95d1f38662415e68f1a2a5898ac1081a175339db 100644 --- a/mace/tools/validation/BUILD.bazel +++ b/mace/tools/BUILD.bazel @@ -1,53 +1,55 @@ +# Benchmark # Examples load( "//mace:mace.bzl", "if_android", - "if_darwin", + "if_hexagon_enabled", "if_opencl_enabled", "if_openmp_enabled", ) +licenses(["notice"]) # Apache 2.0 + cc_binary( name = "mace_run_static", - srcs = ["mace_run.cc"], + srcs = [ + "mace_run.cc", + ], copts = [ "-Werror", "-Wextra", - ] + if_opencl_enabled([ - "-DMACE_ENABLE_OPENCL", - ]), - linkopts = if_darwin( - [], - default_value = ["-fuse-ld=gold"], - ) + if_openmp_enabled([ - "-fopenmp", - ]), + "-Wno-missing-field-initializers", + ] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]), linkstatic = 1, deps = [ "//external:gflags_nothreads", "//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_models", "//mace/libmace", + "//mace/utils", ], ) cc_binary( name = "mace_run_dynamic", - srcs = ["mace_run.cc"], + srcs = [ + "mace_run.cc", + ], copts = [ "-Werror", "-Wextra", - ] + if_opencl_enabled([ - "-DMACE_ENABLE_OPENCL", - ]), - linkopts = if_darwin( - [], - default_value = ["-fuse-ld=gold"], - ) + if_openmp_enabled([ - "-fopenmp", + "-Wno-missing-field-initializers", + ] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]), + linkopts = [ + "-lm", + ] + if_android([ + "-ldl", + "-pie", + "-llog", ]), linkstatic = 0, deps = [ + ":statistics", "//external:gflags_nothreads", "//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_models", diff --git a/mace/tools/CMakeLists.txt b/mace/tools/CMakeLists.txt index a23460711dc66033a21c2b84ef6caf1caec7c9cb..a01661430d57c54e85f266f29ff01baac4f65ad1 100644 --- a/mace/tools/CMakeLists.txt +++ b/mace/tools/CMakeLists.txt @@ -1,5 +1,5 @@ file(GLOB MACE_RUN_SRCS - validation/mace_run.cc + mace_run.cc ) add_executable(mace_run ${MACE_RUN_SRCS}) target_link_libraries(mace_run PUBLIC @@ -7,25 +7,4 @@ target_link_libraries(mace_run PUBLIC gflags ) - -file(GLOB MACE_BENCHMARK_MODEL_SRCS - benchmark/benchmark_model.cc -) -add_executable(benchmark_model ${MACE_BENCHMARK_MODEL_SRCS}) -target_link_libraries(benchmark_model PUBLIC - mace_static - gflags -) - -file(GLOB MACE_BENCHMARK_MODEL_THROUGHPUT_SRCS - benchmark/benchmark_model_throughput.cc -) -add_executable(benchmark_model_throughput ${MACE_BENCHMARK_MODEL_THROUGHPUT_SRCS}) -target_link_libraries(benchmark_model_throughput PUBLIC - mace_static - gflags -) - install(TARGETS mace_run RUNTIME DESTINATION bin) -install(TARGETS benchmark_model RUNTIME DESTINATION bin) -install(TARGETS benchmark_model_throughput RUNTIME DESTINATION bin) diff --git a/mace/tools/benchmark/BUILD.bazel b/mace/tools/benchmark/BUILD.bazel deleted file mode 100644 index b1528e62b2589a5b282646040b56feeea4d8fe0c..0000000000000000000000000000000000000000 --- a/mace/tools/benchmark/BUILD.bazel +++ /dev/null @@ -1,86 +0,0 @@ -# Benchmark -# Examples -load( - "//mace:mace.bzl", - "if_hexagon_enabled", - "if_openmp_enabled", - "if_android", - "if_opencl_enabled", -) - -licenses(["notice"]) # Apache 2.0 - -cc_binary( - name = "benchmark_model_static", - srcs = [ - "benchmark_model.cc", - ], - copts = [ - "-Werror", - "-Wextra", - "-Wno-missing-field-initializers", - ] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]), - linkopts = if_openmp_enabled(["-fopenmp"]), - linkstatic = 1, - deps = [ - "//external:gflags_nothreads", - "//mace/codegen:generated_mace_engine_factory", - "//mace/codegen:generated_models", - "//mace/libmace", - "//mace/utils", - ], -) - -cc_binary( - name = "benchmark_model_dynamic", - srcs = [ - "benchmark_model.cc", - ], - copts = [ - "-Werror", - "-Wextra", - "-Wno-missing-field-initializers", - ] + if_android(["-DMACE_ENABLE_OPENCL"]), - linkopts = [ - "-lm", - ] + if_openmp_enabled([ - "-fopenmp", - ]) + if_android([ - "-ldl", - "-pie", - "-llog", - ]), - linkstatic = 0, - deps = [ - ":statistics", - "//external:gflags_nothreads", - "//mace/codegen:generated_mace_engine_factory", - "//mace/codegen:generated_models", - "//mace/libmace:libmace_dynamic", - ], -) - -cc_library( - name = "libmace_merged", - srcs = [ - "libmace_merged.a", - ], - visibility = ["//visibility:private"], -) - -cc_binary( - name = "model_throughput_test", - srcs = ["model_throughput_test.cc"], - copts = [ - "-Werror", - "-Wextra", - "-Wno-missing-field-initializers", - ], - linkopts = if_openmp_enabled(["-fopenmp"]), - linkstatic = 1, - deps = [ - ":libmace_merged", - "//external:gflags_nothreads", - "//mace/core", - ], -) diff --git a/mace/tools/benchmark/benchmark_model.cc b/mace/tools/benchmark/benchmark_model.cc deleted file mode 100644 index a81c74720e92f95eacb5f4b0fe6f60084c54dbb1..0000000000000000000000000000000000000000 --- a/mace/tools/benchmark/benchmark_model.cc +++ /dev/null @@ -1,401 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include // NOLINT(build/c++11) - -#include "gflags/gflags.h" -#include "mace/port/env.h" -#include "mace/port/file_system.h" -#include "mace/public/mace.h" -#include "mace/utils/logging.h" -#include "mace/utils/memory.h" -#include "mace/utils/math.h" -#include "mace/utils/statistics.h" -#ifdef MODEL_GRAPH_FORMAT_CODE -#include "mace/codegen/engine/mace_engine_factory.h" -#endif - -namespace mace { -namespace benchmark { - -void ParseShape(const std::string &str, std::vector *shape) { - std::string tmp = str; - while (!tmp.empty()) { - int dim = atoi(tmp.data()); - shape->push_back(dim); - size_t next_offset = tmp.find(","); - if (next_offset == std::string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } -} - -std::string FormatName(const std::string input) { - std::string res = input; - for (size_t i = 0; i < input.size(); ++i) { - if (!::isalnum(res[i])) res[i] = '_'; - } - return res; -} - -DeviceType ParseDeviceType(const std::string &device_str) { - if (device_str.compare("CPU") == 0) { - return DeviceType::CPU; - } else if (device_str.compare("GPU") == 0) { - return DeviceType::GPU; - } else if (device_str.compare("HEXAGON") == 0) { - return DeviceType::HEXAGON; - } else { - return DeviceType::CPU; - } -} - -DataFormat ParseDataFormat(const std::string &data_format_str) { - if (data_format_str == "NHWC") { - return DataFormat::NHWC; - } else if (data_format_str == "NCHW") { - return DataFormat::NCHW; - } else if (data_format_str == "OIHW") { - return DataFormat::OIHW; - } else { - return DataFormat::NONE; - } -} - -bool RunInference(MaceEngine *engine, - const std::map &input_infos, - std::map *output_infos, - int64_t *inference_time_us, - OpStat *statistician) { - MACE_CHECK_NOTNULL(output_infos); - RunMetadata run_metadata; - RunMetadata *run_metadata_ptr = nullptr; - if (statistician) { - run_metadata_ptr = &run_metadata; - } - - const int64_t start_time = NowMicros(); - mace::MaceStatus s = engine->Run(input_infos, output_infos, run_metadata_ptr); - const int64_t end_time = NowMicros(); - - if (s != mace::MaceStatus::MACE_SUCCESS) { - LOG(ERROR) << "Error during inference."; - return false; - } - *inference_time_us = end_time - start_time; - - if (statistician != nullptr) { - statistician->StatMetadata(run_metadata); - } - - return true; -} - -bool Run(const std::string &title, - MaceEngine *engine, - const std::map &input_infos, - std::map *output_infos, - int num_runs, - double max_time_sec, - int64_t *total_time_us, - int64_t *actual_num_runs, - OpStat *statistician) { - MACE_CHECK_NOTNULL(output_infos); - *total_time_us = 0; - - TimeInfo time_info; - - bool util_max_time = (num_runs <= 0); - for (int i = 0; util_max_time || i < num_runs; ++i) { - int64_t inference_time_us = 0; - bool s = RunInference(engine, input_infos, output_infos, - &inference_time_us, statistician); - time_info.UpdateTime(inference_time_us); - (*total_time_us) += inference_time_us; - ++(*actual_num_runs); - - if (max_time_sec > 0 && (*total_time_us / 1000000.0) > max_time_sec) { - break; - } - - if (!s) { - LOG(INFO) << "Failed on run " << i; - return s; - } - } - - std::stringstream stream(time_info.ToString(title)); - stream << std::endl; - for (std::string line; std::getline(stream, line);) { - LOG(INFO) << line; - } - return true; -} - -DEFINE_string(model_name, "", "model name in yaml"); -DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]"); -DEFINE_string(input_node, "input_node0,input_node1", - "input nodes, separated by comma"); -DEFINE_string(output_node, "output_node0,output_node1", - "output nodes, separated by comma"); -DEFINE_string(input_shape, "", "input shape, separated by colon and comma"); -DEFINE_string(output_shape, "", "output shape, separated by colon and comma"); -DEFINE_string(input_data_format, - "NHWC", - "input data formats, NONE|NHWC|NCHW"); -DEFINE_string(output_data_format, - "NHWC", - "output data formats, NONE|NHWC|NCHW"); -DEFINE_string(input_file, "", "input file name"); -DEFINE_int32(max_num_runs, 100, "max number of runs"); -DEFINE_double(max_seconds, 10.0, "max number of seconds to run"); -DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); -DEFINE_string(opencl_binary_file, - "", - "compiled opencl binary file path"); -DEFINE_string(opencl_parameter_file, - "", - "tuned OpenCL parameter file path"); -DEFINE_string(model_data_file, "", - "model data file name, used when EMBED_MODEL_DATA set to 0"); -DEFINE_string(model_file, "", - "model file name, used when load mace model in pb"); -DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); -DEFINE_int32(cpu_affinity_policy, 1, - "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); - -int Main(int argc, char **argv) { - MACE_CHECK(FLAGS_device != "HEXAGON", - "Model benchmark tool do not support DSP."); - std::string usage = "benchmark model\nusage: " + std::string(argv[0]) - + " [flags]"; - gflags::SetUsageMessage(usage); - gflags::ParseCommandLineFlags(&argc, &argv, true); - - LOG(INFO) << "Model name: [" << FLAGS_model_name << "]"; - LOG(INFO) << "Model_file: " << FLAGS_model_file; - LOG(INFO) << "Device: [" << FLAGS_device << "]"; - LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]"; - LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]"; - LOG(INFO) << "omp_num_threads: [" << FLAGS_omp_num_threads << "]"; - LOG(INFO) << "cpu_affinity_policy: [" << FLAGS_cpu_affinity_policy << "]"; - LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]"; - LOG(INFO) << "Input shapes: [" << FLAGS_input_shape << "]"; - LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]"; - LOG(INFO) << "output shapes: [" << FLAGS_output_shape << "]"; - LOG(INFO) << "Warmup runs: [" << FLAGS_warmup_runs << "]"; - LOG(INFO) << "Num runs: [" << FLAGS_max_num_runs << "]"; - LOG(INFO) << "Max run seconds: [" << FLAGS_max_seconds << "]"; - - std::unique_ptr statistician(new OpStat()); - - std::vector input_names = Split(FLAGS_input_node, ','); - std::vector output_names = Split(FLAGS_output_node, ','); - std::vector input_shapes = Split(FLAGS_input_shape, ':'); - std::vector output_shapes = Split(FLAGS_output_shape, ':'); - - const size_t input_count = input_shapes.size(); - const size_t output_count = output_shapes.size(); - std::vector> input_shape_vec(input_count); - std::vector> output_shape_vec(output_count); - for (size_t i = 0; i < input_count; ++i) { - ParseShape(input_shapes[i], &input_shape_vec[i]); - } - for (size_t i = 0; i < output_count; ++i) { - ParseShape(output_shapes[i], &output_shape_vec[i]); - } - - std::vector raw_input_data_formats = - Split(FLAGS_input_data_format, ','); - std::vector raw_output_data_formats = - Split(FLAGS_output_data_format, ','); - std::vector input_data_formats(input_count); - std::vector output_data_formats(output_count); - for (size_t i = 0; i < input_count; ++i) { - input_data_formats[i] = ParseDataFormat(raw_input_data_formats[i]); - } - for (size_t i = 0; i < output_count; ++i) { - output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]); - } - - mace::DeviceType device_type = ParseDeviceType(FLAGS_device); - - // configuration - MaceStatus mace_status; - MaceEngineConfig config(device_type); - mace_status = config.SetCPUThreadPolicy( - FLAGS_omp_num_threads, - static_cast(FLAGS_cpu_affinity_policy)); - if (mace_status != MaceStatus::MACE_SUCCESS) { - LOG(INFO) << "Set openmp or cpu affinity failed."; - } -#ifdef MACE_ENABLE_OPENCL - std::shared_ptr gpu_context; - if (device_type == DeviceType::GPU) { - // DO NOT USE tmp directory. - // Please use APP's own directory and make sure the directory exists. - const char *storage_path_ptr = getenv("MACE_INTERNAL_STORAGE_PATH"); - const std::string storage_path = - std::string(storage_path_ptr == nullptr ? - "/data/local/tmp/mace_run/interior" : storage_path_ptr); - std::vector opencl_binary_paths = {FLAGS_opencl_binary_file}; - - gpu_context = GPUContextBuilder() - .SetStoragePath(storage_path) - .SetOpenCLBinaryPaths(opencl_binary_paths) - .SetOpenCLParameterPath(FLAGS_opencl_parameter_file) - .Finalize(); - - config.SetGPUContext(gpu_context); - config.SetGPUHints( - static_cast(FLAGS_gpu_perf_hint), - static_cast(FLAGS_gpu_priority_hint)); - } -#endif // MACE_ENABLE_OPENCL - - // Create Engine - std::shared_ptr engine; - MaceStatus create_engine_status; - // Create Engine - std::unique_ptr model_graph_data = - make_unique(); - if (FLAGS_model_file != "") { - auto fs = GetFileSystem(); - auto status = fs->NewReadOnlyMemoryRegionFromFile(FLAGS_model_file.c_str(), - &model_graph_data); - if (status != MaceStatus::MACE_SUCCESS) { - LOG(FATAL) << "Failed to read file: " << FLAGS_model_file; - } - } - - std::unique_ptr model_weights_data = - make_unique(); - if (FLAGS_model_data_file != "") { - auto fs = GetFileSystem(); - auto status = fs->NewReadOnlyMemoryRegionFromFile( - FLAGS_model_data_file.c_str(), - &model_weights_data); - if (status != MaceStatus::MACE_SUCCESS) { - LOG(FATAL) << "Failed to read file: " << FLAGS_model_data_file; - } - MACE_CHECK(model_weights_data->length() > 0); - } - -#ifdef MODEL_GRAPH_FORMAT_CODE - create_engine_status = CreateMaceEngineFromCode(FLAGS_model_name, - reinterpret_cast(model_weights_data->data()), - model_weights_data->length(), - input_names, - output_names, - config, - &engine); -#else - create_engine_status = CreateMaceEngineFromProto( - reinterpret_cast(model_graph_data->data()), - model_graph_data->length(), - reinterpret_cast(model_weights_data->data()), - model_weights_data->length(), - input_names, - output_names, - config, - &engine); -#endif - if (create_engine_status != MaceStatus::MACE_SUCCESS) { - LOG(FATAL) << "Create engine error, please check the arguments"; - } - - std::map inputs; - std::map outputs; - for (size_t i = 0; i < input_count; ++i) { - // only support float and int32, use char for generalization - int64_t input_size = - std::accumulate(input_shape_vec[i].begin(), input_shape_vec[i].end(), 4, - std::multiplies()); - auto buffer_in = std::shared_ptr(new char[input_size], - std::default_delete()); - // load input - std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]), - std::ios::in | std::ios::binary); - if (in_file.is_open()) { - in_file.read(buffer_in.get(), input_size); - in_file.close(); - } else { - LOG(INFO) << "Open input file failed"; - return -1; - } - inputs[input_names[i]] = mace::MaceTensor(input_shape_vec[i], buffer_in, - input_data_formats[i]); - } - - for (size_t i = 0; i < output_count; ++i) { - // only support float and int32, use char for generalization - int64_t output_size = - std::accumulate(output_shape_vec[i].begin(), - output_shape_vec[i].end(), 4, - std::multiplies()); - auto buffer_out = std::shared_ptr(new char[output_size], - std::default_delete()); - outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i], - buffer_out, - output_data_formats[i]); - } - - int64_t warmup_time_us = 0; - int64_t num_warmup_runs = 0; - if (FLAGS_warmup_runs > 0) { - bool status = - Run("Warm Up", engine.get(), inputs, &outputs, - FLAGS_warmup_runs, -1.0, - &warmup_time_us, &num_warmup_runs, nullptr); - if (!status) { - LOG(ERROR) << "Failed at warm up run"; - } - } - - int64_t no_stat_time_us = 0; - int64_t no_stat_runs = 0; - bool status = - Run("Run without statistics", engine.get(), inputs, &outputs, - FLAGS_max_num_runs, FLAGS_max_seconds, - &no_stat_time_us, &no_stat_runs, nullptr); - if (!status) { - LOG(ERROR) << "Failed at normal no-stat run"; - } - - int64_t stat_time_us = 0; - int64_t stat_runs = 0; - status = Run("Run with statistics", engine.get(), inputs, &outputs, - FLAGS_max_num_runs, FLAGS_max_seconds, - &stat_time_us, &stat_runs, statistician.get()); - if (!status) { - LOG(ERROR) << "Failed at normal stat run"; - } - - statistician->PrintStat(); - - return 0; -} - -} // namespace benchmark -} // namespace mace - -int main(int argc, char **argv) { mace::benchmark::Main(argc, argv); } diff --git a/mace/tools/benchmark/benchmark_model_throughput.cc b/mace/tools/benchmark/benchmark_model_throughput.cc deleted file mode 100644 index a8fcf7596da4600e88d41c6ce9c5d54777a8e91b..0000000000000000000000000000000000000000 --- a/mace/tools/benchmark/benchmark_model_throughput.cc +++ /dev/null @@ -1,391 +0,0 @@ -// Copyright 2018 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** - * Usage: - * throughput_test \ - * --input_shape=1,224,224,3 \ - * --output_shape=1,224,224,2 \ - * --input_file=input_data \ - * --cpu_model_data_file=cpu_model_data.data \ - * --gpu_model_data_file=gpu_model_data.data \ - * --dsp_model_data_file=dsp_model_data.data \ - * --run_seconds=10 - */ -#include -#include -#include -#include -#include -#include // NOLINT(build/c++11) - -#include "gflags/gflags.h" -#include "mace/public/mace.h" -#include "mace/port/env.h" -#include "mace/utils/logging.h" -#include "mace/core/types.h" - -namespace mace { - -#ifdef MACE_CPU_MODEL_TAG -namespace MACE_CPU_MODEL_TAG { - -extern const unsigned char *LoadModelData(const char *model_data_file); - -extern void UnloadModelData(const unsigned char *model_data); - -extern NetDef CreateNet(const unsigned char *model_data); - -extern const std::string ModelChecksum(); - -} // namespace MACE_CPU_MODEL_TAG -#endif - -#ifdef MACE_GPU_MODEL_TAG -namespace MACE_GPU_MODEL_TAG { - -extern const unsigned char *LoadModelData(const char *model_data_file); - -extern void UnloadModelData(const unsigned char *model_data); - -extern NetDef CreateNet(const unsigned char *model_data); - -extern const std::string ModelChecksum(); - -} // namespace MACE_GPU_MODEL_TAG -#endif - -#ifdef MACE_DSP_MODEL_TAG -namespace MACE_DSP_MODEL_TAG { - -extern const unsigned char *LoadModelData(const char *model_data_file); - -extern void UnloadModelData(const unsigned char *model_data); - -extern NetDef CreateNet(const unsigned char *model_data); - -extern const std::string ModelChecksum(); - -} // namespace MACE_DSP_MODEL_TAG -#endif - -namespace benchmark { - -void Split(const std::string &str, - char delims, - std::vector *result) { - MACE_CHECK_NOTNULL(result); - std::string tmp = str; - while (!tmp.empty()) { - size_t next_offset = tmp.find(delims); - result->push_back(tmp.substr(0, next_offset)); - if (next_offset == std::string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } -} - -void SplitAndParseToInts(const std::string &str, - char delims, - std::vector *result) { - MACE_CHECK_NOTNULL(result); - std::string tmp = str; - while (!tmp.empty()) { - index_t dim = atoi(tmp.data()); - result->push_back(dim); - size_t next_offset = tmp.find(delims); - if (next_offset == std::string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } -} - -void ParseShape(const std::string &str, std::vector *shape) { - std::string tmp = str; - while (!tmp.empty()) { - index_t dim = atoi(tmp.data()); - shape->push_back(dim); - size_t next_offset = tmp.find(","); - if (next_offset == std::string::npos) { - break; - } else { - tmp = tmp.substr(next_offset + 1); - } - } -} - -std::string FormatName(const std::string input) { - std::string res = input; - for (size_t i = 0; i < input.size(); ++i) { - if (!::isalnum(res[i])) res[i] = '_'; - } - return res; -} - -DeviceType ParseDeviceType(const std::string &device_str) { - if (device_str.compare("CPU") == 0) { - return DeviceType::CPU; - } else if (device_str.compare("GPU") == 0) { - return DeviceType::GPU; - } else if (device_str.compare("HEXAGON") == 0) { - return DeviceType::HEXAGON; - } else { - return DeviceType::CPU; - } -} - -DEFINE_string(input_node, "input_node0,input_node1", - "input nodes, separated by comma"); -DEFINE_string(output_node, "output_node0,output_node1", - "output nodes, separated by comma"); -DEFINE_string(input_shape, "1,224,224,3", "input shape, separated by comma"); -DEFINE_string(output_shape, "1,224,224,2", "output shape, separated by comma"); -DEFINE_string(input_file, "", "input file name"); -DEFINE_string(cpu_model_data_file, "", "cpu model data file name"); -DEFINE_string(gpu_model_data_file, "", "gpu model data file name"); -DEFINE_string(dsp_model_data_file, "", "dsp model data file name"); -DEFINE_int32(run_seconds, 10, "run seconds"); - -int Main(int argc, char **argv) { - std::string usage = "model throughput test\nusage: " + std::string(argv[0]) - + " [flags]"; - gflags::SetUsageMessage(usage); - gflags::ParseCommandLineFlags(&argc, &argv, true); - - LOG(INFO) << "mace version: " << MaceVersion(); -#ifdef MACE_CPU_MODEL_TAG - LOG(INFO) << "cpu model checksum: " - << mace::MACE_CPU_MODEL_TAG::ModelChecksum(); -#endif -#ifdef MACE_GPU_MODEL_TAG - LOG(INFO) << "gpu model checksum: " - << mace::MACE_GPU_MODEL_TAG::ModelChecksum(); -#endif -#ifdef MACE_DSP_MODEL_TAG - LOG(INFO) << "dsp model checksum: " - << mace::MACE_DSP_MODEL_TAG::ModelChecksum(); -#endif - LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]"; - LOG(INFO) << "input_shape: " << FLAGS_input_shape; - LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]"; - LOG(INFO) << "output_shape: " << FLAGS_output_shape; - LOG(INFO) << "input_file: " << FLAGS_input_file; - LOG(INFO) << "cpu_model_data_file: " << FLAGS_cpu_model_data_file; - LOG(INFO) << "gpu_model_data_file: " << FLAGS_gpu_model_data_file; - LOG(INFO) << "dsp_model_data_file: " << FLAGS_dsp_model_data_file; - LOG(INFO) << "run_seconds: " << FLAGS_run_seconds; - - std::vector input_names; - std::vector output_names; - std::vector input_shapes; - std::vector output_shapes; - Split(FLAGS_input_node, ',', &input_names); - Split(FLAGS_output_node, ',', &output_names); - Split(FLAGS_input_shape, ':', &input_shapes); - Split(FLAGS_output_shape, ':', &output_shapes); - - const size_t input_count = input_shapes.size(); - const size_t output_count = output_shapes.size(); - std::vector> input_shape_vec(input_count); - std::vector> output_shape_vec(output_count); - for (size_t i = 0; i < input_count; ++i) { - ParseShape(input_shapes[i], &input_shape_vec[i]); - } - for (size_t i = 0; i < output_count; ++i) { - ParseShape(output_shapes[i], &output_shape_vec[i]); - } - - std::map inputs; - std::map cpu_outputs; - std::map gpu_outputs; - std::map dsp_outputs; - for (size_t i = 0; i < input_count; ++i) { - // Allocate input and output - int64_t input_size = - std::accumulate(input_shape_vec[i].begin(), input_shape_vec[i].end(), 1, - std::multiplies()); - auto buffer_in = std::shared_ptr(new float[input_size], - std::default_delete()); - // load input - std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]), - std::ios::in | std::ios::binary); - if (in_file.is_open()) { - in_file.read(reinterpret_cast(buffer_in.get()), - input_size * sizeof(float)); - in_file.close(); - } else { - LOG(FATAL) << "Open input file failed"; - } - inputs[input_names[i]] = mace::MaceTensor(input_shape_vec[i], buffer_in); - } - - for (size_t i = 0; i < output_count; ++i) { - int64_t output_size = - std::accumulate(output_shape_vec[i].begin(), - output_shape_vec[i].end(), 1, - std::multiplies()); - auto buffer_out = std::shared_ptr(new float[output_size], - std::default_delete()); - cpu_outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i], - buffer_out); - gpu_outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i], - buffer_out); - dsp_outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i], - buffer_out); - } - -#if defined(MACE_CPU_MODEL_TAG) || \ - defined(MACE_GPU_MODEL_TAG) || \ - defined(MACE_DSP_MODEL_TAG) - int64_t t0, t1, init_micros; -#endif - -#ifdef MACE_CPU_MODEL_TAG - /* --------------------- CPU init ----------------------- */ - LOG(INFO) << "Load & init cpu model and warm up"; - const unsigned char *cpu_model_data = - mace::MACE_CPU_MODEL_TAG::LoadModelData( - FLAGS_cpu_model_data_file.c_str()); - NetDef cpu_net_def = mace::MACE_CPU_MODEL_TAG::CreateNet(cpu_model_data); - - mace::MaceEngine cpu_engine(&cpu_net_def, DeviceType::CPU, input_names, - output_names); - - LOG(INFO) << "CPU Warm up run"; - t0 = NowMicros(); - cpu_engine.Run(inputs, &cpu_outputs); - t1 = NowMicros(); - LOG(INFO) << "CPU 1st warm up run latency: " << t1 - t0 << " us"; -#endif - -#ifdef MACE_GPU_MODEL_TAG - /* --------------------- GPU init ----------------------- */ - LOG(INFO) << "Load & init gpu model and warm up"; - const unsigned char *gpu_model_data = - mace::MACE_GPU_MODEL_TAG::LoadModelData( - FLAGS_gpu_model_data_file.c_str()); - NetDef gpu_net_def = mace::MACE_GPU_MODEL_TAG::CreateNet(gpu_model_data); - - mace::MaceEngine gpu_engine(&gpu_net_def, DeviceType::GPU, input_names, - output_names); - mace::MACE_GPU_MODEL_TAG::UnloadModelData(gpu_model_data); - - LOG(INFO) << "GPU Warm up run"; - t0 = NowMicros(); - gpu_engine.Run(inputs, &gpu_outputs); - t1 = NowMicros(); - LOG(INFO) << "GPU 1st warm up run latency: " << t1 - t0 << " us"; -#endif - -#ifdef MACE_DSP_MODEL_TAG - /* --------------------- DSP init ----------------------- */ - LOG(INFO) << "Load & init dsp model and warm up"; - const unsigned char *dsp_model_data = - mace::MACE_DSP_MODEL_TAG::LoadModelData( - FLAGS_dsp_model_data_file.c_str()); - NetDef dsp_net_def = mace::MACE_DSP_MODEL_TAG::CreateNet(dsp_model_data); - - mace::MaceEngine dsp_engine(&dsp_net_def, DeviceType::HEXAGON, input_names, - output_names); - mace::MACE_DSP_MODEL_TAG::UnloadModelData(dsp_model_data); - - LOG(INFO) << "DSP Warm up run"; - t0 = NowMicros(); - dsp_engine.Run(inputs, &dsp_outputs); - t1 = NowMicros(); - LOG(INFO) << "DSP 1st warm up run latency: " << t1 - t0 << " us"; -#endif - -#if defined(MACE_CPU_MODEL_TAG) || \ - defined(MACE_GPU_MODEL_TAG) || \ - defined(MACE_DSP_MODEL_TAG) - double cpu_throughput = 0; - double gpu_throughput = 0; - double dsp_throughput = 0; - int64_t run_micros = FLAGS_run_seconds * 1000000; -#endif - -#ifdef MACE_CPU_MODEL_TAG - std::thread cpu_thread([&]() { - int64_t frames = 0; - int64_t micros = 0; - int64_t start = NowMicros(); - for (; micros < run_micros; ++frames) { - cpu_engine.Run(inputs, &cpu_outputs); - int64_t end = NowMicros(); - micros = end - start; - } - cpu_throughput = frames * 1000000.0 / micros; - }); -#endif - -#ifdef MACE_GPU_MODEL_TAG - std::thread gpu_thread([&]() { - int64_t frames = 0; - int64_t micros = 0; - int64_t start = NowMicros(); - for (; micros < run_micros; ++frames) { - gpu_engine.Run(inputs, &gpu_outputs); - int64_t end = NowMicros(); - micros = end - start; - } - gpu_throughput = frames * 1000000.0 / micros; - }); -#endif - -#ifdef MACE_DSP_MODEL_TAG - std::thread dsp_thread([&]() { - int64_t frames = 0; - int64_t micros = 0; - int64_t start = NowMicros(); - for (; micros < run_micros; ++frames) { - dsp_engine.Run(inputs, &dsp_outputs); - int64_t end = NowMicros(); - micros = end - start; - } - dsp_throughput = frames * 1000000.0 / micros; - }); -#endif - - double total_throughput = 0; - -#ifdef MACE_CPU_MODEL_TAG - cpu_thread.join(); - LOG(INFO) << "CPU throughput: " << cpu_throughput << " f/s"; - total_throughput += cpu_throughput; -#endif -#ifdef MACE_GPU_MODEL_TAG - gpu_thread.join(); - LOG(INFO) << "GPU throughput: " << gpu_throughput << " f/s"; - total_throughput += gpu_throughput; -#endif -#ifdef MACE_DSP_MODEL_TAG - dsp_thread.join(); - LOG(INFO) << "DSP throughput: " << dsp_throughput << " f/s"; - total_throughput += dsp_throughput; -#endif - - LOG(INFO) << "Total throughput: " << total_throughput << " f/s"; - - return 0; -} - -} // namespace benchmark -} // namespace mace - -int main(int argc, char **argv) { mace::benchmark::Main(argc, argv); } diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/mace_run.cc similarity index 96% rename from mace/tools/validation/mace_run.cc rename to mace/tools/mace_run.cc index 01ee3fb1d6c1aef548981ac52adc9ab406a1964f..f43e38d8c2657fafaba62cb5fe4a991bb69eaf0f 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/mace_run.cc @@ -38,6 +38,7 @@ #include "mace/utils/logging.h" #include "mace/utils/memory.h" #include "mace/utils/string_util.h" +#include "mace/utils/statistics.h" #ifdef MODEL_GRAPH_FORMAT_CODE #include "mace/codegen/engine/mace_engine_factory.h" @@ -45,7 +46,6 @@ namespace mace { namespace tools { -namespace validation { void ParseShape(const std::string &str, std::vector *shape) { std::string tmp = str; @@ -124,7 +124,6 @@ DEFINE_string(input_file, DEFINE_string(output_file, "", "output file name | output file prefix for multiple outputs"); -// TODO(liyin): support batch validation DEFINE_string(input_dir, "", "input directory name"); @@ -152,6 +151,7 @@ DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); +DEFINE_bool(benchmark, false, "enable benchmark op"); bool RunModel(const std::string &model_name, const std::vector &input_names, @@ -352,6 +352,7 @@ bool RunModel(const std::string &model_name, } double model_run_millis = -1; + benchmark::OpStat op_stat; if (FLAGS_round > 0) { LOG(INFO) << "Run model"; int64_t total_run_duration = 0; @@ -364,9 +365,15 @@ bool RunModel(const std::string &model_name, info_log.get(), MakeString(i)); } MaceStatus run_status; + RunMetadata metadata; + RunMetadata *metadata_ptr = nullptr; + if (FLAGS_benchmark) { + metadata_ptr = &metadata; + } + while (true) { int64_t t0 = NowMicros(); - run_status = engine->Run(inputs, &outputs); + run_status = engine->Run(inputs, &outputs, metadata_ptr); if (run_status != MaceStatus::MACE_SUCCESS) { LOG(ERROR) << "Mace run model runtime error, retry ... errcode: " << run_status.information(); @@ -399,6 +406,9 @@ bool RunModel(const std::string &model_name, } else { int64_t t1 = NowMicros(); total_run_duration += (t1 - t0); + if (FLAGS_benchmark) { + op_stat.StatMetadata(metadata); + } break; } } @@ -407,14 +417,6 @@ bool RunModel(const std::string &model_name, LOG(INFO) << "Average latency: " << model_run_millis << " ms"; } - // Metrics reporting tools depends on the format, keep in consistent - printf("========================================================\n"); - printf(" capability(CPU) init warmup run_avg\n"); - printf("========================================================\n"); - printf("time %15.3f %11.3f %11.3f %11.3f\n", - cpu_capability, init_millis, warmup_millis, model_run_millis); - - for (size_t i = 0; i < output_count; ++i) { std::string output_name = FLAGS_output_file + "_" + FormatName(output_names[i]); @@ -431,6 +433,16 @@ bool RunModel(const std::string &model_name, << output_size << " done."; } + // Metrics reporting tools depends on the format, keep in consistent + printf("========================================================\n"); + printf(" capability(CPU) init warmup run_avg\n"); + printf("========================================================\n"); + printf("time %15.3f %11.3f %11.3f %11.3f\n", + cpu_capability, init_millis, warmup_millis, model_run_millis); + if (FLAGS_benchmark) { + op_stat.PrintStat(); + } + return true; } @@ -448,6 +460,10 @@ int Main(int argc, char **argv) { return 0; } + if (FLAGS_benchmark) { + setenv("MACE_OPENCL_PROFILING", "1", 1); + } + LOG(INFO) << "model name: " << FLAGS_model_name; LOG(INFO) << "mace version: " << MaceVersion(); LOG(INFO) << "input node: " << FLAGS_input_node; @@ -517,8 +533,9 @@ int Main(int argc, char **argv) { return -1; } -} // namespace validation } // namespace tools } // namespace mace -int main(int argc, char **argv) { mace::tools::validation::Main(argc, argv); } +int main(int argc, char **argv) { + mace::tools::Main(argc, argv); +} diff --git a/tools/common.py b/tools/common.py index a7a3cfdb882c662f25aa6006295b585ed655424c..a45bf37a645f4c78a90b16df54d2bc7304044b64 100644 --- a/tools/common.py +++ b/tools/common.py @@ -447,14 +447,10 @@ BUILD_TMP_DIR_NAME = '_tmp' BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads' BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general' MODEL_OUTPUT_DIR_NAME = 'model' -EXAMPLE_STATIC_NAME = "example_static" -EXAMPLE_DYNAMIC_NAME = "example_dynamic" -EXAMPLE_STATIC_TARGET = "//examples/cli:" + EXAMPLE_STATIC_NAME -EXAMPLE_DYNAMIC_TARGET = "//examples/cli:" + EXAMPLE_DYNAMIC_NAME MACE_RUN_STATIC_NAME = "mace_run_static" MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic" -MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME -MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME +MACE_RUN_STATIC_TARGET = "//mace/tools:" + MACE_RUN_STATIC_NAME +MACE_RUN_DYNAMIC_TARGET = "//mace/tools:" + MACE_RUN_DYNAMIC_NAME CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin" BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin' LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so" @@ -474,11 +470,6 @@ LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static" LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a" MODEL_LIB_TARGET = "//mace/codegen:generated_models" MODEL_LIB_PATH = "bazel-bin/mace/codegen/libgenerated_models.a" -QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat" -BM_MODEL_STATIC_NAME = "benchmark_model_static" -BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic" -BM_MODEL_STATIC_TARGET = "//mace/tools/benchmark:" + BM_MODEL_STATIC_NAME -BM_MODEL_DYNAMIC_TARGET = "//mace/tools/benchmark:" + BM_MODEL_DYNAMIC_NAME ################################ @@ -508,7 +499,6 @@ class ModuleName(object): YAML_CONFIG = 'YAML CONFIG' MODEL_CONVERTER = 'Model Converter' RUN = 'RUN' - BENCHMARK = 'Benchmark' ################################# diff --git a/tools/converter.py b/tools/converter.py index aca556aedcc01c7c6bd9f78acb9c883030071918..4a0aed7b4649caac632e19983223653fa1fff6f3 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -891,7 +891,7 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp, mace_check(os.path.exists(ENGINE_CODEGEN_DIR), ModuleName.RUN, "You should convert model first.") - build_arg = "--per_file_copt=mace/tools/validation/mace_run.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa + build_arg = "--per_file_copt=mace/tools/mace_run.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa sh_commands.bazel_build( mace_run_target, @@ -912,86 +912,6 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp, mace_lib_type == MACELibType.dynamic) -def build_example(configs, target_abi, toolchain, enable_openmp, mace_lib_type, - cl_binary_to_code, device, debug_mode): - library_name = configs[YAMLKeyword.library_name] - - build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) - if os.path.exists(build_tmp_binary_dir): - sh.rm("-rf", build_tmp_binary_dir) - os.makedirs(build_tmp_binary_dir) - - if cl_binary_to_code: - sh_commands.gen_opencl_binary_cpps( - get_opencl_binary_output_path( - library_name, target_abi, device), - get_opencl_parameter_output_path( - library_name, target_abi, device), - OPENCL_CODEGEN_DIR + '/opencl_binary.cc', - OPENCL_CODEGEN_DIR + '/opencl_parameter.cc') - else: - sh_commands.gen_opencl_binary_cpps( - "", "", - OPENCL_CODEGEN_DIR + '/opencl_binary.cc', - OPENCL_CODEGEN_DIR + '/opencl_parameter.cc') - - libmace_target = LIBMACE_STATIC_TARGET - if mace_lib_type == MACELibType.dynamic: - libmace_target = LIBMACE_SO_TARGET - - sh_commands.bazel_build(libmace_target, - abi=target_abi, - toolchain=toolchain, - enable_openmp=enable_openmp, - enable_opencl=get_opencl_mode(configs), - enable_quantize=get_quantize_mode(configs), - enable_hexagon=get_hexagon_mode(configs), - enable_hta=get_hta_mode(configs), - enable_apu=get_apu_mode(configs), - address_sanitizer=flags.address_sanitizer, - symbol_hidden=get_symbol_hidden_mode(debug_mode, mace_lib_type), # noqa - debug_mode=debug_mode) - - if os.path.exists(LIB_CODEGEN_DIR): - sh.rm("-rf", LIB_CODEGEN_DIR) - sh.mkdir("-p", LIB_CODEGEN_DIR) - - build_arg = "" - if configs[YAMLKeyword.model_graph_format] == ModelFormat.code: - mace_check(os.path.exists(ENGINE_CODEGEN_DIR), - ModuleName.RUN, - "You should convert model first.") - model_lib_path = get_model_lib_output_path(library_name, - target_abi) - sh.cp("-f", model_lib_path, LIB_CODEGEN_DIR) - build_arg = "--per_file_copt=examples/cli/example.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa - - if mace_lib_type == MACELibType.dynamic: - example_target = EXAMPLE_DYNAMIC_TARGET - sh.cp("-f", LIBMACE_DYNAMIC_PATH, LIB_CODEGEN_DIR) - else: - example_target = EXAMPLE_STATIC_TARGET - sh.cp("-f", LIBMACE_STATIC_PATH, LIB_CODEGEN_DIR) - - sh_commands.bazel_build(example_target, - abi=target_abi, - toolchain=toolchain, - enable_openmp=enable_openmp, - enable_opencl=get_opencl_mode(configs), - enable_quantize=get_quantize_mode(configs), - enable_hexagon=get_hexagon_mode(configs), - enable_hta=get_hta_mode(configs), - enable_apu=get_apu_mode(configs), - address_sanitizer=flags.address_sanitizer, - debug_mode=debug_mode, - extra_args=build_arg) - - target_bin = "/".join(sh_commands.bazel_target_to_bin(example_target)) - sh.cp("-f", target_bin, build_tmp_binary_dir) - if os.path.exists(LIB_CODEGEN_DIR): - sh.rm("-rf", LIB_CODEGEN_DIR) - - def print_package_summary(package_path): title = "Library" header = ["key", "value"] @@ -1024,23 +944,13 @@ def run_mace(flags): # get toolchain toolchain = infer_toolchain(target_abi) device = DeviceWrapper(dev) - if flags.example: - build_example(configs, - target_abi, - toolchain, - flags.enable_openmp, - flags.mace_lib_type, - flags.cl_binary_to_code, - device, - flags.debug_mode) - else: - build_mace_run(configs, - target_abi, - toolchain, - flags.enable_openmp, - flags.address_sanitizer, - flags.mace_lib_type, - flags.debug_mode) + build_mace_run(configs, + target_abi, + toolchain, + flags.enable_openmp, + flags.address_sanitizer, + flags.mace_lib_type, + flags.debug_mode) # run start_time = time.time() with device.lock(): @@ -1058,90 +968,6 @@ def run_mace(flags): print_package_summary(package_path) -################################ -# benchmark model -################################ -def build_benchmark_model(configs, - target_abi, - toolchain, - enable_openmp, - mace_lib_type, - debug_mode): - library_name = configs[YAMLKeyword.library_name] - - link_dynamic = mace_lib_type == MACELibType.dynamic - if link_dynamic: - benchmark_target = BM_MODEL_DYNAMIC_TARGET - else: - benchmark_target = BM_MODEL_STATIC_TARGET - - build_arg = "" - if configs[YAMLKeyword.model_graph_format] == ModelFormat.code: - mace_check(os.path.exists(ENGINE_CODEGEN_DIR), - ModuleName.BENCHMARK, - "You should convert model first.") - build_arg = "--per_file_copt=mace/tools/benchmark/benchmark_model.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa - - sh_commands.bazel_build(benchmark_target, - abi=target_abi, - toolchain=toolchain, - enable_openmp=enable_openmp, - enable_opencl=get_opencl_mode(configs), - enable_quantize=get_quantize_mode(configs), - enable_hexagon=get_hexagon_mode(configs), - enable_hta=get_hta_mode(configs), - enable_apu=get_apu_mode(configs), - symbol_hidden=get_symbol_hidden_mode(debug_mode, mace_lib_type), # noqa - debug_mode=debug_mode, - extra_args=build_arg) - # clear tmp binary dir - build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) - if os.path.exists(build_tmp_binary_dir): - sh.rm("-rf", build_tmp_binary_dir) - os.makedirs(build_tmp_binary_dir) - - target_bin = "/".join(sh_commands.bazel_target_to_bin(benchmark_target)) - sh.cp("-f", target_bin, build_tmp_binary_dir) - - -def benchmark_model(flags): - configs = format_model_config(flags) - - clear_build_dirs(configs[YAMLKeyword.library_name]) - - target_socs = configs[YAMLKeyword.target_socs] - device_list = DeviceManager.list_devices(flags.device_yml) - if target_socs and TargetSOCTag.all not in target_socs: - device_list = [dev for dev in device_list - if dev[YAMLKeyword.target_socs].lower() in target_socs] - for target_abi in configs[YAMLKeyword.target_abis]: - if flags.target_socs == TargetSOCTag.random: - target_devices = sh_commands.choose_a_random_device( - device_list, target_abi) - else: - target_devices = device_list - # build benchmark_model binary - for dev in target_devices: - if target_abi in dev[YAMLKeyword.target_abis]: - toolchain = infer_toolchain(target_abi) - build_benchmark_model(configs, - target_abi, - toolchain, - flags.enable_openmp, - flags.mace_lib_type, - flags.debug_mode) - device = DeviceWrapper(dev) - start_time = time.time() - with device.lock(): - device.bm_specific_target(flags, configs, target_abi) - elapse_minutes = (time.time() - start_time) / 60 - print("Elapse time: %f minutes." % elapse_minutes) - else: - six.print_('There is no abi %s with soc %s' % - (target_abi, dev[YAMLKeyword.target_socs]), - file=sys.stderr) - - ################################ # parsing arguments ################################ @@ -1210,60 +1036,61 @@ def parse_args(): '--address_sanitizer', action="store_true", help="Whether to use address sanitizer to check memory error") - run_bm_parent_parser = argparse.ArgumentParser(add_help=False) - run_bm_parent_parser.add_argument( + + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers() + convert = subparsers.add_parser( + 'convert', + parents=[all_type_parent_parser, convert_run_parent_parser], + help='convert to mace model (file or code)') + convert.add_argument( + "--cl_mem_type", + type=str, + default=None, + help="Which type of OpenCL memory type to use [image | buffer].") + convert.set_defaults(func=convert_func) + + run = subparsers.add_parser( + 'run', + parents=[all_type_parent_parser, + convert_run_parent_parser], + help='run model in command line') + run.set_defaults(func=run_mace) + run.add_argument( "--mace_lib_type", type=str_to_mace_lib_type, default=DefaultValues.mace_lib_type, help="[static | dynamic], Which type MACE library to use.") - run_bm_parent_parser.add_argument( + run.add_argument( "--enable_openmp", action="store_true", help="Enable openmp for multiple thread.") - run_bm_parent_parser.add_argument( + run.add_argument( "--omp_num_threads", type=int, default=DefaultValues.omp_num_threads, help="num of openmp threads") - run_bm_parent_parser.add_argument( + run.add_argument( "--cpu_affinity_policy", type=int, default=DefaultValues.cpu_affinity_policy, help="0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY") - run_bm_parent_parser.add_argument( + run.add_argument( "--gpu_perf_hint", type=int, default=DefaultValues.gpu_perf_hint, help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH") - run_bm_parent_parser.add_argument( + run.add_argument( "--gpu_priority_hint", type=int, default=DefaultValues.gpu_priority_hint, help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH") - run_bm_parent_parser.add_argument( + run.add_argument( "--device_yml", type=str, default='', help='embedded linux device config yml file' ) - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers() - convert = subparsers.add_parser( - 'convert', - parents=[all_type_parent_parser, convert_run_parent_parser], - help='convert to mace model (file or code)') - convert.add_argument( - "--cl_mem_type", - type=str, - default=None, - help="Which type of OpenCL memory type to use [image | buffer].") - convert.set_defaults(func=convert_func) - run = subparsers.add_parser( - 'run', - parents=[all_type_parent_parser, run_bm_parent_parser, - convert_run_parent_parser], - help='run model in command line') - run.set_defaults(func=run_mace) run.add_argument( "--disable_tuning", action="store_true", @@ -1318,10 +1145,6 @@ def parse_args(): type=float, default=0.0, help="[mock runtime failure ratio].") - run.add_argument( - "--example", - action="store_true", - help="whether to run example.") run.add_argument( "--quantize_stat", action="store_true", @@ -1340,21 +1163,10 @@ def parse_args(): "--cl_binary_to_code", action="store_true", help="convert OpenCL binaries to cpp.") - benchmark = subparsers.add_parser( - 'benchmark', - parents=[all_type_parent_parser, run_bm_parent_parser], - help='benchmark model for detail information') - benchmark.set_defaults(func=benchmark_model) - benchmark.add_argument( - "--max_num_runs", - type=int, - default=100, - help="max number of runs.") - benchmark.add_argument( - "--max_seconds", - type=float, - default=10.0, - help="max number of seconds to run.") + run.add_argument( + "--benchmark", + action="store_true", + help="enable op benchmark.") return parser.parse_known_args() diff --git a/tools/device.py b/tools/device.py index 5706196990826dfc04a47896ac65a9105ce4c699..66d8d1e6adc7dcf288f81d03be920f7cf40e5213 100644 --- a/tools/device.py +++ b/tools/device.py @@ -186,6 +186,7 @@ class DeviceWrapper: link_dynamic=False, quantize_stat=False, layers_validate_file="", + benchmark=False, ): six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, " "out_of_range_check=%s, omp_num_threads=%s, " @@ -343,6 +344,9 @@ class DeviceWrapper: "--opencl_parameter_file=%s/%s" % (self.data_dir, os.path.basename(opencl_parameter_file)), ]) + if benchmark: + cmd.append("--benchmark=%s" % benchmark) + cmd = ' '.join(cmd) cmd_file_name = "%s-%s-%s" % ('cmd_file', model_tag, @@ -473,16 +477,10 @@ class DeviceWrapper: build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) # get target name for run mace_lib_type = flags.mace_lib_type - if flags.example: - if mace_lib_type == MACELibType.static: - target_name = EXAMPLE_STATIC_NAME - else: - target_name = EXAMPLE_DYNAMIC_NAME + if mace_lib_type == MACELibType.static: + target_name = MACE_RUN_STATIC_NAME else: - if mace_lib_type == MACELibType.static: - target_name = MACE_RUN_STATIC_NAME - else: - target_name = MACE_RUN_DYNAMIC_NAME + target_name = MACE_RUN_DYNAMIC_NAME link_dynamic = mace_lib_type == MACELibType.dynamic if target_abi != ABIType.host: @@ -557,7 +555,8 @@ class DeviceWrapper: input_dir=flags.input_dir, output_dir=flags.output_dir, layers_validate_file=output_config[ - YAMLKeyword.model_file_path] + YAMLKeyword.model_file_path], + benchmark=flags.benchmark, ) def get_output_map(self, @@ -621,7 +620,6 @@ class DeviceWrapper: tuning = False if not flags.address_sanitizer \ - and not flags.example \ and target_abi != ABIType.host \ and (configs[YAMLKeyword.target_socs] or flags.target_socs) \ @@ -859,254 +857,6 @@ class DeviceWrapper: with open(report_filename, 'a') as f: f.write(data_str) - def benchmark_model(self, - abi, - benchmark_binary_dir, - benchmark_binary_name, - vlog_level, - embed_model_data, - model_output_dir, - mace_model_dir, - input_nodes, - output_nodes, - input_shapes, - output_shapes, - input_data_formats, - output_data_formats, - max_num_runs, - max_seconds, - model_tag, - device_type, - model_graph_format, - opencl_binary_file, - opencl_parameter_file, - libmace_dynamic_library_path, - omp_num_threads=-1, - cpu_affinity_policy=1, - gpu_perf_hint=3, - gpu_priority_hint=3, - input_file_name='model_input', - link_dynamic=False): - six.print_('* Benchmark for %s' % model_tag) - mace_model_path = '' - if model_graph_format == ModelFormat.file: - mace_model_path = '%s/%s.pb' % (mace_model_dir, model_tag) - - model_data_file = "" - if not embed_model_data: - if self.system == SystemType.host: - model_data_file = "%s/%s.data" % (mace_model_dir, model_tag) - else: - model_data_file = "%s/%s.data" % (self.data_dir, model_tag) - - if abi == ABIType.host: - libmace_dynamic_lib_dir_path = \ - os.path.dirname(libmace_dynamic_library_path) - p = subprocess.Popen( - [ - 'env', - 'LD_LIBRARY_PATH=%s' % libmace_dynamic_lib_dir_path, - 'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level, - '%s/%s' % (benchmark_binary_dir, benchmark_binary_name), - '--model_name=%s' % model_tag, - '--input_node=%s' % ','.join(input_nodes), - '--output_node=%s' % ','.join(output_nodes), - '--input_shape=%s' % ':'.join(input_shapes), - '--output_shape=%s' % ':'.join(output_shapes), - "--input_data_format=%s" % ",".join(input_data_formats), - "--output_data_format=%s" % ",".join(output_data_formats), - '--input_file=%s/%s' % (model_output_dir, input_file_name), - "--model_data_file=%s" % model_data_file, - '--max_num_runs=%d' % max_num_runs, - '--max_seconds=%f' % max_seconds, - '--device=%s' % device_type, - '--omp_num_threads=%s' % omp_num_threads, - '--cpu_affinity_policy=%s' % cpu_affinity_policy, - '--gpu_perf_hint=%s' % gpu_perf_hint, - '--gpu_priority_hint=%s' % gpu_priority_hint, - '--model_file=%s' % mace_model_path - ]) - p.wait() - elif self.system in [SystemType.android, SystemType.arm_linux]: - self.exec_command('mkdir -p %s' % self.data_dir) - internal_storage_dir = self.create_internal_storage_dir() - for input_name in input_nodes: - formatted_name = formatted_file_name(input_file_name, - input_name) - self.push('%s/%s' % (model_output_dir, formatted_name), - self.data_dir) - if not embed_model_data: - self.push('%s/%s.data' % (mace_model_dir, model_tag), - self.data_dir) - if device_type == common.DeviceType.GPU: - if os.path.exists(opencl_binary_file): - self.push(opencl_binary_file, self.data_dir) - if os.path.exists(opencl_parameter_file): - self.push(opencl_parameter_file, self.data_dir) - mace_model_device_path = '' - if model_graph_format == ModelFormat.file: - mace_model_device_path = '%s/%s.pb' % \ - (self.data_dir, model_tag) - self.push(mace_model_path, mace_model_device_path) - if link_dynamic: - self.push(libmace_dynamic_library_path, self.data_dir) - if self.system == SystemType.android: - sh_commands.push_depended_so_libs( - libmace_dynamic_library_path, abi, self.data_dir, - self.address) - self.rm('%s/%s' % (self.data_dir, benchmark_binary_name)) - self.push('%s/%s' % (benchmark_binary_dir, benchmark_binary_name), - self.data_dir) - - cmd = [ - 'LD_LIBRARY_PATH=%s' % self.data_dir, - 'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level, - 'MACE_RUN_PARAMETER_PATH=%s/mace_run.config' % self.data_dir, - 'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir, - 'MACE_OPENCL_PROFILING=1', - '%s/%s' % (self.data_dir, benchmark_binary_name), - '--model_name=%s' % model_tag, - '--input_node=%s' % ','.join(input_nodes), - '--output_node=%s' % ','.join(output_nodes), - '--input_shape=%s' % ':'.join(input_shapes), - '--output_shape=%s' % ':'.join(output_shapes), - "--input_data_format=%s" % ",".join(input_data_formats), - "--output_data_format=%s" % ",".join(output_data_formats), - '--input_file=%s/%s' % (self.data_dir, input_file_name), - "--model_data_file=%s" % model_data_file, - '--max_num_runs=%d' % max_num_runs, - '--max_seconds=%f' % max_seconds, - '--device=%s' % device_type, - '--omp_num_threads=%s' % omp_num_threads, - '--cpu_affinity_policy=%s' % cpu_affinity_policy, - '--gpu_perf_hint=%s' % gpu_perf_hint, - '--gpu_priority_hint=%s' % gpu_priority_hint, - '--model_file=%s' % mace_model_device_path, - '--opencl_binary_file=%s/%s' % - (self.data_dir, os.path.basename(opencl_binary_file)), - '--opencl_parameter_file=%s/%s' % - (self.data_dir, os.path.basename(opencl_parameter_file)) - ] - - cmd = ' '.join(cmd) - cmd_file_name = '%s-%s-%s' % \ - ('cmd_file', model_tag, str(time.time())) - - cmd_file_path = '%s/%s' % (self.data_dir, cmd_file_name) - tmp_cmd_file = '%s/%s' % ('/tmp', cmd_file_name) - with open(tmp_cmd_file, 'w') as f: - f.write(cmd) - self.push(tmp_cmd_file, cmd_file_path) - os.remove(tmp_cmd_file) - - if self.system == SystemType.android: - sh.adb('-s', self.address, 'shell', 'sh', cmd_file_path, - _fg=True) - elif self.system == SystemType.arm_linux: - sh.ssh('%s@%s' % (self.username, self.address), - 'sh', cmd_file_path, _fg=True) - self.rm(cmd_file_path) - six.print_('Benchmark done! \n') - - def bm_specific_target(self, flags, configs, target_abi): - library_name = configs[YAMLKeyword.library_name] - embed_model_data = \ - configs[YAMLKeyword.model_data_format] == ModelFormat.code - opencl_output_bin_path = '' - opencl_parameter_path = '' - link_dynamic = flags.mace_lib_type == MACELibType.dynamic - - if link_dynamic: - bm_model_binary_name = BM_MODEL_DYNAMIC_NAME - else: - bm_model_binary_name = BM_MODEL_STATIC_NAME - build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) - if (configs[YAMLKeyword.target_socs] or flags.target_socs)\ - and target_abi != ABIType.host: - opencl_output_bin_path = get_opencl_binary_output_path( - library_name, target_abi, self - ) - opencl_parameter_path = get_opencl_parameter_output_path( - library_name, target_abi, self - ) - - for model_name in configs[YAMLKeyword.models]: - check_model_converted(library_name, - model_name, - configs[YAMLKeyword.model_graph_format], - configs[YAMLKeyword.model_data_format], - target_abi) - MaceLogger.header( - StringFormatter.block( - 'Benchmark model %s on %s' % (model_name, - self.device_name))) - model_config = configs[YAMLKeyword.models][model_name] - model_runtime = model_config[YAMLKeyword.runtime] - subgraphs = model_config[YAMLKeyword.subgraphs] - - model_output_base_dir, model_output_dir, mace_model_dir = \ - get_build_model_dirs(library_name, model_name, - target_abi, self, - model_config[YAMLKeyword.model_file_path]) - if os.path.exists(model_output_dir): - sh.rm('-rf', model_output_dir) - os.makedirs(model_output_dir) - - if target_abi != ABIType.host: - self.clear_data_dir() - sh_commands.gen_input( - model_output_dir, - subgraphs[0][YAMLKeyword.input_tensors], - subgraphs[0][YAMLKeyword.input_shapes], - subgraphs[0][YAMLKeyword.validation_inputs_data], - input_ranges=subgraphs[0][YAMLKeyword.input_ranges], - input_data_types=subgraphs[0][YAMLKeyword.input_data_types] - ) - runtime_list = [] - if target_abi == ABIType.host: - runtime_list.append(RuntimeType.cpu) - elif model_runtime == RuntimeType.cpu_gpu: - runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu]) - else: - runtime_list.append(model_runtime) - for runtime in runtime_list: - device_type = parse_device_type(runtime) - if not subgraphs[0][YAMLKeyword.check_tensors]: - output_nodes = subgraphs[0][YAMLKeyword.output_tensors] - output_shapes = subgraphs[0][YAMLKeyword.output_shapes] - else: - output_nodes = subgraphs[0][YAMLKeyword.check_tensors] - output_shapes = subgraphs[0][YAMLKeyword.check_shapes] - self.benchmark_model( - abi=target_abi, - benchmark_binary_dir=build_tmp_binary_dir, - benchmark_binary_name=bm_model_binary_name, - vlog_level=0, - embed_model_data=embed_model_data, - model_output_dir=model_output_dir, - input_nodes=subgraphs[0][YAMLKeyword.input_tensors], - output_nodes=output_nodes, - input_shapes=subgraphs[0][YAMLKeyword.input_shapes], - output_shapes=output_shapes, - input_data_formats=subgraphs[0][ - YAMLKeyword.input_data_formats], - output_data_formats=subgraphs[0][ - YAMLKeyword.output_data_formats], - max_num_runs=flags.max_num_runs, - max_seconds=flags.max_seconds, - mace_model_dir=mace_model_dir, - model_tag=model_name, - device_type=device_type, - model_graph_format=configs[YAMLKeyword.model_graph_format], - omp_num_threads=flags.omp_num_threads, - cpu_affinity_policy=flags.cpu_affinity_policy, - gpu_perf_hint=flags.gpu_perf_hint, - gpu_priority_hint=flags.gpu_priority_hint, - opencl_binary_file=opencl_output_bin_path, - opencl_parameter_file=opencl_parameter_path, - libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH, - link_dynamic=link_dynamic) - def run(self, abi, host_bin_path, diff --git a/tools/sh_commands.py b/tools/sh_commands.py index e9d051b3c718e1621e00c7160567944c29b940ae..1b69feb9bb8e190ea7a082e58313534cc2902403 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -661,10 +661,10 @@ def update_mace_run_binary(build_tmp_binary_dir, link_dynamic=False): if os.path.exists(mace_run_filepath): sh.rm("-rf", mace_run_filepath) if link_dynamic: - sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run_dynamic", + sh.cp("-f", "bazel-bin/mace/tools/mace_run_dynamic", build_tmp_binary_dir) else: - sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run_static", + sh.cp("-f", "bazel-bin/mace/tools/mace_run_static", build_tmp_binary_dir) @@ -865,120 +865,3 @@ def packaging_lib(libmace_output_dir, project_name): _fg=True) six.print_("Packaging Done!\n") return tar_package_path - - -################################ -# benchmark -################################ -def build_run_throughput_test(abi, - serialno, - vlog_level, - run_seconds, - merged_lib_file, - model_input_dir, - embed_model_data, - input_nodes, - output_nodes, - input_shapes, - output_shapes, - cpu_model_tag, - gpu_model_tag, - dsp_model_tag, - apu_model_tag, - phone_data_dir, - strip="always", - input_file_name="model_input"): - six.print_("* Build and run throughput_test") - - model_tag_build_flag = "" - if cpu_model_tag: - model_tag_build_flag += "--copt=-DMACE_CPU_MODEL_TAG=%s " % \ - cpu_model_tag - if gpu_model_tag: - model_tag_build_flag += "--copt=-DMACE_GPU_MODEL_TAG=%s " % \ - gpu_model_tag - if dsp_model_tag: - model_tag_build_flag += "--copt=-DMACE_DSP_MODEL_TAG=%s " % \ - dsp_model_tag - if apu_model_tag: - model_tag_build_flag += "--copt=-DMACE_APU_MODEL_TAG=%s " % \ - apu_model_tag - sh.cp("-f", merged_lib_file, "mace/benchmark/libmace_merged.a") - sh.bazel( - "build", - "-c", - "opt", - "--strip", - strip, - "--verbose_failures", - "//mace/benchmark:model_throughput_test", - "--crosstool_top=//external:android/crosstool", - "--host_crosstool_top=@bazel_tools//tools/cpp:toolchain", - "--cpu=%s" % abi, - "--copt=-std=c++11", - "--copt=-D_GLIBCXX_USE_C99_MATH_TR1", - "--copt=-Werror=return-type", - "--copt=-O3", - "--define", - "neon=true", - "--define", - "openmp=true", - model_tag_build_flag, - _fg=True) - - sh.rm("mace/benchmark/libmace_merged.a") - sh.adb("-s", - serialno, - "shell", - "mkdir", - "-p", - phone_data_dir) - adb_push("%s/%s_%s" % (model_input_dir, input_file_name, - ",".join(input_nodes)), - phone_data_dir, - serialno) - adb_push("bazel-bin/mace/benchmark/model_throughput_test", - phone_data_dir, - serialno) - if not embed_model_data: - adb_push("codegen/models/%s/%s.data" % cpu_model_tag, - phone_data_dir, - serialno) - adb_push("codegen/models/%s/%s.data" % gpu_model_tag, - phone_data_dir, - serialno) - adb_push("codegen/models/%s/%s.data" % dsp_model_tag, - phone_data_dir, - serialno) - - adb_push("third_party/nnlib/%s/libhexagon_controller.so" % abi, - phone_data_dir, - serialno) - if apu_model_tag: - adb_push("third_party/apu/libapu-frontend.so", - phone_data_dir, - serialno) - sh.adb( - "-s", - serialno, - "shell", - "LD_LIBRARY_PATH=%s" % phone_data_dir, - "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, - "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % - phone_data_dir, - "%s/model_throughput_test" % phone_data_dir, - "--input_node=%s" % ",".join(input_nodes), - "--output_node=%s" % ",".join(output_nodes), - "--input_shape=%s" % ":".join(input_shapes), - "--output_shape=%s" % ":".join(output_shapes), - "--input_file=%s/%s" % (phone_data_dir, input_file_name), - "--cpu_model_data_file=%s/%s.data" % (phone_data_dir, - cpu_model_tag), - "--gpu_model_data_file=%s/%s.data" % (phone_data_dir, - gpu_model_tag), - "--dsp_model_data_file=%s/%s.data" % (phone_data_dir, - dsp_model_tag), - "--run_seconds=%s" % run_seconds, - _fg=True) - - six.print_("throughput_test done!\n")