提交 c3f3df53 编写于 作者: L liyin

Merge benchmark and run.

上级 b159cb1f
......@@ -112,16 +112,13 @@ model_tests:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,arm64 --validate --model_graph_format=file --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file
- CONF_FILE=mace-models/mobilenet-v2/mobilenet-v2-host.yml
- python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=file --model_data_format=file --address_sanitizer
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=file --model_data_format=file
- python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=file --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=file --model_data_format=file --benchmark
- python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --example --round=1 --validate --model_graph_format=code --model_data_format=file
- python tools/converter.py benchmark --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark
- rm -rf mace-models
quantization_tests:
......@@ -141,7 +138,6 @@ quantization_tests:
do
python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --layers=0 --model_graph_format=file --model_data_format=file || exit 1;
done
- rm -rf mace-models
only:
......@@ -162,7 +158,6 @@ dynamic_linking_test:
fi
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=file --model_data_format=file --cl_mem_type=buffer
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --device_yml=${DEVICE_CONF_FILE} --example --mace_lib_type=dynamic --target_abis=armeabi-v7a,arm64 --round=1 --validate --model_graph_format=file --model_data_format=file
- rm -rf mace-models
only:
- triggers
......@@ -12,7 +12,6 @@ option(MACE_ENABLE_HEXAGON_HTA "whether to enable Hexagon HTA support" OFF)
option(MACE_ENABLE_MTK_APU "whether to enable MTK APU support" OFF)
option(MACE_ENABLE_TESTS "whether to build c++ unit tests" OFF)
option(MACE_ENABLE_BENCHMARKS "whether to build c++ micro benchmarks" OFF)
option(MACE_ENABLE_EXAMPLES "whether to build examples" OFF)
option(MACE_ENABLE_OPT_SIZE "whether to build with optimized binary size" ON)
option(MACE_ENABLE_OBFUSCATE "whether to build with code obfuscation" ON)
option(MACE_ENABLE_CCACHE "whether to build with ccache" ON)
......@@ -131,10 +130,6 @@ include_directories("${PROJECT_BINARY_DIR}") # proto
add_subdirectory(include)
add_subdirectory(mace)
if(MACE_ENABLE_EXAMPLES)
add_subdirectory(examples)
endif(MACE_ENABLE_EXAMPLES)
if(MACE_ENABLE_TESTS OR MACE_ENABLE_BENCHMARKS)
add_subdirectory(test)
endif(MACE_ENABLE_TESTS OR MACE_ENABLE_BENCHMARKS)
......@@ -236,7 +236,7 @@ Convert model(s) to C++ code
* **3. Deployment**
* Link `libmace.a` and `${library_name}.a` to your target.
* Refer to \ ``mace/examples/example.cc``\ for full usage. The following list the key steps.
* Refer to \ ``mace/tools/mace_run.cc``\ for full usage. The following list the key steps.
.. code:: cpp
......@@ -404,7 +404,7 @@ the detailed information is in :doc:`benchmark`.
.. code:: sh
# Benchmark model, get detailed statistics of each Op.
python tools/converter.py benchmark --config=/path/to/model_deployment_file.yml
python tools/converter.py run --config=/path/to/model_deployment_file.yml --benchmark
.. warning::
......@@ -424,17 +424,17 @@ the detailed information is in :doc:`benchmark`.
* - --omp_num_threads
- int
- -1
- ``run``/``benchmark``
- ``run``
- number of threads
* - --cpu_affinity_policy
- int
- 1
- ``run``/``benchmark``
- ``run``
- 0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY
* - --gpu_perf_hint
- int
- 3
- ``run``/``benchmark``
- ``run``
- 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH
* - --gpu_priority_hint
- int
......@@ -449,7 +449,6 @@ Use ``-h`` to get detailed help.
python tools/converter.py -h
python tools/converter.py build -h
python tools/converter.py run -h
python tools/converter.py benchmark -h
Reduce Library Size
-------------------
......
......@@ -75,8 +75,8 @@ Here we use the mobilenet-v2 model as an example.
.. code:: sh
# Run example
python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --example
# Run
python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml
# Test model run time
python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --round=100
......@@ -233,7 +233,7 @@ to run and validate your model.
.. code:: sh
# Benchmark model, get detailed statistics of each Op.
python tools/converter.py benchmark --config=/path/to/your/model_deployment_file.yml
python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --benchmark
=======================================
......@@ -308,7 +308,7 @@ header files.
└── mace_run_static
Please refer to \ ``mace/examples/example.cc``\ for full usage. The following list the key steps.
Please refer to \ ``mace/tools/mace_run.cc``\ for full usage. The following list the key steps.
.. code:: cpp
......
......@@ -68,7 +68,7 @@ Usage
.. code:: bash
python tools/converter.py benchmark --config=/path/to/your/model_deployment.yml
python tools/converter.py run --config=/path/to/your/model_deployment.yml --benchmark
======
Output
......@@ -76,29 +76,6 @@ Output
.. code:: bash
I benchmark_model.cc:158 ---------------------------------------------------------------------
I benchmark_model.cc:158 Warm Up
I benchmark_model.cc:158 ----------------------------------------------------------------------
I benchmark_model.cc:158 | round | first(ms) | curr(ms) | min(ms) | max(ms) | avg(ms) | std |
I benchmark_model.cc:158 ----------------------------------------------------------------------
I benchmark_model.cc:158 | 1 | 51.481 | 51.481 | 51.481 | 51.481 | 51.481 | 0.000 |
I benchmark_model.cc:158 ----------------------------------------------------------------------
I benchmark_model.cc:158
I benchmark_model.cc:158 ------------------------------------------------------------------------
I benchmark_model.cc:158 Run without statistics
I benchmark_model.cc:158 -------------------------------------------------------------------------
I benchmark_model.cc:158 | round | first(ms) | curr(ms) | min(ms) | max(ms) | avg(ms) | std |
I benchmark_model.cc:158 -------------------------------------------------------------------------
I benchmark_model.cc:158 | 100 | 30.272 | 31.390 | 29.938 | 45.966 | 30.913 | 1850.983 |
I benchmark_model.cc:158 -------------------------------------------------------------------------
I benchmark_model.cc:158
I benchmark_model.cc:158 -----------------------------------------------------------------------
I benchmark_model.cc:158 Run with statistics
I benchmark_model.cc:158 ------------------------------------------------------------------------
I benchmark_model.cc:158 | round | first(ms) | curr(ms) | min(ms) | max(ms) | avg(ms) | std |
I benchmark_model.cc:158 ------------------------------------------------------------------------
I benchmark_model.cc:158 | 100 | 32.358 | 33.327 | 32.293 | 33.607 | 33.002 | 310.435 |
I benchmark_model.cc:158 ------------------------------------------------------------------------
I statistics.cc:343 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
I statistics.cc:343 Sort by Run Order
I statistics.cc:343 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
......
......@@ -52,7 +52,7 @@ MACE provides tools to do statistics with following steps:
rename 's/^/input/' *
# Run with input tensors
python tools/converter.py run --config ../mace-models/inception-v3/inception-v3.yml --example
python tools/converter.py run --config ../mace-models/inception-v3/inception-v3.yml
--quantize_stat --input_dir /path/to/directory/of/input/tensors > range_log
......
file(GLOB MACE_EXAMPLE_SRCS
cli/example.cc
)
add_executable(mace_example ${MACE_EXAMPLE_SRCS})
target_link_libraries(mace_example PUBLIC
mace_static
gflags
)
install(TARGETS mace_example RUNTIME DESTINATION bin)
# Examples
load(
"//mace:mace.bzl",
"if_android",
"if_darwin",
"if_hexagon_enabled",
"if_hta_enabled",
"if_linux",
"if_opencl_enabled",
"if_openmp_enabled",
)
cc_binary(
name = "example_static",
srcs = ["example.cc"],
copts = [
"-Werror",
"-Wextra",
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]),
linkopts = [
"-lm",
"-ldl",
] + if_linux(["-lpthread"]) + if_darwin(
["-lpthread"],
default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp",
]) + if_android([
"-pie",
"-llog",
]),
linkstatic = 1,
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models",
"//mace/libmace",
] + if_opencl_enabled([
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
]) + if_hexagon_enabled([
"//third_party/nnlib:libhexagon",
]) + if_hta_enabled([
"//third_party/hta",
]),
)
cc_binary(
name = "example_dynamic",
srcs = ["example.cc"],
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]),
linkopts = [
"-lm",
"-ldl",
] + if_linux(["-lpthread"]) + if_darwin(
["-lpthread"],
default_value = ["-fuse-ld=gold"],
) + if_android([
"-pie",
"-llog",
]),
linkstatic = 0,
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models",
"//mace/libmace:libmace_dynamic",
] + if_opencl_enabled([
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
]),
)
Examples
=======
* Convert model
```
python tools/converter.py convert --config=/path/to/your/model_deployment_file
```
* Run example
```
python tools/converter.py run --config=/path/to/your/model_deployment_file --example
```
* Validate result
```
python tools/converter.py run --config=/path/to/your/model_deployment_file --example --validate
```
* Check the logs
```
adb logcat
```
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <numeric>
#include "gflags/gflags.h"
#include "mace/port/env.h"
#include "mace/port/file_system.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include "mace/utils/memory.h"
#include "mace/utils/string_util.h"
// if convert model to code.
#ifdef MODEL_GRAPH_FORMAT_CODE
#include "mace/codegen/engine/mace_engine_factory.h"
#endif
#ifdef MACE_ENABLE_OPENCL
namespace mace {
const unsigned char *LoadOpenCLBinary();
size_t OpenCLBinarySize();
const unsigned char *LoadOpenCLParameter();
size_t OpenCLParameterSize();
} // namespace mace
#endif
namespace mace {
namespace examples {
void ParseShape(const std::string &str, std::vector<int64_t> *shape) {
std::string tmp = str;
while (!tmp.empty()) {
int dim = atoi(tmp.data());
shape->push_back(dim);
size_t next_offset = tmp.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
}
std::string FormatName(const std::string input) {
std::string res = input;
for (size_t i = 0; i < input.size(); ++i) {
if (!isalnum(res[i])) res[i] = '_';
}
return res;
}
DeviceType ParseDeviceType(const std::string &device_str) {
if (device_str.compare("CPU") == 0) {
return DeviceType::CPU;
} else if (device_str.compare("GPU") == 0) {
return DeviceType::GPU;
} else if (device_str.compare("HEXAGON") == 0) {
return DeviceType::HEXAGON;
} else if (device_str.compare("HTA") == 0) {
return DeviceType::HTA;
} else {
return DeviceType::CPU;
}
}
DataFormat ParseDataFormat(const std::string &data_format_str) {
if (data_format_str == "NHWC") {
return DataFormat::NHWC;
} else if (data_format_str == "NCHW") {
return DataFormat::NCHW;
} else if (data_format_str == "OIHW") {
return DataFormat::OIHW;
} else {
return DataFormat::NONE;
}
}
DEFINE_string(model_name,
"",
"model name in model deployment file");
DEFINE_string(input_node,
"",
"input nodes, separated by comma,"
"example: input_node0,input_node1");
DEFINE_string(input_shape,
"",
"input shapes, separated by colon and comma, "
"example: 1,224,224,3:1,1,1,10");
DEFINE_string(output_node,
"output_node0,output_node1",
"output nodes, separated by comma");
DEFINE_string(output_shape,
"",
"output shapes, separated by colon and comma, "
"example: 1,224,224,2:1,1,1,10");
DEFINE_string(input_data_format,
"NHWC",
"input data formats, NONE|NHWC|NCHW");
DEFINE_string(output_data_format,
"NHWC",
"output data formats, NONE|NHWC|NCHW");
DEFINE_string(input_file,
"",
"input file name | input file prefix for multiple inputs.");
DEFINE_string(output_file,
"",
"output file name | output file prefix for multiple outputs");
DEFINE_string(input_dir,
"",
"input directory name");
DEFINE_string(output_dir,
"",
"output directory name");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(opencl_parameter_file,
"",
"tuned OpenCL parameter file path");
DEFINE_string(model_data_file,
"",
"model data file name, used when model_data_format == file");
DEFINE_string(model_file,
"",
"model file name, used when load mace model in pb");
DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
bool RunModel(const std::vector<std::string> &input_names,
const std::vector<std::vector<int64_t>> &input_shapes,
const std::vector<DataFormat> &input_data_formats,
const std::vector<std::string> &output_names,
const std::vector<std::vector<int64_t>> &output_shapes,
const std::vector<DataFormat> &output_data_formats) {
// load model
DeviceType device_type = ParseDeviceType(FLAGS_device);
// configuration
// Detailed information please see mace.h
MaceStatus status;
MaceEngineConfig config(device_type);
status = config.SetCPUThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy>(FLAGS_cpu_affinity_policy));
if (status != MaceStatus::MACE_SUCCESS) {
std::cerr << "Set openmp or cpu affinity failed." << std::endl;
}
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> gpu_context;
if (device_type == DeviceType::GPU) {
// DO NOT USE tmp directory.
// Please use APP's own directory and make sure the directory exists.
const char *storage_path_ptr = getenv("MACE_INTERNAL_STORAGE_PATH");
const std::string storage_path =
std::string(storage_path_ptr == nullptr ?
"/data/local/tmp/mace_run/interior" : storage_path_ptr);
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path)
.SetOpenCLBinaryPaths(opencl_binary_paths)
.SetOpenCLBinary(LoadOpenCLBinary(), OpenCLBinarySize())
.SetOpenCLParameterPath(FLAGS_opencl_parameter_file)
.SetOpenCLParameter(LoadOpenCLParameter(), OpenCLParameterSize())
.Finalize();
config.SetGPUContext(gpu_context);
config.SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
#endif // MACE_ENABLE_OPENCL
// Create Engine
std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status;
std::unique_ptr<mace::port::ReadOnlyMemoryRegion> model_graph_data =
make_unique<mace::port::ReadOnlyBufferMemoryRegion>();
if (FLAGS_model_file != "") {
auto fs = GetFileSystem();
auto status = fs->NewReadOnlyMemoryRegionFromFile(FLAGS_model_file.c_str(),
&model_graph_data);
if (status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
}
}
std::unique_ptr<mace::port::ReadOnlyMemoryRegion> model_weights_data =
make_unique<mace::port::ReadOnlyBufferMemoryRegion>();
if (FLAGS_model_data_file != "") {
auto fs = GetFileSystem();
auto status = fs->NewReadOnlyMemoryRegionFromFile(
FLAGS_model_data_file.c_str(),
&model_weights_data);
if (status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Failed to read file: " << FLAGS_model_data_file;
}
MACE_CHECK(model_weights_data->length() > 0);
}
// Only choose one of the two type based on the `model_graph_format`
// in model deployment file(.yml).
#ifdef MODEL_GRAPH_FORMAT_CODE
// if model_data_format == code, just pass an empty string("")
// to model_data_file parameter.
create_engine_status = CreateMaceEngineFromCode(
FLAGS_model_name,
reinterpret_cast<const unsigned char *>(model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#else
create_engine_status = CreateMaceEngineFromProto(
reinterpret_cast<const unsigned char *>(model_graph_data->data()),
model_graph_data->length(),
reinterpret_cast<const unsigned char *>(model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#endif
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
std::cerr << "Create engine error, please check the arguments first, "
<< "if correct, the device may not run the model, "
<< "please fall back to other strategy."
<< std::endl;
exit(1);
}
const size_t input_count = input_names.size();
const size_t output_count = output_names.size();
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs;
std::map<std::string, int64_t> inputs_size;
for (size_t i = 0; i < input_count; ++i) {
int64_t input_size =
std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1,
std::multiplies<int64_t>());
inputs_size[input_names[i]] = input_size;
// Only support float and int32 data type
auto buffer_in = std::shared_ptr<float>(new float[input_size],
std::default_delete<float[]>());
inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in,
input_data_formats[i]);
}
for (size_t i = 0; i < output_count; ++i) {
int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1,
std::multiplies<int64_t>());
// Only support float and int32 data type
auto buffer_out = std::shared_ptr<float>(new float[output_size],
std::default_delete<float[]>());
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out,
output_data_formats[i]);
}
if (!FLAGS_input_dir.empty()) {
DIR *dir_parent;
struct dirent *entry;
dir_parent = opendir(FLAGS_input_dir.c_str());
if (dir_parent) {
while ((entry = readdir(dir_parent))) {
std::string file_name = std::string(entry->d_name);
std::string prefix = FormatName(input_names[0]);
if (file_name.find(prefix) == 0) {
std::string suffix = file_name.substr(prefix.size());
for (size_t i = 0; i < input_count; ++i) {
file_name = FLAGS_input_dir + "/" + FormatName(input_names[i])
+ suffix;
std::ifstream in_file(file_name, std::ios::in | std::ios::binary);
std::cout << "Read " << file_name << std::endl;
if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(
inputs[input_names[i]].data().get()),
inputs_size[input_names[i]] * sizeof(float));
in_file.close();
} else {
std::cerr << "Open input file failed" << std::endl;
return -1;
}
}
engine->Run(inputs, &outputs);
if (!FLAGS_output_dir.empty()) {
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
FLAGS_output_dir + "/" + FormatName(output_names[i]) + suffix;
std::ofstream out_file(output_name, std::ios::binary);
if (out_file.is_open()) {
int64_t output_size =
std::accumulate(output_shapes[i].begin(),
output_shapes[i].end(),
1,
std::multiplies<int64_t>());
out_file.write(
reinterpret_cast<char *>(
outputs[output_names[i]].data().get()),
output_size * sizeof(float));
out_file.flush();
out_file.close();
} else {
std::cerr << "Open output file failed" << std::endl;
return -1;
}
}
}
}
}
closedir(dir_parent);
} else {
std::cerr << "Directory " << FLAGS_input_dir << " does not exist."
<< std::endl;
}
} else {
for (size_t i = 0; i < input_count; ++i) {
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]),
std::ios::in | std::ios::binary);
if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(
inputs[input_names[i]].data().get()),
inputs_size[input_names[i]] * sizeof(float));
in_file.close();
} else {
std::cerr << "Open input file failed" << std::endl;
return -1;
}
}
engine->Run(inputs, &outputs);
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
FLAGS_output_file + "_" + FormatName(output_names[i]);
std::ofstream out_file(output_name, std::ios::binary);
int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1,
std::multiplies<int64_t>());
if (out_file.is_open()) {
out_file.write(
reinterpret_cast<char *>(outputs[output_names[i]].data().get()),
output_size * sizeof(float));
out_file.flush();
out_file.close();
} else {
std::cerr << "Open output file failed" << std::endl;
return -1;
}
}
}
std::cout << "Finished" << std::endl;
return true;
}
int Main(int argc, char **argv) {
std::string usage = "example run\nusage: " + std::string(argv[0])
+ " [flags]";
gflags::SetUsageMessage(usage);
gflags::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "mace version: " << MaceVersion() << std::endl;
std::cout << "input node: " << FLAGS_input_node << std::endl;
std::cout << "input shape: " << FLAGS_input_shape << std::endl;
std::cout << "output node: " << FLAGS_output_node << std::endl;
std::cout << "output shape: " << FLAGS_output_shape << std::endl;
std::cout << "input_file: " << FLAGS_input_file << std::endl;
std::cout << "output_file: " << FLAGS_output_file << std::endl;
std::cout << "input_dir: " << FLAGS_input_dir << std::endl;
std::cout << "output dir: " << FLAGS_output_dir << std::endl;
std::cout << "model_data_file: " << FLAGS_model_data_file << std::endl;
std::cout << "model_file: " << FLAGS_model_file << std::endl;
std::cout << "device: " << FLAGS_device << std::endl;
std::cout << "round: " << FLAGS_round << std::endl;
std::cout << "restart_round: " << FLAGS_restart_round << std::endl;
std::cout << "gpu_perf_hint: " << FLAGS_gpu_perf_hint << std::endl;
std::cout << "gpu_priority_hint: " << FLAGS_gpu_priority_hint << std::endl;
std::cout << "omp_num_threads: " << FLAGS_omp_num_threads << std::endl;
std::cout << "cpu_affinity_policy: "
<< FLAGS_cpu_affinity_policy
<< std::endl;
std::vector<std::string> input_names = Split(FLAGS_input_node, ',');
std::vector<std::string> output_names = Split(FLAGS_output_node, ',');
std::vector<std::string> input_shapes = Split(FLAGS_input_shape, ':');
std::vector<std::string> output_shapes = Split(FLAGS_output_shape, ':');
const size_t input_count = input_shapes.size();
const size_t output_count = output_shapes.size();
std::vector<std::vector<int64_t>> input_shape_vec(input_count);
std::vector<std::vector<int64_t>> output_shape_vec(output_count);
for (size_t i = 0; i < input_count; ++i) {
ParseShape(input_shapes[i], &input_shape_vec[i]);
}
for (size_t i = 0; i < output_count; ++i) {
ParseShape(output_shapes[i], &output_shape_vec[i]);
}
std::vector<std::string> raw_input_data_formats =
Split(FLAGS_input_data_format, ',');
std::vector<std::string> raw_output_data_formats =
Split(FLAGS_output_data_format, ',');
std::vector<DataFormat> input_data_formats(input_count);
std::vector<DataFormat> output_data_formats(output_count);
for (size_t i = 0; i < input_count; ++i) {
input_data_formats[i] = ParseDataFormat(raw_input_data_formats[i]);
}
for (size_t i = 0; i < output_count; ++i) {
output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]);
}
bool ret = false;
for (int i = 0; i < FLAGS_restart_round; ++i) {
std::cout << "restart round " << i << std::endl;
ret =
RunModel(input_names, input_shape_vec, input_data_formats,
output_names, output_shape_vec, output_data_formats);
}
if (ret) {
return 0;
} else {
return -1;
}
}
} // namespace examples
} // namespace mace
int main(int argc, char **argv) { mace::examples::Main(argc, argv); }
......@@ -124,6 +124,11 @@ MaceStatus SerialNet::Init() {
}
MaceStatus SerialNet::Run(RunMetadata *run_metadata) {
const char *profiling = getenv("MACE_OPENCL_PROFILING");
bool
enable_opencl_profiling =
profiling != nullptr && strlen(profiling) == 1 && profiling[0] == '1';
MACE_MEMORY_LOGGING_GUARD();
MACE_LATENCY_LOGGER(1, "Running net");
OpContext context(ws_, cpu_device_.get());
......@@ -146,7 +151,8 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) {
if (run_metadata == nullptr) {
MACE_RETURN_IF_ERROR(op->Run(&context));
} else {
if (device_type == DeviceType::CPU) {
if (device_type == DeviceType::CPU
|| (device_type == DeviceType::GPU && !enable_opencl_profiling)) {
call_stats.start_micros = NowMicros();
MACE_RETURN_IF_ERROR(op->Run(&context));
call_stats.end_micros = NowMicros();
......
# Benchmark
# Examples
load(
"//mace:mace.bzl",
"if_android",
"if_darwin",
"if_hexagon_enabled",
"if_opencl_enabled",
"if_openmp_enabled",
)
licenses(["notice"]) # Apache 2.0
cc_binary(
name = "mace_run_static",
srcs = ["mace_run.cc"],
srcs = [
"mace_run.cc",
],
copts = [
"-Werror",
"-Wextra",
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]),
linkopts = if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp",
]),
"-Wno-missing-field-initializers",
] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]),
linkstatic = 1,
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models",
"//mace/libmace",
"//mace/utils",
],
)
cc_binary(
name = "mace_run_dynamic",
srcs = ["mace_run.cc"],
srcs = [
"mace_run.cc",
],
copts = [
"-Werror",
"-Wextra",
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]),
linkopts = if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp",
"-Wno-missing-field-initializers",
] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]),
linkopts = [
"-lm",
] + if_android([
"-ldl",
"-pie",
"-llog",
]),
linkstatic = 0,
deps = [
":statistics",
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models",
......
file(GLOB MACE_RUN_SRCS
validation/mace_run.cc
mace_run.cc
)
add_executable(mace_run ${MACE_RUN_SRCS})
target_link_libraries(mace_run PUBLIC
......@@ -7,25 +7,4 @@ target_link_libraries(mace_run PUBLIC
gflags
)
file(GLOB MACE_BENCHMARK_MODEL_SRCS
benchmark/benchmark_model.cc
)
add_executable(benchmark_model ${MACE_BENCHMARK_MODEL_SRCS})
target_link_libraries(benchmark_model PUBLIC
mace_static
gflags
)
file(GLOB MACE_BENCHMARK_MODEL_THROUGHPUT_SRCS
benchmark/benchmark_model_throughput.cc
)
add_executable(benchmark_model_throughput ${MACE_BENCHMARK_MODEL_THROUGHPUT_SRCS})
target_link_libraries(benchmark_model_throughput PUBLIC
mace_static
gflags
)
install(TARGETS mace_run RUNTIME DESTINATION bin)
install(TARGETS benchmark_model RUNTIME DESTINATION bin)
install(TARGETS benchmark_model_throughput RUNTIME DESTINATION bin)
# Benchmark
# Examples
load(
"//mace:mace.bzl",
"if_hexagon_enabled",
"if_openmp_enabled",
"if_android",
"if_opencl_enabled",
)
licenses(["notice"]) # Apache 2.0
cc_binary(
name = "benchmark_model_static",
srcs = [
"benchmark_model.cc",
],
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_opencl_enabled(["-DMACE_ENABLE_OPENCL"]),
linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1,
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models",
"//mace/libmace",
"//mace/utils",
],
)
cc_binary(
name = "benchmark_model_dynamic",
srcs = [
"benchmark_model.cc",
],
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
] + if_android(["-DMACE_ENABLE_OPENCL"]),
linkopts = [
"-lm",
] + if_openmp_enabled([
"-fopenmp",
]) + if_android([
"-ldl",
"-pie",
"-llog",
]),
linkstatic = 0,
deps = [
":statistics",
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_models",
"//mace/libmace:libmace_dynamic",
],
)
cc_library(
name = "libmace_merged",
srcs = [
"libmace_merged.a",
],
visibility = ["//visibility:private"],
)
cc_binary(
name = "model_throughput_test",
srcs = ["model_throughput_test.cc"],
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1,
deps = [
":libmace_merged",
"//external:gflags_nothreads",
"//mace/core",
],
)
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include <fstream>
#include <memory>
#include <numeric>
#include <thread> // NOLINT(build/c++11)
#include "gflags/gflags.h"
#include "mace/port/env.h"
#include "mace/port/file_system.h"
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
#include "mace/utils/memory.h"
#include "mace/utils/math.h"
#include "mace/utils/statistics.h"
#ifdef MODEL_GRAPH_FORMAT_CODE
#include "mace/codegen/engine/mace_engine_factory.h"
#endif
namespace mace {
namespace benchmark {
void ParseShape(const std::string &str, std::vector<int64_t> *shape) {
std::string tmp = str;
while (!tmp.empty()) {
int dim = atoi(tmp.data());
shape->push_back(dim);
size_t next_offset = tmp.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
}
std::string FormatName(const std::string input) {
std::string res = input;
for (size_t i = 0; i < input.size(); ++i) {
if (!::isalnum(res[i])) res[i] = '_';
}
return res;
}
DeviceType ParseDeviceType(const std::string &device_str) {
if (device_str.compare("CPU") == 0) {
return DeviceType::CPU;
} else if (device_str.compare("GPU") == 0) {
return DeviceType::GPU;
} else if (device_str.compare("HEXAGON") == 0) {
return DeviceType::HEXAGON;
} else {
return DeviceType::CPU;
}
}
DataFormat ParseDataFormat(const std::string &data_format_str) {
if (data_format_str == "NHWC") {
return DataFormat::NHWC;
} else if (data_format_str == "NCHW") {
return DataFormat::NCHW;
} else if (data_format_str == "OIHW") {
return DataFormat::OIHW;
} else {
return DataFormat::NONE;
}
}
bool RunInference(MaceEngine *engine,
const std::map<std::string, mace::MaceTensor> &input_infos,
std::map<std::string, mace::MaceTensor> *output_infos,
int64_t *inference_time_us,
OpStat *statistician) {
MACE_CHECK_NOTNULL(output_infos);
RunMetadata run_metadata;
RunMetadata *run_metadata_ptr = nullptr;
if (statistician) {
run_metadata_ptr = &run_metadata;
}
const int64_t start_time = NowMicros();
mace::MaceStatus s = engine->Run(input_infos, output_infos, run_metadata_ptr);
const int64_t end_time = NowMicros();
if (s != mace::MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Error during inference.";
return false;
}
*inference_time_us = end_time - start_time;
if (statistician != nullptr) {
statistician->StatMetadata(run_metadata);
}
return true;
}
bool Run(const std::string &title,
MaceEngine *engine,
const std::map<std::string, mace::MaceTensor> &input_infos,
std::map<std::string, mace::MaceTensor> *output_infos,
int num_runs,
double max_time_sec,
int64_t *total_time_us,
int64_t *actual_num_runs,
OpStat *statistician) {
MACE_CHECK_NOTNULL(output_infos);
*total_time_us = 0;
TimeInfo<int64_t> time_info;
bool util_max_time = (num_runs <= 0);
for (int i = 0; util_max_time || i < num_runs; ++i) {
int64_t inference_time_us = 0;
bool s = RunInference(engine, input_infos, output_infos,
&inference_time_us, statistician);
time_info.UpdateTime(inference_time_us);
(*total_time_us) += inference_time_us;
++(*actual_num_runs);
if (max_time_sec > 0 && (*total_time_us / 1000000.0) > max_time_sec) {
break;
}
if (!s) {
LOG(INFO) << "Failed on run " << i;
return s;
}
}
std::stringstream stream(time_info.ToString(title));
stream << std::endl;
for (std::string line; std::getline(stream, line);) {
LOG(INFO) << line;
}
return true;
}
DEFINE_string(model_name, "", "model name in yaml");
DEFINE_string(device, "CPU", "Device [CPU|GPU|DSP]");
DEFINE_string(input_node, "input_node0,input_node1",
"input nodes, separated by comma");
DEFINE_string(output_node, "output_node0,output_node1",
"output nodes, separated by comma");
DEFINE_string(input_shape, "", "input shape, separated by colon and comma");
DEFINE_string(output_shape, "", "output shape, separated by colon and comma");
DEFINE_string(input_data_format,
"NHWC",
"input data formats, NONE|NHWC|NCHW");
DEFINE_string(output_data_format,
"NHWC",
"output data formats, NONE|NHWC|NCHW");
DEFINE_string(input_file, "", "input file name");
DEFINE_int32(max_num_runs, 100, "max number of runs");
DEFINE_double(max_seconds, 10.0, "max number of seconds to run");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(opencl_binary_file,
"",
"compiled opencl binary file path");
DEFINE_string(opencl_parameter_file,
"",
"tuned OpenCL parameter file path");
DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(model_file, "",
"model file name, used when load mace model in pb");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
int Main(int argc, char **argv) {
MACE_CHECK(FLAGS_device != "HEXAGON",
"Model benchmark tool do not support DSP.");
std::string usage = "benchmark model\nusage: " + std::string(argv[0])
+ " [flags]";
gflags::SetUsageMessage(usage);
gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "Model name: [" << FLAGS_model_name << "]";
LOG(INFO) << "Model_file: " << FLAGS_model_file;
LOG(INFO) << "Device: [" << FLAGS_device << "]";
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
LOG(INFO) << "omp_num_threads: [" << FLAGS_omp_num_threads << "]";
LOG(INFO) << "cpu_affinity_policy: [" << FLAGS_cpu_affinity_policy << "]";
LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]";
LOG(INFO) << "Input shapes: [" << FLAGS_input_shape << "]";
LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]";
LOG(INFO) << "output shapes: [" << FLAGS_output_shape << "]";
LOG(INFO) << "Warmup runs: [" << FLAGS_warmup_runs << "]";
LOG(INFO) << "Num runs: [" << FLAGS_max_num_runs << "]";
LOG(INFO) << "Max run seconds: [" << FLAGS_max_seconds << "]";
std::unique_ptr<OpStat> statistician(new OpStat());
std::vector<std::string> input_names = Split(FLAGS_input_node, ',');
std::vector<std::string> output_names = Split(FLAGS_output_node, ',');
std::vector<std::string> input_shapes = Split(FLAGS_input_shape, ':');
std::vector<std::string> output_shapes = Split(FLAGS_output_shape, ':');
const size_t input_count = input_shapes.size();
const size_t output_count = output_shapes.size();
std::vector<std::vector<int64_t>> input_shape_vec(input_count);
std::vector<std::vector<int64_t>> output_shape_vec(output_count);
for (size_t i = 0; i < input_count; ++i) {
ParseShape(input_shapes[i], &input_shape_vec[i]);
}
for (size_t i = 0; i < output_count; ++i) {
ParseShape(output_shapes[i], &output_shape_vec[i]);
}
std::vector<std::string> raw_input_data_formats =
Split(FLAGS_input_data_format, ',');
std::vector<std::string> raw_output_data_formats =
Split(FLAGS_output_data_format, ',');
std::vector<DataFormat> input_data_formats(input_count);
std::vector<DataFormat> output_data_formats(output_count);
for (size_t i = 0; i < input_count; ++i) {
input_data_formats[i] = ParseDataFormat(raw_input_data_formats[i]);
}
for (size_t i = 0; i < output_count; ++i) {
output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]);
}
mace::DeviceType device_type = ParseDeviceType(FLAGS_device);
// configuration
MaceStatus mace_status;
MaceEngineConfig config(device_type);
mace_status = config.SetCPUThreadPolicy(
FLAGS_omp_num_threads,
static_cast<CPUAffinityPolicy >(FLAGS_cpu_affinity_policy));
if (mace_status != MaceStatus::MACE_SUCCESS) {
LOG(INFO) << "Set openmp or cpu affinity failed.";
}
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> gpu_context;
if (device_type == DeviceType::GPU) {
// DO NOT USE tmp directory.
// Please use APP's own directory and make sure the directory exists.
const char *storage_path_ptr = getenv("MACE_INTERNAL_STORAGE_PATH");
const std::string storage_path =
std::string(storage_path_ptr == nullptr ?
"/data/local/tmp/mace_run/interior" : storage_path_ptr);
std::vector<std::string> opencl_binary_paths = {FLAGS_opencl_binary_file};
gpu_context = GPUContextBuilder()
.SetStoragePath(storage_path)
.SetOpenCLBinaryPaths(opencl_binary_paths)
.SetOpenCLParameterPath(FLAGS_opencl_parameter_file)
.Finalize();
config.SetGPUContext(gpu_context);
config.SetGPUHints(
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
}
#endif // MACE_ENABLE_OPENCL
// Create Engine
std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status;
// Create Engine
std::unique_ptr<mace::port::ReadOnlyMemoryRegion> model_graph_data =
make_unique<mace::port::ReadOnlyBufferMemoryRegion>();
if (FLAGS_model_file != "") {
auto fs = GetFileSystem();
auto status = fs->NewReadOnlyMemoryRegionFromFile(FLAGS_model_file.c_str(),
&model_graph_data);
if (status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
}
}
std::unique_ptr<mace::port::ReadOnlyMemoryRegion> model_weights_data =
make_unique<mace::port::ReadOnlyBufferMemoryRegion>();
if (FLAGS_model_data_file != "") {
auto fs = GetFileSystem();
auto status = fs->NewReadOnlyMemoryRegionFromFile(
FLAGS_model_data_file.c_str(),
&model_weights_data);
if (status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Failed to read file: " << FLAGS_model_data_file;
}
MACE_CHECK(model_weights_data->length() > 0);
}
#ifdef MODEL_GRAPH_FORMAT_CODE
create_engine_status = CreateMaceEngineFromCode(FLAGS_model_name,
reinterpret_cast<const unsigned char *>(model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#else
create_engine_status = CreateMaceEngineFromProto(
reinterpret_cast<const unsigned char *>(model_graph_data->data()),
model_graph_data->length(),
reinterpret_cast<const unsigned char *>(model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#endif
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
LOG(FATAL) << "Create engine error, please check the arguments";
}
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) {
// only support float and int32, use char for generalization
int64_t input_size =
std::accumulate(input_shape_vec[i].begin(), input_shape_vec[i].end(), 4,
std::multiplies<int64_t>());
auto buffer_in = std::shared_ptr<char>(new char[input_size],
std::default_delete<char[]>());
// load input
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]),
std::ios::in | std::ios::binary);
if (in_file.is_open()) {
in_file.read(buffer_in.get(), input_size);
in_file.close();
} else {
LOG(INFO) << "Open input file failed";
return -1;
}
inputs[input_names[i]] = mace::MaceTensor(input_shape_vec[i], buffer_in,
input_data_formats[i]);
}
for (size_t i = 0; i < output_count; ++i) {
// only support float and int32, use char for generalization
int64_t output_size =
std::accumulate(output_shape_vec[i].begin(),
output_shape_vec[i].end(), 4,
std::multiplies<int64_t>());
auto buffer_out = std::shared_ptr<char>(new char[output_size],
std::default_delete<char[]>());
outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i],
buffer_out,
output_data_formats[i]);
}
int64_t warmup_time_us = 0;
int64_t num_warmup_runs = 0;
if (FLAGS_warmup_runs > 0) {
bool status =
Run("Warm Up", engine.get(), inputs, &outputs,
FLAGS_warmup_runs, -1.0,
&warmup_time_us, &num_warmup_runs, nullptr);
if (!status) {
LOG(ERROR) << "Failed at warm up run";
}
}
int64_t no_stat_time_us = 0;
int64_t no_stat_runs = 0;
bool status =
Run("Run without statistics", engine.get(), inputs, &outputs,
FLAGS_max_num_runs, FLAGS_max_seconds,
&no_stat_time_us, &no_stat_runs, nullptr);
if (!status) {
LOG(ERROR) << "Failed at normal no-stat run";
}
int64_t stat_time_us = 0;
int64_t stat_runs = 0;
status = Run("Run with statistics", engine.get(), inputs, &outputs,
FLAGS_max_num_runs, FLAGS_max_seconds,
&stat_time_us, &stat_runs, statistician.get());
if (!status) {
LOG(ERROR) << "Failed at normal stat run";
}
statistician->PrintStat();
return 0;
}
} // namespace benchmark
} // namespace mace
int main(int argc, char **argv) { mace::benchmark::Main(argc, argv); }
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/**
* Usage:
* throughput_test \
* --input_shape=1,224,224,3 \
* --output_shape=1,224,224,2 \
* --input_file=input_data \
* --cpu_model_data_file=cpu_model_data.data \
* --gpu_model_data_file=gpu_model_data.data \
* --dsp_model_data_file=dsp_model_data.data \
* --run_seconds=10
*/
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <numeric>
#include <thread> // NOLINT(build/c++11)
#include "gflags/gflags.h"
#include "mace/public/mace.h"
#include "mace/port/env.h"
#include "mace/utils/logging.h"
#include "mace/core/types.h"
namespace mace {
#ifdef MACE_CPU_MODEL_TAG
namespace MACE_CPU_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
} // namespace MACE_CPU_MODEL_TAG
#endif
#ifdef MACE_GPU_MODEL_TAG
namespace MACE_GPU_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
} // namespace MACE_GPU_MODEL_TAG
#endif
#ifdef MACE_DSP_MODEL_TAG
namespace MACE_DSP_MODEL_TAG {
extern const unsigned char *LoadModelData(const char *model_data_file);
extern void UnloadModelData(const unsigned char *model_data);
extern NetDef CreateNet(const unsigned char *model_data);
extern const std::string ModelChecksum();
} // namespace MACE_DSP_MODEL_TAG
#endif
namespace benchmark {
void Split(const std::string &str,
char delims,
std::vector<std::string> *result) {
MACE_CHECK_NOTNULL(result);
std::string tmp = str;
while (!tmp.empty()) {
size_t next_offset = tmp.find(delims);
result->push_back(tmp.substr(0, next_offset));
if (next_offset == std::string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
}
void SplitAndParseToInts(const std::string &str,
char delims,
std::vector<int64_t> *result) {
MACE_CHECK_NOTNULL(result);
std::string tmp = str;
while (!tmp.empty()) {
index_t dim = atoi(tmp.data());
result->push_back(dim);
size_t next_offset = tmp.find(delims);
if (next_offset == std::string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
}
void ParseShape(const std::string &str, std::vector<int64_t> *shape) {
std::string tmp = str;
while (!tmp.empty()) {
index_t dim = atoi(tmp.data());
shape->push_back(dim);
size_t next_offset = tmp.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp = tmp.substr(next_offset + 1);
}
}
}
std::string FormatName(const std::string input) {
std::string res = input;
for (size_t i = 0; i < input.size(); ++i) {
if (!::isalnum(res[i])) res[i] = '_';
}
return res;
}
DeviceType ParseDeviceType(const std::string &device_str) {
if (device_str.compare("CPU") == 0) {
return DeviceType::CPU;
} else if (device_str.compare("GPU") == 0) {
return DeviceType::GPU;
} else if (device_str.compare("HEXAGON") == 0) {
return DeviceType::HEXAGON;
} else {
return DeviceType::CPU;
}
}
DEFINE_string(input_node, "input_node0,input_node1",
"input nodes, separated by comma");
DEFINE_string(output_node, "output_node0,output_node1",
"output nodes, separated by comma");
DEFINE_string(input_shape, "1,224,224,3", "input shape, separated by comma");
DEFINE_string(output_shape, "1,224,224,2", "output shape, separated by comma");
DEFINE_string(input_file, "", "input file name");
DEFINE_string(cpu_model_data_file, "", "cpu model data file name");
DEFINE_string(gpu_model_data_file, "", "gpu model data file name");
DEFINE_string(dsp_model_data_file, "", "dsp model data file name");
DEFINE_int32(run_seconds, 10, "run seconds");
int Main(int argc, char **argv) {
std::string usage = "model throughput test\nusage: " + std::string(argv[0])
+ " [flags]";
gflags::SetUsageMessage(usage);
gflags::ParseCommandLineFlags(&argc, &argv, true);
LOG(INFO) << "mace version: " << MaceVersion();
#ifdef MACE_CPU_MODEL_TAG
LOG(INFO) << "cpu model checksum: "
<< mace::MACE_CPU_MODEL_TAG::ModelChecksum();
#endif
#ifdef MACE_GPU_MODEL_TAG
LOG(INFO) << "gpu model checksum: "
<< mace::MACE_GPU_MODEL_TAG::ModelChecksum();
#endif
#ifdef MACE_DSP_MODEL_TAG
LOG(INFO) << "dsp model checksum: "
<< mace::MACE_DSP_MODEL_TAG::ModelChecksum();
#endif
LOG(INFO) << "Input node: [" << FLAGS_input_node<< "]";
LOG(INFO) << "input_shape: " << FLAGS_input_shape;
LOG(INFO) << "Output node: [" << FLAGS_output_node<< "]";
LOG(INFO) << "output_shape: " << FLAGS_output_shape;
LOG(INFO) << "input_file: " << FLAGS_input_file;
LOG(INFO) << "cpu_model_data_file: " << FLAGS_cpu_model_data_file;
LOG(INFO) << "gpu_model_data_file: " << FLAGS_gpu_model_data_file;
LOG(INFO) << "dsp_model_data_file: " << FLAGS_dsp_model_data_file;
LOG(INFO) << "run_seconds: " << FLAGS_run_seconds;
std::vector<std::string> input_names;
std::vector<std::string> output_names;
std::vector<std::string> input_shapes;
std::vector<std::string> output_shapes;
Split(FLAGS_input_node, ',', &input_names);
Split(FLAGS_output_node, ',', &output_names);
Split(FLAGS_input_shape, ':', &input_shapes);
Split(FLAGS_output_shape, ':', &output_shapes);
const size_t input_count = input_shapes.size();
const size_t output_count = output_shapes.size();
std::vector<std::vector<int64_t>> input_shape_vec(input_count);
std::vector<std::vector<int64_t>> output_shape_vec(output_count);
for (size_t i = 0; i < input_count; ++i) {
ParseShape(input_shapes[i], &input_shape_vec[i]);
}
for (size_t i = 0; i < output_count; ++i) {
ParseShape(output_shapes[i], &output_shape_vec[i]);
}
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> cpu_outputs;
std::map<std::string, mace::MaceTensor> gpu_outputs;
std::map<std::string, mace::MaceTensor> dsp_outputs;
for (size_t i = 0; i < input_count; ++i) {
// Allocate input and output
int64_t input_size =
std::accumulate(input_shape_vec[i].begin(), input_shape_vec[i].end(), 1,
std::multiplies<int64_t>());
auto buffer_in = std::shared_ptr<float>(new float[input_size],
std::default_delete<float[]>());
// load input
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]),
std::ios::in | std::ios::binary);
if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(buffer_in.get()),
input_size * sizeof(float));
in_file.close();
} else {
LOG(FATAL) << "Open input file failed";
}
inputs[input_names[i]] = mace::MaceTensor(input_shape_vec[i], buffer_in);
}
for (size_t i = 0; i < output_count; ++i) {
int64_t output_size =
std::accumulate(output_shape_vec[i].begin(),
output_shape_vec[i].end(), 1,
std::multiplies<int64_t>());
auto buffer_out = std::shared_ptr<float>(new float[output_size],
std::default_delete<float[]>());
cpu_outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i],
buffer_out);
gpu_outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i],
buffer_out);
dsp_outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i],
buffer_out);
}
#if defined(MACE_CPU_MODEL_TAG) || \
defined(MACE_GPU_MODEL_TAG) || \
defined(MACE_DSP_MODEL_TAG)
int64_t t0, t1, init_micros;
#endif
#ifdef MACE_CPU_MODEL_TAG
/* --------------------- CPU init ----------------------- */
LOG(INFO) << "Load & init cpu model and warm up";
const unsigned char *cpu_model_data =
mace::MACE_CPU_MODEL_TAG::LoadModelData(
FLAGS_cpu_model_data_file.c_str());
NetDef cpu_net_def = mace::MACE_CPU_MODEL_TAG::CreateNet(cpu_model_data);
mace::MaceEngine cpu_engine(&cpu_net_def, DeviceType::CPU, input_names,
output_names);
LOG(INFO) << "CPU Warm up run";
t0 = NowMicros();
cpu_engine.Run(inputs, &cpu_outputs);
t1 = NowMicros();
LOG(INFO) << "CPU 1st warm up run latency: " << t1 - t0 << " us";
#endif
#ifdef MACE_GPU_MODEL_TAG
/* --------------------- GPU init ----------------------- */
LOG(INFO) << "Load & init gpu model and warm up";
const unsigned char *gpu_model_data =
mace::MACE_GPU_MODEL_TAG::LoadModelData(
FLAGS_gpu_model_data_file.c_str());
NetDef gpu_net_def = mace::MACE_GPU_MODEL_TAG::CreateNet(gpu_model_data);
mace::MaceEngine gpu_engine(&gpu_net_def, DeviceType::GPU, input_names,
output_names);
mace::MACE_GPU_MODEL_TAG::UnloadModelData(gpu_model_data);
LOG(INFO) << "GPU Warm up run";
t0 = NowMicros();
gpu_engine.Run(inputs, &gpu_outputs);
t1 = NowMicros();
LOG(INFO) << "GPU 1st warm up run latency: " << t1 - t0 << " us";
#endif
#ifdef MACE_DSP_MODEL_TAG
/* --------------------- DSP init ----------------------- */
LOG(INFO) << "Load & init dsp model and warm up";
const unsigned char *dsp_model_data =
mace::MACE_DSP_MODEL_TAG::LoadModelData(
FLAGS_dsp_model_data_file.c_str());
NetDef dsp_net_def = mace::MACE_DSP_MODEL_TAG::CreateNet(dsp_model_data);
mace::MaceEngine dsp_engine(&dsp_net_def, DeviceType::HEXAGON, input_names,
output_names);
mace::MACE_DSP_MODEL_TAG::UnloadModelData(dsp_model_data);
LOG(INFO) << "DSP Warm up run";
t0 = NowMicros();
dsp_engine.Run(inputs, &dsp_outputs);
t1 = NowMicros();
LOG(INFO) << "DSP 1st warm up run latency: " << t1 - t0 << " us";
#endif
#if defined(MACE_CPU_MODEL_TAG) || \
defined(MACE_GPU_MODEL_TAG) || \
defined(MACE_DSP_MODEL_TAG)
double cpu_throughput = 0;
double gpu_throughput = 0;
double dsp_throughput = 0;
int64_t run_micros = FLAGS_run_seconds * 1000000;
#endif
#ifdef MACE_CPU_MODEL_TAG
std::thread cpu_thread([&]() {
int64_t frames = 0;
int64_t micros = 0;
int64_t start = NowMicros();
for (; micros < run_micros; ++frames) {
cpu_engine.Run(inputs, &cpu_outputs);
int64_t end = NowMicros();
micros = end - start;
}
cpu_throughput = frames * 1000000.0 / micros;
});
#endif
#ifdef MACE_GPU_MODEL_TAG
std::thread gpu_thread([&]() {
int64_t frames = 0;
int64_t micros = 0;
int64_t start = NowMicros();
for (; micros < run_micros; ++frames) {
gpu_engine.Run(inputs, &gpu_outputs);
int64_t end = NowMicros();
micros = end - start;
}
gpu_throughput = frames * 1000000.0 / micros;
});
#endif
#ifdef MACE_DSP_MODEL_TAG
std::thread dsp_thread([&]() {
int64_t frames = 0;
int64_t micros = 0;
int64_t start = NowMicros();
for (; micros < run_micros; ++frames) {
dsp_engine.Run(inputs, &dsp_outputs);
int64_t end = NowMicros();
micros = end - start;
}
dsp_throughput = frames * 1000000.0 / micros;
});
#endif
double total_throughput = 0;
#ifdef MACE_CPU_MODEL_TAG
cpu_thread.join();
LOG(INFO) << "CPU throughput: " << cpu_throughput << " f/s";
total_throughput += cpu_throughput;
#endif
#ifdef MACE_GPU_MODEL_TAG
gpu_thread.join();
LOG(INFO) << "GPU throughput: " << gpu_throughput << " f/s";
total_throughput += gpu_throughput;
#endif
#ifdef MACE_DSP_MODEL_TAG
dsp_thread.join();
LOG(INFO) << "DSP throughput: " << dsp_throughput << " f/s";
total_throughput += dsp_throughput;
#endif
LOG(INFO) << "Total throughput: " << total_throughput << " f/s";
return 0;
}
} // namespace benchmark
} // namespace mace
int main(int argc, char **argv) { mace::benchmark::Main(argc, argv); }
......@@ -38,6 +38,7 @@
#include "mace/utils/logging.h"
#include "mace/utils/memory.h"
#include "mace/utils/string_util.h"
#include "mace/utils/statistics.h"
#ifdef MODEL_GRAPH_FORMAT_CODE
#include "mace/codegen/engine/mace_engine_factory.h"
......@@ -45,7 +46,6 @@
namespace mace {
namespace tools {
namespace validation {
void ParseShape(const std::string &str, std::vector<int64_t> *shape) {
std::string tmp = str;
......@@ -124,7 +124,6 @@ DEFINE_string(input_file,
DEFINE_string(output_file,
"",
"output file name | output file prefix for multiple outputs");
// TODO(liyin): support batch validation
DEFINE_string(input_dir,
"",
"input directory name");
......@@ -152,6 +151,7 @@ DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, -1, "num of openmp threads");
DEFINE_int32(cpu_affinity_policy, 1,
"0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY");
DEFINE_bool(benchmark, false, "enable benchmark op");
bool RunModel(const std::string &model_name,
const std::vector<std::string> &input_names,
......@@ -352,6 +352,7 @@ bool RunModel(const std::string &model_name,
}
double model_run_millis = -1;
benchmark::OpStat op_stat;
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
int64_t total_run_duration = 0;
......@@ -364,9 +365,15 @@ bool RunModel(const std::string &model_name,
info_log.get(), MakeString(i));
}
MaceStatus run_status;
RunMetadata metadata;
RunMetadata *metadata_ptr = nullptr;
if (FLAGS_benchmark) {
metadata_ptr = &metadata;
}
while (true) {
int64_t t0 = NowMicros();
run_status = engine->Run(inputs, &outputs);
run_status = engine->Run(inputs, &outputs, metadata_ptr);
if (run_status != MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Mace run model runtime error, retry ... errcode: "
<< run_status.information();
......@@ -399,6 +406,9 @@ bool RunModel(const std::string &model_name,
} else {
int64_t t1 = NowMicros();
total_run_duration += (t1 - t0);
if (FLAGS_benchmark) {
op_stat.StatMetadata(metadata);
}
break;
}
}
......@@ -407,14 +417,6 @@ bool RunModel(const std::string &model_name,
LOG(INFO) << "Average latency: " << model_run_millis << " ms";
}
// Metrics reporting tools depends on the format, keep in consistent
printf("========================================================\n");
printf(" capability(CPU) init warmup run_avg\n");
printf("========================================================\n");
printf("time %15.3f %11.3f %11.3f %11.3f\n",
cpu_capability, init_millis, warmup_millis, model_run_millis);
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
FLAGS_output_file + "_" + FormatName(output_names[i]);
......@@ -431,6 +433,16 @@ bool RunModel(const std::string &model_name,
<< output_size << " done.";
}
// Metrics reporting tools depends on the format, keep in consistent
printf("========================================================\n");
printf(" capability(CPU) init warmup run_avg\n");
printf("========================================================\n");
printf("time %15.3f %11.3f %11.3f %11.3f\n",
cpu_capability, init_millis, warmup_millis, model_run_millis);
if (FLAGS_benchmark) {
op_stat.PrintStat();
}
return true;
}
......@@ -448,6 +460,10 @@ int Main(int argc, char **argv) {
return 0;
}
if (FLAGS_benchmark) {
setenv("MACE_OPENCL_PROFILING", "1", 1);
}
LOG(INFO) << "model name: " << FLAGS_model_name;
LOG(INFO) << "mace version: " << MaceVersion();
LOG(INFO) << "input node: " << FLAGS_input_node;
......@@ -517,8 +533,9 @@ int Main(int argc, char **argv) {
return -1;
}
} // namespace validation
} // namespace tools
} // namespace mace
int main(int argc, char **argv) { mace::tools::validation::Main(argc, argv); }
int main(int argc, char **argv) {
mace::tools::Main(argc, argv);
}
......@@ -447,14 +447,10 @@ BUILD_TMP_DIR_NAME = '_tmp'
BUILD_DOWNLOADS_DIR = BUILD_OUTPUT_DIR + '/downloads'
BUILD_TMP_GENERAL_OUTPUT_DIR_NAME = 'general'
MODEL_OUTPUT_DIR_NAME = 'model'
EXAMPLE_STATIC_NAME = "example_static"
EXAMPLE_DYNAMIC_NAME = "example_dynamic"
EXAMPLE_STATIC_TARGET = "//examples/cli:" + EXAMPLE_STATIC_NAME
EXAMPLE_DYNAMIC_TARGET = "//examples/cli:" + EXAMPLE_DYNAMIC_NAME
MACE_RUN_STATIC_NAME = "mace_run_static"
MACE_RUN_DYNAMIC_NAME = "mace_run_dynamic"
MACE_RUN_STATIC_TARGET = "//mace/tools/validation:" + MACE_RUN_STATIC_NAME
MACE_RUN_DYNAMIC_TARGET = "//mace/tools/validation:" + MACE_RUN_DYNAMIC_NAME
MACE_RUN_STATIC_TARGET = "//mace/tools:" + MACE_RUN_STATIC_NAME
MACE_RUN_DYNAMIC_TARGET = "//mace/tools:" + MACE_RUN_DYNAMIC_NAME
CL_COMPILED_BINARY_FILE_NAME = "mace_cl_compiled_program.bin"
BUILD_TMP_OPENCL_BIN_DIR = 'opencl_bin'
LIBMACE_DYNAMIC_PATH = "bazel-bin/mace/libmace/libmace.so"
......@@ -474,11 +470,6 @@ LIBMACE_STATIC_TARGET = "//mace/libmace:libmace_static"
LIBMACE_STATIC_PATH = "bazel-genfiles/mace/libmace/libmace.a"
MODEL_LIB_TARGET = "//mace/codegen:generated_models"
MODEL_LIB_PATH = "bazel-bin/mace/codegen/libgenerated_models.a"
QUANTIZE_STAT_TARGET = "//mace/tools/quantization:quantize_stat"
BM_MODEL_STATIC_NAME = "benchmark_model_static"
BM_MODEL_DYNAMIC_NAME = "benchmark_model_dynamic"
BM_MODEL_STATIC_TARGET = "//mace/tools/benchmark:" + BM_MODEL_STATIC_NAME
BM_MODEL_DYNAMIC_TARGET = "//mace/tools/benchmark:" + BM_MODEL_DYNAMIC_NAME
################################
......@@ -508,7 +499,6 @@ class ModuleName(object):
YAML_CONFIG = 'YAML CONFIG'
MODEL_CONVERTER = 'Model Converter'
RUN = 'RUN'
BENCHMARK = 'Benchmark'
#################################
......
......@@ -891,7 +891,7 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
mace_check(os.path.exists(ENGINE_CODEGEN_DIR),
ModuleName.RUN,
"You should convert model first.")
build_arg = "--per_file_copt=mace/tools/validation/mace_run.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa
build_arg = "--per_file_copt=mace/tools/mace_run.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa
sh_commands.bazel_build(
mace_run_target,
......@@ -912,86 +912,6 @@ def build_mace_run(configs, target_abi, toolchain, enable_openmp,
mace_lib_type == MACELibType.dynamic)
def build_example(configs, target_abi, toolchain, enable_openmp, mace_lib_type,
cl_binary_to_code, device, debug_mode):
library_name = configs[YAMLKeyword.library_name]
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
if os.path.exists(build_tmp_binary_dir):
sh.rm("-rf", build_tmp_binary_dir)
os.makedirs(build_tmp_binary_dir)
if cl_binary_to_code:
sh_commands.gen_opencl_binary_cpps(
get_opencl_binary_output_path(
library_name, target_abi, device),
get_opencl_parameter_output_path(
library_name, target_abi, device),
OPENCL_CODEGEN_DIR + '/opencl_binary.cc',
OPENCL_CODEGEN_DIR + '/opencl_parameter.cc')
else:
sh_commands.gen_opencl_binary_cpps(
"", "",
OPENCL_CODEGEN_DIR + '/opencl_binary.cc',
OPENCL_CODEGEN_DIR + '/opencl_parameter.cc')
libmace_target = LIBMACE_STATIC_TARGET
if mace_lib_type == MACELibType.dynamic:
libmace_target = LIBMACE_SO_TARGET
sh_commands.bazel_build(libmace_target,
abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
enable_hexagon=get_hexagon_mode(configs),
enable_hta=get_hta_mode(configs),
enable_apu=get_apu_mode(configs),
address_sanitizer=flags.address_sanitizer,
symbol_hidden=get_symbol_hidden_mode(debug_mode, mace_lib_type), # noqa
debug_mode=debug_mode)
if os.path.exists(LIB_CODEGEN_DIR):
sh.rm("-rf", LIB_CODEGEN_DIR)
sh.mkdir("-p", LIB_CODEGEN_DIR)
build_arg = ""
if configs[YAMLKeyword.model_graph_format] == ModelFormat.code:
mace_check(os.path.exists(ENGINE_CODEGEN_DIR),
ModuleName.RUN,
"You should convert model first.")
model_lib_path = get_model_lib_output_path(library_name,
target_abi)
sh.cp("-f", model_lib_path, LIB_CODEGEN_DIR)
build_arg = "--per_file_copt=examples/cli/example.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa
if mace_lib_type == MACELibType.dynamic:
example_target = EXAMPLE_DYNAMIC_TARGET
sh.cp("-f", LIBMACE_DYNAMIC_PATH, LIB_CODEGEN_DIR)
else:
example_target = EXAMPLE_STATIC_TARGET
sh.cp("-f", LIBMACE_STATIC_PATH, LIB_CODEGEN_DIR)
sh_commands.bazel_build(example_target,
abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
enable_hexagon=get_hexagon_mode(configs),
enable_hta=get_hta_mode(configs),
enable_apu=get_apu_mode(configs),
address_sanitizer=flags.address_sanitizer,
debug_mode=debug_mode,
extra_args=build_arg)
target_bin = "/".join(sh_commands.bazel_target_to_bin(example_target))
sh.cp("-f", target_bin, build_tmp_binary_dir)
if os.path.exists(LIB_CODEGEN_DIR):
sh.rm("-rf", LIB_CODEGEN_DIR)
def print_package_summary(package_path):
title = "Library"
header = ["key", "value"]
......@@ -1024,23 +944,13 @@ def run_mace(flags):
# get toolchain
toolchain = infer_toolchain(target_abi)
device = DeviceWrapper(dev)
if flags.example:
build_example(configs,
target_abi,
toolchain,
flags.enable_openmp,
flags.mace_lib_type,
flags.cl_binary_to_code,
device,
flags.debug_mode)
else:
build_mace_run(configs,
target_abi,
toolchain,
flags.enable_openmp,
flags.address_sanitizer,
flags.mace_lib_type,
flags.debug_mode)
build_mace_run(configs,
target_abi,
toolchain,
flags.enable_openmp,
flags.address_sanitizer,
flags.mace_lib_type,
flags.debug_mode)
# run
start_time = time.time()
with device.lock():
......@@ -1058,90 +968,6 @@ def run_mace(flags):
print_package_summary(package_path)
################################
# benchmark model
################################
def build_benchmark_model(configs,
target_abi,
toolchain,
enable_openmp,
mace_lib_type,
debug_mode):
library_name = configs[YAMLKeyword.library_name]
link_dynamic = mace_lib_type == MACELibType.dynamic
if link_dynamic:
benchmark_target = BM_MODEL_DYNAMIC_TARGET
else:
benchmark_target = BM_MODEL_STATIC_TARGET
build_arg = ""
if configs[YAMLKeyword.model_graph_format] == ModelFormat.code:
mace_check(os.path.exists(ENGINE_CODEGEN_DIR),
ModuleName.BENCHMARK,
"You should convert model first.")
build_arg = "--per_file_copt=mace/tools/benchmark/benchmark_model.cc@-DMODEL_GRAPH_FORMAT_CODE" # noqa
sh_commands.bazel_build(benchmark_target,
abi=target_abi,
toolchain=toolchain,
enable_openmp=enable_openmp,
enable_opencl=get_opencl_mode(configs),
enable_quantize=get_quantize_mode(configs),
enable_hexagon=get_hexagon_mode(configs),
enable_hta=get_hta_mode(configs),
enable_apu=get_apu_mode(configs),
symbol_hidden=get_symbol_hidden_mode(debug_mode, mace_lib_type), # noqa
debug_mode=debug_mode,
extra_args=build_arg)
# clear tmp binary dir
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
if os.path.exists(build_tmp_binary_dir):
sh.rm("-rf", build_tmp_binary_dir)
os.makedirs(build_tmp_binary_dir)
target_bin = "/".join(sh_commands.bazel_target_to_bin(benchmark_target))
sh.cp("-f", target_bin, build_tmp_binary_dir)
def benchmark_model(flags):
configs = format_model_config(flags)
clear_build_dirs(configs[YAMLKeyword.library_name])
target_socs = configs[YAMLKeyword.target_socs]
device_list = DeviceManager.list_devices(flags.device_yml)
if target_socs and TargetSOCTag.all not in target_socs:
device_list = [dev for dev in device_list
if dev[YAMLKeyword.target_socs].lower() in target_socs]
for target_abi in configs[YAMLKeyword.target_abis]:
if flags.target_socs == TargetSOCTag.random:
target_devices = sh_commands.choose_a_random_device(
device_list, target_abi)
else:
target_devices = device_list
# build benchmark_model binary
for dev in target_devices:
if target_abi in dev[YAMLKeyword.target_abis]:
toolchain = infer_toolchain(target_abi)
build_benchmark_model(configs,
target_abi,
toolchain,
flags.enable_openmp,
flags.mace_lib_type,
flags.debug_mode)
device = DeviceWrapper(dev)
start_time = time.time()
with device.lock():
device.bm_specific_target(flags, configs, target_abi)
elapse_minutes = (time.time() - start_time) / 60
print("Elapse time: %f minutes." % elapse_minutes)
else:
six.print_('There is no abi %s with soc %s' %
(target_abi, dev[YAMLKeyword.target_socs]),
file=sys.stderr)
################################
# parsing arguments
################################
......@@ -1210,60 +1036,61 @@ def parse_args():
'--address_sanitizer',
action="store_true",
help="Whether to use address sanitizer to check memory error")
run_bm_parent_parser = argparse.ArgumentParser(add_help=False)
run_bm_parent_parser.add_argument(
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
convert = subparsers.add_parser(
'convert',
parents=[all_type_parent_parser, convert_run_parent_parser],
help='convert to mace model (file or code)')
convert.add_argument(
"--cl_mem_type",
type=str,
default=None,
help="Which type of OpenCL memory type to use [image | buffer].")
convert.set_defaults(func=convert_func)
run = subparsers.add_parser(
'run',
parents=[all_type_parent_parser,
convert_run_parent_parser],
help='run model in command line')
run.set_defaults(func=run_mace)
run.add_argument(
"--mace_lib_type",
type=str_to_mace_lib_type,
default=DefaultValues.mace_lib_type,
help="[static | dynamic], Which type MACE library to use.")
run_bm_parent_parser.add_argument(
run.add_argument(
"--enable_openmp",
action="store_true",
help="Enable openmp for multiple thread.")
run_bm_parent_parser.add_argument(
run.add_argument(
"--omp_num_threads",
type=int,
default=DefaultValues.omp_num_threads,
help="num of openmp threads")
run_bm_parent_parser.add_argument(
run.add_argument(
"--cpu_affinity_policy",
type=int,
default=DefaultValues.cpu_affinity_policy,
help="0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY")
run_bm_parent_parser.add_argument(
run.add_argument(
"--gpu_perf_hint",
type=int,
default=DefaultValues.gpu_perf_hint,
help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
run_bm_parent_parser.add_argument(
run.add_argument(
"--gpu_priority_hint",
type=int,
default=DefaultValues.gpu_priority_hint,
help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
run_bm_parent_parser.add_argument(
run.add_argument(
"--device_yml",
type=str,
default='',
help='embedded linux device config yml file'
)
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
convert = subparsers.add_parser(
'convert',
parents=[all_type_parent_parser, convert_run_parent_parser],
help='convert to mace model (file or code)')
convert.add_argument(
"--cl_mem_type",
type=str,
default=None,
help="Which type of OpenCL memory type to use [image | buffer].")
convert.set_defaults(func=convert_func)
run = subparsers.add_parser(
'run',
parents=[all_type_parent_parser, run_bm_parent_parser,
convert_run_parent_parser],
help='run model in command line')
run.set_defaults(func=run_mace)
run.add_argument(
"--disable_tuning",
action="store_true",
......@@ -1318,10 +1145,6 @@ def parse_args():
type=float,
default=0.0,
help="[mock runtime failure ratio].")
run.add_argument(
"--example",
action="store_true",
help="whether to run example.")
run.add_argument(
"--quantize_stat",
action="store_true",
......@@ -1340,21 +1163,10 @@ def parse_args():
"--cl_binary_to_code",
action="store_true",
help="convert OpenCL binaries to cpp.")
benchmark = subparsers.add_parser(
'benchmark',
parents=[all_type_parent_parser, run_bm_parent_parser],
help='benchmark model for detail information')
benchmark.set_defaults(func=benchmark_model)
benchmark.add_argument(
"--max_num_runs",
type=int,
default=100,
help="max number of runs.")
benchmark.add_argument(
"--max_seconds",
type=float,
default=10.0,
help="max number of seconds to run.")
run.add_argument(
"--benchmark",
action="store_true",
help="enable op benchmark.")
return parser.parse_known_args()
......
......@@ -186,6 +186,7 @@ class DeviceWrapper:
link_dynamic=False,
quantize_stat=False,
layers_validate_file="",
benchmark=False,
):
six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
"out_of_range_check=%s, omp_num_threads=%s, "
......@@ -343,6 +344,9 @@ class DeviceWrapper:
"--opencl_parameter_file=%s/%s" %
(self.data_dir, os.path.basename(opencl_parameter_file)),
])
if benchmark:
cmd.append("--benchmark=%s" % benchmark)
cmd = ' '.join(cmd)
cmd_file_name = "%s-%s-%s" % ('cmd_file',
model_tag,
......@@ -473,16 +477,10 @@ class DeviceWrapper:
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
# get target name for run
mace_lib_type = flags.mace_lib_type
if flags.example:
if mace_lib_type == MACELibType.static:
target_name = EXAMPLE_STATIC_NAME
else:
target_name = EXAMPLE_DYNAMIC_NAME
if mace_lib_type == MACELibType.static:
target_name = MACE_RUN_STATIC_NAME
else:
if mace_lib_type == MACELibType.static:
target_name = MACE_RUN_STATIC_NAME
else:
target_name = MACE_RUN_DYNAMIC_NAME
target_name = MACE_RUN_DYNAMIC_NAME
link_dynamic = mace_lib_type == MACELibType.dynamic
if target_abi != ABIType.host:
......@@ -557,7 +555,8 @@ class DeviceWrapper:
input_dir=flags.input_dir,
output_dir=flags.output_dir,
layers_validate_file=output_config[
YAMLKeyword.model_file_path]
YAMLKeyword.model_file_path],
benchmark=flags.benchmark,
)
def get_output_map(self,
......@@ -621,7 +620,6 @@ class DeviceWrapper:
tuning = False
if not flags.address_sanitizer \
and not flags.example \
and target_abi != ABIType.host \
and (configs[YAMLKeyword.target_socs]
or flags.target_socs) \
......@@ -859,254 +857,6 @@ class DeviceWrapper:
with open(report_filename, 'a') as f:
f.write(data_str)
def benchmark_model(self,
abi,
benchmark_binary_dir,
benchmark_binary_name,
vlog_level,
embed_model_data,
model_output_dir,
mace_model_dir,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
input_data_formats,
output_data_formats,
max_num_runs,
max_seconds,
model_tag,
device_type,
model_graph_format,
opencl_binary_file,
opencl_parameter_file,
libmace_dynamic_library_path,
omp_num_threads=-1,
cpu_affinity_policy=1,
gpu_perf_hint=3,
gpu_priority_hint=3,
input_file_name='model_input',
link_dynamic=False):
six.print_('* Benchmark for %s' % model_tag)
mace_model_path = ''
if model_graph_format == ModelFormat.file:
mace_model_path = '%s/%s.pb' % (mace_model_dir, model_tag)
model_data_file = ""
if not embed_model_data:
if self.system == SystemType.host:
model_data_file = "%s/%s.data" % (mace_model_dir, model_tag)
else:
model_data_file = "%s/%s.data" % (self.data_dir, model_tag)
if abi == ABIType.host:
libmace_dynamic_lib_dir_path = \
os.path.dirname(libmace_dynamic_library_path)
p = subprocess.Popen(
[
'env',
'LD_LIBRARY_PATH=%s' % libmace_dynamic_lib_dir_path,
'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
'%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
'--model_name=%s' % model_tag,
'--input_node=%s' % ','.join(input_nodes),
'--output_node=%s' % ','.join(output_nodes),
'--input_shape=%s' % ':'.join(input_shapes),
'--output_shape=%s' % ':'.join(output_shapes),
"--input_data_format=%s" % ",".join(input_data_formats),
"--output_data_format=%s" % ",".join(output_data_formats),
'--input_file=%s/%s' % (model_output_dir, input_file_name),
"--model_data_file=%s" % model_data_file,
'--max_num_runs=%d' % max_num_runs,
'--max_seconds=%f' % max_seconds,
'--device=%s' % device_type,
'--omp_num_threads=%s' % omp_num_threads,
'--cpu_affinity_policy=%s' % cpu_affinity_policy,
'--gpu_perf_hint=%s' % gpu_perf_hint,
'--gpu_priority_hint=%s' % gpu_priority_hint,
'--model_file=%s' % mace_model_path
])
p.wait()
elif self.system in [SystemType.android, SystemType.arm_linux]:
self.exec_command('mkdir -p %s' % self.data_dir)
internal_storage_dir = self.create_internal_storage_dir()
for input_name in input_nodes:
formatted_name = formatted_file_name(input_file_name,
input_name)
self.push('%s/%s' % (model_output_dir, formatted_name),
self.data_dir)
if not embed_model_data:
self.push('%s/%s.data' % (mace_model_dir, model_tag),
self.data_dir)
if device_type == common.DeviceType.GPU:
if os.path.exists(opencl_binary_file):
self.push(opencl_binary_file, self.data_dir)
if os.path.exists(opencl_parameter_file):
self.push(opencl_parameter_file, self.data_dir)
mace_model_device_path = ''
if model_graph_format == ModelFormat.file:
mace_model_device_path = '%s/%s.pb' % \
(self.data_dir, model_tag)
self.push(mace_model_path, mace_model_device_path)
if link_dynamic:
self.push(libmace_dynamic_library_path, self.data_dir)
if self.system == SystemType.android:
sh_commands.push_depended_so_libs(
libmace_dynamic_library_path, abi, self.data_dir,
self.address)
self.rm('%s/%s' % (self.data_dir, benchmark_binary_name))
self.push('%s/%s' % (benchmark_binary_dir, benchmark_binary_name),
self.data_dir)
cmd = [
'LD_LIBRARY_PATH=%s' % self.data_dir,
'MACE_CPP_MIN_VLOG_LEVEL=%s' % vlog_level,
'MACE_RUN_PARAMETER_PATH=%s/mace_run.config' % self.data_dir,
'MACE_INTERNAL_STORAGE_PATH=%s' % internal_storage_dir,
'MACE_OPENCL_PROFILING=1',
'%s/%s' % (self.data_dir, benchmark_binary_name),
'--model_name=%s' % model_tag,
'--input_node=%s' % ','.join(input_nodes),
'--output_node=%s' % ','.join(output_nodes),
'--input_shape=%s' % ':'.join(input_shapes),
'--output_shape=%s' % ':'.join(output_shapes),
"--input_data_format=%s" % ",".join(input_data_formats),
"--output_data_format=%s" % ",".join(output_data_formats),
'--input_file=%s/%s' % (self.data_dir, input_file_name),
"--model_data_file=%s" % model_data_file,
'--max_num_runs=%d' % max_num_runs,
'--max_seconds=%f' % max_seconds,
'--device=%s' % device_type,
'--omp_num_threads=%s' % omp_num_threads,
'--cpu_affinity_policy=%s' % cpu_affinity_policy,
'--gpu_perf_hint=%s' % gpu_perf_hint,
'--gpu_priority_hint=%s' % gpu_priority_hint,
'--model_file=%s' % mace_model_device_path,
'--opencl_binary_file=%s/%s' %
(self.data_dir, os.path.basename(opencl_binary_file)),
'--opencl_parameter_file=%s/%s' %
(self.data_dir, os.path.basename(opencl_parameter_file))
]
cmd = ' '.join(cmd)
cmd_file_name = '%s-%s-%s' % \
('cmd_file', model_tag, str(time.time()))
cmd_file_path = '%s/%s' % (self.data_dir, cmd_file_name)
tmp_cmd_file = '%s/%s' % ('/tmp', cmd_file_name)
with open(tmp_cmd_file, 'w') as f:
f.write(cmd)
self.push(tmp_cmd_file, cmd_file_path)
os.remove(tmp_cmd_file)
if self.system == SystemType.android:
sh.adb('-s', self.address, 'shell', 'sh', cmd_file_path,
_fg=True)
elif self.system == SystemType.arm_linux:
sh.ssh('%s@%s' % (self.username, self.address),
'sh', cmd_file_path, _fg=True)
self.rm(cmd_file_path)
six.print_('Benchmark done! \n')
def bm_specific_target(self, flags, configs, target_abi):
library_name = configs[YAMLKeyword.library_name]
embed_model_data = \
configs[YAMLKeyword.model_data_format] == ModelFormat.code
opencl_output_bin_path = ''
opencl_parameter_path = ''
link_dynamic = flags.mace_lib_type == MACELibType.dynamic
if link_dynamic:
bm_model_binary_name = BM_MODEL_DYNAMIC_NAME
else:
bm_model_binary_name = BM_MODEL_STATIC_NAME
build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi)
if (configs[YAMLKeyword.target_socs] or flags.target_socs)\
and target_abi != ABIType.host:
opencl_output_bin_path = get_opencl_binary_output_path(
library_name, target_abi, self
)
opencl_parameter_path = get_opencl_parameter_output_path(
library_name, target_abi, self
)
for model_name in configs[YAMLKeyword.models]:
check_model_converted(library_name,
model_name,
configs[YAMLKeyword.model_graph_format],
configs[YAMLKeyword.model_data_format],
target_abi)
MaceLogger.header(
StringFormatter.block(
'Benchmark model %s on %s' % (model_name,
self.device_name)))
model_config = configs[YAMLKeyword.models][model_name]
model_runtime = model_config[YAMLKeyword.runtime]
subgraphs = model_config[YAMLKeyword.subgraphs]
model_output_base_dir, model_output_dir, mace_model_dir = \
get_build_model_dirs(library_name, model_name,
target_abi, self,
model_config[YAMLKeyword.model_file_path])
if os.path.exists(model_output_dir):
sh.rm('-rf', model_output_dir)
os.makedirs(model_output_dir)
if target_abi != ABIType.host:
self.clear_data_dir()
sh_commands.gen_input(
model_output_dir,
subgraphs[0][YAMLKeyword.input_tensors],
subgraphs[0][YAMLKeyword.input_shapes],
subgraphs[0][YAMLKeyword.validation_inputs_data],
input_ranges=subgraphs[0][YAMLKeyword.input_ranges],
input_data_types=subgraphs[0][YAMLKeyword.input_data_types]
)
runtime_list = []
if target_abi == ABIType.host:
runtime_list.append(RuntimeType.cpu)
elif model_runtime == RuntimeType.cpu_gpu:
runtime_list.extend([RuntimeType.cpu, RuntimeType.gpu])
else:
runtime_list.append(model_runtime)
for runtime in runtime_list:
device_type = parse_device_type(runtime)
if not subgraphs[0][YAMLKeyword.check_tensors]:
output_nodes = subgraphs[0][YAMLKeyword.output_tensors]
output_shapes = subgraphs[0][YAMLKeyword.output_shapes]
else:
output_nodes = subgraphs[0][YAMLKeyword.check_tensors]
output_shapes = subgraphs[0][YAMLKeyword.check_shapes]
self.benchmark_model(
abi=target_abi,
benchmark_binary_dir=build_tmp_binary_dir,
benchmark_binary_name=bm_model_binary_name,
vlog_level=0,
embed_model_data=embed_model_data,
model_output_dir=model_output_dir,
input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
output_nodes=output_nodes,
input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
output_shapes=output_shapes,
input_data_formats=subgraphs[0][
YAMLKeyword.input_data_formats],
output_data_formats=subgraphs[0][
YAMLKeyword.output_data_formats],
max_num_runs=flags.max_num_runs,
max_seconds=flags.max_seconds,
mace_model_dir=mace_model_dir,
model_tag=model_name,
device_type=device_type,
model_graph_format=configs[YAMLKeyword.model_graph_format],
omp_num_threads=flags.omp_num_threads,
cpu_affinity_policy=flags.cpu_affinity_policy,
gpu_perf_hint=flags.gpu_perf_hint,
gpu_priority_hint=flags.gpu_priority_hint,
opencl_binary_file=opencl_output_bin_path,
opencl_parameter_file=opencl_parameter_path,
libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
link_dynamic=link_dynamic)
def run(self,
abi,
host_bin_path,
......
......@@ -661,10 +661,10 @@ def update_mace_run_binary(build_tmp_binary_dir, link_dynamic=False):
if os.path.exists(mace_run_filepath):
sh.rm("-rf", mace_run_filepath)
if link_dynamic:
sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run_dynamic",
sh.cp("-f", "bazel-bin/mace/tools/mace_run_dynamic",
build_tmp_binary_dir)
else:
sh.cp("-f", "bazel-bin/mace/tools/validation/mace_run_static",
sh.cp("-f", "bazel-bin/mace/tools/mace_run_static",
build_tmp_binary_dir)
......@@ -865,120 +865,3 @@ def packaging_lib(libmace_output_dir, project_name):
_fg=True)
six.print_("Packaging Done!\n")
return tar_package_path
################################
# benchmark
################################
def build_run_throughput_test(abi,
serialno,
vlog_level,
run_seconds,
merged_lib_file,
model_input_dir,
embed_model_data,
input_nodes,
output_nodes,
input_shapes,
output_shapes,
cpu_model_tag,
gpu_model_tag,
dsp_model_tag,
apu_model_tag,
phone_data_dir,
strip="always",
input_file_name="model_input"):
six.print_("* Build and run throughput_test")
model_tag_build_flag = ""
if cpu_model_tag:
model_tag_build_flag += "--copt=-DMACE_CPU_MODEL_TAG=%s " % \
cpu_model_tag
if gpu_model_tag:
model_tag_build_flag += "--copt=-DMACE_GPU_MODEL_TAG=%s " % \
gpu_model_tag
if dsp_model_tag:
model_tag_build_flag += "--copt=-DMACE_DSP_MODEL_TAG=%s " % \
dsp_model_tag
if apu_model_tag:
model_tag_build_flag += "--copt=-DMACE_APU_MODEL_TAG=%s " % \
apu_model_tag
sh.cp("-f", merged_lib_file, "mace/benchmark/libmace_merged.a")
sh.bazel(
"build",
"-c",
"opt",
"--strip",
strip,
"--verbose_failures",
"//mace/benchmark:model_throughput_test",
"--crosstool_top=//external:android/crosstool",
"--host_crosstool_top=@bazel_tools//tools/cpp:toolchain",
"--cpu=%s" % abi,
"--copt=-std=c++11",
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror=return-type",
"--copt=-O3",
"--define",
"neon=true",
"--define",
"openmp=true",
model_tag_build_flag,
_fg=True)
sh.rm("mace/benchmark/libmace_merged.a")
sh.adb("-s",
serialno,
"shell",
"mkdir",
"-p",
phone_data_dir)
adb_push("%s/%s_%s" % (model_input_dir, input_file_name,
",".join(input_nodes)),
phone_data_dir,
serialno)
adb_push("bazel-bin/mace/benchmark/model_throughput_test",
phone_data_dir,
serialno)
if not embed_model_data:
adb_push("codegen/models/%s/%s.data" % cpu_model_tag,
phone_data_dir,
serialno)
adb_push("codegen/models/%s/%s.data" % gpu_model_tag,
phone_data_dir,
serialno)
adb_push("codegen/models/%s/%s.data" % dsp_model_tag,
phone_data_dir,
serialno)
adb_push("third_party/nnlib/%s/libhexagon_controller.so" % abi,
phone_data_dir,
serialno)
if apu_model_tag:
adb_push("third_party/apu/libapu-frontend.so",
phone_data_dir,
serialno)
sh.adb(
"-s",
serialno,
"shell",
"LD_LIBRARY_PATH=%s" % phone_data_dir,
"MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
"MACE_RUN_PARAMETER_PATH=%s/mace_run.config" %
phone_data_dir,
"%s/model_throughput_test" % phone_data_dir,
"--input_node=%s" % ",".join(input_nodes),
"--output_node=%s" % ",".join(output_nodes),
"--input_shape=%s" % ":".join(input_shapes),
"--output_shape=%s" % ":".join(output_shapes),
"--input_file=%s/%s" % (phone_data_dir, input_file_name),
"--cpu_model_data_file=%s/%s.data" % (phone_data_dir,
cpu_model_tag),
"--gpu_model_data_file=%s/%s.data" % (phone_data_dir,
gpu_model_tag),
"--dsp_model_data_file=%s/%s.data" % (phone_data_dir,
dsp_model_tag),
"--run_seconds=%s" % run_seconds,
_fg=True)
six.print_("throughput_test done!\n")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册