diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 7ab701ddd21b15b0bb88b258d9b2f85801b8dda2..a35b2a2eba94686af1eb36e8b1bc286b5cbadeb3 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -25,7 +25,9 @@ #include "mace/core/macros.h" #include "mace/core/registry.h" #include "mace/core/types.h" +#include "mace/core/runtime_failure_mock.h" #include "mace/public/mace.h" +#include "mace/public/mace_runtime.h" namespace mace { @@ -65,6 +67,11 @@ class CPUAllocator : public Allocator { if (nbytes == 0) { return MaceStatus::MACE_SUCCESS; } + + if (ShouldMockRuntimeFailure()) { + return MaceStatus::MACE_OUT_OF_RESOURCES; + } + void *data = nullptr; #if defined(__ANDROID__) || defined(__hexagon__) data = memalign(kMaceAlignment, nbytes); diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 36401a9bdbf88cf8dd6dcb13de9c335b74eb60bc..fa06d140fc49c793bf90c542426f3ad97bf7cba3 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -121,14 +121,16 @@ MaceEngine::Impl::Impl(DeviceType device_type) #ifdef MACE_ENABLE_HEXAGON , hexagon_controller_(nullptr) #endif -{} +{ + LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion(); +} MaceStatus MaceEngine::Impl::Init( const NetDef *net_def, const std::vector &input_nodes, const std::vector &output_nodes, const unsigned char *model_data) { - LOG(INFO) << "MACE version: " << MaceVersion(); + LOG(INFO) << "Initializing MaceEngine"; // Set storage path for internal usage for (auto input_name : input_nodes) { ws_->CreateTensor(MakeString("mace_input_node_", input_name), @@ -158,7 +160,7 @@ MaceStatus MaceEngine::Impl::Init( MACE_RETURN_IF_ERROR(ws_->LoadModelTensor( *net_def, device_type_, model_data)); - // Init model + // Init model auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type_, NetMode::INIT); MACE_RETURN_IF_ERROR(net->Run()); @@ -170,6 +172,7 @@ MaceStatus MaceEngine::Impl::Init( } MaceEngine::Impl::~Impl() { + LOG(INFO) << "Destroying MaceEngine"; #ifdef MACE_ENABLE_HEXAGON if (device_type_ == HEXAGON) { if (VLOG_IS_ON(2)) { diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index a110ed19f30a55ac8bf43662d40da846a5d2cc9b..4d356d6f5cb99fd9e848de50740e48b3a8a5047c 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -45,6 +45,11 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { return MaceStatus::MACE_SUCCESS; } VLOG(3) << "Allocate OpenCL buffer: " << nbytes; + + if (ShouldMockRuntimeFailure()) { + return MaceStatus::MACE_OUT_OF_RESOURCES; + } + cl_int error; cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, @@ -68,6 +73,10 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1]; + if (ShouldMockRuntimeFailure()) { + return MaceStatus::MACE_OUT_OF_RESOURCES; + } + cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt)); cl_int error; diff --git a/mace/core/runtime_failure_mock.cc b/mace/core/runtime_failure_mock.cc new file mode 100644 index 0000000000000000000000000000000000000000..b28f51fe903d306c0bac91894300d223508b82da --- /dev/null +++ b/mace/core/runtime_failure_mock.cc @@ -0,0 +1,51 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "mace/core/runtime_failure_mock.h" +#include "mace/utils/logging.h" + +namespace mace { + +namespace { +inline float GetRuntimeFailureRatioFromEnv() { + const char *env = getenv("MACE_RUNTIME_FAILURE_RATIO"); + if (env == nullptr) { + return 0; + } + std::string env_str(env); + std::istringstream ss(env_str); + float ratio; + ss >> ratio; + return ratio; +} +} // namespace + +bool ShouldMockRuntimeFailure() { + static unsigned int seed = time(NULL); + static float mock_runtime_failure_ratio = GetRuntimeFailureRatioFromEnv(); + if (mock_runtime_failure_ratio > 1e-6) { + float random_ratio = rand_r(&seed) / static_cast(RAND_MAX); + if (random_ratio < mock_runtime_failure_ratio) { + VLOG(0) << "Mock runtime failure."; + return true; + } + } + + return false; +} + +} // namespace mace diff --git a/mace/core/runtime_failure_mock.h b/mace/core/runtime_failure_mock.h new file mode 100644 index 0000000000000000000000000000000000000000..178fa62f9c5c86652748de48d521a9267611a223 --- /dev/null +++ b/mace/core/runtime_failure_mock.h @@ -0,0 +1,24 @@ +// Copyright 2018 Xiaomi, Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_CORE_RUNTIME_FAILURE_MOCK_H_ +#define MACE_CORE_RUNTIME_FAILURE_MOCK_H_ + +namespace mace { + +bool ShouldMockRuntimeFailure(); + +} // namespace mace + +#endif // MACE_CORE_RUNTIME_FAILURE_MOCK_H_ diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index 38f18af5a5dd425ee0888ea6450b7494c16ca594..17623ab4093152503b35855cb314d6a498083235 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -120,45 +120,48 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) { struct mallinfo curr = mallinfo(); if (prev.arena != curr.arena) { LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena - << ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena); + << ", diff: " << ((int64_t) curr.arena - (int64_t) prev.arena); } if (prev.ordblks != curr.ordblks) { LOG(INFO) << "Number of free chunks: " << curr.ordblks - << ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks); + << ", diff: " + << ((int64_t) curr.ordblks - (int64_t) prev.ordblks); } if (prev.smblks != curr.smblks) { LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks - << ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks); + << ", diff: " << ((int64_t) curr.smblks - (int64_t) prev.smblks); } if (prev.hblks != curr.hblks) { LOG(INFO) << "Number of mmapped regions: " << curr.hblks - << ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks); + << ", diff: " << ((int64_t) curr.hblks - (int64_t) prev.hblks); } if (prev.hblkhd != curr.hblkhd) { LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd - << ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd); + << ", diff: " << ((int64_t) curr.hblkhd - (int64_t) prev.hblkhd); } if (prev.usmblks != curr.usmblks) { LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks - << ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks); + << ", diff: " + << ((int64_t) curr.usmblks - (int64_t) prev.usmblks); } if (prev.fsmblks != curr.fsmblks) { LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks - << ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks); + << ", diff: " + << ((int64_t) curr.fsmblks - (int64_t) prev.fsmblks); } if (prev.uordblks != curr.uordblks) { LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks << ", diff: " - << ((int64_t)curr.uordblks - (int64_t)prev.uordblks); + << ((int64_t) curr.uordblks - (int64_t) prev.uordblks); } if (prev.fordblks != curr.fordblks) { LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: " - << ((int64_t)curr.fordblks - (int64_t)prev.fordblks); + << ((int64_t) curr.fordblks - (int64_t) prev.fordblks); } if (prev.keepcost != curr.keepcost) { LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost << ", diff: " - << ((int64_t)curr.keepcost - (int64_t)prev.keepcost); + << ((int64_t) curr.keepcost - (int64_t) prev.keepcost); } return curr; } @@ -227,39 +230,48 @@ bool RunModel(const std::string &model_name, new FileStorageFactory(kernel_file_path)); SetKVStorageFactory(storage_factory); - std::shared_ptr engine; - MaceStatus create_engine_status; - // Create Engine - int64_t t0 = NowMicros(); + std::vector model_pb_data; if (FLAGS_model_file != "") { - std::vector model_pb_data; if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) { LOG(FATAL) << "Failed to read file: " << FLAGS_model_file; } - create_engine_status = - CreateMaceEngineFromProto(model_pb_data, - FLAGS_model_data_file, - input_names, - output_names, - device_type, - &engine); - } else { - create_engine_status = - CreateMaceEngineFromCode(model_name, - FLAGS_model_data_file, - input_names, - output_names, - device_type, - &engine); } - int64_t t1 = NowMicros(); - if (create_engine_status != MaceStatus::MACE_SUCCESS) { - LOG(FATAL) << "Create engine error, please check the arguments"; - } + std::shared_ptr engine; + MaceStatus create_engine_status; - double init_millis = (t1 - t0) / 1000.0; - LOG(INFO) << "Total init latency: " << init_millis << " ms"; + double init_millis; + while (true) { + // Create Engine + int64_t t0 = NowMicros(); + if (FLAGS_model_file != "") { + create_engine_status = + CreateMaceEngineFromProto(model_pb_data, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); + } else { + create_engine_status = + CreateMaceEngineFromCode(model_name, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); + } + int64_t t1 = NowMicros(); + + if (create_engine_status != MACE_SUCCESS) { + LOG(ERROR) << "Create engine runtime error, retry ... errcode: " + << create_engine_status; + } else { + init_millis = (t1 - t0) / 1000.0; + LOG(INFO) << "Total init latency: " << init_millis << " ms"; + break; + } + } const size_t input_count = input_names.size(); const size_t output_count = output_names.size(); @@ -297,26 +309,84 @@ bool RunModel(const std::string &model_name, } LOG(INFO) << "Warm up run"; - int64_t t3 = NowMicros(); - engine->Run(inputs, &outputs); - int64_t t4 = NowMicros(); - double warmup_millis = (t4 - t3) / 1000.0; - LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms"; + double warmup_millis; + while (true) { + int64_t t3 = NowMicros(); + MaceStatus warmup_status = engine->Run(inputs, &outputs); + if (warmup_status != MACE_SUCCESS) { + LOG(ERROR) << "Warmup runtime error, retry ... errcode: " + << warmup_status; + do { + if (FLAGS_model_file != "") { + create_engine_status = + CreateMaceEngineFromProto(model_pb_data, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); + } else { + create_engine_status = + CreateMaceEngineFromCode(model_name, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); + } + } while (create_engine_status != MACE_SUCCESS); + } else { + int64_t t4 = NowMicros(); + warmup_millis = (t4 - t3) / 1000.0; + LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms"; + break; + } + } double model_run_millis = -1; if (FLAGS_round > 0) { LOG(INFO) << "Run model"; - int64_t t0 = NowMicros(); + int64_t total_run_duration = 0; struct mallinfo prev = mallinfo(); for (int i = 0; i < FLAGS_round; ++i) { - engine->Run(inputs, &outputs); + MaceStatus run_status; + while (true) { + int64_t t0 = NowMicros(); + run_status = engine->Run(inputs, &outputs); + if (run_status != MACE_SUCCESS) { + LOG(ERROR) << "Mace run model runtime error, retry ... errcode: " + << run_status; + do { + if (FLAGS_model_file != "") { + create_engine_status = + CreateMaceEngineFromProto(model_pb_data, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); + } else { + create_engine_status = + CreateMaceEngineFromCode(model_name, + FLAGS_model_data_file, + input_names, + output_names, + device_type, + &engine); + } + } while (create_engine_status != MACE_SUCCESS); + } else { + int64_t t1 = NowMicros(); + total_run_duration += (t1 - t0); + break; + } + } if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { LOG(INFO) << "=== check malloc info change #" << i << " ==="; prev = LogMallinfoChange(prev); } } - int64_t t1 = NowMicros(); - model_run_millis = (t1 - t0) / 1000.0 / FLAGS_round; + model_run_millis = total_run_duration / 1000.0 / FLAGS_round; LOG(INFO) << "Average latency: " << model_run_millis << " ms"; } diff --git a/tools/mace_tools.py b/tools/mace_tools.py index 7bc15c7dac93f03a4596b4b152707c9222a6e87e..f831ba51ef032e46126fae9c96a432c4a7553a17 100644 --- a/tools/mace_tools.py +++ b/tools/mace_tools.py @@ -171,7 +171,8 @@ def tuning_run(target_abi, omp_num_threads=-1, cpu_affinity_policy=1, gpu_perf_hint=3, - gpu_priority_hint=3): + gpu_priority_hint=3, + runtime_failure_ratio=0.0): stdout = sh_commands.tuning_run( target_abi, serialno, @@ -195,6 +196,7 @@ def tuning_run(target_abi, cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint, + runtime_failure_ratio, valgrind=FLAGS.valgrind, valgrind_path=FLAGS.valgrind_path, valgrind_args=FLAGS.valgrind_args @@ -543,6 +545,11 @@ def parse_args(): type=str, default="half", help="[half | float].") + parser.add_argument( + "--runtime_failure_ratio", + type=float, + default=0.0, + help="[mock runtime failure ratio].") return parser.parse_known_args() @@ -632,6 +639,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level, if FLAGS.mode == "run" or FLAGS.mode == "validate" or \ FLAGS.mode == "all": + if FLAGS.mode == "run": + runtime_failure_ratio = FLAGS.runtime_failure_ratio + else: + runtime_failure_ratio = 0.0 + tuning_run(target_abi, serialno, vlog_level, @@ -651,7 +663,8 @@ def process_models(project_name, configs, embed_model_data, vlog_level, omp_num_threads=FLAGS.omp_num_threads, cpu_affinity_policy=FLAGS.cpu_affinity_policy, gpu_perf_hint=FLAGS.gpu_perf_hint, - gpu_priority_hint=FLAGS.gpu_priority_hint) + gpu_priority_hint=FLAGS.gpu_priority_hint, + runtime_failure_ratio=runtime_failure_ratio) if FLAGS.mode == "benchmark": gen_opencl_and_tuning_code( diff --git a/tools/sh_commands.py b/tools/sh_commands.py index da51944e046c870fa39cc7f9d6692c76dbd3d2e6..0c61c9d7c05b9c5622c2db324bdcccfcab3420f1 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -597,6 +597,7 @@ def tuning_run(abi, cpu_affinity_policy=1, gpu_perf_hint=3, gpu_priority_hint=3, + runtime_failure_ratio=0.0, valgrind=False, valgrind_path="/data/local/tmp/valgrind", valgrind_args="", @@ -617,6 +618,7 @@ def tuning_run(abi, [ "env", "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level, + "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, "%s/mace_run" % model_output_dir, "--model_name=%s" % model_tag, "--input_node=%s" % ",".join(input_nodes), @@ -678,6 +680,7 @@ def tuning_run(abi, "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir, "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir, "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time, + "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio, ] if valgrind: adb_cmd.extend([