diff --git a/mace/core/allocator.h b/mace/core/allocator.h
index 7ab701ddd21b15b0bb88b258d9b2f85801b8dda2..a35b2a2eba94686af1eb36e8b1bc286b5cbadeb3 100644
--- a/mace/core/allocator.h
+++ b/mace/core/allocator.h
@@ -25,7 +25,9 @@
 #include "mace/core/macros.h"
 #include "mace/core/registry.h"
 #include "mace/core/types.h"
+#include "mace/core/runtime_failure_mock.h"
 #include "mace/public/mace.h"
+#include "mace/public/mace_runtime.h"
 
 namespace mace {
 
@@ -65,6 +67,11 @@ class CPUAllocator : public Allocator {
     if (nbytes == 0) {
       return MaceStatus::MACE_SUCCESS;
     }
+
+    if (ShouldMockRuntimeFailure()) {
+      return MaceStatus::MACE_OUT_OF_RESOURCES;
+    }
+
     void *data = nullptr;
 #if defined(__ANDROID__) || defined(__hexagon__)
     data = memalign(kMaceAlignment, nbytes);
diff --git a/mace/core/mace.cc b/mace/core/mace.cc
index 36401a9bdbf88cf8dd6dcb13de9c335b74eb60bc..fa06d140fc49c793bf90c542426f3ad97bf7cba3 100644
--- a/mace/core/mace.cc
+++ b/mace/core/mace.cc
@@ -121,14 +121,16 @@ MaceEngine::Impl::Impl(DeviceType device_type)
 #ifdef MACE_ENABLE_HEXAGON
       , hexagon_controller_(nullptr)
 #endif
-{}
+{
+  LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion();
+}
 
 MaceStatus MaceEngine::Impl::Init(
     const NetDef *net_def,
     const std::vector<std::string> &input_nodes,
     const std::vector<std::string> &output_nodes,
     const unsigned char *model_data) {
-  LOG(INFO) << "MACE version: " << MaceVersion();
+  LOG(INFO) << "Initializing MaceEngine";
   // Set storage path for internal usage
   for (auto input_name : input_nodes) {
     ws_->CreateTensor(MakeString("mace_input_node_", input_name),
@@ -158,7 +160,7 @@ MaceStatus MaceEngine::Impl::Init(
     MACE_RETURN_IF_ERROR(ws_->LoadModelTensor(
         *net_def, device_type_, model_data));
 
-  // Init model
+    // Init model
     auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type_,
                          NetMode::INIT);
     MACE_RETURN_IF_ERROR(net->Run());
@@ -170,6 +172,7 @@ MaceStatus MaceEngine::Impl::Init(
 }
 
 MaceEngine::Impl::~Impl() {
+  LOG(INFO) << "Destroying MaceEngine";
 #ifdef MACE_ENABLE_HEXAGON
   if (device_type_ == HEXAGON) {
     if (VLOG_IS_ON(2)) {
diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc
index a110ed19f30a55ac8bf43662d40da846a5d2cc9b..4d356d6f5cb99fd9e848de50740e48b3a8a5047c 100644
--- a/mace/core/runtime/opencl/opencl_allocator.cc
+++ b/mace/core/runtime/opencl/opencl_allocator.cc
@@ -45,6 +45,11 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const {
     return MaceStatus::MACE_SUCCESS;
   }
   VLOG(3) << "Allocate OpenCL buffer: " << nbytes;
+
+  if (ShouldMockRuntimeFailure()) {
+    return MaceStatus::MACE_OUT_OF_RESOURCES;
+  }
+
   cl_int error;
   cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(),
                                       CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
@@ -68,6 +73,10 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
   VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", "
           << image_shape[1];
 
+  if (ShouldMockRuntimeFailure()) {
+    return MaceStatus::MACE_OUT_OF_RESOURCES;
+  }
+
   cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt));
 
   cl_int error;
diff --git a/mace/core/runtime_failure_mock.cc b/mace/core/runtime_failure_mock.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b28f51fe903d306c0bac91894300d223508b82da
--- /dev/null
+++ b/mace/core/runtime_failure_mock.cc
@@ -0,0 +1,51 @@
+// Copyright 2018 Xiaomi, Inc.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdlib>
+#include <string>
+
+#include "mace/core/runtime_failure_mock.h"
+#include "mace/utils/logging.h"
+
+namespace mace {
+
+namespace {
+inline float GetRuntimeFailureRatioFromEnv() {
+  const char *env = getenv("MACE_RUNTIME_FAILURE_RATIO");
+  if (env == nullptr) {
+    return 0;
+  }
+  std::string env_str(env);
+  std::istringstream ss(env_str);
+  float ratio;
+  ss >> ratio;
+  return ratio;
+}
+}  // namespace
+
+bool ShouldMockRuntimeFailure() {
+  static unsigned int seed = time(NULL);
+  static float mock_runtime_failure_ratio = GetRuntimeFailureRatioFromEnv();
+  if (mock_runtime_failure_ratio > 1e-6) {
+    float random_ratio = rand_r(&seed) / static_cast<float>(RAND_MAX);
+    if (random_ratio < mock_runtime_failure_ratio) {
+      VLOG(0) << "Mock runtime failure.";
+      return true;
+    }
+  }
+
+  return false;
+}
+
+}  // namespace mace
diff --git a/mace/core/runtime_failure_mock.h b/mace/core/runtime_failure_mock.h
new file mode 100644
index 0000000000000000000000000000000000000000..178fa62f9c5c86652748de48d521a9267611a223
--- /dev/null
+++ b/mace/core/runtime_failure_mock.h
@@ -0,0 +1,24 @@
+// Copyright 2018 Xiaomi, Inc.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MACE_CORE_RUNTIME_FAILURE_MOCK_H_
+#define MACE_CORE_RUNTIME_FAILURE_MOCK_H_
+
+namespace mace {
+
+bool ShouldMockRuntimeFailure();
+
+}  // namespace mace
+
+#endif  // MACE_CORE_RUNTIME_FAILURE_MOCK_H_
diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc
index 38f18af5a5dd425ee0888ea6450b7494c16ca594..17623ab4093152503b35855cb314d6a498083235 100644
--- a/mace/tools/validation/mace_run.cc
+++ b/mace/tools/validation/mace_run.cc
@@ -120,45 +120,48 @@ struct mallinfo LogMallinfoChange(struct mallinfo prev) {
   struct mallinfo curr = mallinfo();
   if (prev.arena != curr.arena) {
     LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena
-              << ", diff: " << ((int64_t)curr.arena - (int64_t)prev.arena);
+              << ", diff: " << ((int64_t) curr.arena - (int64_t) prev.arena);
   }
   if (prev.ordblks != curr.ordblks) {
     LOG(INFO) << "Number of free chunks: " << curr.ordblks
-              << ", diff: " << ((int64_t)curr.ordblks - (int64_t)prev.ordblks);
+              << ", diff: "
+              << ((int64_t) curr.ordblks - (int64_t) prev.ordblks);
   }
   if (prev.smblks != curr.smblks) {
     LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks
-              << ", diff: " << ((int64_t)curr.smblks - (int64_t)prev.smblks);
+              << ", diff: " << ((int64_t) curr.smblks - (int64_t) prev.smblks);
   }
   if (prev.hblks != curr.hblks) {
     LOG(INFO) << "Number of mmapped regions: " << curr.hblks
-              << ", diff: " << ((int64_t)curr.hblks - (int64_t)prev.hblks);
+              << ", diff: " << ((int64_t) curr.hblks - (int64_t) prev.hblks);
   }
   if (prev.hblkhd != curr.hblkhd) {
     LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd
-              << ", diff: " << ((int64_t)curr.hblkhd - (int64_t)prev.hblkhd);
+              << ", diff: " << ((int64_t) curr.hblkhd - (int64_t) prev.hblkhd);
   }
   if (prev.usmblks != curr.usmblks) {
     LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks
-              << ", diff: " << ((int64_t)curr.usmblks - (int64_t)prev.usmblks);
+              << ", diff: "
+              << ((int64_t) curr.usmblks - (int64_t) prev.usmblks);
   }
   if (prev.fsmblks != curr.fsmblks) {
     LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks
-              << ", diff: " << ((int64_t)curr.fsmblks - (int64_t)prev.fsmblks);
+              << ", diff: "
+              << ((int64_t) curr.fsmblks - (int64_t) prev.fsmblks);
   }
   if (prev.uordblks != curr.uordblks) {
     LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks
               << ", diff: "
-              << ((int64_t)curr.uordblks - (int64_t)prev.uordblks);
+              << ((int64_t) curr.uordblks - (int64_t) prev.uordblks);
   }
   if (prev.fordblks != curr.fordblks) {
     LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: "
-              << ((int64_t)curr.fordblks - (int64_t)prev.fordblks);
+              << ((int64_t) curr.fordblks - (int64_t) prev.fordblks);
   }
   if (prev.keepcost != curr.keepcost) {
     LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost
               << ", diff: "
-              << ((int64_t)curr.keepcost - (int64_t)prev.keepcost);
+              << ((int64_t) curr.keepcost - (int64_t) prev.keepcost);
   }
   return curr;
 }
@@ -227,39 +230,48 @@ bool RunModel(const std::string &model_name,
       new FileStorageFactory(kernel_file_path));
   SetKVStorageFactory(storage_factory);
 
-  std::shared_ptr<mace::MaceEngine> engine;
-  MaceStatus create_engine_status;
-  // Create Engine
-  int64_t t0 = NowMicros();
+  std::vector<unsigned char> model_pb_data;
   if (FLAGS_model_file != "") {
-    std::vector<unsigned char> model_pb_data;
     if (!mace::ReadBinaryFile(&model_pb_data, FLAGS_model_file)) {
       LOG(FATAL) << "Failed to read file: " << FLAGS_model_file;
     }
-    create_engine_status =
-        CreateMaceEngineFromProto(model_pb_data,
-                                  FLAGS_model_data_file,
-                                  input_names,
-                                  output_names,
-                                  device_type,
-                                  &engine);
-  } else {
-    create_engine_status =
-        CreateMaceEngineFromCode(model_name,
-                                 FLAGS_model_data_file,
-                                 input_names,
-                                 output_names,
-                                 device_type,
-                                 &engine);
   }
-  int64_t t1 = NowMicros();
 
-  if (create_engine_status != MaceStatus::MACE_SUCCESS) {
-    LOG(FATAL) << "Create engine error, please check the arguments";
-  }
+  std::shared_ptr<mace::MaceEngine> engine;
+  MaceStatus create_engine_status;
 
-  double init_millis = (t1 - t0) / 1000.0;
-  LOG(INFO) << "Total init latency: " << init_millis << " ms";
+  double init_millis;
+  while (true) {
+    // Create Engine
+    int64_t t0 = NowMicros();
+    if (FLAGS_model_file != "") {
+      create_engine_status =
+          CreateMaceEngineFromProto(model_pb_data,
+                                    FLAGS_model_data_file,
+                                    input_names,
+                                    output_names,
+                                    device_type,
+                                    &engine);
+    } else {
+      create_engine_status =
+          CreateMaceEngineFromCode(model_name,
+                                   FLAGS_model_data_file,
+                                   input_names,
+                                   output_names,
+                                   device_type,
+                                   &engine);
+    }
+    int64_t t1 = NowMicros();
+
+    if (create_engine_status != MACE_SUCCESS) {
+      LOG(ERROR) << "Create engine runtime error, retry ... errcode: "
+                 << create_engine_status;
+    } else {
+      init_millis = (t1 - t0) / 1000.0;
+      LOG(INFO) << "Total init latency: " << init_millis << " ms";
+      break;
+    }
+  }
 
   const size_t input_count = input_names.size();
   const size_t output_count = output_names.size();
@@ -297,26 +309,84 @@ bool RunModel(const std::string &model_name,
   }
 
   LOG(INFO) << "Warm up run";
-  int64_t t3 = NowMicros();
-  engine->Run(inputs, &outputs);
-  int64_t t4 = NowMicros();
-  double warmup_millis = (t4 - t3) / 1000.0;
-  LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
+  double warmup_millis;
+  while (true) {
+    int64_t t3 = NowMicros();
+    MaceStatus warmup_status = engine->Run(inputs, &outputs);
+    if (warmup_status != MACE_SUCCESS) {
+      LOG(ERROR) << "Warmup runtime error, retry ... errcode: "
+                 << warmup_status;
+      do {
+        if (FLAGS_model_file != "") {
+          create_engine_status =
+              CreateMaceEngineFromProto(model_pb_data,
+                                        FLAGS_model_data_file,
+                                        input_names,
+                                        output_names,
+                                        device_type,
+                                        &engine);
+        } else {
+          create_engine_status =
+              CreateMaceEngineFromCode(model_name,
+                                       FLAGS_model_data_file,
+                                       input_names,
+                                       output_names,
+                                       device_type,
+                                       &engine);
+        }
+      } while (create_engine_status != MACE_SUCCESS);
+    } else {
+      int64_t t4 = NowMicros();
+      warmup_millis = (t4 - t3) / 1000.0;
+      LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
+      break;
+    }
+  }
 
   double model_run_millis = -1;
   if (FLAGS_round > 0) {
     LOG(INFO) << "Run model";
-    int64_t t0 = NowMicros();
+    int64_t total_run_duration = 0;
     struct mallinfo prev = mallinfo();
     for (int i = 0; i < FLAGS_round; ++i) {
-      engine->Run(inputs, &outputs);
+      MaceStatus run_status;
+      while (true) {
+        int64_t t0 = NowMicros();
+        run_status = engine->Run(inputs, &outputs);
+        if (run_status != MACE_SUCCESS) {
+          LOG(ERROR) << "Mace run model runtime error, retry ... errcode: "
+                     << run_status;
+          do {
+            if (FLAGS_model_file != "") {
+              create_engine_status =
+                  CreateMaceEngineFromProto(model_pb_data,
+                                            FLAGS_model_data_file,
+                                            input_names,
+                                            output_names,
+                                            device_type,
+                                            &engine);
+            } else {
+              create_engine_status =
+                  CreateMaceEngineFromCode(model_name,
+                                           FLAGS_model_data_file,
+                                           input_names,
+                                           output_names,
+                                           device_type,
+                                           &engine);
+            }
+          } while (create_engine_status != MACE_SUCCESS);
+        } else {
+          int64_t t1 = NowMicros();
+          total_run_duration += (t1 - t0);
+          break;
+        }
+      }
       if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
         LOG(INFO) << "=== check malloc info change #" << i << " ===";
         prev = LogMallinfoChange(prev);
       }
     }
-    int64_t t1 = NowMicros();
-    model_run_millis = (t1 - t0) / 1000.0 / FLAGS_round;
+    model_run_millis = total_run_duration / 1000.0 / FLAGS_round;
     LOG(INFO) << "Average latency: " << model_run_millis << " ms";
   }
 
diff --git a/tools/mace_tools.py b/tools/mace_tools.py
index 7bc15c7dac93f03a4596b4b152707c9222a6e87e..f831ba51ef032e46126fae9c96a432c4a7553a17 100644
--- a/tools/mace_tools.py
+++ b/tools/mace_tools.py
@@ -171,7 +171,8 @@ def tuning_run(target_abi,
                omp_num_threads=-1,
                cpu_affinity_policy=1,
                gpu_perf_hint=3,
-               gpu_priority_hint=3):
+               gpu_priority_hint=3,
+               runtime_failure_ratio=0.0):
     stdout = sh_commands.tuning_run(
         target_abi,
         serialno,
@@ -195,6 +196,7 @@ def tuning_run(target_abi,
         cpu_affinity_policy,
         gpu_perf_hint,
         gpu_priority_hint,
+        runtime_failure_ratio,
         valgrind=FLAGS.valgrind,
         valgrind_path=FLAGS.valgrind_path,
         valgrind_args=FLAGS.valgrind_args
@@ -543,6 +545,11 @@ def parse_args():
         type=str,
         default="half",
         help="[half | float].")
+    parser.add_argument(
+        "--runtime_failure_ratio",
+        type=float,
+        default=0.0,
+        help="[mock runtime failure ratio].")
     return parser.parse_known_args()
 
 
@@ -632,6 +639,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
 
         if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
            FLAGS.mode == "all":
+            if FLAGS.mode == "run":
+                runtime_failure_ratio = FLAGS.runtime_failure_ratio
+            else:
+                runtime_failure_ratio = 0.0
+
             tuning_run(target_abi,
                        serialno,
                        vlog_level,
@@ -651,7 +663,8 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
                        omp_num_threads=FLAGS.omp_num_threads,
                        cpu_affinity_policy=FLAGS.cpu_affinity_policy,
                        gpu_perf_hint=FLAGS.gpu_perf_hint,
-                       gpu_priority_hint=FLAGS.gpu_priority_hint)
+                       gpu_priority_hint=FLAGS.gpu_priority_hint,
+                       runtime_failure_ratio=runtime_failure_ratio)
 
         if FLAGS.mode == "benchmark":
             gen_opencl_and_tuning_code(
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index da51944e046c870fa39cc7f9d6692c76dbd3d2e6..0c61c9d7c05b9c5622c2db324bdcccfcab3420f1 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -597,6 +597,7 @@ def tuning_run(abi,
                cpu_affinity_policy=1,
                gpu_perf_hint=3,
                gpu_priority_hint=3,
+               runtime_failure_ratio=0.0,
                valgrind=False,
                valgrind_path="/data/local/tmp/valgrind",
                valgrind_args="",
@@ -617,6 +618,7 @@ def tuning_run(abi,
             [
                 "env",
                 "MACE_CPP_MIN_VLOG_LEVEL=%s" % vlog_level,
+                "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
                 "%s/mace_run" % model_output_dir,
                 "--model_name=%s" % model_tag,
                 "--input_node=%s" % ",".join(input_nodes),
@@ -678,6 +680,7 @@ def tuning_run(abi,
             "MACE_RUN_PARAMETER_PATH=%s/mace_run.config" % phone_data_dir,
             "MACE_INTERNAL_STORAGE_PATH=%s" % internal_storage_dir,
             "MACE_LIMIT_OPENCL_KERNEL_TIME=%s" % limit_opencl_kernel_time,
+            "MACE_RUNTIME_FAILURE_RATIO=%f" % runtime_failure_ratio,
         ]
         if valgrind:
             adb_cmd.extend([