diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bde098d7ac59547c02a9e44f6763b5d268db3de6..c7730f99dbe13a7f7eb69de246ba24fe62f8feba 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -147,6 +147,7 @@ python_tools_tests:
       python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
       python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --target_abis=armeabi-v7a,armhf --validate --model_graph_format=file --model_data_format=file || exit 1;
       python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,armhf --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --target_abis=armeabi-v7a,armhf --round=1 --validate_all_layers --model_graph_format=file --model_data_format=file || exit 1;
 
 model_tests:
   stage: model_tests
@@ -189,6 +190,7 @@ quantization_tests:
       python tools/converter.py convert --config=${CONF_FILE} --model_graph_format=file --model_data_format=file || exit 1;
       python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
       python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate --model_graph_format=file --model_data_format=file || exit 1;
+      python tools/converter.py run --config=${CONF_FILE} --device_yml=${DEVICE_CONF_FILE} --example --round=1 --validate_all_layers --model_graph_format=file --model_data_format=file || exit 1;
       done
     - rm -rf mace-models
 
diff --git a/docs/development/how_to_debug.rst b/docs/development/how_to_debug.rst
index b0e2131fa6a6d48b69b444e9578f8a9e81bbba93..b55ed94ffdac90dee0d99681e96a7d5e2cfd001f 100644
--- a/docs/development/how_to_debug.rst
+++ b/docs/development/how_to_debug.rst
@@ -34,6 +34,14 @@ It is usually used to measure classification accuracy. The higher the better.
 where :math:`X` is expected output (from training platform) whereas :math:`X'` is actual output (from MACE) .
 
 
+You can validate it by specifying `--validate` while running the model.
+
+    .. code:: sh
+
+        # Validate the correctness by comparing the results against the
+        # original model and framework
+        python tools/converter.py run --config=/path/to/your/model_deployment_file.yml --validate
+
 MACE automatically validate these metrics by running models with synthetic inputs.
 If you want to specify input data to use, you can add an option in yaml config under 'subgraphs', e.g.,
 
@@ -53,13 +61,22 @@ If you want to specify input data to use, you can add an option in yaml config u
 	          - MobilenetV1/Predictions/Reshape_1
 	        output_shapes:
 	          - 1,1001
+	        check_tensors:
+	          - MobilenetV1/Logits/Conv2d_1c_1x1/BiasAdd:0
+	        check_shapes:
+	          - 1,1,1,1001
 	        validation_inputs_data:
 	          - https://cnbj1.fds.api.xiaomi.com/mace/inputs/dog.npy
 
-
 If model's output is suspected to be incorrect, it might be useful to debug your model layer by layer by specifying an intermediate layer as output,
 or use binary search method until suspicious layer is found.
 
+You can also specify `--validate_all_layers` to validate all the layers of the model(excluding some layers changed by MACE, e.g., BatchToSpaceND),
+it only supports TensorFlow now. You can find validation results in `builds/your_model/model/runtime_in_yaml/log.csv`.
+
+For quantized model, if you want to check one layer, you can add `check_tensors` and `check_shapes` like in the yaml above. You can only specify
+MACE op's output.
+
 
 Debug memory usage
 --------------------------
diff --git a/mace/python/tools/BUILD b/mace/python/tools/BUILD
index a5a35397f0d1eacdeadccfefc02f75645dc9fbf3..547243b70f72963be6b41ac53aa798a56b072a8c 100644
--- a/mace/python/tools/BUILD
+++ b/mace/python/tools/BUILD
@@ -55,3 +55,15 @@ py_binary(
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
 )
+
+py_binary(
+    name = "layers_validate",
+    srcs = [
+        "layers_validate.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":converter_lib",
+        ":model_saver_lib",
+    ],
+)
diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py
index ea99e8f815314d62dfcd09c9580fc99e6bd688e0..591f055b2408c06a74e12a8152618a3359abf9cd 100644
--- a/mace/python/tools/converter_tool/hexagon_converter.py
+++ b/mace/python/tools/converter_tool/hexagon_converter.py
@@ -14,6 +14,7 @@
 
 import copy
 import numpy as np
+from enum import Enum
 from operator import mul
 
 from mace.proto import mace_pb2
@@ -28,22 +29,44 @@ from mace.python.tools.convert_util import mace_check
 from mace.python.tools import graph_util
 
 
+HexagonSupportedOps = [
+    'BatchToSpaceND_8',
+    'DepthwiseSupernode_8x8p32to8',
+    'DequantizeOUTPUT_8tof',
+    'QuantizedAdd_8p8to8',
+    'QuantizedAvgPool_8',
+    'QuantizedConcat_8',
+    'QuantizedMaxPool_8',
+    'QuantizedResizeBilinear_8',
+    'QuantizedSoftmax_8',
+    'QuantizeINPUT_f_to_8',
+    'SpaceToBatchND_8',
+    'Supernode_8x8p32to8',
+    'Nop',
+]
+
+HexagonOp = Enum('HexagonOp', [(op, op) for op in HexagonSupportedOps],
+                 type=str)
+
+
 class HexagonOps(object):
     def __init__(self):
         self.hexagon_ops = {
-            'Quantize': 'QuantizeINPUT_f_to_8',
-            'Dequantize': 'DequantizeOUTPUT_8tof',
-            'Concat': 'QuantizedConcat_8',
-            'Conv2D': 'Supernode_8x8p32to8',
-            'DepthwiseConv2d': 'DepthwiseSupernode_8x8p32to8',
-            'ResizeBilinear': 'QuantizedResizeBilinear_8',
-            'SpaceToBatchND': 'SpaceToBatchND_8',
-            'BatchToSpaceND': 'BatchToSpaceND_8',
-            'Softmax': 'QuantizedSoftmax_8',
-            'Eltwise': 'Eltwise',
-            'Pooling': 'Pooling',
-            'Identity': 'Nop',
-            'Squeeze': 'Nop',
+            MaceOp.BatchToSpaceND.name: HexagonOp.BatchToSpaceND_8.name,
+            MaceOp.Concat.name: HexagonOp.QuantizedConcat_8.name,
+            MaceOp.Conv2D.name: HexagonOp.Supernode_8x8p32to8.name,
+            MaceOp.DepthwiseConv2d.name:
+                HexagonOp.DepthwiseSupernode_8x8p32to8.name,
+            MaceOp.Dequantize.name: HexagonOp.DequantizeOUTPUT_8tof.name,
+            MaceOp.Eltwise.name: [HexagonOp.QuantizedAdd_8p8to8],
+            MaceOp.Identity.name: HexagonOp.Nop.name,
+            MaceOp.Quantize.name: HexagonOp.QuantizeINPUT_f_to_8.name,
+            MaceOp.Pooling.name: [HexagonOp.QuantizedAvgPool_8.name,
+                                  HexagonOp.QuantizedMaxPool_8.name],
+            MaceOp.ResizeBilinear.name:
+                HexagonOp.QuantizedResizeBilinear_8.name,
+            MaceOp.SpaceToBatchND.name: HexagonOp.SpaceToBatchND_8.name,
+            MaceOp.Softmax.name: HexagonOp.QuantizedSoftmax_8.name,
         }
 
     def has_op(self, tf_op):
@@ -116,7 +139,6 @@ class HexagonConverter(base_converter.ConverterInterface):
         for op in self._model.op:
             if not self._hexagon_ops.has_op(op.type):
                 raise Exception('Unsupported op: ', op)
-            print('Op: %s (%s)' % (op.name, op.type))
             for i in range(len(op.input)):
                 if ':' not in op.input[i]:
                     node_name = op.input[i]
@@ -250,14 +272,14 @@ class HexagonConverter(base_converter.ConverterInterface):
                     and ConverterUtil.get_arg(
                         op, MaceKeyword.mace_element_type_str).i
                     == EltwiseType.SUM.value):
-                op.type = 'QuantizedAdd_8p8to8'
+                op.type = HexagonOp.QuantizedAdd_8p8to8.name
             elif op.type == MaceOp.Pooling.name:
                 pooling_type_arg = ConverterUtil.get_arg(
                     op, MaceKeyword.mace_pooling_type_str)
                 if PoolingType(pooling_type_arg.i) == PoolingType.AVG:
-                    op.type = 'QuantizedAvgPool_8'
+                    op.type = HexagonOp.QuantizedAvgPool_8.name
                 else:
-                    op.type = 'QuantizedMaxPool_8'
+                    op.type = HexagonOp.QuantizedMaxPool_8.name
             else:
                 op.type = self._hexagon_ops.map_nn_op(op.type)
 
@@ -342,8 +364,10 @@ class HexagonConverter(base_converter.ConverterInterface):
             tensor_op, port = get_op_and_port_from_tensor(tensor.name)
             node_id_map[tensor_op] = tensor.node_id
 
+        print("Hexagon op:")
         for op in self._model.op:
             op.node_id = node_id_counter
+            print('Op: %s (%s, %d)' % (op.name, op.type, op.node_id))
             node_id_counter += 1
             node_id_map[op.name] = op.node_id
             for ipt in op.input:
diff --git a/mace/python/tools/layers_validate.py b/mace/python/tools/layers_validate.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb3458ef1c1e113fa181882ba085dc5a95fad80
--- /dev/null
+++ b/mace/python/tools/layers_validate.py
@@ -0,0 +1,171 @@
+# Copyright 2018 Xiaomi, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import os
+import sys
+import yaml
+
+from mace.proto import mace_pb2
+from mace.python.tools.converter_tool.base_converter import ConverterUtil
+from mace.python.tools.converter_tool.base_converter import MaceKeyword
+from mace.python.tools.converter_tool.base_converter import MaceOp
+from mace.python.tools.converter_tool.hexagon_converter import HexagonOp
+from mace.python.tools.convert_util import mace_check
+from mace.python.tools.model_saver import save_model_to_proto
+
+
+def normalize_op_name(name):
+    return name.replace('/', '_').replace(':', '_')
+
+
+def main(unused_args):
+    mace_check(os.path.isfile(FLAGS.model_file),
+               "Input graph file '" + FLAGS.model_file + "' does not exist!")
+    mace_check(os.path.isdir(FLAGS.output_dir),
+               "Output directory '" + FLAGS.output_dir + "' does not exist!")
+    net_def = mace_pb2.NetDef()
+    with open(FLAGS.model_file, "rb") as f:
+        net_def.ParseFromString(f.read())
+
+    quantize_flag = ConverterUtil.get_arg(
+        net_def, MaceKeyword.mace_quantize_flag_arg_str)
+    quantize_flag = False if quantize_flag is None else quantize_flag.i == 1
+    hexagon_flag = False
+    index = 0
+    end_index = len(net_def.op)
+    if quantize_flag:
+        while index < end_index:
+            # omit op quantize
+            if net_def.op[index].type == MaceOp.Quantize.name or \
+                    net_def.op[index].type == \
+                    HexagonOp.QuantizeINPUT_f_to_8.name:
+                index += 1
+            # omit op dequantize
+            elif net_def.op[end_index - 1].type == MaceOp.Dequantize.name or \
+                    net_def.op[end_index - 1].type == \
+                    HexagonOp.DequantizeOUTPUT_8tof.name:
+                end_index -= 1
+            else:
+                break
+        mace_check(0 < index < end_index < len(net_def.op),
+                   "Wrong number of op quantize(%d) or dequantize(%d)." %
+                   (index, len(net_def.op) - end_index))
+        if net_def.op[-1].type == HexagonOp.DequantizeOUTPUT_8tof.name:
+            hexagon_flag = True
+    # omit original output
+    end_index -= 1
+
+    data_format = net_def.output_info[0].data_format
+    output_configs = {"subgraphs": []}
+    while index < end_index:
+        # omit BatchToSpaceND and op before that due to changed graph
+        if net_def.op[index].type == MaceOp.BatchToSpaceND.name or \
+                net_def.op[index].type == HexagonOp.BatchToSpaceND_8.name or \
+                (index + 1 < end_index and
+                 (net_def.op[index + 1].type == MaceOp.BatchToSpaceND.name or
+                  net_def.op[index + 1].type == HexagonOp.BatchToSpaceND_8.name)):  # noqa
+            index += 1
+            continue
+        net = copy.deepcopy(net_def)
+        if hexagon_flag:
+            # reuse dequantize op and it's min/max tensor's node_id
+            del net.op[index+1:end_index+1]
+        else:
+            del net.op[index+1:]
+        del net.output_info[:]
+        op = net.op[index]
+        index += 1
+
+        output_tensors = []
+        output_shapes = []
+        op_name = op.name
+        if quantize_flag:
+            op.name = MaceKeyword.mace_output_node_name + '_' + op.name
+        if hexagon_flag:
+            mace_check(len(op.output) == 1,
+                       "Only supports number of outputs of Hexagon op be 1.")
+        for i in range(len(op.output)):
+            output_tensors.append(str(op.output[i]))
+            output_shapes.append(
+                ",".join([str(dim) for dim in op.output_shape[i].dims]))
+            # modify output info
+            output_info = net.output_info.add()
+            output_info.name = op.output[i]
+            output_info.data_format = data_format
+            output_info.dims.extend(op.output_shape[i].dims)
+            output_info.data_type = mace_pb2.DT_FLOAT
+            # modify output op
+            if quantize_flag:
+                output_name = op.output[i]
+                new_output_name = \
+                    MaceKeyword.mace_output_node_name + '_' + op.output[i]
+                op.output[i] = new_output_name
+                if not hexagon_flag:
+                    dequantize_op = net.op.add()
+                    dequantize_op.name = normalize_op_name(output_name)
+                    dequantize_op.type = MaceOp.Dequantize.name
+                    dequantize_op.input.append(new_output_name)
+                    dequantize_op.output.append(output_name)
+                    output_shape = dequantize_op.output_shape.add()
+                    output_shape.dims.extend(op.output_shape[i].dims)
+                    dequantize_op.output_type.append(mace_pb2.DT_FLOAT)
+                    ConverterUtil.add_data_type_arg(dequantize_op,
+                                                    mace_pb2.DT_UINT8)
+                else:
+                    dequantize_op = net.op[-1]
+                    dequantize_op.name = normalize_op_name(output_name)
+                    del dequantize_op.input[:]
+                    del dequantize_op.output[:]
+                    dequantize_op.input.append(new_output_name)
+                    dequantize_op.output.append(output_name)
+                    input_min = new_output_name[:-1] + '1'
+                    input_max = new_output_name[:-1] + '2'
+                    dequantize_op.input.extend([input_min, input_max])
+                    dequantize_op.node_input[0].node_id = op.node_id
+                    dequantize_op.node_input[1].node_id = op.node_id
+                    dequantize_op.node_input[2].node_id = op.node_id
+
+        model_path = save_model_to_proto(net, normalize_op_name(op_name),
+                                         FLAGS.output_dir)
+        output_config = {"model_file_path": str(model_path),
+                         "output_tensors": output_tensors,
+                         "output_shapes": output_shapes}
+        output_configs["subgraphs"].append(output_config)
+
+    output_configs_path = FLAGS.output_dir + "outputs.yml"
+    with open(output_configs_path, "w") as f:
+        yaml.dump(output_configs, f, default_flow_style=False)
+
+
+def parse_args():
+    """Parses command line arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_file",
+        type=str,
+        default="",
+        help="pb file to load.")
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="",
+        help="Directory to save the output graph to.")
+    return parser.parse_known_args()
+
+
+if __name__ == '__main__':
+    FLAGS, unparsed = parse_args()
+    main(unused_args=[sys.argv[0]] + unparsed)
diff --git a/mace/python/tools/model_saver.py b/mace/python/tools/model_saver.py
index 217b25b6cf82ca398d6c50c544a55bcd9f5ed63c..c854c307ebca8c439990b5b3f66bed16782900c2 100644
--- a/mace/python/tools/model_saver.py
+++ b/mace/python/tools/model_saver.py
@@ -190,6 +190,8 @@ def save_model_to_proto(net_def, model_tag, output_dir):
     with open(proto_file_path + '_txt', "w") as f:
         f.write(str(net_def))
 
+    return proto_file_path
+
 
 def save_model_to_code(net_def, model_tag, device,
                        template_dir, output_dir, embed_model_data,
diff --git a/tools/common.py b/tools/common.py
index fff51e4d080ae27f119acae1bfc80a0e7a90f526..3775e86c118209c2758db96459f63cc8c1816f43 100644
--- a/tools/common.py
+++ b/tools/common.py
@@ -14,6 +14,7 @@
 
 import enum
 import hashlib
+import inspect
 import re
 import os
 
@@ -34,6 +35,12 @@ class CMDColors:
     UNDERLINE = '\033[4m'
 
 
+def get_frame_info(level=2):
+    caller_frame = inspect.stack()[level]
+    info = inspect.getframeinfo(caller_frame[0])
+    return info.filename + ':' + str(info.lineno) + ': '
+
+
 class MaceLogger:
     @staticmethod
     def header(message):
@@ -45,22 +52,25 @@ class MaceLogger:
 
     @staticmethod
     def info(message):
-        six.print_(message)
+        six.print_(get_frame_info() + message)
 
     @staticmethod
     def warning(message):
-        six.print_(CMDColors.YELLOW + 'WARNING:' + message + CMDColors.ENDC)
+        six.print_(CMDColors.YELLOW + 'WARNING:' + get_frame_info() + message
+                   + CMDColors.ENDC)
 
     @staticmethod
-    def error(module, message):
-        six.print_(CMDColors.RED + 'ERROR: [' + module + '] '
+    def error(module, message, location_info=""):
+        if not location_info:
+            location_info = get_frame_info()
+        six.print_(CMDColors.RED + 'ERROR: [' + module + '] ' + location_info
                    + message + CMDColors.ENDC)
         exit(1)
 
 
 def mace_check(condition, module, message):
     if not condition:
-        MaceLogger.error(module, message)
+        MaceLogger.error(module, message, get_frame_info())
 
 
 ################################
diff --git a/tools/converter.py b/tools/converter.py
index fa67ea919e3421e0f3554c1dc53743c3d5c5d7b0..60576f9fb1c4cf9d460bb636e612df781eaf37db 100644
--- a/tools/converter.py
+++ b/tools/converter.py
@@ -1177,6 +1177,11 @@ def parse_args():
         action="store_true",
         help="whether to verify the results are consistent with "
              "the frameworks.")
+    run.add_argument(
+        "--validate_all_layers",
+        action="store_true",
+        help="whether to verify the results of all layers are "
+             "consistent with the frameworks.")
     run.add_argument(
         "--caffe_env",
         type=str_to_caffe_env_type,
diff --git a/tools/device.py b/tools/device.py
index 8c78a09f8195ca8f577d9f1ed016f065fc50aa65..504bf54164e2a7fdea73bc08472d63caba715a5e 100644
--- a/tools/device.py
+++ b/tools/device.py
@@ -179,6 +179,7 @@ class DeviceWrapper:
                    address_sanitizer=False,
                    link_dynamic=False,
                    quantize_stat=False,
+                   layers_validate_file="",
                    ):
         six.print_("* Run '%s' with round=%s, restart_round=%s, tuning=%s, "
                    "out_of_range_check=%s, omp_num_threads=%s, "
@@ -189,7 +190,8 @@ class DeviceWrapper:
                     cpu_affinity_policy, gpu_perf_hint, gpu_priority_hint))
         mace_model_path = ""
         if model_graph_format == ModelFormat.file:
-            mace_model_path = "%s/%s.pb" % (mace_model_dir, model_tag)
+            mace_model_path = layers_validate_file if layers_validate_file \
+                else "%s/%s.pb" % (mace_model_dir, model_tag)
         if self.system == SystemType.host:
             libmace_dynamic_lib_path = \
                 os.path.dirname(libmace_dynamic_library_path)
@@ -413,6 +415,28 @@ class DeviceWrapper:
 
         six.print_('Tuning done! \n')
 
+    @staticmethod
+    def get_layers(model_dir, model_name):
+        sh_commands.bazel_build_common("//mace/python/tools:layers_validate")
+
+        model_file = "%s/%s.pb" % (model_dir, model_name)
+        output_dir = "%s/output_models/" % model_dir
+        if os.path.exists(output_dir):
+            sh.rm('-rf', output_dir)
+        os.makedirs(output_dir)
+        sh.python("bazel-bin/mace/python/tools/layers_validate",
+                  "-u",
+                  "--model_file=%s" % model_file,
+                  "--output_dir=%s" % output_dir,
+                  _fg=True)
+
+        output_configs_path = output_dir + "outputs.yml"
+        with open(output_configs_path) as f:
+            output_configs = yaml.load(f)
+        output_configs = output_configs[YAMLKeyword.subgraphs]
+
+        return output_configs
+
     def run_specify_abi(self, flags, configs, target_abi):
         if target_abi not in self.target_abis:
             six.print_('There is no device with soc: %s abi: %s' %
@@ -527,81 +551,111 @@ class DeviceWrapper:
                 else:
                     output_nodes = subgraphs[0][YAMLKeyword.check_tensors]
                     output_shapes = subgraphs[0][YAMLKeyword.check_shapes]
-                run_output = self.tuning_run(
-                    abi=target_abi,
-                    target_dir=build_tmp_binary_dir,
-                    target_name=target_name,
-                    vlog_level=flags.vlog_level,
-                    embed_model_data=embed_model_data,
-                    model_output_dir=model_output_dir,
-                    input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
-                    output_nodes=output_nodes,
-                    input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
-                    output_shapes=output_shapes,
-                    mace_model_dir=mace_model_dir,
-                    model_tag=model_name,
-                    device_type=device_type,
-                    running_round=flags.round,
-                    restart_round=flags.restart_round,
-                    limit_opencl_kernel_time=model_config[
-                        YAMLKeyword.limit_opencl_kernel_time],
-                    tuning=False,
-                    out_of_range_check=flags.gpu_out_of_range_check,
-                    model_graph_format=configs[YAMLKeyword.model_graph_format],
-                    omp_num_threads=flags.omp_num_threads,
-                    cpu_affinity_policy=flags.cpu_affinity_policy,
-                    gpu_perf_hint=flags.gpu_perf_hint,
-                    gpu_priority_hint=flags.gpu_priority_hint,
-                    runtime_failure_ratio=flags.runtime_failure_ratio,
-                    address_sanitizer=flags.address_sanitizer,
-                    opencl_binary_file=model_opencl_output_bin_path,
-                    opencl_parameter_file=model_opencl_parameter_path,
-                    libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
-                    link_dynamic=link_dynamic,
-                    quantize_stat=flags.quantize_stat,
-                    input_dir=flags.input_dir,
-                    output_dir=flags.output_dir,
-                )
-                if flags.validate:
-                    model_file_path, weight_file_path = get_model_files(
-                        model_config[YAMLKeyword.model_file_path],
-                        model_config[YAMLKeyword.model_sha256_checksum],
-                        BUILD_DOWNLOADS_DIR,
-                        model_config[YAMLKeyword.weight_file_path],
-                        model_config[YAMLKeyword.weight_sha256_checksum]
-                    )
-
-                    validate_type = device_type
-                    if model_config[YAMLKeyword.quantize] == 1:
-                        validate_type = device_type + '_QUANTIZE'
-                    sh_commands.validate_model(
+                output_configs = []
+                log_file = ""
+                if flags.validate_all_layers:
+                    mace_check(configs[YAMLKeyword.model_graph_format] ==
+                               ModelFormat.file and
+                               configs[YAMLKeyword.model_data_format] ==
+                               ModelFormat.file, "Device",
+                               "'--validate_all_layers' only supports model format 'file'.")  # noqa
+                    output_configs = \
+                        self.get_layers(mace_model_dir, model_name)
+                    log_dir = mace_model_dir + "/" + runtime
+                    if os.path.exists(log_dir):
+                        sh.rm('-rf', log_dir)
+                    os.makedirs(log_dir)
+                    log_file = log_dir + "/log.csv"
+                model_path = "%s/%s.pb" % (mace_model_dir, model_name)
+                output_config = {YAMLKeyword.model_file_path: model_path,
+                                 YAMLKeyword.output_tensors: output_nodes,
+                                 YAMLKeyword.output_shapes: output_shapes}
+                output_configs.append(output_config)
+                for output_config in output_configs:
+                    run_output = self.tuning_run(
                         abi=target_abi,
-                        device=self,
-                        model_file_path=model_file_path,
-                        weight_file_path=weight_file_path,
-                        platform=model_config[YAMLKeyword.platform],
-                        device_type=device_type,
+                        target_dir=build_tmp_binary_dir,
+                        target_name=target_name,
+                        vlog_level=flags.vlog_level,
+                        embed_model_data=embed_model_data,
+                        model_output_dir=model_output_dir,
                         input_nodes=subgraphs[0][YAMLKeyword.input_tensors],
-                        output_nodes=output_nodes,
+                        output_nodes=output_config[
+                            YAMLKeyword.output_tensors],
                         input_shapes=subgraphs[0][YAMLKeyword.input_shapes],
-                        output_shapes=output_shapes,
-                        model_output_dir=model_output_dir,
-                        input_data_types=subgraphs[0][
-                            YAMLKeyword.input_data_types],
-                        caffe_env=flags.caffe_env,
-                        validation_threshold=subgraphs[0][
-                            YAMLKeyword.validation_threshold][validate_type],
-                        backend=subgraphs[0][YAMLKeyword.backend]
-                    )
-                if flags.report and flags.round > 0:
-                    tuned = is_tuned and device_type == DeviceType.GPU
-                    self.report_run_statistics(
-                        target_abi=target_abi,
-                        model_name=model_name,
+                        output_shapes=output_config[YAMLKeyword.output_shapes],
+                        mace_model_dir=mace_model_dir,
+                        model_tag=model_name,
                         device_type=device_type,
-                        output_dir=flags.report_dir,
-                        tuned=tuned
+                        running_round=flags.round,
+                        restart_round=flags.restart_round,
+                        limit_opencl_kernel_time=model_config[
+                            YAMLKeyword.limit_opencl_kernel_time],
+                        tuning=False,
+                        out_of_range_check=flags.gpu_out_of_range_check,
+                        model_graph_format=configs[
+                            YAMLKeyword.model_graph_format],
+                        omp_num_threads=flags.omp_num_threads,
+                        cpu_affinity_policy=flags.cpu_affinity_policy,
+                        gpu_perf_hint=flags.gpu_perf_hint,
+                        gpu_priority_hint=flags.gpu_priority_hint,
+                        runtime_failure_ratio=flags.runtime_failure_ratio,
+                        address_sanitizer=flags.address_sanitizer,
+                        opencl_binary_file=model_opencl_output_bin_path,
+                        opencl_parameter_file=model_opencl_parameter_path,
+                        libmace_dynamic_library_path=LIBMACE_DYNAMIC_PATH,
+                        link_dynamic=link_dynamic,
+                        quantize_stat=flags.quantize_stat,
+                        input_dir=flags.input_dir,
+                        output_dir=flags.output_dir,
+                        layers_validate_file=output_config[
+                            YAMLKeyword.model_file_path]
                     )
+                    if flags.validate or flags.validate_all_layers:
+                        model_file_path, weight_file_path = get_model_files(
+                            model_config[YAMLKeyword.model_file_path],
+                            model_config[YAMLKeyword.model_sha256_checksum],
+                            BUILD_DOWNLOADS_DIR,
+                            model_config[YAMLKeyword.weight_file_path],
+                            model_config[YAMLKeyword.weight_sha256_checksum]
+                        )
+                        validate_type = device_type
+                        if model_config[YAMLKeyword.quantize] == 1:
+                            validate_type = device_type + '_QUANTIZE'
+                        sh_commands.validate_model(
+                            abi=target_abi,
+                            device=self,
+                            model_file_path=model_file_path,
+                            weight_file_path=weight_file_path,
+                            platform=model_config[YAMLKeyword.platform],
+                            device_type=device_type,
+                            input_nodes=subgraphs[0][
+                                YAMLKeyword.input_tensors],
+                            output_nodes=output_config[
+                                YAMLKeyword.output_tensors],
+                            input_shapes=subgraphs[0][
+                                YAMLKeyword.input_shapes],
+                            output_shapes=output_config[
+                                YAMLKeyword.output_shapes],
+                            model_output_dir=model_output_dir,
+                            input_data_types=subgraphs[0][
+                                YAMLKeyword.input_data_types],
+                            caffe_env=flags.caffe_env,
+                            validation_threshold=subgraphs[0][
+                                YAMLKeyword.validation_threshold][
+                                validate_type],
+                            backend=subgraphs[0][YAMLKeyword.backend],
+                            log_file=log_file,
+                        )
+                    if flags.report and flags.round > 0:
+                        tuned = is_tuned and device_type == DeviceType.GPU
+                        self.report_run_statistics(
+                            target_abi=target_abi,
+                            model_name=model_name,
+                            device_type=device_type,
+                            output_dir=flags.report_dir,
+                            tuned=tuned
+                        )
         if model_output_dirs:
             opencl_output_bin_path = get_opencl_binary_output_path(
                 library_name, target_abi, self
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index a824ba570ed44b76ec7c803b4645d503d7476594..95628738bfd525296b42169a3cea1c3df1520018 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -622,7 +622,9 @@ def validate_model(abi,
                    input_file_name="model_input",
                    output_file_name="model_out",
                    validation_threshold=0.9,
-                   backend="tensorflow"):
+                   backend="tensorflow",
+                   log_file="",
+                   ):
     six.print_("* Validate with %s" % platform)
     if abi != "host":
         for output_name in output_nodes:
@@ -639,14 +641,16 @@ def validate_model(abi,
                  "%s/%s" % (model_output_dir, output_file_name), device_type,
                  ":".join(input_shapes), ":".join(output_shapes),
                  ",".join(input_nodes), ",".join(output_nodes),
-                 validation_threshold, ",".join(input_data_types), backend)
+                 validation_threshold, ",".join(input_data_types), backend,
+                 log_file)
     elif platform == "onnx":
         validate(platform, model_file_path, "",
                  "%s/%s" % (model_output_dir, input_file_name),
                  "%s/%s" % (model_output_dir, output_file_name), device_type,
                  ":".join(input_shapes), ":".join(output_shapes),
                  ",".join(input_nodes), ",".join(output_nodes),
-                 validation_threshold, ",".join(input_data_types), backend)
+                 validation_threshold, ",".join(input_data_types), backend,
+                 log_file)
     elif platform == "caffe":
         image_name = "mace-caffe:latest"
         container_name = "mace_caffe_validator"
@@ -662,7 +666,8 @@ def validate_model(abi,
                      device_type,
                      ":".join(input_shapes), ":".join(output_shapes),
                      ",".join(input_nodes), ",".join(output_nodes),
-                     validation_threshold, ",".join(input_data_types), backend)
+                     validation_threshold, ",".join(input_data_types), backend,
+                     log_file)
         elif caffe_env == common.CaffeEnvType.DOCKER:
             docker_image_id = sh.docker("images", "-q", image_name)
             if not docker_image_id:
diff --git a/tools/validate.py b/tools/validate.py
index 65e8ba1c4d46896cf71d5cbe97746479f85ec10a..93ce07fd2b967d1a9944e319bb283516b9aca9e8 100644
--- a/tools/validate.py
+++ b/tools/validate.py
@@ -79,7 +79,7 @@ def calculate_pixel_accuracy(out_value, mace_out_value):
 
 
 def compare_output(platform, device_type, output_name, mace_out_value,
-                   out_value, validation_threshold):
+                   out_value, validation_threshold, log_file):
     if mace_out_value.size != 0:
         pixel_accuracy = calculate_pixel_accuracy(out_value, mace_out_value)
         out_value = out_value.reshape(-1)
@@ -91,7 +91,18 @@ def compare_output(platform, device_type, output_name, mace_out_value,
             output_name + ' MACE VS ' + platform.upper()
             + ' similarity: ' + str(similarity) + ' , sqnr: ' + str(sqnr)
             + ' , pixel_accuracy: ' + str(pixel_accuracy))
-        if similarity > validation_threshold:
+        if log_file:
+            if not os.path.exists(log_file):
+                with open(log_file, 'w') as f:
+                    f.write('output_name,similarity,sqnr,pixel_accuracy\n')
+            summary = '{output_name},{similarity},{sqnr},{pixel_accuracy}\n'\
+                .format(output_name=output_name,
+                        similarity=similarity,
+                        sqnr=sqnr,
+                        pixel_accuracy=pixel_accuracy)
+            with open(log_file, "a") as f:
+                f.write(summary)
+        elif similarity > validation_threshold:
             common.MaceLogger.summary(
                 common.StringFormatter.block("Similarity Test Passed"))
         else:
@@ -112,7 +123,8 @@ def normalize_tf_tensor_name(name):
 
 def validate_tf_model(platform, device_type, model_file, input_file,
                       mace_out_file, input_names, input_shapes,
-                      output_names, validation_threshold, input_data_types):
+                      output_names, validation_threshold, input_data_types,
+                      log_file):
     import tensorflow as tf
     if not os.path.isfile(model_file):
         common.MaceLogger.error(
@@ -151,12 +163,13 @@ def validate_tf_model(platform, device_type, model_file, input_file,
                     mace_out_value = load_data(output_file_name)
                     compare_output(platform, device_type, output_names[i],
                                    mace_out_value, output_values[i],
-                                   validation_threshold)
+                                   validation_threshold, log_file)
 
 
 def validate_caffe_model(platform, device_type, model_file, input_file,
                          mace_out_file, weight_file, input_names, input_shapes,
-                         output_names, output_shapes, validation_threshold):
+                         output_names, output_shapes, validation_threshold,
+                         log_file):
     os.environ['GLOG_minloglevel'] = '1'  # suprress Caffe verbose prints
     import caffe
     if not os.path.isfile(model_file):
@@ -201,13 +214,13 @@ def validate_caffe_model(platform, device_type, model_file, input_file,
             mace_out_file, output_names[i])
         mace_out_value = load_data(output_file_name)
         compare_output(platform, device_type, output_names[i], mace_out_value,
-                       value, validation_threshold)
+                       value, validation_threshold, log_file)
 
 
 def validate_onnx_model(platform, device_type, model_file, input_file,
                         mace_out_file, input_names, input_shapes,
                         output_names, output_shapes, validation_threshold,
-                        input_data_types, backend):
+                        input_data_types, backend, log_file):
     import onnx
     if backend == "tensorflow":
         from onnx_tf.backend import prepare
@@ -257,12 +270,12 @@ def validate_onnx_model(platform, device_type, model_file, input_file,
         mace_out_value = load_data(output_file_name)
         compare_output(platform, device_type, output_names[i],
                        mace_out_value, value,
-                       validation_threshold)
+                       validation_threshold, log_file)
 
 
 def validate(platform, model_file, weight_file, input_file, mace_out_file,
              device_type, input_shape, output_shape, input_node, output_node,
-             validation_threshold, input_data_type, backend):
+             validation_threshold, input_data_type, backend, log_file):
     input_names = [name for name in input_node.split(',')]
     input_shape_strs = [shape for shape in input_shape.split(':')]
     input_shapes = [[int(x) for x in shape.split(',')]
@@ -278,7 +291,8 @@ def validate(platform, model_file, weight_file, input_file, mace_out_file,
     if platform == 'tensorflow':
         validate_tf_model(platform, device_type, model_file, input_file,
                           mace_out_file, input_names, input_shapes,
-                          output_names, validation_threshold, input_data_types)
+                          output_names, validation_threshold, input_data_types,
+                          log_file)
     elif platform == 'caffe':
         output_shape_strs = [shape for shape in output_shape.split(':')]
         output_shapes = [[int(x) for x in shape.split(',')]
@@ -286,7 +300,7 @@ def validate(platform, model_file, weight_file, input_file, mace_out_file,
         validate_caffe_model(platform, device_type, model_file, input_file,
                              mace_out_file, weight_file, input_names,
                              input_shapes, output_names, output_shapes,
-                             validation_threshold)
+                             validation_threshold, log_file)
     elif platform == 'onnx':
         output_shape_strs = [shape for shape in output_shape.split(':')]
         output_shapes = [[int(x) for x in shape.split(',')]
@@ -295,7 +309,7 @@ def validate(platform, model_file, weight_file, input_file, mace_out_file,
                             mace_out_file, input_names, input_shapes,
                             output_names, output_shapes,
                             validation_threshold,
-                            input_data_types, backend)
+                            input_data_types, backend, log_file)
 
 
 def parse_args():