diff --git a/mace/libmace/mace_version_script.lds b/mace/libmace/mace_version_script.lds index 9b7d34538ad20417e59051420048e98998c5afd7..917ecc47464b0851adcd5537b9fb11e0af6ded28 100644 --- a/mace/libmace/mace_version_script.lds +++ b/mace/libmace/mace_version_script.lds @@ -7,6 +7,7 @@ mace { *CreateMaceEngineFromProto*; *GetBigLittleCoreIDs*; *MaceVersion*; + *GetCapability*; # api for static library of models *mace*logging*LogMessage*; diff --git a/mace/public/mace.h b/mace/public/mace.h index 575ca32877374badf249a3b7bcad89f2e740793e..d71ecc1b4d0935958465ea37507a2ac32524c280 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -102,7 +102,7 @@ class RunMetadata { /// Consistent with Android NNAPI struct PerformanceInfo { - // Time of executing some workload. + // Time of executing some workload(millisecond). // negative value for unsupported. float exec_time; }; diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py index eddb8d8685972a8dbc05070f444653288446657a..c70fcb1d2f4a21e2e5f55d6d7298bb91dd67d42c 100644 --- a/mace/python/tools/converter_tool/tensorflow_converter.py +++ b/mace/python/tools/converter_tool/tensorflow_converter.py @@ -123,39 +123,16 @@ TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str) TFSupportedOps = [six.b(op) for op in TFSupportedOps] -TFTransformGraphOptions = { - base_converter.DeviceType.CPU.value: [ - 'strip_unused_nodes', - 'remove_nodes(op=Identity, op=CheckNumerics)', - 'fold_constants(ignore_errors=true)', - 'fold_batch_norms', - 'fold_old_batch_norms', - 'remove_control_dependencies', - 'strip_unused_nodes', - 'sort_by_execution_order' - ], - base_converter.DeviceType.GPU.value: [ - 'strip_unused_nodes', - 'remove_nodes(op=Identity, op=CheckNumerics)', - 'fold_constants(ignore_errors=true)', - 'flatten_atrous_conv', - 'fold_batch_norms', - 'fold_old_batch_norms', - 'remove_control_dependencies', - 'strip_unused_nodes', - 'sort_by_execution_order' - ], - base_converter.DeviceType.HEXAGON.value: [ - 'strip_unused_nodes', - 'remove_nodes(op=Identity, op=CheckNumerics)', - 'fold_constants(ignore_errors=true)', - 'fold_batch_norms', - 'fold_old_batch_norms', - 'remove_control_dependencies', - 'strip_unused_nodes', - 'sort_by_execution_order' - ] -} +TFTransformGraphOptions = [ + 'strip_unused_nodes', + 'remove_nodes(op=Identity, op=CheckNumerics)', + 'fold_constants(ignore_errors=true)', + 'fold_batch_norms', + 'fold_old_batch_norms', + 'remove_control_dependencies', + 'strip_unused_nodes', + 'sort_by_execution_order' +] class TensorflowConverter(base_converter.ConverterInterface): @@ -289,15 +266,13 @@ class TensorflowConverter(base_converter.ConverterInterface): self._placeholders = {} - print("Run transform_graph: %s" % TFTransformGraphOptions[ - option.device]) + print("Run transform_graph: %s" % TFTransformGraphOptions) try: print("output keys: ", option.output_nodes.keys()) transformed_graph_def = TransformGraph(tf_graph_def, option.input_nodes.keys(), option.output_nodes.keys(), - TFTransformGraphOptions[ - option.device]) + TFTransformGraphOptions) except Exception as ex: print("Failed to transform graph using tf tool: %s" % ex) transformed_graph_def = tf_graph_def diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index b6ee678600b770cd3cb2793200c51d2833a5c093..cd5be2449eba0de3d05463e3e089282fe9be013b 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -225,7 +225,8 @@ bool RunModel(const std::string &model_name, const std::vector &input_data_formats, const std::vector &output_names, const std::vector> &output_shapes, - const std::vector &output_data_formats) { + const std::vector &output_data_formats, + float cpu_capability) { DeviceType device_type = ParseDeviceType(FLAGS_device); int64_t t0 = NowMicros(); @@ -446,11 +447,11 @@ bool RunModel(const std::string &model_name, } // Metrics reporting tools depends on the format, keep in consistent - printf("========================================\n"); - printf(" init warmup run_avg\n"); - printf("========================================\n"); - printf("time %11.3f %11.3f %11.3f\n", - init_millis, warmup_millis, model_run_millis); + printf("========================================================\n"); + printf(" capability(CPU) init warmup run_avg\n"); + printf("========================================================\n"); + printf("time %15.3f %11.3f %11.3f %11.3f\n", + cpu_capability, init_millis, warmup_millis, model_run_millis); for (size_t i = 0; i < output_count; ++i) { @@ -532,13 +533,16 @@ int Main(int argc, char **argv) { } + // get cpu capability + Capability cpu_capability = GetCapability(DeviceType::CPU); + bool ret = false; for (int i = 0; i < FLAGS_restart_round; ++i) { VLOG(0) << "restart round " << i; - ret = - RunModel(FLAGS_model_name, - input_names, input_shape_vec, input_data_formats, - output_names, output_shape_vec, output_data_formats); + ret = RunModel(FLAGS_model_name, + input_names, input_shape_vec, input_data_formats, + output_names, output_shape_vec, output_data_formats, + cpu_capability.float32_performance.exec_time); } if (ret) { return 0;