From d71985fe40190adf227792703b09713e3c6c5c29 Mon Sep 17 00:00:00 2001 From: Bin Li Date: Wed, 7 Aug 2019 11:21:11 +0800 Subject: [PATCH] Fix post quantize --- .../tools/converter_tool/hexagon_converter.py | 30 +- mace/tools/mace_run.cc | 297 +++++++++++------- tools/image/tensor_to_image.py | 11 +- 3 files changed, 209 insertions(+), 129 deletions(-) diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py index c30676c1..8a3fd336 100644 --- a/mace/python/tools/converter_tool/hexagon_converter.py +++ b/mace/python/tools/converter_tool/hexagon_converter.py @@ -144,18 +144,23 @@ class HexagonConverter(base_converter.ConverterInterface): return self._model + def add_port_for_tensors(self, tensors): + for i in range(len(tensors)): + if ':' not in tensors[i]: + node_name = tensors[i] + tensors[i] += ':0' + if node_name in self._quantize_activation_info: + self._quantize_activation_info[tensors[i]] = \ + self._quantize_activation_info[node_name] + def convert_ops(self): print("Convert mace graph to hexagon.") for op in self._model.op: if not self._hexagon_ops.has_op(op.type): raise Exception('Unsupported op: ', op) - for i in range(len(op.input)): - if ':' not in op.input[i]: - node_name = op.input[i] - op.input[i] += ':0' - if node_name in self._quantize_activation_info: - self._quantize_activation_info[op.input[i]] = \ - self._quantize_activation_info[node_name] + + self.add_port_for_tensors(op.input) + self.add_port_for_tensors(op.output) if op.type == MaceOp.Conv2D.name \ or op.type == MaceOp.DepthwiseConv2d.name: @@ -483,13 +488,15 @@ class HexagonConverter(base_converter.ConverterInterface): for tensor in self._model.tensors: tensor.node_id = node_id_counter node_id_counter += 1 - tensor_op, port = get_op_and_port_from_tensor(tensor.name) - node_id_map[tensor_op] = tensor.node_id + node_id_map[tensor.name] = tensor.node_id print("Hexagon op:") index = 0 for op in self._model.op: op.node_id = node_id_counter + node_id_counter += 1 + for output in op.output: + node_id_map[output] = op.node_id if op.type not in [HexagonOp.QuantizeINPUT_f_to_8, HexagonOp.DequantizeOUTPUT_8tof.name]: index_str = str(index) @@ -498,11 +505,10 @@ class HexagonConverter(base_converter.ConverterInterface): index_str = '' print('Op: %s (%s, node_id:%d, index:%s)' % (op.name, op.type, op.node_id, index_str)) - node_id_counter += 1 - node_id_map[op.name] = op.node_id for ipt in op.input: op_name, port = get_op_and_port_from_tensor(ipt) - node_id = node_id_map[op_name] + tensor_name = ipt if port == 0 else op_name + ':0' + node_id = node_id_map[tensor_name] node_input = op.node_input.add() node_input.node_id = node_id node_input.output_port = int(port) diff --git a/mace/tools/mace_run.cc b/mace/tools/mace_run.cc index f43e38d8..7d6e1b2c 100644 --- a/mace/tools/mace_run.cc +++ b/mace/tools/mace_run.cc @@ -24,6 +24,8 @@ * --model_data_file=model_data.data \ * --device=GPU */ +#include +#include #include #include #include @@ -276,6 +278,7 @@ bool RunModel(const std::string &model_name, std::map inputs; std::map outputs; + std::map inputs_size; for (size_t i = 0; i < input_count; ++i) { // Allocate input and output // only support float and int32, use char for generalization @@ -283,6 +286,7 @@ bool RunModel(const std::string &model_name, int64_t input_size = std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 4, std::multiplies()); + inputs_size[input_names[i]] = input_size; auto buffer_in = std::shared_ptr(new char[input_size], std::default_delete()); // load input @@ -310,90 +314,139 @@ bool RunModel(const std::string &model_name, output_data_formats[i]); } - LOG(INFO) << "Warm up run"; - double warmup_millis; - while (true) { - int64_t t3 = NowMicros(); - MaceStatus warmup_status = engine->Run(inputs, &outputs); - if (warmup_status != MaceStatus::MACE_SUCCESS) { - LOG(ERROR) << "Warmup runtime error, retry ... errcode: " - << warmup_status.information(); - do { + if (!FLAGS_input_dir.empty()) { + DIR *dir_parent; + struct dirent *entry; + dir_parent = opendir(FLAGS_input_dir.c_str()); + if (dir_parent) { + while ((entry = readdir(dir_parent))) { + std::string file_name = std::string(entry->d_name); + std::string prefix = FormatName(input_names[0]); + if (file_name.find(prefix) == 0) { + std::string suffix = file_name.substr(prefix.size()); + + for (size_t i = 0; i < input_count; ++i) { + file_name = FLAGS_input_dir + "/" + FormatName(input_names[i]) + + suffix; + std::ifstream in_file(file_name, std::ios::in | std::ios::binary); + std::cout << "Read " << file_name << std::endl; + if (in_file.is_open()) { + in_file.read(reinterpret_cast( + inputs[input_names[i]].data().get()), + inputs_size[input_names[i]] * sizeof(float)); + in_file.close(); + } else { + std::cerr << "Open input file failed" << std::endl; + return -1; + } + } + engine->Run(inputs, &outputs); + + if (!FLAGS_output_dir.empty()) { + for (size_t i = 0; i < output_count; ++i) { + std::string output_name = + FLAGS_output_dir + "/" + FormatName(output_names[i]) + suffix; + std::ofstream out_file(output_name, std::ios::binary); + if (out_file.is_open()) { + int64_t output_size = + std::accumulate(output_shapes[i].begin(), + output_shapes[i].end(), + 1, + std::multiplies()); + out_file.write( + reinterpret_cast( + outputs[output_names[i]].data().get()), + output_size * sizeof(float)); + out_file.flush(); + out_file.close(); + } else { + std::cerr << "Open output file failed" << std::endl; + return -1; + } + } + } + } + } + + closedir(dir_parent); + } else { + std::cerr << "Directory " << FLAGS_input_dir << " does not exist." + << std::endl; + } + } else { + LOG(INFO) << "Warm up run"; + double warmup_millis; + while (true) { + int64_t t3 = NowMicros(); + MaceStatus warmup_status = engine->Run(inputs, &outputs); + if (warmup_status != MaceStatus::MACE_SUCCESS) { + LOG(ERROR) << "Warmup runtime error, retry ... errcode: " + << warmup_status.information(); + do { #ifdef MODEL_GRAPH_FORMAT_CODE - create_engine_status = - CreateMaceEngineFromCode(model_name, - reinterpret_cast( - model_weights_data->data()), - model_weights_data->length(), - input_names, - output_names, - config, - &engine); + create_engine_status = + CreateMaceEngineFromCode(model_name, + reinterpret_cast( + model_weights_data->data()), + model_weights_data->length(), + input_names, + output_names, + config, + &engine); #else - create_engine_status = - CreateMaceEngineFromProto(reinterpret_cast( - model_graph_data->data()), - model_graph_data->length(), - reinterpret_cast( - model_weights_data->data()), - model_weights_data->length(), - input_names, - output_names, - config, - &engine); + create_engine_status = + CreateMaceEngineFromProto(reinterpret_cast( + model_graph_data->data()), + model_graph_data->length(), + reinterpret_cast( + model_weights_data->data()), + model_weights_data->length(), + input_names, + output_names, + config, + &engine); #endif - } while (create_engine_status != MaceStatus::MACE_SUCCESS); - } else { - int64_t t4 = NowMicros(); - warmup_millis = (t4 - t3) / 1000.0; - LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms"; - break; + } while (create_engine_status != MaceStatus::MACE_SUCCESS); + } else { + int64_t t4 = NowMicros(); + warmup_millis = (t4 - t3) / 1000.0; + LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms"; + break; + } } - } - double model_run_millis = -1; - benchmark::OpStat op_stat; - if (FLAGS_round > 0) { - LOG(INFO) << "Run model"; - int64_t total_run_duration = 0; - for (int i = 0; i < FLAGS_round; ++i) { - std::unique_ptr info_log; - std::unique_ptr malloc_logger; - if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { - info_log = LOG_PTR(INFO); - malloc_logger = port::Env::Default()->NewMallocLogger( - info_log.get(), MakeString(i)); - } - MaceStatus run_status; - RunMetadata metadata; - RunMetadata *metadata_ptr = nullptr; - if (FLAGS_benchmark) { - metadata_ptr = &metadata; - } + double model_run_millis = -1; + benchmark::OpStat op_stat; + if (FLAGS_round > 0) { + LOG(INFO) << "Run model"; + int64_t total_run_duration = 0; + for (int i = 0; i < FLAGS_round; ++i) { + std::unique_ptr info_log; + std::unique_ptr malloc_logger; + if (FLAGS_malloc_check_cycle >= 1 + && i % FLAGS_malloc_check_cycle == 0) { + info_log = LOG_PTR(INFO); + malloc_logger = port::Env::Default()->NewMallocLogger( + info_log.get(), MakeString(i)); + } + MaceStatus run_status; + RunMetadata metadata; + RunMetadata *metadata_ptr = nullptr; + if (FLAGS_benchmark) { + metadata_ptr = &metadata; + } - while (true) { - int64_t t0 = NowMicros(); - run_status = engine->Run(inputs, &outputs, metadata_ptr); - if (run_status != MaceStatus::MACE_SUCCESS) { - LOG(ERROR) << "Mace run model runtime error, retry ... errcode: " - << run_status.information(); - do { + while (true) { + int64_t t0 = NowMicros(); + run_status = engine->Run(inputs, &outputs, metadata_ptr); + if (run_status != MaceStatus::MACE_SUCCESS) { + LOG(ERROR) << "Mace run model runtime error, retry ... errcode: " + << run_status.information(); + do { #ifdef MODEL_GRAPH_FORMAT_CODE - create_engine_status = - CreateMaceEngineFromCode(model_name, - reinterpret_cast( - model_weights_data->data()), - model_weights_data->length(), - input_names, - output_names, - config, - &engine); -#else - create_engine_status = - CreateMaceEngineFromProto( - reinterpret_cast( - model_graph_data->data()), - model_graph_data->length(), + create_engine_status = + CreateMaceEngineFromCode( + model_name, reinterpret_cast( model_weights_data->data()), model_weights_data->length(), @@ -401,46 +454,60 @@ bool RunModel(const std::string &model_name, output_names, config, &engine); +#else + create_engine_status = + CreateMaceEngineFromProto( + reinterpret_cast( + model_graph_data->data()), + model_graph_data->length(), + reinterpret_cast( + model_weights_data->data()), + model_weights_data->length(), + input_names, + output_names, + config, + &engine); #endif - } while (create_engine_status != MaceStatus::MACE_SUCCESS); - } else { - int64_t t1 = NowMicros(); - total_run_duration += (t1 - t0); - if (FLAGS_benchmark) { - op_stat.StatMetadata(metadata); + } while (create_engine_status != MaceStatus::MACE_SUCCESS); + } else { + int64_t t1 = NowMicros(); + total_run_duration += (t1 - t0); + if (FLAGS_benchmark) { + op_stat.StatMetadata(metadata); + } + break; } - break; } } + model_run_millis = total_run_duration / 1000.0 / FLAGS_round; + LOG(INFO) << "Average latency: " << model_run_millis << " ms"; } - model_run_millis = total_run_duration / 1000.0 / FLAGS_round; - LOG(INFO) << "Average latency: " << model_run_millis << " ms"; - } - for (size_t i = 0; i < output_count; ++i) { - std::string output_name = - FLAGS_output_file + "_" + FormatName(output_names[i]); - std::ofstream out_file(output_name, std::ios::binary); - // only support float and int32 - int64_t output_size = - std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 4, - std::multiplies()); - out_file.write( - outputs[output_names[i]].data().get(), output_size); - out_file.flush(); - out_file.close(); - LOG(INFO) << "Write output file " << output_name << " with size " - << output_size << " done."; - } + for (size_t i = 0; i < output_count; ++i) { + std::string output_name = + FLAGS_output_file + "_" + FormatName(output_names[i]); + std::ofstream out_file(output_name, std::ios::binary); + // only support float and int32 + int64_t output_size = + std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 4, + std::multiplies()); + out_file.write( + outputs[output_names[i]].data().get(), output_size); + out_file.flush(); + out_file.close(); + LOG(INFO) << "Write output file " << output_name << " with size " + << output_size << " done."; + } - // Metrics reporting tools depends on the format, keep in consistent - printf("========================================================\n"); - printf(" capability(CPU) init warmup run_avg\n"); - printf("========================================================\n"); - printf("time %15.3f %11.3f %11.3f %11.3f\n", - cpu_capability, init_millis, warmup_millis, model_run_millis); - if (FLAGS_benchmark) { - op_stat.PrintStat(); + // Metrics reporting tools depends on the format, keep in consistent + printf("========================================================\n"); + printf(" capability(CPU) init warmup run_avg\n"); + printf("========================================================\n"); + printf("time %15.3f %11.3f %11.3f %11.3f\n", + cpu_capability, init_millis, warmup_millis, model_run_millis); + if (FLAGS_benchmark) { + op_stat.PrintStat(); + } } return true; @@ -514,10 +581,12 @@ int Main(int argc, char **argv) { output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]); } - - // get cpu capability - Capability cpu_capability = GetCapability(DeviceType::CPU); - float cpu_float32_performance = cpu_capability.float32_performance.exec_time; + float cpu_float32_performance = 0.0f; + if (FLAGS_input_dir.empty()) { + // get cpu capability + Capability cpu_capability = GetCapability(DeviceType::CPU); + cpu_float32_performance = cpu_capability.float32_performance.exec_time; + } bool ret = false; for (int i = 0; i < FLAGS_restart_round; ++i) { diff --git a/tools/image/tensor_to_image.py b/tools/image/tensor_to_image.py index a4d7cda7..b9d9d4e7 100644 --- a/tools/image/tensor_to_image.py +++ b/tools/image/tensor_to_image.py @@ -28,16 +28,21 @@ def parse_args(): "--image_shape", type=str, help="target image shape, e.g, 224,224,3") + parser.add_argument( + "--add_softmax", + action="store_true", + help="add softmax before convert to image") return parser.parse_known_args() -def tensors_to_images(input_files, image_shape): +def tensors_to_images(input_files, image_shape, add_softmax): with tf.Graph().as_default(): input = tf.placeholder(tf.float32, shape=image_shape, name='input') output = tf.placeholder(tf.string, name='output_file') + if add_softmax: + input = tf.nn.softmax(input) # use the second channel if it is gray image if image_shape[2] == 2: - input = tf.nn.softmax(input) _, input = tf.split(input, 2, axis=2) tensor_data = tf.image.convert_image_dtype(input, tf.uint8, @@ -68,7 +73,7 @@ def main(unused_args): input_files.append(FLAGS.input) image_shape = [int(dim) for dim in FLAGS.image_shape.split(',')] - tensors_to_images(input_files, image_shape) + tensors_to_images(input_files, image_shape, FLAGS.add_softmax) if __name__ == '__main__': -- GitLab