提交 6d12272f 编写于 作者: 李寅

Merge branch 'quantize' into 'master'

Fix post quantize

See merge request !1169
......@@ -144,18 +144,23 @@ class HexagonConverter(base_converter.ConverterInterface):
return self._model
def add_port_for_tensors(self, tensors):
for i in range(len(tensors)):
if ':' not in tensors[i]:
node_name = tensors[i]
tensors[i] += ':0'
if node_name in self._quantize_activation_info:
self._quantize_activation_info[tensors[i]] = \
self._quantize_activation_info[node_name]
def convert_ops(self):
print("Convert mace graph to hexagon.")
for op in self._model.op:
if not self._hexagon_ops.has_op(op.type):
raise Exception('Unsupported op: ', op)
for i in range(len(op.input)):
if ':' not in op.input[i]:
node_name = op.input[i]
op.input[i] += ':0'
if node_name in self._quantize_activation_info:
self._quantize_activation_info[op.input[i]] = \
self._quantize_activation_info[node_name]
self.add_port_for_tensors(op.input)
self.add_port_for_tensors(op.output)
if op.type == MaceOp.Conv2D.name \
or op.type == MaceOp.DepthwiseConv2d.name:
......@@ -483,13 +488,15 @@ class HexagonConverter(base_converter.ConverterInterface):
for tensor in self._model.tensors:
tensor.node_id = node_id_counter
node_id_counter += 1
tensor_op, port = get_op_and_port_from_tensor(tensor.name)
node_id_map[tensor_op] = tensor.node_id
node_id_map[tensor.name] = tensor.node_id
print("Hexagon op:")
index = 0
for op in self._model.op:
op.node_id = node_id_counter
node_id_counter += 1
for output in op.output:
node_id_map[output] = op.node_id
if op.type not in [HexagonOp.QuantizeINPUT_f_to_8,
HexagonOp.DequantizeOUTPUT_8tof.name]:
index_str = str(index)
......@@ -498,11 +505,10 @@ class HexagonConverter(base_converter.ConverterInterface):
index_str = ''
print('Op: %s (%s, node_id:%d, index:%s)' %
(op.name, op.type, op.node_id, index_str))
node_id_counter += 1
node_id_map[op.name] = op.node_id
for ipt in op.input:
op_name, port = get_op_and_port_from_tensor(ipt)
node_id = node_id_map[op_name]
tensor_name = ipt if port == 0 else op_name + ':0'
node_id = node_id_map[tensor_name]
node_input = op.node_input.add()
node_input.node_id = node_id
node_input.output_port = int(port)
......@@ -24,6 +24,8 @@
* --model_data_file=model_data.data \
* --device=GPU
*/
#include <sys/types.h>
#include <dirent.h>
#include <stdint.h>
#include <cstdio>
#include <cstdlib>
......@@ -276,6 +278,7 @@ bool RunModel(const std::string &model_name,
std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs;
std::map<std::string, int64_t> inputs_size;
for (size_t i = 0; i < input_count; ++i) {
// Allocate input and output
// only support float and int32, use char for generalization
......@@ -283,6 +286,7 @@ bool RunModel(const std::string &model_name,
int64_t input_size =
std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 4,
std::multiplies<int64_t>());
inputs_size[input_names[i]] = input_size;
auto buffer_in = std::shared_ptr<char>(new char[input_size],
std::default_delete<char[]>());
// load input
......@@ -310,90 +314,139 @@ bool RunModel(const std::string &model_name,
output_data_formats[i]);
}
LOG(INFO) << "Warm up run";
double warmup_millis;
while (true) {
int64_t t3 = NowMicros();
MaceStatus warmup_status = engine->Run(inputs, &outputs);
if (warmup_status != MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Warmup runtime error, retry ... errcode: "
<< warmup_status.information();
do {
if (!FLAGS_input_dir.empty()) {
DIR *dir_parent;
struct dirent *entry;
dir_parent = opendir(FLAGS_input_dir.c_str());
if (dir_parent) {
while ((entry = readdir(dir_parent))) {
std::string file_name = std::string(entry->d_name);
std::string prefix = FormatName(input_names[0]);
if (file_name.find(prefix) == 0) {
std::string suffix = file_name.substr(prefix.size());
for (size_t i = 0; i < input_count; ++i) {
file_name = FLAGS_input_dir + "/" + FormatName(input_names[i])
+ suffix;
std::ifstream in_file(file_name, std::ios::in | std::ios::binary);
std::cout << "Read " << file_name << std::endl;
if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(
inputs[input_names[i]].data().get()),
inputs_size[input_names[i]] * sizeof(float));
in_file.close();
} else {
std::cerr << "Open input file failed" << std::endl;
return -1;
}
}
engine->Run(inputs, &outputs);
if (!FLAGS_output_dir.empty()) {
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
FLAGS_output_dir + "/" + FormatName(output_names[i]) + suffix;
std::ofstream out_file(output_name, std::ios::binary);
if (out_file.is_open()) {
int64_t output_size =
std::accumulate(output_shapes[i].begin(),
output_shapes[i].end(),
1,
std::multiplies<int64_t>());
out_file.write(
reinterpret_cast<char *>(
outputs[output_names[i]].data().get()),
output_size * sizeof(float));
out_file.flush();
out_file.close();
} else {
std::cerr << "Open output file failed" << std::endl;
return -1;
}
}
}
}
}
closedir(dir_parent);
} else {
std::cerr << "Directory " << FLAGS_input_dir << " does not exist."
<< std::endl;
}
} else {
LOG(INFO) << "Warm up run";
double warmup_millis;
while (true) {
int64_t t3 = NowMicros();
MaceStatus warmup_status = engine->Run(inputs, &outputs);
if (warmup_status != MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Warmup runtime error, retry ... errcode: "
<< warmup_status.information();
do {
#ifdef MODEL_GRAPH_FORMAT_CODE
create_engine_status =
CreateMaceEngineFromCode(model_name,
reinterpret_cast<const unsigned char *>(
model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
create_engine_status =
CreateMaceEngineFromCode(model_name,
reinterpret_cast<const unsigned char *>(
model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#else
create_engine_status =
CreateMaceEngineFromProto(reinterpret_cast<const unsigned char *>(
model_graph_data->data()),
model_graph_data->length(),
reinterpret_cast<const unsigned char *>(
model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
create_engine_status =
CreateMaceEngineFromProto(reinterpret_cast<const unsigned char *>(
model_graph_data->data()),
model_graph_data->length(),
reinterpret_cast<const unsigned char *>(
model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#endif
} while (create_engine_status != MaceStatus::MACE_SUCCESS);
} else {
int64_t t4 = NowMicros();
warmup_millis = (t4 - t3) / 1000.0;
LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
break;
} while (create_engine_status != MaceStatus::MACE_SUCCESS);
} else {
int64_t t4 = NowMicros();
warmup_millis = (t4 - t3) / 1000.0;
LOG(INFO) << "1st warm up run latency: " << warmup_millis << " ms";
break;
}
}
}
double model_run_millis = -1;
benchmark::OpStat op_stat;
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
int64_t total_run_duration = 0;
for (int i = 0; i < FLAGS_round; ++i) {
std::unique_ptr<port::Logger> info_log;
std::unique_ptr<port::MallocLogger> malloc_logger;
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
info_log = LOG_PTR(INFO);
malloc_logger = port::Env::Default()->NewMallocLogger(
info_log.get(), MakeString(i));
}
MaceStatus run_status;
RunMetadata metadata;
RunMetadata *metadata_ptr = nullptr;
if (FLAGS_benchmark) {
metadata_ptr = &metadata;
}
double model_run_millis = -1;
benchmark::OpStat op_stat;
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
int64_t total_run_duration = 0;
for (int i = 0; i < FLAGS_round; ++i) {
std::unique_ptr<port::Logger> info_log;
std::unique_ptr<port::MallocLogger> malloc_logger;
if (FLAGS_malloc_check_cycle >= 1
&& i % FLAGS_malloc_check_cycle == 0) {
info_log = LOG_PTR(INFO);
malloc_logger = port::Env::Default()->NewMallocLogger(
info_log.get(), MakeString(i));
}
MaceStatus run_status;
RunMetadata metadata;
RunMetadata *metadata_ptr = nullptr;
if (FLAGS_benchmark) {
metadata_ptr = &metadata;
}
while (true) {
int64_t t0 = NowMicros();
run_status = engine->Run(inputs, &outputs, metadata_ptr);
if (run_status != MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Mace run model runtime error, retry ... errcode: "
<< run_status.information();
do {
while (true) {
int64_t t0 = NowMicros();
run_status = engine->Run(inputs, &outputs, metadata_ptr);
if (run_status != MaceStatus::MACE_SUCCESS) {
LOG(ERROR) << "Mace run model runtime error, retry ... errcode: "
<< run_status.information();
do {
#ifdef MODEL_GRAPH_FORMAT_CODE
create_engine_status =
CreateMaceEngineFromCode(model_name,
reinterpret_cast<const unsigned char *>(
model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#else
create_engine_status =
CreateMaceEngineFromProto(
reinterpret_cast<const unsigned char *>(
model_graph_data->data()),
model_graph_data->length(),
create_engine_status =
CreateMaceEngineFromCode(
model_name,
reinterpret_cast<const unsigned char *>(
model_weights_data->data()),
model_weights_data->length(),
......@@ -401,46 +454,60 @@ bool RunModel(const std::string &model_name,
output_names,
config,
&engine);
#else
create_engine_status =
CreateMaceEngineFromProto(
reinterpret_cast<const unsigned char *>(
model_graph_data->data()),
model_graph_data->length(),
reinterpret_cast<const unsigned char *>(
model_weights_data->data()),
model_weights_data->length(),
input_names,
output_names,
config,
&engine);
#endif
} while (create_engine_status != MaceStatus::MACE_SUCCESS);
} else {
int64_t t1 = NowMicros();
total_run_duration += (t1 - t0);
if (FLAGS_benchmark) {
op_stat.StatMetadata(metadata);
} while (create_engine_status != MaceStatus::MACE_SUCCESS);
} else {
int64_t t1 = NowMicros();
total_run_duration += (t1 - t0);
if (FLAGS_benchmark) {
op_stat.StatMetadata(metadata);
}
break;
}
break;
}
}
model_run_millis = total_run_duration / 1000.0 / FLAGS_round;
LOG(INFO) << "Average latency: " << model_run_millis << " ms";
}
model_run_millis = total_run_duration / 1000.0 / FLAGS_round;
LOG(INFO) << "Average latency: " << model_run_millis << " ms";
}
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
FLAGS_output_file + "_" + FormatName(output_names[i]);
std::ofstream out_file(output_name, std::ios::binary);
// only support float and int32
int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 4,
std::multiplies<int64_t>());
out_file.write(
outputs[output_names[i]].data<char>().get(), output_size);
out_file.flush();
out_file.close();
LOG(INFO) << "Write output file " << output_name << " with size "
<< output_size << " done.";
}
for (size_t i = 0; i < output_count; ++i) {
std::string output_name =
FLAGS_output_file + "_" + FormatName(output_names[i]);
std::ofstream out_file(output_name, std::ios::binary);
// only support float and int32
int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 4,
std::multiplies<int64_t>());
out_file.write(
outputs[output_names[i]].data<char>().get(), output_size);
out_file.flush();
out_file.close();
LOG(INFO) << "Write output file " << output_name << " with size "
<< output_size << " done.";
}
// Metrics reporting tools depends on the format, keep in consistent
printf("========================================================\n");
printf(" capability(CPU) init warmup run_avg\n");
printf("========================================================\n");
printf("time %15.3f %11.3f %11.3f %11.3f\n",
cpu_capability, init_millis, warmup_millis, model_run_millis);
if (FLAGS_benchmark) {
op_stat.PrintStat();
// Metrics reporting tools depends on the format, keep in consistent
printf("========================================================\n");
printf(" capability(CPU) init warmup run_avg\n");
printf("========================================================\n");
printf("time %15.3f %11.3f %11.3f %11.3f\n",
cpu_capability, init_millis, warmup_millis, model_run_millis);
if (FLAGS_benchmark) {
op_stat.PrintStat();
}
}
return true;
......@@ -514,10 +581,12 @@ int Main(int argc, char **argv) {
output_data_formats[i] = ParseDataFormat(raw_output_data_formats[i]);
}
// get cpu capability
Capability cpu_capability = GetCapability(DeviceType::CPU);
float cpu_float32_performance = cpu_capability.float32_performance.exec_time;
float cpu_float32_performance = 0.0f;
if (FLAGS_input_dir.empty()) {
// get cpu capability
Capability cpu_capability = GetCapability(DeviceType::CPU);
cpu_float32_performance = cpu_capability.float32_performance.exec_time;
}
bool ret = false;
for (int i = 0; i < FLAGS_restart_round; ++i) {
......
......@@ -28,16 +28,21 @@ def parse_args():
"--image_shape",
type=str,
help="target image shape, e.g, 224,224,3")
parser.add_argument(
"--add_softmax",
action="store_true",
help="add softmax before convert to image")
return parser.parse_known_args()
def tensors_to_images(input_files, image_shape):
def tensors_to_images(input_files, image_shape, add_softmax):
with tf.Graph().as_default():
input = tf.placeholder(tf.float32, shape=image_shape, name='input')
output = tf.placeholder(tf.string, name='output_file')
if add_softmax:
input = tf.nn.softmax(input)
# use the second channel if it is gray image
if image_shape[2] == 2:
input = tf.nn.softmax(input)
_, input = tf.split(input, 2, axis=2)
tensor_data = tf.image.convert_image_dtype(input,
tf.uint8,
......@@ -68,7 +73,7 @@ def main(unused_args):
input_files.append(FLAGS.input)
image_shape = [int(dim) for dim in FLAGS.image_shape.split(',')]
tensors_to_images(input_files, image_shape)
tensors_to_images(input_files, image_shape, FLAGS.add_softmax)
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册