提交 7407738c 编写于 作者: Y yejianwu

Merge branch 'master' of v9.git.n.xiaomi.com:deep-computing/mace into lock_adb_run

...@@ -215,8 +215,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs"); ...@@ -215,8 +215,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "", DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0"); "model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_perf_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 4, "num of openmp threads"); DEFINE_int32(omp_num_threads, 4, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0, DEFINE_int32(cpu_power_option, 0,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
......
...@@ -179,7 +179,7 @@ void OpenCLProfilingTimer::ClearTiming() { ...@@ -179,7 +179,7 @@ void OpenCLProfilingTimer::ClearTiming() {
accumulated_micros_ = 0; accumulated_micros_ = 0;
} }
GPUPerfHint OpenCLRuntime::kGPUPerfHint = GPUPerfHint::PERF_DEFAULT; GPUPerfHint OpenCLRuntime::kGPUPerfHint = GPUPerfHint::PERF_NORMAL;
GPUPriorityHint OpenCLRuntime::kGPUPriorityHint = GPUPriorityHint OpenCLRuntime::kGPUPriorityHint =
GPUPriorityHint::PRIORITY_DEFAULT; GPUPriorityHint::PRIORITY_DEFAULT;
......
...@@ -450,18 +450,6 @@ class CaffeConverter(object): ...@@ -450,18 +450,6 @@ class CaffeConverter(object):
final_op.output_shape_map[final_op.layer.top[0]] = output_shape final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(activation_op.name) self.resolved_ops.add(activation_op.name)
if op_def.type in ("Conv2D", "FusedConv2D") and \
output_shape[2] == 1 and \
((input_format == 'NCHW' and output_shape[3] == 1) or
(input_format == 'NHWC' and output_shape[1] == 1)):
print "convert op %s from CONV to FC" % op.name
op_def.type = 'FC'
filter_shape = weight_data.shape
new_shape = [filter_shape[0],
filter_shape[1] * filter_shape[2] * filter_shape[3],
1, 1]
weight_data.reshape(new_shape)
op_def.output.extend([final_op.name + ':0']) op_def.output.extend([final_op.name + ':0'])
self.add_output_shape(op_def, output_shape) self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def]) self.net_def.op.extend([op_def])
......
...@@ -363,17 +363,15 @@ class TFConverter(object): ...@@ -363,17 +363,15 @@ class TFConverter(object):
op_def.name = op.name op_def.name = op.name
if op.type == 'DepthwiseConv2dNative': if op.type == 'DepthwiseConv2dNative':
op_def.type = 'DepthwiseConv2d' op_def.type = 'DepthwiseConv2d'
if self.device == 'neon':
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (3, 2, 0, 1)
else: else:
op_def.type = op.type op_def.type = op.type
if self.device == 'neon':
self.transpose_filter_tensor[get_input_tensor( if self.device == 'neon':
op, 1).name] = (3, 2, 0, 1) self.transpose_filter_tensor[get_input_tensor(
else: op, 1).name] = (3, 2, 0, 1)
self.transpose_filter_tensor[get_input_tensor( elif op.type == 'Conv2D':
op, 1).name] = (0, 1, 3, 2) self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (0, 1, 3, 2)
if self.device == 'gpu': if self.device == 'gpu':
op_def.input.extend([op.inputs[0].name]) op_def.input.extend([op.inputs[0].name])
if op_def.type == 'DepthwiseConv2d': if op_def.type == 'DepthwiseConv2d':
...@@ -402,21 +400,71 @@ class TFConverter(object): ...@@ -402,21 +400,71 @@ class TFConverter(object):
final_op = op final_op = op
self.resolved_ops[op.name] = 1 self.resolved_ops[op.name] = 1
# convert global conv to fc if len(self.tf_graph.get(op.name, [])) == 1 and \
self.tf_graph[op.name][0].type == 'BiasAdd':
bias_add_op = self.tf_graph[op.name][0]
if self.device == 'gpu':
output_name = self.add_buffer_to_image(
get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([get_input_tensor(bias_add_op, 1).name])
final_op = bias_add_op
self.resolved_ops[bias_add_op.name] = 1
if len(self.tf_graph.get(final_op.name, [])) == 1 and \
self.tf_graph[final_op.name][0].type in activation_name_map:
activation_op = self.tf_graph[final_op.name][0]
if op_def.type == "Conv2D":
op_def.type = "FusedConv2D"
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type]
if activation_op.type == 'Relu6':
max_limit_arg = op_def.arg.add()
max_limit_arg.name = 'max_limit'
max_limit_arg.f = 6
final_op = activation_op
self.resolved_ops[activation_op.name] = 1
op_def.output.extend([output.name for output in final_op.outputs])
self.add_output_shape(final_op.outputs, op_def)
self.net_def.op.extend([op_def])
def check_conv_to_fc(self, op):
if self.device != 'neon' or op.type != "Conv2D":
return False
filter_shape = get_input_tensor(op, 1).shape.as_list() filter_shape = get_input_tensor(op, 1).shape.as_list()
input_shape = get_input_tensor(op, 0).shape.as_list() input_shape = get_input_tensor(op, 0).shape.as_list()
if op_def.type == "Conv2D" and input_shape[1] == filter_shape[0] and \ return input_shape[1] == filter_shape[0] \
input_shape[2] == filter_shape[1] and \ and input_shape[2] == filter_shape[1] \
(op.get_attr('padding') == 'VALID' or filter_shape[0] == 1 and and (op.get_attr('padding') == 'VALID' or filter_shape[0] == 1
filter_shape[1] == 1): and filter_shape[1] == 1)
print "convert op %s from CONV to FC" % op.name
op_def.type = 'FC' def convert_global_conv_to_fc(self, op):
self.reshape_tensor[get_input_tensor(op, 1).name] = \ op_def = mace_pb2.OperatorDef()
[filter_shape[3], arg = op_def.arg.add()
filter_shape[2] * filter_shape[1] * filter_shape[0], 1, 1] arg.name = 'T'
arg.i = self.dt
op_def.name = op.name
op_def.type = 'FC'
self.transpose_filter_tensor[get_input_tensor(op, 1).name] = \
(3, 2, 0, 1)
filter_shape = get_input_tensor(op, 1).shape.as_list()
self.reshape_tensor[get_input_tensor(op, 1).name] = \
[filter_shape[3],
filter_shape[2] * filter_shape[1] * filter_shape[0], 1, 1]
op_def.input.extend(
[get_input_tensor(op, i).name for i in range(len(op.inputs))])
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NCHW'
final_op = op
self.resolved_ops[op.name] = 1
if len(self.tf_graph.get(op.name, [])) == 1 and \ if len(self.tf_graph.get(op.name, [])) == 1 and \
self.tf_graph[op.name][0].type == 'BiasAdd': self.tf_graph[op.name][0].type == 'BiasAdd':
bias_add_op = self.tf_graph[op.name][0] bias_add_op = self.tf_graph[op.name][0]
if self.device == 'gpu': if self.device == 'gpu':
output_name = self.add_buffer_to_image( output_name = self.add_buffer_to_image(
...@@ -428,10 +476,8 @@ class TFConverter(object): ...@@ -428,10 +476,8 @@ class TFConverter(object):
self.resolved_ops[bias_add_op.name] = 1 self.resolved_ops[bias_add_op.name] = 1
if len(self.tf_graph.get(final_op.name, [])) == 1 and \ if len(self.tf_graph.get(final_op.name, [])) == 1 and \
self.tf_graph[final_op.name][0].type in activation_name_map: self.tf_graph[final_op.name][0].type in activation_name_map:
activation_op = self.tf_graph[final_op.name][0] activation_op = self.tf_graph[final_op.name][0]
if op_def.type == "Conv2D":
op_def.type = "FusedConv2D"
fused_act_arg = op_def.arg.add() fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation' fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type] fused_act_arg.s = activation_name_map[activation_op.type]
...@@ -985,6 +1031,8 @@ class TFConverter(object): ...@@ -985,6 +1031,8 @@ class TFConverter(object):
self.convert_reshape(op) self.convert_reshape(op)
elif self.is_atrous_conv2d(op): elif self.is_atrous_conv2d(op):
self.convert_atrous_conv2d(op) self.convert_atrous_conv2d(op)
elif self.check_conv_to_fc(op):
self.convert_global_conv_to_fc(op)
elif op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative': elif op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative':
if self.check_winograd_conv(op): if self.check_winograd_conv(op):
self.convert_winograd_conv(op) self.convert_winograd_conv(op)
......
...@@ -188,8 +188,8 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON"); ...@@ -188,8 +188,8 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round"); DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_perf_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 4, "num of openmp threads"); DEFINE_int32(omp_num_threads, 4, "num of openmp threads");
DEFINE_int32(cpu_power_option, DEFINE_int32(cpu_power_option,
0, 0,
......
...@@ -30,9 +30,13 @@ def adb_split_stdout(stdout_str): ...@@ -30,9 +30,13 @@ def adb_split_stdout(stdout_str):
def adb_devices(target_socs=None): def adb_devices(target_socs=None):
outputs = sh.grep(sh.adb("devices"), "^[A-Za-z0-9]\+[[:space:]]\+device$") device_ids = []
raw_lists = sh.cut(outputs, "-f1") p = re.compile(r'(\w+)\s+device')
device_ids = adb_split_stdout(raw_lists) for line in adb_split_stdout(sh.adb("devices")):
m = p.match(line)
if m:
device_ids.append(m.group(1))
if target_socs is not None: if target_socs is not None:
target_socs_set = set(target_socs) target_socs_set = set(target_socs)
target_devices = [] target_devices = []
...@@ -49,7 +53,7 @@ def adb_getprop_by_serialno(serialno): ...@@ -49,7 +53,7 @@ def adb_getprop_by_serialno(serialno):
outputs = sh.adb("-s", serialno, "shell", "getprop") outputs = sh.adb("-s", serialno, "shell", "getprop")
raw_props = adb_split_stdout(outputs) raw_props = adb_split_stdout(outputs)
props = {} props = {}
p = re.compile("\[(.+)\]: \[(.+)\]") p = re.compile(r'\[(.+)\]: \[(.+)\]')
for raw_prop in raw_props: for raw_prop in raw_props:
m = p.match(raw_prop) m = p.match(raw_prop)
if m: if m:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册