提交 7407738c 编写于 作者: Y yejianwu

Merge branch 'master' of v9.git.n.xiaomi.com:deep-computing/mace into lock_adb_run

......@@ -215,8 +215,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_perf_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 4, "num of openmp threads");
DEFINE_int32(cpu_power_option, 0,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
......
......@@ -179,7 +179,7 @@ void OpenCLProfilingTimer::ClearTiming() {
accumulated_micros_ = 0;
}
GPUPerfHint OpenCLRuntime::kGPUPerfHint = GPUPerfHint::PERF_DEFAULT;
GPUPerfHint OpenCLRuntime::kGPUPerfHint = GPUPerfHint::PERF_NORMAL;
GPUPriorityHint OpenCLRuntime::kGPUPriorityHint =
GPUPriorityHint::PRIORITY_DEFAULT;
......
......@@ -450,18 +450,6 @@ class CaffeConverter(object):
final_op.output_shape_map[final_op.layer.top[0]] = output_shape
self.resolved_ops.add(activation_op.name)
if op_def.type in ("Conv2D", "FusedConv2D") and \
output_shape[2] == 1 and \
((input_format == 'NCHW' and output_shape[3] == 1) or
(input_format == 'NHWC' and output_shape[1] == 1)):
print "convert op %s from CONV to FC" % op.name
op_def.type = 'FC'
filter_shape = weight_data.shape
new_shape = [filter_shape[0],
filter_shape[1] * filter_shape[2] * filter_shape[3],
1, 1]
weight_data.reshape(new_shape)
op_def.output.extend([final_op.name + ':0'])
self.add_output_shape(op_def, output_shape)
self.net_def.op.extend([op_def])
......
......@@ -363,17 +363,15 @@ class TFConverter(object):
op_def.name = op.name
if op.type == 'DepthwiseConv2dNative':
op_def.type = 'DepthwiseConv2d'
if self.device == 'neon':
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (3, 2, 0, 1)
else:
op_def.type = op.type
if self.device == 'neon':
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (3, 2, 0, 1)
else:
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (0, 1, 3, 2)
if self.device == 'neon':
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (3, 2, 0, 1)
elif op.type == 'Conv2D':
self.transpose_filter_tensor[get_input_tensor(
op, 1).name] = (0, 1, 3, 2)
if self.device == 'gpu':
op_def.input.extend([op.inputs[0].name])
if op_def.type == 'DepthwiseConv2d':
......@@ -402,21 +400,71 @@ class TFConverter(object):
final_op = op
self.resolved_ops[op.name] = 1
# convert global conv to fc
if len(self.tf_graph.get(op.name, [])) == 1 and \
self.tf_graph[op.name][0].type == 'BiasAdd':
bias_add_op = self.tf_graph[op.name][0]
if self.device == 'gpu':
output_name = self.add_buffer_to_image(
get_input_tensor(bias_add_op, 1).name, "ARGUMENT")
op_def.input.extend([output_name])
else:
op_def.input.extend([get_input_tensor(bias_add_op, 1).name])
final_op = bias_add_op
self.resolved_ops[bias_add_op.name] = 1
if len(self.tf_graph.get(final_op.name, [])) == 1 and \
self.tf_graph[final_op.name][0].type in activation_name_map:
activation_op = self.tf_graph[final_op.name][0]
if op_def.type == "Conv2D":
op_def.type = "FusedConv2D"
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type]
if activation_op.type == 'Relu6':
max_limit_arg = op_def.arg.add()
max_limit_arg.name = 'max_limit'
max_limit_arg.f = 6
final_op = activation_op
self.resolved_ops[activation_op.name] = 1
op_def.output.extend([output.name for output in final_op.outputs])
self.add_output_shape(final_op.outputs, op_def)
self.net_def.op.extend([op_def])
def check_conv_to_fc(self, op):
if self.device != 'neon' or op.type != "Conv2D":
return False
filter_shape = get_input_tensor(op, 1).shape.as_list()
input_shape = get_input_tensor(op, 0).shape.as_list()
if op_def.type == "Conv2D" and input_shape[1] == filter_shape[0] and \
input_shape[2] == filter_shape[1] and \
(op.get_attr('padding') == 'VALID' or filter_shape[0] == 1 and
filter_shape[1] == 1):
print "convert op %s from CONV to FC" % op.name
op_def.type = 'FC'
self.reshape_tensor[get_input_tensor(op, 1).name] = \
[filter_shape[3],
filter_shape[2] * filter_shape[1] * filter_shape[0], 1, 1]
return input_shape[1] == filter_shape[0] \
and input_shape[2] == filter_shape[1] \
and (op.get_attr('padding') == 'VALID' or filter_shape[0] == 1
and filter_shape[1] == 1)
def convert_global_conv_to_fc(self, op):
op_def = mace_pb2.OperatorDef()
arg = op_def.arg.add()
arg.name = 'T'
arg.i = self.dt
op_def.name = op.name
op_def.type = 'FC'
self.transpose_filter_tensor[get_input_tensor(op, 1).name] = \
(3, 2, 0, 1)
filter_shape = get_input_tensor(op, 1).shape.as_list()
self.reshape_tensor[get_input_tensor(op, 1).name] = \
[filter_shape[3],
filter_shape[2] * filter_shape[1] * filter_shape[0], 1, 1]
op_def.input.extend(
[get_input_tensor(op, i).name for i in range(len(op.inputs))])
data_format_arg = op_def.arg.add()
data_format_arg.name = 'data_format'
data_format_arg.s = 'NCHW'
final_op = op
self.resolved_ops[op.name] = 1
if len(self.tf_graph.get(op.name, [])) == 1 and \
self.tf_graph[op.name][0].type == 'BiasAdd':
self.tf_graph[op.name][0].type == 'BiasAdd':
bias_add_op = self.tf_graph[op.name][0]
if self.device == 'gpu':
output_name = self.add_buffer_to_image(
......@@ -428,10 +476,8 @@ class TFConverter(object):
self.resolved_ops[bias_add_op.name] = 1
if len(self.tf_graph.get(final_op.name, [])) == 1 and \
self.tf_graph[final_op.name][0].type in activation_name_map:
self.tf_graph[final_op.name][0].type in activation_name_map:
activation_op = self.tf_graph[final_op.name][0]
if op_def.type == "Conv2D":
op_def.type = "FusedConv2D"
fused_act_arg = op_def.arg.add()
fused_act_arg.name = 'activation'
fused_act_arg.s = activation_name_map[activation_op.type]
......@@ -985,6 +1031,8 @@ class TFConverter(object):
self.convert_reshape(op)
elif self.is_atrous_conv2d(op):
self.convert_atrous_conv2d(op)
elif self.check_conv_to_fc(op):
self.convert_global_conv_to_fc(op)
elif op.type == 'Conv2D' or op.type == 'DepthwiseConv2dNative':
if self.check_winograd_conv(op):
self.convert_winograd_conv(op)
......
......@@ -188,8 +188,8 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_perf_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 0, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 4, "num of openmp threads");
DEFINE_int32(cpu_power_option,
0,
......
......@@ -30,9 +30,13 @@ def adb_split_stdout(stdout_str):
def adb_devices(target_socs=None):
outputs = sh.grep(sh.adb("devices"), "^[A-Za-z0-9]\+[[:space:]]\+device$")
raw_lists = sh.cut(outputs, "-f1")
device_ids = adb_split_stdout(raw_lists)
device_ids = []
p = re.compile(r'(\w+)\s+device')
for line in adb_split_stdout(sh.adb("devices")):
m = p.match(line)
if m:
device_ids.append(m.group(1))
if target_socs is not None:
target_socs_set = set(target_socs)
target_devices = []
......@@ -49,7 +53,7 @@ def adb_getprop_by_serialno(serialno):
outputs = sh.adb("-s", serialno, "shell", "getprop")
raw_props = adb_split_stdout(outputs)
props = {}
p = re.compile("\[(.+)\]: \[(.+)\]")
p = re.compile(r'\[(.+)\]: \[(.+)\]')
for raw_prop in raw_props:
m = p.match(raw_prop)
if m:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册