Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
84bf8f31
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
“61f94f00027fc4e6e6558303316c0972856e3bea”上不存在“paddle/fluid/operators/lrn_op.cu”
提交
84bf8f31
编写于
4月 11, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Change the default gpu perf hint to normal but benchmark with high.
上级
be9d1001
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
75 addition
and
39 deletion
+75
-39
mace/benchmark/benchmark_model.cc
mace/benchmark/benchmark_model.cc
+2
-2
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+1
-1
mace/python/tools/caffe_converter_lib.py
mace/python/tools/caffe_converter_lib.py
+0
-12
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+70
-22
mace/tools/validation/mace_run.cc
mace/tools/validation/mace_run.cc
+2
-2
未找到文件。
mace/benchmark/benchmark_model.cc
浏览文件 @
84bf8f31
...
...
@@ -215,8 +215,8 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32
(
warmup_runs
,
1
,
"how many runs to initialize model"
);
DEFINE_string
(
model_data_file
,
""
,
"model data file name, used when EMBED_MODEL_DATA set to 0"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_perf_hint
,
0
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
0
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
4
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_power_option
,
0
,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"
);
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
84bf8f31
...
...
@@ -179,7 +179,7 @@ void OpenCLProfilingTimer::ClearTiming() {
accumulated_micros_
=
0
;
}
GPUPerfHint
OpenCLRuntime
::
kGPUPerfHint
=
GPUPerfHint
::
PERF_
DEFAULT
;
GPUPerfHint
OpenCLRuntime
::
kGPUPerfHint
=
GPUPerfHint
::
PERF_
NORMAL
;
GPUPriorityHint
OpenCLRuntime
::
kGPUPriorityHint
=
GPUPriorityHint
::
PRIORITY_DEFAULT
;
...
...
mace/python/tools/caffe_converter_lib.py
浏览文件 @
84bf8f31
...
...
@@ -450,18 +450,6 @@ class CaffeConverter(object):
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
if
op_def
.
type
in
(
"Conv2D"
,
"FusedConv2D"
)
and
\
output_shape
[
2
]
==
1
and
\
((
input_format
==
'NCHW'
and
output_shape
[
3
]
==
1
)
or
(
input_format
==
'NHWC'
and
output_shape
[
1
]
==
1
)):
print
"convert op %s from CONV to FC"
%
op
.
name
op_def
.
type
=
'FC'
filter_shape
=
weight_data
.
shape
new_shape
=
[
filter_shape
[
0
],
filter_shape
[
1
]
*
filter_shape
[
2
]
*
filter_shape
[
3
],
1
,
1
]
weight_data
.
reshape
(
new_shape
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
...
...
mace/python/tools/tf_converter_lib.py
浏览文件 @
84bf8f31
...
...
@@ -363,17 +363,15 @@ class TFConverter(object):
op_def
.
name
=
op
.
name
if
op
.
type
==
'DepthwiseConv2dNative'
:
op_def
.
type
=
'DepthwiseConv2d'
if
self
.
device
==
'neon'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
op_def
.
type
=
op
.
type
if
self
.
device
==
'neon'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'neon'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
elif
op
.
type
==
'Conv2D'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
op_def
.
type
==
'DepthwiseConv2d'
:
...
...
@@ -402,19 +400,6 @@ class TFConverter(object):
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
# convert global conv to fc
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
input_shape
=
get_input_tensor
(
op
,
0
).
shape
.
as_list
()
if
op_def
.
type
==
"Conv2D"
and
input_shape
[
1
]
==
filter_shape
[
0
]
and
\
input_shape
[
2
]
==
filter_shape
[
1
]
and
\
(
op
.
get_attr
(
'padding'
)
==
'VALID'
or
filter_shape
[
0
]
==
1
and
filter_shape
[
1
]
==
1
):
print
"convert op %s from CONV to FC"
%
op
.
name
op_def
.
type
=
'FC'
self
.
reshape_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
\
[
filter_shape
[
3
],
filter_shape
[
2
]
*
filter_shape
[
1
]
*
filter_shape
[
0
],
1
,
1
]
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
...
...
@@ -446,6 +431,67 @@ class TFConverter(object):
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
check_conv_to_fc
(
self
,
op
):
if
self
.
device
!=
'neon'
or
op
.
type
!=
"Conv2D"
:
return
False
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
input_shape
=
get_input_tensor
(
op
,
0
).
shape
.
as_list
()
return
input_shape
[
1
]
==
filter_shape
[
0
]
and
\
input_shape
[
2
]
==
filter_shape
[
1
]
and
\
(
op
.
get_attr
(
'padding'
)
==
'VALID'
or
filter_shape
[
0
]
==
1
and
filter_shape
[
1
]
==
1
)
def
convert_global_conv_to_fc
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'FC'
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
\
(
3
,
2
,
0
,
1
)
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
self
.
reshape_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
\
[
filter_shape
[
3
],
filter_shape
[
2
]
*
filter_shape
[
1
]
*
filter_shape
[
0
],
1
,
1
]
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
data_format_arg
.
s
=
'NCHW'
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_fused_batchnorm
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
...
...
@@ -985,6 +1031,8 @@ class TFConverter(object):
self
.
convert_reshape
(
op
)
elif
self
.
is_atrous_conv2d
(
op
):
self
.
convert_atrous_conv2d
(
op
)
elif
self
.
check_conv_to_fc
(
op
):
self
.
convert_global_conv_to_fc
(
op
)
elif
op
.
type
==
'Conv2D'
or
op
.
type
==
'DepthwiseConv2dNative'
:
if
self
.
check_winograd_conv
(
op
):
self
.
convert_winograd_conv
(
op
)
...
...
mace/tools/validation/mace_run.cc
浏览文件 @
84bf8f31
...
...
@@ -188,8 +188,8 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32
(
round
,
1
,
"round"
);
DEFINE_int32
(
restart_round
,
1
,
"restart round"
);
DEFINE_int32
(
malloc_check_cycle
,
-
1
,
"malloc debug check cycle, -1 to disable"
);
DEFINE_int32
(
gpu_perf_hint
,
2
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
1
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_perf_hint
,
0
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
gpu_priority_hint
,
0
,
"0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"
);
DEFINE_int32
(
omp_num_threads
,
4
,
"num of openmp threads"
);
DEFINE_int32
(
cpu_power_option
,
0
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录