Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
0436bb8e
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
0436bb8e
编写于
1月 31, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix openmp bugs for cpu kernel.
上级
c5d83495
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
49 addition
and
34 deletion
+49
-34
mace/BUILD
mace/BUILD
+8
-0
mace/examples/BUILD
mace/examples/BUILD
+3
-3
mace/kernels/BUILD
mace/kernels/BUILD
+2
-2
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+2
-3
mace/kernels/bias_add.h
mace/kernels/bias_add.h
+2
-3
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+4
-6
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+1
-1
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+4
-0
mace/kernels/pooling.h
mace/kernels/pooling.h
+6
-6
mace/kernels/softmax.h
mace/kernels/softmax.h
+5
-6
mace/mace.bzl
mace/mace.bzl
+6
-0
mace/ops/BUILD
mace/ops/BUILD
+2
-2
tools/bazel-adb-run.sh
tools/bazel-adb-run.sh
+2
-1
tools/export_local_lib.sh
tools/export_local_lib.sh
+2
-1
未找到文件。
mace/BUILD
浏览文件 @
0436bb8e
...
...
@@ -51,3 +51,11 @@ config_setting(
},
visibility
=
[
"//visibility:public"
],
)
config_setting
(
name
=
"openmp_enabled"
,
define_values
=
{
"openmp"
:
"true"
,
},
visibility
=
[
"//visibility:public"
],
)
mace/examples/BUILD
浏览文件 @
0436bb8e
# Examples
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_neon_enabled"
)
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_neon_enabled"
,
"if_openmp_enabled"
)
cc_binary
(
name
=
"helloworld"
,
srcs
=
[
"helloworld.cc"
,
],
linkopts
=
if_
neon
_enabled
([
"-fopenmp"
]),
linkopts
=
if_
openmp
_enabled
([
"-fopenmp"
]),
deps
=
[
"//mace/core"
,
"//mace/ops"
,
...
...
@@ -17,7 +17,7 @@ cc_test(
name
=
"benchmark_example"
,
testonly
=
1
,
srcs
=
[
"benchmark_example.cc"
],
linkopts
=
if_
neon
_enabled
([
"-fopenmp"
]),
linkopts
=
if_
openmp
_enabled
([
"-fopenmp"
]),
linkstatic
=
1
,
deps
=
[
"//mace/core"
,
...
...
mace/kernels/BUILD
浏览文件 @
0436bb8e
...
...
@@ -7,7 +7,7 @@ package(
licenses
([
"notice"
])
# Apache 2.0
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_neon_enabled"
)
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_neon_enabled"
,
"if_openmp_enabled"
)
cc_library
(
name
=
"kernels"
,
...
...
@@ -23,7 +23,7 @@ cc_library(
])
+
if_neon_enabled
(
glob
([
"neon/*.h"
,
])),
copts
=
if_
neon
_enabled
([
"-fopenmp"
]),
copts
=
if_
openmp
_enabled
([
"-fopenmp"
]),
linkopts
=
if_android
([
"-lm"
]),
deps
=
[
"//mace/core"
,
...
...
mace/kernels/batch_norm.h
浏览文件 @
0436bb8e
...
...
@@ -86,19 +86,18 @@ struct BatchNormFunctor : BatchNormFunctorBase {
}
}
index_t
pos
=
0
;
#pragma omp parallel for
#pragma omp parallel for
collapse(4)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
pos
=
(((
n
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
if
(
folded_constant_
)
{
output_ptr
[
pos
]
=
scale_ptr
[
c
]
*
input_ptr
[
pos
]
+
offset_ptr
[
c
];
}
else
{
output_ptr
[
pos
]
=
new_scale
[
c
]
*
input_ptr
[
pos
]
+
new_offset
[
c
];
}
++
pos
;
}
}
}
...
...
mace/kernels/bias_add.h
浏览文件 @
0436bb8e
...
...
@@ -33,14 +33,13 @@ struct BiasAddFunctor {
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
index_t
pos
=
0
;
#pragma omp parallel for
#pragma omp parallel for collapse(4)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
h
=
0
;
h
<
height
;
++
h
)
{
for
(
index_t
w
=
0
;
w
<
width
;
++
w
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
pos
=
(((
n
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
output_ptr
[
pos
]
=
input_ptr
[
pos
]
+
bias_ptr
[
c
];
++
pos
;
}
}
}
...
...
mace/kernels/conv_2d.h
浏览文件 @
0436bb8e
...
...
@@ -103,13 +103,15 @@ struct Conv2dFunctor : Conv2dFunctorBase {
auto
bias_data
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
T
>
();
auto
output_data
=
output
->
mutable_data
<
T
>
();
#pragma omp parallel for collapse(4)
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
const
int
out_idx
=
((
n
*
height
+
h
)
*
width
+
w
)
*
channels
+
c
;
T
bias_channel
=
0.0
f
;
if
(
bias
)
bias_channel
=
bias_data
[
c
];
*
output_data
=
bias_channel
;
output_data
[
out_idx
]
=
bias_channel
;
T
sum
=
0.0
f
;
const
T
*
filter_ptr
=
filter_data
+
c
;
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
...
...
@@ -123,8 +125,6 @@ struct Conv2dFunctor : Conv2dFunctorBase {
inw
>=
padded_w_start
&&
inw
<
padded_w_stop
,
"Out of range read from input: "
,
inh
,
", "
,
inw
);
// else padding with 0:
// sum += 0;
}
else
{
index_t
input_offset
=
n
*
input_height
*
input_width
*
input_channels
+
...
...
@@ -136,13 +136,11 @@ struct Conv2dFunctor : Conv2dFunctorBase {
}
}
}
*
output_data
+=
sum
;
output_data
++
;
output_data
[
out_idx
]
+=
sum
;
}
}
}
}
output_data
=
output
->
mutable_data
<
T
>
();
DoActivation
(
output_data
,
output_data
,
output
->
NumElements
(),
activation_
,
relux_max_limit_
,
prelu_alpha_
);
}
...
...
mace/kernels/depthwise_conv2d.h
浏览文件 @
0436bb8e
...
...
@@ -114,7 +114,7 @@ struct DepthwiseConv2dFunctor : public DepthwiseConv2dFunctorBase {
const
T
*
bias_ptr
=
bias
==
nullptr
?
nullptr
:
bias
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
#pragma omp parallel for collapse(
2
)
#pragma omp parallel for collapse(
4
)
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
...
...
mace/kernels/opencl/helper.cc
浏览文件 @
0436bb8e
...
...
@@ -191,6 +191,10 @@ void TuningOrRun3DKernel(cl::Kernel &kernel,
{
1
,
kwg_size
/
32
,
32
,
1
},
{
1
,
kwg_size
/
64
,
64
,
1
},
{
1
,
kwg_size
/
128
,
128
,
1
},
{
4
,
kwg_size
/
16
,
4
,
1
},
{
4
,
kwg_size
/
28
,
7
,
1
},
{
4
,
kwg_size
/
32
,
8
,
1
},
{
4
,
kwg_size
/
56
,
14
,
1
},
{
3
,
15
,
9
,
1
},
{
7
,
15
,
9
,
1
},
{
9
,
7
,
15
,
1
},
...
...
mace/kernels/pooling.h
浏览文件 @
0436bb8e
...
...
@@ -96,11 +96,12 @@ struct PoolingFunctor : PoolingFunctorBase {
int
padded_w_start
=
0
-
paddings
[
1
]
/
2
;
if
(
pooling_type_
==
MAX
)
{
#pragma omp parallel for collapse(
2
)
#pragma omp parallel for collapse(
4
)
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
out_offset
=
(((
b
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
index_t
in_offset
=
b
*
in_image_size
*
input_channels
+
c
;
T
res
=
std
::
numeric_limits
<
T
>::
lowest
();
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
...
...
@@ -114,18 +115,18 @@ struct PoolingFunctor : PoolingFunctorBase {
}
}
}
*
output
=
res
;
output
++
;
output
[
out_offset
]
=
res
;
}
}
}
}
}
else
if
(
pooling_type_
==
AVG
)
{
#pragma omp parallel for collapse(
2
)
#pragma omp parallel for collapse(
4
)
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
out_offset
=
(((
b
*
height
)
+
h
)
*
width
+
w
)
*
channels
+
c
;
index_t
in_offset
=
b
*
in_image_size
*
input_channels
+
c
;
T
sum
=
0
;
int
block_size
=
0
;
...
...
@@ -141,8 +142,7 @@ struct PoolingFunctor : PoolingFunctorBase {
}
}
}
*
output
=
sum
/
block_size
;
output
++
;
output
[
out_offset
]
=
sum
/
block_size
;
}
}
}
...
...
mace/kernels/softmax.h
浏览文件 @
0436bb8e
...
...
@@ -29,21 +29,20 @@ struct SoftmaxFunctor {
const
index_t
num_classes
=
logits_shape
.
back
();
#pragma omp parallel for
for
(
index_t
i
=
0
;
i
<
batch_size
;
++
i
)
{
T
max_value
=
*
logits_ptr
;
const
index_t
pos
=
i
*
num_classes
;
T
max_value
=
logits_ptr
[
pos
];
for
(
index_t
c
=
1
;
c
<
num_classes
;
++
c
)
{
max_value
=
std
::
max
(
max_value
,
logits_ptr
[
c
]);
max_value
=
std
::
max
(
max_value
,
logits_ptr
[
pos
+
c
]);
}
// TODO: check overflow?
T
sum
=
0
;
std
::
vector
<
T
>
exp_data
(
num_classes
);
for
(
index_t
c
=
0
;
c
<
num_classes
;
++
c
)
{
exp_data
[
c
]
=
::
exp
((
*
logits_ptr
-
max_value
));
exp_data
[
c
]
=
::
exp
((
logits_ptr
[
pos
+
c
]
-
max_value
));
sum
+=
exp_data
[
c
];
logits_ptr
++
;
}
for
(
index_t
c
=
0
;
c
<
num_classes
;
++
c
)
{
*
output_ptr
=
exp_data
[
c
]
/
sum
;
output_ptr
++
;
output_ptr
[
pos
+
c
]
=
exp_data
[
c
]
/
sum
;
}
}
}
...
...
mace/mace.bzl
浏览文件 @
0436bb8e
...
...
@@ -53,3 +53,9 @@ def if_not_hexagon_enabled(a):
"//mace:hexagon_enabled"
:
[],
"//conditions:default"
:
a
,
})
def
if_openmp_enabled
(
a
):
return
select
({
"//mace:openmp_enabled"
:
a
,
"//conditions:default"
:
[],
})
mace/ops/BUILD
浏览文件 @
0436bb8e
...
...
@@ -7,7 +7,7 @@ package(
licenses
([
"notice"
])
# Apache 2.0
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_neon_enabled"
)
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_neon_enabled"
,
"if_openmp_enabled"
)
cc_library
(
name
=
"test"
,
...
...
@@ -34,7 +34,7 @@ cc_library(
[
"*.h"
],
exclude
=
[
"ops_test_util.h"
],
),
copts
=
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
]),
copts
=
if_
openmp_enabled
([
"-fopenmp"
])
+
if_
neon_enabled
([
"-DMACE_ENABLE_NEON"
]),
deps
=
[
"//mace/kernels"
,
],
...
...
tools/bazel-adb-run.sh
浏览文件 @
0436bb8e
...
...
@@ -43,7 +43,8 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
--copt
=
"-D_GLIBCXX_USE_C99_MATH_TR1"
\
--copt
=
"-DMACE_DISABLE_NO_TUNING_WARNING"
\
--copt
=
"-Werror=return-type"
\
--define
neon
=
false
--define
neon
=
false
\
--define
openmp
=
true
if
[
$?
-ne
0
]
;
then
exit
1
...
...
tools/export_local_lib.sh
浏览文件 @
0436bb8e
...
...
@@ -55,7 +55,8 @@ build_target()
--copt
=
"-std=c++11"
\
--copt
=
"-D_GLIBCXX_USE_C99_MATH_TR1"
\
--copt
=
"-Werror=return-type"
\
--copt
=
"-DMACE_OBFUSCATE_LITERALS"
||
exit
-1
--copt
=
"-DMACE_OBFUSCATE_LITERALS"
\
--define
openmp
=
true
||
exit
-1
}
merge_libs
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录