Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
e34d6183
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e34d6183
编写于
4月 20, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix BUILD warning and conv test precision.
上级
b0bfe7f6
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
98 addition
and
78 deletion
+98
-78
mace/core/mace.cc
mace/core/mace.cc
+5
-3
mace/kernels/BUILD
mace/kernels/BUILD
+12
-6
mace/ops/BUILD
mace/ops/BUILD
+18
-9
mace/ops/fused_conv_2d_test.cc
mace/ops/fused_conv_2d_test.cc
+60
-58
mace/test/BUILD
mace/test/BUILD
+3
-2
未找到文件。
mace/core/mace.cc
浏览文件 @
e34d6183
...
...
@@ -193,9 +193,11 @@ MaceStatus MaceEngine::Impl::Run(
input_tensors
.
push_back
(
input_tensor
);
}
for
(
auto
&
output
:
*
outputs
)
{
MACE_CHECK
(
output
.
second
.
shape
().
size
()
==
4
,
"The outputs' shape must be 4-dimension with NHWC format,"
" please use 1 to fill missing dimensions"
);
if
(
device_type_
==
DeviceType
::
OPENCL
)
{
MACE_CHECK
(
output
.
second
.
shape
().
size
()
==
4
,
"The outputs' shape must be 4-dimension with NHWC format,"
" please use 1 to fill missing dimensions"
);
}
Tensor
*
output_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
+
":0"
));
output_tensors
.
push_back
(
output_tensor
);
...
...
mace/kernels/BUILD
浏览文件 @
e34d6183
...
...
@@ -28,9 +28,12 @@ cc_library(
"opencl/*.h"
,
"arm/*.h"
,
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon -mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
,
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon"
])
+
if_android_armv7
([
"-mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
linkopts
=
if_android
([
"-lm"
]),
deps
=
[
"//mace/core"
,
...
...
@@ -48,9 +51,12 @@ cc_test(
"opencl/*_test.cc"
,
],
),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon -mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
,
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon"
])
+
if_android_armv7
([
"-mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
linkopts
=
[
"-fopenmp"
],
linkstatic
=
1
,
deps
=
[
...
...
mace/ops/BUILD
浏览文件 @
e34d6183
...
...
@@ -34,9 +34,12 @@ cc_library(
[
"*.h"
],
exclude
=
[
"ops_test_util.h"
],
),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon -mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
,
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon"
])
+
if_android_armv7
([
"-mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
deps
=
[
"//mace/kernels"
,
],
...
...
@@ -49,9 +52,12 @@ cc_test(
srcs
=
glob
(
[
"*_test.cc"
],
),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon -mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
,
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon"
])
+
if_android_armv7
([
"-mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
linkopts
=
[
"-fopenmp"
],
linkstatic
=
1
,
deps
=
[
...
...
@@ -65,9 +71,12 @@ cc_test(
name
=
"ops_benchmark"
,
testonly
=
1
,
srcs
=
glob
([
"*_benchmark.cc"
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon -mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
,
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon"
])
+
if_android_armv7
([
"-mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
linkopts
=
[
"-fopenmp"
],
linkstatic
=
1
,
deps
=
[
...
...
mace/ops/fused_conv_2d_test.cc
浏览文件 @
e34d6183
...
...
@@ -375,90 +375,92 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) {
namespace
{
template
<
DeviceType
D
>
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
,
const
int
kernel
,
const
int
stride
,
Padding
type
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
// generate random input
static
unsigned
int
seed
=
time
(
NULL
);
index_t
batch
=
3
+
(
rand_r
(
&
seed
)
%
10
);
index_t
height
=
shape
[
0
];
index_t
width
=
shape
[
1
];
index_t
input_channels
=
shape
[
2
]
+
(
rand_r
(
&
seed
)
%
10
);
index_t
output_channels
=
shape
[
3
]
+
(
rand_r
(
&
seed
)
%
10
);
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
// generate random input
srand
(
time
(
NULL
));
index_t
batch
=
3
;
index_t
height
=
shape
[
0
];
index_t
width
=
shape
[
1
];
index_t
input_channels
=
shape
[
2
];
index_t
output_channels
=
shape
[
3
];
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
stride
_h
,
stride_w
})
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
float
>
float_input_data
;
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
&
float_input_data
);
std
::
vector
<
float
>
float_filter_data
;
GenerateRandomRealTypeData
(
{
kernel
_h
,
kernel_w
,
output_channels
,
input_channels
},
std
::
vector
<
float
>
float_input_data
;
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
&
float_input_data
);
std
::
vector
<
float
>
float_filter_data
;
GenerateRandomRealTypeData
(
{
kernel
,
kernel
,
output_channels
,
input_channels
},
&
float_filter_data
);
std
::
vector
<
float
>
float_bias_data
;
GenerateRandomRealTypeData
({
output_channels
},
&
float_bias_data
);
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
std
::
vector
<
float
>
float_bias_data
;
GenerateRandomRealTypeData
({
output_channels
},
&
float_bias_data
);
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel
_h
,
kernel_w
,
output_channels
,
input_channels
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel
,
kernel
,
output_channels
,
input_channels
},
float_filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
// run on cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
// run on gpu
BufferToImage
<
D
,
half
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
half
>
(
&
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
half
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
// run on cpu
net
.
RunOp
();
// Check
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
half
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
BufferToImage
<
D
,
half
>
(
&
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
CONV2D_FILTER
);
BufferToImage
<
D
,
half
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
stride
_h
,
stride_w
})
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataType
::
DT_HALF
))
.
Finalize
(
net
.
NewOperatorDef
());
// Run on device
net
.
RunOp
(
D
);
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
,
1e-1
);
};
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
for
(
int
kernel_size
:
{
1
,
3
})
{
for
(
int
stride
:
{
1
,
2
})
{
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
VALID
);
}
}
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
,
1e-1
);
}
}
// namespace
TEST_F
(
FusedConv2dOpTest
,
OPENCLHalfAlignedConvNxNS12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
});
TEST_F
(
FusedConv2dOpTest
,
OPENCLHalfAlignedConv1x1S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
},
1
,
1
,
VALID
);
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
31
,
37
,
31
,
37
},
1
,
1
,
SAME
);
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
},
1
,
2
,
VALID
);
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
31
,
37
,
31
,
37
},
1
,
2
,
SAME
);
}
TEST_F
(
FusedConv2dOpTest
,
OPENCLHalfAlignedConv3x3S12
)
{
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
},
3
,
1
,
VALID
);
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
31
,
37
,
31
,
37
},
3
,
1
,
SAME
);
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
},
3
,
2
,
VALID
);
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
31
,
37
,
31
,
37
},
3
,
2
,
SAME
);
}
namespace
{
...
...
mace/test/BUILD
浏览文件 @
e34d6183
...
...
@@ -15,8 +15,9 @@ cc_test(
srcs
=
[
"mace_api_test.cc"
],
copts
=
if_openmp_enabled
([
"-fopenmp"
])
+
if_neon_enabled
([
"-DMACE_ENABLE_NEON"
])
+
if_android_armv7
([
"-mfpu=neon -mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
,])
+
if_android_armv7
([
"-mfpu=neon"
])
+
if_android_armv7
([
"-mfloat-abi=softfp"
])
+
if_android
([
"-DMACE_ENABLE_OPENCL"
])
+
if_hexagon_enabled
([
"-DMACE_ENABLE_HEXAGON"
]),
linkopts
=
[
"-fopenmp"
],
linkstatic
=
1
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录