Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
51f366fb
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
51f366fb
编写于
9月 19, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Reformat the code.
上级
5232b85e
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
100 addition
and
101 deletion
+100
-101
mace/kernels/depthwise_conv2d.h
mace/kernels/depthwise_conv2d.h
+25
-25
mace/kernels/neon/conv_2d_neon_1x1.cc
mace/kernels/neon/conv_2d_neon_1x1.cc
+21
-21
mace/kernels/neon/conv_2d_neon_3x3.cc
mace/kernels/neon/conv_2d_neon_3x3.cc
+4
-5
mace/kernels/neon/conv_2d_neon_5x5.cc
mace/kernels/neon/conv_2d_neon_5x5.cc
+28
-28
mace/kernels/neon/depthwise_conv_neon.cc
mace/kernels/neon/depthwise_conv_neon.cc
+7
-7
mace/ops/conv_2d.h
mace/ops/conv_2d.h
+7
-7
mace/ops/depthwise_conv2d.h
mace/ops/depthwise_conv2d.h
+8
-8
未找到文件。
mace/kernels/depthwise_conv2d.h
浏览文件 @
51f366fb
...
...
@@ -15,30 +15,30 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
class
DepthwiseConv2dFunctor
{
public:
DepthwiseConv2dFunctor
(
const
index_t
*
input_shape
,
const
index_t
*
filter_shape
,
const
int
*
strides
,
DepthwiseConv2dFunctor
(
const
index_t
*
input_shape
,
const
index_t
*
filter_shape
,
const
int
*
strides
,
const
Padding
padding
,
const
int
*
dilations
)
:
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
2
,
0
),
dilations_
(
dilations
)
{
CalPaddingSize
(
input_shape
,
filter_shape
,
dilations_
,
strides_
,
padding
,
paddings_
.
data
());
}
DepthwiseConv2dFunctor
(
const
int
*
strides
,
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
)
:
DepthwiseConv2dFunctor
(
const
int
*
strides
,
const
std
::
vector
<
int
>
&
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
T
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
T
*
bias
,
// c_out
T
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
void
operator
()(
const
T
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
T
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
T
*
bias
,
// c_out
T
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
MACE_CHECK_NOTNULL
(
output
);
...
...
@@ -80,7 +80,7 @@ class DepthwiseConv2dFunctor {
index_t
offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
+
h
*
width
+
w
;
T
sum
=
0
;
const
T
*
filter_ptr
=
filter
+
c
*
kernel_size
;
const
T
*
filter_ptr
=
filter
+
c
*
kernel_size
;
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
inh
=
padded_h_start
+
h
*
stride_h
+
dilation_h
*
kh
;
...
...
@@ -110,19 +110,19 @@ class DepthwiseConv2dFunctor {
}
}
private:
const
int
*
strides_
;
// [stride_h, stride_w]
const
int
*
strides_
;
// [stride_h, stride_w]
std
::
vector
<
int
>
paddings_
;
// [padding_h, padding_w]
const
int
*
dilations_
;
// [dilation_h, dilation_w]
const
int
*
dilations_
;
// [dilation_h, dilation_w]
};
template
<
>
void
DepthwiseConv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
template
<
>
void
DepthwiseConv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/neon/conv_2d_neon_1x1.cc
浏览文件 @
51f366fb
...
...
@@ -8,13 +8,13 @@
namespace
mace
{
namespace
kernels
{
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
channels
=
output_shape
[
1
];
const
index_t
height
=
output_shape
[
2
];
...
...
@@ -26,7 +26,7 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
const
index_t
input_width
=
input_shape
[
3
];
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
input_width
==
width
);
input_width
==
width
);
const
index_t
total_pixels
=
height
*
width
;
// Process 4 * 2 = 8 pixels for each innermost loop
...
...
@@ -36,17 +36,17 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// benchmark omp collapsed(2)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
const
float
*
filter_ptr
=
filter
;
const
float
*
filter_ptr
=
filter
;
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
// TODO Will GCC opt these out?
float
*
channel_output_start
=
float
*
channel_output_start
=
output
+
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
const
float
*
input_ptr
=
const
float
*
input_ptr
=
input
+
n
*
input_channels
*
input_height
*
input_width
;
// Fill with bias
float
*
output_ptr
=
channel_output_start
;
float
*
output_ptr
=
channel_output_start
;
for
(
index_t
ptr
=
0
;
ptr
<
total_pixels
;
++
ptr
)
{
output_ptr
[
ptr
]
=
bias
[
c
];
// TODO can we avoid this?
}
...
...
@@ -54,15 +54,15 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
index_t
inc
=
0
;
// Process 4 input channels in batch
for
(;
inc
+
3
<
input_channels
;
inc
+=
4
)
{
float
*
output_ptr
=
channel_output_start
;
float
*
output_ptr
=
channel_output_start
;
// The begining of each input feature map channel
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
const
float
*
input_ptr1
=
input_ptr
+
total_pixels
;
const
float
*
input_ptr2
=
input_ptr1
+
total_pixels
;
const
float
*
input_ptr3
=
input_ptr2
+
total_pixels
;
const
float
*
input_ptr1
=
input_ptr
+
total_pixels
;
const
float
*
input_ptr2
=
input_ptr1
+
total_pixels
;
const
float
*
input_ptr3
=
input_ptr2
+
total_pixels
;
// filter is in c_out, c_in, 1, 1 order
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
...
...
@@ -140,10 +140,10 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
}
// Process the remaining channels
for
(;
inc
<
input_channels
;
++
inc
)
{
float
*
output_ptr
=
channel_output_start
;
float
*
output_ptr
=
channel_output_start
;
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
const
float
k0
=
filter_ptr
[
0
];
...
...
mace/kernels/neon/conv_2d_neon_3x3.cc
浏览文件 @
51f366fb
...
...
@@ -20,19 +20,18 @@ namespace kernels {
int multiplier = filter_shape == nullptr ? 0 : (filter_shape[0] / input_channels); \
int filter_in_channels = filter_shape == nullptr ? input_channels : filter_shape[1]; \
for (int b = 0; b < output_batch; ++b) { \
float
*
output_ptr_base = output + b * output_channels * output_height * output_width; \
float
*
output_ptr_base = output + b * output_channels * output_height * output_width; \
for (int oc = 0; oc < output_channels; ++oc) { \
const float
*
filter_ptr = filter + oc * filter_in_channels * kFilterSize; \
const float
*
input_ptr = input + b * input_channels * input_height * input_width; \
const float
*
filter_ptr = filter + oc * filter_in_channels * kFilterSize; \
const float
*
input_ptr = input + b * input_channels * input_height * input_width; \
if (filter_shape != nullptr) { \
input_ptr += (oc / multiplier) * input_height * input_width; \
} \
float
*
output_ptr = output_ptr_base + oc * output_height * output_width; \
float
*
output_ptr = output_ptr_base + oc * output_height * output_width; \
std::fill(output_ptr, output_ptr + output_height * output_width, bias[oc]); \
for (int ic = 0; ic < filter_in_channels; ++ic) { \
float32x4_t n_filter_v[3] = {vld1q_f32(filter_ptr), vld1q_f32(filter_ptr+3), vld1q_f32(filter_ptr+6)};
#define KERNEL_TAIL_CODE \
filter_ptr += kFilterSize; \
input_ptr += input_height * input_width; \
...
...
mace/kernels/neon/conv_2d_neon_5x5.cc
浏览文件 @
51f366fb
...
...
@@ -10,13 +10,13 @@
namespace
mace
{
namespace
kernels
{
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
channels
=
output_shape
[
1
];
const
index_t
height
=
output_shape
[
2
];
...
...
@@ -40,9 +40,9 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
#pragma omp parallel for collapse(2)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
float
*
output_ptr
=
output
+
n
*
output_total_pixels_per_batch
+
c
*
output_total_pixels_per_channel
;
const
float
*
input_ptr
=
input
+
n
*
input_total_pixels_per_batch
;
float
*
output_ptr
=
output
+
n
*
output_total_pixels_per_batch
+
c
*
output_total_pixels_per_channel
;
const
float
*
input_ptr
=
input
+
n
*
input_total_pixels_per_batch
;
// Fill with bias
for
(
index_t
i
=
0
;
i
<
output_total_pixels_per_channel
;
++
i
)
{
...
...
@@ -50,24 +50,24 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
}
for
(
index_t
inc
=
0
;
inc
<
input_channels
;
++
inc
)
{
float
*
outptr
=
output_ptr
;
float
*
outptr2
=
outptr
+
width
;
const
float
*
inptr
=
input_ptr
+
inc
*
input_total_pixels_per_channel
;
const
float
*
filter_ptr
=
filter
+
c
*
patch_size
+
inc
*
25
;
const
float
*
r0
=
inptr
;
const
float
*
r1
=
inptr
+
input_width
;
const
float
*
r2
=
inptr
+
input_width
*
2
;
const
float
*
r3
=
inptr
+
input_width
*
3
;
const
float
*
r4
=
inptr
+
input_width
*
4
;
const
float
*
r5
=
inptr
+
input_width
*
5
;
const
float
*
k0
=
filter_ptr
;
const
float
*
k1
=
filter_ptr
+
5
;
const
float
*
k2
=
filter_ptr
+
10
;
const
float
*
k3
=
filter_ptr
+
15
;
const
float
*
k4
=
filter_ptr
+
20
;
float
*
outptr
=
output_ptr
;
float
*
outptr2
=
outptr
+
width
;
const
float
*
inptr
=
input_ptr
+
inc
*
input_total_pixels_per_channel
;
const
float
*
filter_ptr
=
filter
+
c
*
patch_size
+
inc
*
25
;
const
float
*
r0
=
inptr
;
const
float
*
r1
=
inptr
+
input_width
;
const
float
*
r2
=
inptr
+
input_width
*
2
;
const
float
*
r3
=
inptr
+
input_width
*
3
;
const
float
*
r4
=
inptr
+
input_width
*
4
;
const
float
*
r5
=
inptr
+
input_width
*
5
;
const
float
*
k0
=
filter_ptr
;
const
float
*
k1
=
filter_ptr
+
5
;
const
float
*
k2
=
filter_ptr
+
10
;
const
float
*
k3
=
filter_ptr
+
15
;
const
float
*
k4
=
filter_ptr
+
20
;
float32x4_t
_k0123
=
vld1q_f32
(
filter_ptr
);
float32x4_t
_k4567
=
vld1q_f32
(
filter_ptr
+
4
);
...
...
mace/kernels/neon/depthwise_conv_neon.cc
浏览文件 @
51f366fb
...
...
@@ -25,13 +25,13 @@ extern void Conv2dNeonK3x3S2(const float *input,
const
index_t
*
output_shape
);
template
<
>
void
DepthwiseConv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
void
DepthwiseConv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
const
index_t
*
input_shape
,
...
...
mace/ops/conv_2d.h
浏览文件 @
51f366fb
...
...
@@ -13,17 +13,17 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
Conv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
public:
Conv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
){};
Conv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
)
{};
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
Tensor
*
bias
=
this
->
Input
(
BIAS
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
Tensor
*
bias
=
this
->
Input
(
BIAS
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
int
>
paddings
(
2
);
...
...
mace/ops/depthwise_conv2d.h
浏览文件 @
51f366fb
...
...
@@ -14,25 +14,25 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
DepthwiseConv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
public:
DepthwiseConv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
DepthwiseConv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
),
functor_
(
this
->
Input
(
INPUT
)
->
shape
().
data
(),
this
->
Input
(
FILTER
)
->
shape
().
data
(),
this
->
strides_
.
data
(),
this
->
padding_
,
this
->
dilations_
.
data
()){};
this
->
strides_
.
data
(),
this
->
padding_
,
this
->
dilations_
.
data
())
{};
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
Tensor
*
bias
=
this
->
Input
(
BIAS
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
const
Tensor
*
filter
=
this
->
Input
(
FILTER
);
const
Tensor
*
bias
=
this
->
Input
(
BIAS
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
// resize filter shape.
std
::
vector
<
index_t
>
filter_shape
(
filter
->
shape
().
begin
(),
filter
->
shape
().
end
());
filter_shape
[
0
]
*=
filter_shape
[
1
];
filter_shape
[
1
]
=
1
;
filter_shape
[
1
]
=
1
;
std
::
vector
<
index_t
>
output_shape
(
4
);
this
->
CalOutputSize
(
input
->
shape
().
data
(),
filter_shape
.
data
(),
output_shape
.
data
());
output
->
Resize
(
output_shape
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录