Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
7e9d1442
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
7e9d1442
编写于
9月 13, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Change conv2d neon kernel logic.
上级
23f6c70c
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
70 addition
and
43 deletion
+70
-43
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+64
-16
mace/kernels/neon/conv_2d_neon_3x3.h
mace/kernels/neon/conv_2d_neon_3x3.h
+6
-3
mace/kernels/neon/conv_2d_neon_base.h
mace/kernels/neon/conv_2d_neon_base.h
+0
-24
未找到文件。
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
7e9d1442
...
@@ -4,16 +4,42 @@
...
@@ -4,16 +4,42 @@
#include <arm_neon.h>
#include <arm_neon.h>
#include "mace/kernels/conv_2d.h"
#include "mace/kernels/conv_2d.h"
#include "mace/kernels/neon/conv_2d_neon_
base
.h"
#include "mace/kernels/neon/conv_2d_neon_
3x3
.h"
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
static
inline
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
static
inline
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
padding
,
const
int
*
paddings
,
std
::
unique_ptr
<
float
>&
output
,
Tensor
&
output_tensor
,
index_t
*
output_shape
)
{
std
::
vector
<
index_t
>&
output_shape
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
index_t
width
=
input_shape
[
3
];
output_shape
[
0
]
=
batch
;
output_shape
[
1
]
=
channels
;
output_shape
[
2
]
=
paddings
[
0
]
+
height
;
output_shape
[
3
]
=
paddings
[
1
]
+
width
;
index_t
output_width
=
output_shape
[
3
];
int
padded_left
=
paddings
[
1
]
/
2
;
output_tensor
.
Resize
(
output_shape
);
float
*
output_ptr
=
output_tensor
.
mutable_data
<
float
>
();
memset
(
output_ptr
,
0
,
output_tensor
.
size
()
*
sizeof
(
float
));
output_ptr
+=
paddings
[
0
]
/
2
*
output_width
;
for
(;
batch
>
0
;
--
batch
)
{
for
(;
channels
>
0
;
--
channels
)
{
for
(;
height
>
0
;
--
height
)
{
memcpy
(
output_ptr
+
padded_left
,
input
,
width
*
sizeof
(
float
));
input
+=
width
;
output_ptr
+=
output_width
;
}
output_ptr
+=
paddings
[
0
]
*
output_width
;
}
}
}
}
template
<
>
template
<
>
...
@@ -25,18 +51,39 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // N
...
@@ -25,18 +51,39 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // N
float
*
output
,
// NCHW
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
const
index_t
*
output_shape
)
{
static
const
bool
selector
[
5
][
4
]
=
{
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
// NCHW
{
true
,
false
,
false
,
false
},
const
index_t
*
input_shape
,
{
false
,
false
,
false
,
false
},
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
{
true
,
true
,
false
,
false
},
const
float
*
bias
,
// c_out
{
false
,
false
,
false
,
false
},
float
*
output
,
// NCHW
{
true
,
false
,
false
,
false
},
const
index_t
*
output_shape
);
static
const
Conv2dNeonFunction
selector
[
5
][
2
]
=
{
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK3x3S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
}
};
};
// not implement yet
// not implement yet
if
(
paddings_
[
0
]
!=
paddings_
[
1
]
||
paddings_
[
0
]
>
5
||
if
(
paddings_
[
0
]
!=
paddings_
[
1
]
||
paddings_
[
0
]
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
4
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
4
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
!
selector
[
paddings_
[
0
]
-
1
,
strides_
[
0
]
-
1
]
)
{
selector
[
paddings_
[
0
]
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
input
,
input_shape
,
input_shape
,
...
@@ -47,12 +94,13 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // N
...
@@ -47,12 +94,13 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // N
output_shape
output_shape
);
);
}
}
std
::
unique_ptr
<
float
>
padded_input
;
Tensor
padded_input
;
index_t
padded_input_shape
[
4
]
;
std
::
vector
<
index_t
>
padded_input_shape
(
4
)
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
padded_input
,
padded_input_shape
);
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
padded_input
,
padded_input_shape
);
Conv2dNeon
<
paddings_
[
0
],
paddings_
[
1
],
strides_
[
0
],
strides_
[
1
]
>
(
auto
conv2d_neon_func
=
selector
[
paddings_
[
0
]
-
1
][
strides_
[
0
]
-
1
];
padded_input
.
get
(),
conv2d_neon_func
(
padded_input_shape
,
padded_input
.
data
<
float
>
(),
padded_input_shape
.
data
(),
filter
,
filter
,
bias
,
bias
,
output
,
output
,
...
...
mace/kernels/neon/conv_2d_neon_3x3.
cc
→
mace/kernels/neon/conv_2d_neon_3x3.
h
浏览文件 @
7e9d1442
//
//
// Copyright (c) 2017 XiaoMi All rights reserved.
// Copyright (c) 2017 XiaoMi All rights reserved.
//
//
#ifndef MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_
#define MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_
#include <arm_neon.h>
#include <arm_neon.h>
#include "mace/
kernels/neon/conv_2d_neon_base
.h"
#include "mace/
core/common
.h"
namespace
mace
{
namespace
mace
{
namespace
kernels
{
namespace
kernels
{
template
<
>
void
Conv2dNeonK3x3S1
(
const
float
*
input
,
// NCHW
void
Conv2dNeon
<
3
,
3
,
1
,
1
>
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
const
float
*
bias
,
// c_out
...
@@ -20,3 +21,5 @@ void Conv2dNeon<3, 3, 1, 1>(const float* input, // NCHW
...
@@ -20,3 +21,5 @@ void Conv2dNeon<3, 3, 1, 1>(const float* input, // NCHW
}
// namespace kernels
}
// namespace kernels
}
// namespace mace
}
// namespace mace
#endif // MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_
mace/kernels/neon/conv_2d_neon_base.h
已删除
100644 → 0
浏览文件 @
23f6c70c
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_NEON_CONV_2D_NEON_BASE_H_
#define MACE_KERNELS_NEON_CONV_2D_NEON_BASE_H_
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
template
<
index_t
kernel_h
,
index_t
kernel_w
,
index_t
stride_h
,
index_t
stride_w
>
inline
void
Conv2dNeon
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_NEON_CONV_2D_NEON_BASE_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录