Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
b9492125
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
b9492125
编写于
9月 13, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'conv2d-neon' into 'master'
Add conv2d neon api. See merge request !31
上级
dbb49292
7e9d1442
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
146 addition
and
0 deletion
+146
-0
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+9
-0
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+112
-0
mace/kernels/neon/conv_2d_neon_3x3.h
mace/kernels/neon/conv_2d_neon_3x3.h
+25
-0
未找到文件。
mace/kernels/conv_2d.h
浏览文件 @
b9492125
...
...
@@ -108,6 +108,15 @@ class Conv2dFunctor {
const
int
*
dilations_
;
// [dilation_h, dilation_w]
};
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/neon/conv_2d_neon.cc
0 → 100644
浏览文件 @
b9492125
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/conv_2d.h"
#include "mace/kernels/neon/conv_2d_neon_3x3.h"
namespace
mace
{
namespace
kernels
{
static
inline
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
&
output_tensor
,
std
::
vector
<
index_t
>&
output_shape
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
index_t
width
=
input_shape
[
3
];
output_shape
[
0
]
=
batch
;
output_shape
[
1
]
=
channels
;
output_shape
[
2
]
=
paddings
[
0
]
+
height
;
output_shape
[
3
]
=
paddings
[
1
]
+
width
;
index_t
output_width
=
output_shape
[
3
];
int
padded_left
=
paddings
[
1
]
/
2
;
output_tensor
.
Resize
(
output_shape
);
float
*
output_ptr
=
output_tensor
.
mutable_data
<
float
>
();
memset
(
output_ptr
,
0
,
output_tensor
.
size
()
*
sizeof
(
float
));
output_ptr
+=
paddings
[
0
]
/
2
*
output_width
;
for
(;
batch
>
0
;
--
batch
)
{
for
(;
channels
>
0
;
--
channels
)
{
for
(;
height
>
0
;
--
height
)
{
memcpy
(
output_ptr
+
padded_left
,
input
,
width
*
sizeof
(
float
));
input
+=
width
;
output_ptr
+=
output_width
;
}
output_ptr
+=
paddings
[
0
]
*
output_width
;
}
}
}
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
);
static
const
Conv2dNeonFunction
selector
[
5
][
2
]
=
{
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK3x3S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
}
};
// not implement yet
if
(
paddings_
[
0
]
!=
paddings_
[
1
]
||
paddings_
[
0
]
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
4
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
paddings_
[
0
]
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
}
Tensor
padded_input
;
std
::
vector
<
index_t
>
padded_input_shape
(
4
);
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
padded_input
,
padded_input_shape
);
auto
conv2d_neon_func
=
selector
[
paddings_
[
0
]
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
padded_input
.
data
<
float
>
(),
padded_input_shape
.
data
(),
filter
,
bias
,
output
,
output_shape
);
}
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/neon/conv_2d_neon_3x3.h
0 → 100644
浏览文件 @
b9492125
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_
#define MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_
#include <arm_neon.h>
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
void
Conv2dNeonK3x3S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
}
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_NEON_CONV_2D_NEON_3X3_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录