Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
f6c669f6
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f6c669f6
编写于
9月 17, 2018
作者:
李
李滨
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'sgemm_conv' into 'master'
Replace gemm to sgemm See merge request !794
上级
df23f428
961dddd9
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
93 addition
and
42 deletion
+93
-42
mace/core/buffer.h
mace/core/buffer.h
+7
-2
mace/kernels/arm/conv_2d_neon.h
mace/kernels/arm/conv_2d_neon.h
+4
-1
mace/kernels/arm/conv_2d_neon_1x1.cc
mace/kernels/arm/conv_2d_neon_1x1.cc
+16
-5
mace/kernels/arm/conv_winograd.cc
mace/kernels/arm/conv_winograd.cc
+36
-26
mace/kernels/arm/conv_winograd.h
mace/kernels/arm/conv_winograd.h
+7
-2
mace/kernels/arm/conv_winograd_test.cc
mace/kernels/arm/conv_winograd_test.cc
+2
-1
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+17
-2
mace/kernels/matmul.h
mace/kernels/matmul.h
+4
-1
mace/kernels/sgemm.cc
mace/kernels/sgemm.cc
+0
-2
未找到文件。
mace/core/buffer.h
浏览文件 @
f6c669f6
...
...
@@ -469,6 +469,7 @@ class ScratchBuffer: public Buffer {
MaceStatus
GrowSize
(
index_t
size
)
{
if
(
size
>
size_
)
{
MACE_CHECK
(
offset_
==
0
,
"scratch is being used, cannot grow size"
);
return
Resize
(
size
);
}
return
MaceStatus
::
MACE_SUCCESS
;
...
...
@@ -487,8 +488,12 @@ class ScratchBuffer: public Buffer {
return
slice
;
}
void
Rewind
()
{
offset_
=
0
;
void
Rewind
(
index_t
offset
=
0
)
{
offset_
=
offset
;
}
index_t
offset
()
const
{
return
offset_
;
}
private:
...
...
mace/kernels/arm/conv_2d_neon.h
浏览文件 @
f6c669f6
...
...
@@ -16,6 +16,7 @@
#define MACE_KERNELS_ARM_CONV_2D_NEON_H_
#include "mace/core/types.h"
#include "mace/kernels/sgemm.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -27,7 +28,9 @@ void Conv2dNeonK1x1S1(const float *input,
const
index_t
width
,
const
index_t
in_channels
,
const
index_t
out_channels
,
float
*
output
);
float
*
output
,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
);
void
Conv2dNeonK3x3S1
(
const
float
*
input
,
const
float
*
filter
,
...
...
mace/kernels/arm/conv_2d_neon_1x1.cc
浏览文件 @
f6c669f6
...
...
@@ -13,7 +13,6 @@
// limitations under the License.
#include "mace/kernels/arm/conv_2d_neon.h"
#include "mace/kernels/gemm.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -25,11 +24,23 @@ void Conv2dNeonK1x1S1(const float *input,
const
index_t
width
,
const
index_t
in_channels
,
const
index_t
out_channels
,
float
*
output
)
{
float
*
output
,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
)
{
for
(
index_t
b
=
0
;
b
<
batch
;
++
b
)
{
Gemm
(
filter
,
input
+
b
*
in_channels
*
height
*
width
,
1
,
out_channels
,
in_channels
,
height
*
width
,
output
+
b
*
out_channels
*
height
*
width
);
sgemm
->
Run
(
filter
,
input
+
b
*
in_channels
*
height
*
width
,
1
,
out_channels
,
in_channels
,
in_channels
,
height
*
width
,
false
,
false
,
true
,
false
,
output
+
b
*
out_channels
*
height
*
width
,
scratch_buffer
);
}
}
...
...
mace/kernels/arm/conv_winograd.cc
浏览文件 @
f6c669f6
...
...
@@ -12,13 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <math.h>
#include <algorithm>
#include "mace/kernels/arm/conv_winograd.h"
#include "mace/kernels/gemm.h"
#include "mace/utils/logging.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -247,30 +244,38 @@ void BatchGemm(const float *input,
index_t
out_channels
,
index_t
tile_count
,
int
out_tile_size
,
float
*
output
)
{
const
index_t
filter_stride
=
out_channels
*
in_channels
;
float
*
output
,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
)
{
const
int
in_tile_area
=
(
out_tile_size
+
2
)
*
(
out_tile_size
+
2
);
const
index_t
in_batch_size
=
in_tile_area
*
in_channels
*
tile_count
;
const
index_t
in_stride
=
in_channels
*
tile_count
;
const
index_t
out_batch_size
=
in_tile_area
*
out_channels
*
tile_count
;
const
index_t
out_stride
=
out_channels
*
tile_count
;
if
(
batch
==
1
)
{
Gemm
(
filter
,
input
,
in_tile_area
,
out_channels
,
in_channels
,
tile_count
,
output
);
}
else
{
#pragma omp parallel for collapse(2)
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int
i
=
0
;
i
<
in_tile_area
;
++
i
)
{
const
float
*
in_ptr
=
input
+
b
*
in_batch_size
+
i
*
in_stride
;
const
float
*
filter_ptr
=
filter
+
i
*
filter_stride
;
float
*
out_ptr
=
output
+
b
*
out_batch_size
+
i
*
out_stride
;
Gemm
(
filter_ptr
,
in_ptr
,
1
,
out_channels
,
/* rows */
in_channels
,
/* K */
tile_count
,
/* cols */
out_ptr
);
}
index_t
scratch_buffer_offset
=
0
;
if
(
scratch_buffer
)
{
scratch_buffer_offset
=
scratch_buffer
->
offset
();
}
// 'batch' is not gemm batch, 'in_tile_area' is. gemm is not thread safe,
// so we loop batch using single thread.
// Scratch buffer should be rewind to the initial position to use same
// scratch memory for each batch.
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
if
(
scratch_buffer
)
{
scratch_buffer
->
Rewind
(
scratch_buffer_offset
);
}
sgemm
->
Run
(
filter
,
input
+
b
*
in_batch_size
,
in_tile_area
,
out_channels
,
in_channels
,
in_channels
,
tile_count
,
false
,
false
,
true
,
false
,
output
+
b
*
out_batch_size
,
scratch_buffer
);
}
}
...
...
@@ -613,7 +618,9 @@ void WinoGradConv3x3s1(const float *input,
const
int
out_tile_size
,
float
*
transformed_input
,
float
*
transformed_output
,
float
*
output
)
{
float
*
output
,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
)
{
index_t
out_height
=
in_height
-
2
;
index_t
out_width
=
in_width
-
2
;
index_t
tile_height_count
=
...
...
@@ -636,7 +643,8 @@ void WinoGradConv3x3s1(const float *input,
}
BatchGemm
(
transformed_input
,
transformed_filter
,
batch
,
in_channels
,
out_channels
,
tile_count
,
out_tile_size
,
transformed_output
);
out_channels
,
tile_count
,
out_tile_size
,
transformed_output
,
sgemm
,
scratch_buffer
);
switch
(
out_tile_size
)
{
case
2
:
...
...
@@ -660,7 +668,9 @@ void WinoGradConv3x3s1(const float *input,
const
index_t
in_channels
,
const
index_t
out_channels
,
const
int
out_tile_size
,
float
*
output
)
{
float
*
output
,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
)
{
index_t
out_height
=
in_height
-
2
;
index_t
out_width
=
in_width
-
2
;
index_t
tile_height_count
=
...
...
@@ -692,7 +702,7 @@ void WinoGradConv3x3s1(const float *input,
WinoGradConv3x3s1
(
input
,
transformed_filter
,
batch
,
in_height
,
in_width
,
in_channels
,
out_channels
,
out_tile_size
,
transformed_input
,
transformed_output
,
output
);
transformed_output
,
output
,
sgemm
,
scratch_buffer
);
delete
[]
transformed_input
;
delete
[]
transformed_filter
;
...
...
mace/kernels/arm/conv_winograd.h
浏览文件 @
f6c669f6
...
...
@@ -20,6 +20,7 @@
#endif
#include "mace/core/types.h"
#include "mace/kernels/sgemm.h"
namespace
mace
{
namespace
kernels
{
...
...
@@ -42,7 +43,9 @@ void WinoGradConv3x3s1(const float *input,
const
index_t
in_channels
,
const
index_t
out_channels
,
const
int
out_tile_size
,
float
*
output
);
float
*
output
,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
);
void
WinoGradConv3x3s1
(
const
float
*
input
,
const
float
*
transformed_filter
,
...
...
@@ -54,7 +57,9 @@ void WinoGradConv3x3s1(const float *input,
const
int
out_tile_size
,
float
*
transformed_input
,
float
*
transformed_output
,
float
*
output
);
float
*
output
,
SGemm
*
sgemm
,
ScratchBuffer
*
scratch_buffer
);
void
ConvRef3x3s1
(
const
float
*
input
,
const
float
*
filter
,
...
...
mace/kernels/arm/conv_winograd_test.cc
浏览文件 @
f6c669f6
...
...
@@ -65,9 +65,10 @@ TEST(ConvWinogradTest, winograd) {
kernels
::
ConvRef3x3s1
(
input_data
,
filter_data
,
batch
,
in_height
,
in_width
,
in_channels
,
out_channels
,
output_data_ref
);
SGemm
sgemm
;
kernels
::
WinoGradConv3x3s1
(
input_data
,
filter_data
,
batch
,
in_height
,
in_width
,
in_channels
,
out_channels
,
6
,
output_data
);
output_data
,
&
sgemm
,
nullptr
);
// test
for
(
index_t
i
=
0
;
i
<
output_size
;
++
i
)
{
...
...
mace/kernels/conv_2d.h
浏览文件 @
f6c669f6
...
...
@@ -483,6 +483,16 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
*
sizeof
(
float
);
total_scratch_size
+=
padded_output_size
;
}
// scratch for sgemm
if
(
use_neon_1x1_s1
)
{
total_scratch_size
+=
(
input_batch
*
input_height
*
input_width
*
(
input_channels
+
channels
))
*
sizeof
(
float
);
}
else
if
(
use_winograd
)
{
total_scratch_size
+=
(
transformed_input_size
+
transformed_output_size
)
*
sizeof
(
float
);
}
// Init scratch buffer
scratch_
->
Rewind
();
scratch_
->
GrowSize
(
total_scratch_size
);
...
...
@@ -547,7 +557,9 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
winograd_out_tile_size
,
transformed_input_data
,
transformed_output_data
,
pad_output
);
pad_output
,
&
sgemm_
,
scratch_
);
};
}
else
if
(
use_neon_3x3_s1
)
{
conv_func
=
[
=
](
const
float
*
pad_input
,
float
*
pad_output
)
{
...
...
@@ -574,7 +586,9 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
extra_input_width
,
input_channels
,
channels
,
pad_output
);
pad_output
,
&
sgemm_
,
scratch_
);
};
}
else
if
(
use_neon_5x5_s1
)
{
conv_func
=
[
=
](
const
float
*
pad_input
,
float
*
pad_output
)
{
...
...
@@ -722,6 +736,7 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
Tensor
transformed_filter_
;
bool
is_filter_transformed_
;
ScratchBuffer
*
scratch_
;
SGemm
sgemm_
;
};
template
<
>
...
...
mace/kernels/matmul.h
浏览文件 @
f6c669f6
...
...
@@ -89,6 +89,9 @@ struct MatMulFunctor : OpKernel {
const
index_t
height_b
=
B
->
dim
(
rank
-
2
);
const
index_t
width_b
=
B
->
dim
(
rank
-
1
);
auto
scratch_buffer
=
context_
->
workspace
()
->
GetScratchBuffer
(
D
);
scratch_buffer
->
Rewind
();
sgemm_
.
Run
(
a_ptr_base
,
b_ptr_base
,
batch
,
...
...
@@ -101,7 +104,7 @@ struct MatMulFunctor : OpKernel {
A
->
is_weight
(),
B
->
is_weight
(),
c_ptr_base
,
context_
->
workspace
()
->
GetScratchBuffer
(
D
)
);
scratch_buffer
);
return
MACE_SUCCESS
;
}
...
...
mace/kernels/sgemm.cc
浏览文件 @
f6c669f6
...
...
@@ -44,7 +44,6 @@ void SGemm::operator()(const MatrixMap<const float> &lhs,
}
if
(
scratch_buffer
!=
nullptr
)
{
scratch_buffer
->
Rewind
();
index_t
total_size
=
result
->
size
();
if
(
!
lhs
.
is_const
())
{
total_size
+=
lhs
.
size
();
...
...
@@ -54,7 +53,6 @@ void SGemm::operator()(const MatrixMap<const float> &lhs,
}
scratch_buffer
->
GrowSize
(
total_size
*
sizeof
(
float
));
scratch_buffer
->
Rewind
();
if
(
!
lhs
.
is_const
())
{
packed_lhs_
.
reset
(
new
Tensor
(
scratch_buffer
->
Scratch
(
lhs
.
size
()
*
sizeof
(
float
)),
DT_FLOAT
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录