Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
74a309cb
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
74a309cb
编写于
7月 17, 2019
作者:
S
StarryRain
提交者:
Yanzhan Yang
7月 17, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add CPU_ARCH info, improve the performance of GEMM1*1s1 (#1751)
上级
497bf326
变更
17
展开全部
隐藏空白更改
内联
并排
Showing
17 changed file
with
2420 addition
and
4 deletion
+2420
-4
src/common/types.h
src/common/types.h
+12
-0
src/framework/context.cpp
src/framework/context.cpp
+45
-4
src/framework/context.h
src/framework/context.h
+2
-0
src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp
...rators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp
+3
-0
src/operators/kernel/arm/convolution/conv_add_kernel.cpp
src/operators/kernel/arm/convolution/conv_add_kernel.cpp
+3
-0
src/operators/kernel/arm/convolution/conv_add_relu_kernel.cpp
...operators/kernel/arm/convolution/conv_add_relu_kernel.cpp
+3
-0
src/operators/kernel/arm/convolution/conv_bn_add_relu_kernel.cpp
...rators/kernel/arm/convolution/conv_bn_add_relu_kernel.cpp
+3
-0
src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp
src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp
+3
-0
src/operators/kernel/arm/convolution/conv_common.cpp
src/operators/kernel/arm/convolution/conv_common.cpp
+20
-0
src/operators/kernel/arm/convolution/conv_kernel.cpp
src/operators/kernel/arm/convolution/conv_kernel.cpp
+3
-0
src/operators/kernel/arm/convolution/conv_relu_kernel.cpp
src/operators/kernel/arm/convolution/conv_relu_kernel.cpp
+3
-0
src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp
...perators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp
+3
-0
src/operators/kernel/central-arm-func/conv_arm_func.cpp
src/operators/kernel/central-arm-func/conv_arm_func.cpp
+58
-0
src/operators/kernel/central-arm-func/conv_arm_func.h
src/operators/kernel/central-arm-func/conv_arm_func.h
+3
-0
src/operators/math/gemm/gemm1x1s1.cpp
src/operators/math/gemm/gemm1x1s1.cpp
+2198
-0
src/operators/math/gemm/gemm1x1s1.h
src/operators/math/gemm/gemm1x1s1.h
+57
-0
src/operators/op_param.h
src/operators/op_param.h
+1
-0
未找到文件。
src/common/types.h
浏览文件 @
74a309cb
...
...
@@ -145,6 +145,18 @@ struct PaddleMobileConfigInternal {
std
::
string
model_obfuscate_key
=
""
;
};
enum
ARMArch
{
APPLE
=
0
,
A53
=
53
,
A55
=
55
,
A57
=
57
,
A72
=
72
,
A73
=
73
,
A75
=
75
,
A76
=
76
,
ARM_UNKOWN
=
-
1
};
extern
const
char
*
G_OP_TYPE_CONV
;
extern
const
char
*
G_OP_TYPE_BATCHNORM
;
extern
const
char
*
G_OP_TYPE_BOX_CODER
;
...
...
src/framework/context.cpp
浏览文件 @
74a309cb
...
...
@@ -261,7 +261,8 @@ int set_sched_affinity(const std::vector<int> &cpu_ids) {
return
0
;
}
int
get_cpu_info_by_name
(
int
*
cpu_num
,
std
::
vector
<
int
>
*
big_core_ids
,
int
get_cpu_info_by_name
(
int
*
cpu_num
,
ARMArch
*
arch
,
std
::
vector
<
int
>
*
big_core_ids
,
std
::
vector
<
int
>
*
little_core_ids
,
std
::
vector
<
int
>
*
l1_cache_sizes
,
std
::
vector
<
int
>
*
l2_cache_sizes
,
...
...
@@ -270,6 +271,7 @@ int get_cpu_info_by_name(int *cpu_num, std::vector<int> *big_core_ids,
/* Snapdragon */
if
(
hardware_name
.
find
(
"SDM845"
)
!=
std
::
string
::
npos
)
{
// 845
*
cpu_num
=
8
;
*
arch
=
A75
;
*
big_core_ids
=
{
4
,
5
,
6
,
7
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
};
l1_cache_sizes
->
resize
(
*
cpu_num
);
...
...
@@ -282,6 +284,7 @@ int get_cpu_info_by_name(int *cpu_num, std::vector<int> *big_core_ids,
return
0
;
}
else
if
(
hardware_name
.
find
(
"SDM710"
)
!=
std
::
string
::
npos
)
{
// 710
*
cpu_num
=
8
;
*
arch
=
A75
;
*
big_core_ids
=
{
6
,
7
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
,
4
,
5
};
l1_cache_sizes
->
resize
(
*
cpu_num
);
...
...
@@ -295,6 +298,7 @@ int get_cpu_info_by_name(int *cpu_num, std::vector<int> *big_core_ids,
return
0
;
}
else
if
(
hardware_name
.
find
(
"MSM8998"
)
!=
std
::
string
::
npos
)
{
// 835
*
cpu_num
=
8
;
*
arch
=
A73
;
*
big_core_ids
=
{
4
,
5
,
6
,
7
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
};
l1_cache_sizes
->
resize
(
*
cpu_num
);
...
...
@@ -313,8 +317,9 @@ int get_cpu_info_by_name(int *cpu_num, std::vector<int> *big_core_ids,
return
0
;
}
else
if
(
hardware_name
.
find
(
"MSM8976"
)
!=
std
::
string
::
npos
)
{
// 652,653
*
cpu_num
=
8
;
*
big_core_ids
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
*
little_core_ids
=
{};
*
arch
=
A72
;
*
big_core_ids
=
{
4
,
5
,
6
,
7
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
};
l1_cache_sizes
->
resize
(
*
cpu_num
);
l2_cache_sizes
->
resize
(
*
cpu_num
);
l3_cache_sizes
->
resize
(
*
cpu_num
);
...
...
@@ -322,6 +327,42 @@ int get_cpu_info_by_name(int *cpu_num, std::vector<int> *big_core_ids,
fill_cpu_cache_size
(
l2_cache_sizes
,
1024
*
1024
);
fill_cpu_cache_size
(
l3_cache_sizes
,
0
);
return
0
;
}
else
if
(
hardware_name
.
find
(
"SDM660"
)
!=
std
::
string
::
npos
||
hardware_name
.
find
(
"SDM636"
)
!=
std
::
string
::
npos
)
{
// 660, 636
*
cpu_num
=
8
;
*
arch
=
A73
;
*
big_core_ids
=
{
4
,
5
,
6
,
7
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
};
l1_cache_sizes
->
resize
(
*
cpu_num
);
l2_cache_sizes
->
resize
(
*
cpu_num
);
l3_cache_sizes
->
resize
(
*
cpu_num
);
fill_cpu_cache_size
(
l1_cache_sizes
,
64
*
1024
);
fill_cpu_cache_size
(
l2_cache_sizes
,
1024
*
1024
);
fill_cpu_cache_size
(
l3_cache_sizes
,
0
);
return
0
;
/* MediaTek */
}
else
if
(
hardware_name
.
find
(
"MT6799"
)
!=
std
::
string
::
npos
)
{
// X30
*
cpu_num
=
10
;
*
arch
=
A73
;
*
big_core_ids
=
{
8
,
9
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
};
return
0
;
}
else
if
(
hardware_name
.
find
(
"MT6771"
)
!=
std
::
string
::
npos
)
{
// P60
*
cpu_num
=
8
;
*
arch
=
A73
;
*
big_core_ids
=
{
4
,
5
,
6
,
7
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
};
return
0
;
/* Kirin */
}
else
if
(
hardware_name
.
find
(
"KIRIN970"
)
!=
std
::
string
::
npos
)
{
// Kirin 970
*
cpu_num
=
8
;
*
arch
=
A73
;
*
big_core_ids
=
{
4
,
5
,
6
,
7
};
*
little_core_ids
=
{
0
,
1
,
2
,
3
};
return
0
;
}
return
-
1
;
}
...
...
@@ -410,7 +451,7 @@ CPUContext::CPUContext() {
// probe cpu info, and set big&litte clusters, L1, L2 and L3 cache sizes
std
::
string
cpu_name
=
get_cpu_name
();
bool
failed
=
get_cpu_info_by_name
(
&
_cpu_num
,
&
_big_core_ids
,
&
_little_core_ids
,
get_cpu_info_by_name
(
&
_cpu_num
,
&
_
arch
,
&
_
big_core_ids
,
&
_little_core_ids
,
&
_l1_cache_sizes
,
&
_l2_cache_sizes
,
&
_l3_cache_sizes
,
cpu_name
)
!=
0
;
if
(
failed
)
{
...
...
src/framework/context.h
浏览文件 @
74a309cb
...
...
@@ -43,12 +43,14 @@ struct CPUContext {
int
get_thread_num
();
PowerMode
get_power_mode
()
const
{
return
_power_mode
;
}
int
get_cache_size
(
int
level
);
ARMArch
get_arch
()
const
{
return
_arch
;
}
int
get_l1_cache_size
()
{
return
get_cache_size
(
1
);
}
int
get_l2_cache_size
()
{
return
get_cache_size
(
2
);
}
int
get_l3_cache_size
()
{
return
get_cache_size
(
3
);
}
void
*
get_work_space
(
int
size_in_byte
);
int
_cpu_num
;
ARMArch
_arch
;
PowerMode
_power_mode
;
std
::
vector
<
int
>
_big_core_ids
;
std
::
vector
<
int
>
_little_core_ids
;
...
...
src/operators/kernel/arm/convolution/conv_add_bn_relu_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -126,6 +126,9 @@ void ConvAddBNReluKernel<CPU, float>::Compute(
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S1_FLOAT
:
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
:
SlidingwindowConv3x3
<
float
,
float
>
(
param
);
...
...
src/operators/kernel/arm/convolution/conv_add_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -44,6 +44,9 @@ void ConvAddKernel<CPU, float>::Compute(const FusionConvAddParam<CPU> ¶m) {
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S1_FLOAT
:
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
:
SlidingwindowConv3x3
<
float
,
float
>
(
param
);
...
...
src/operators/kernel/arm/convolution/conv_add_relu_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -45,6 +45,9 @@ void ConvAddReluKernel<CPU, float>::Compute(
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S1_FLOAT
:
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
:
SlidingwindowConv3x3
<
float
,
float
>
(
param
);
...
...
src/operators/kernel/arm/convolution/conv_bn_add_relu_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -64,6 +64,9 @@ void ConvBNAddReluKernel<CPU, float>::Compute(
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S1_FLOAT
:
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
:
SlidingwindowConv3x3
<
float
,
float
>
(
param
);
...
...
src/operators/kernel/arm/convolution/conv_bn_relu_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -77,6 +77,9 @@ void ConvBNReluKernel<CPU, float>::Compute(
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S1_FLOAT
:
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
:
SlidingwindowConv3x3
<
float
,
float
>
(
param
);
...
...
src/operators/kernel/arm/convolution/conv_common.cpp
浏览文件 @
74a309cb
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/arm/convolution/conv_common.h"
#include "framework/context.h"
#include "operators/math/gemm/gemm1x1s1.h"
#include "operators/math/slidingwindow_utils.h"
#include "operators/math/winograd/winograd_transform.h"
...
...
@@ -20,6 +22,8 @@ namespace paddle_mobile {
namespace
operators
{
void
InitBaseConvKernel
(
ConvParam
<
CPU
>
*
param
)
{
bool
conv1x1
=
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
1
;
bool
conv3x3
=
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
3
;
bool
conv5x5
=
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
...
...
@@ -83,6 +87,22 @@ void InitBaseConvKernel(ConvParam<CPU> *param) {
math
::
slidingwindow_transform_weight
<
float
>
(
*
param
->
Filter
(),
param
->
transformed_filter_
);
param
->
ExecMode
()
=
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
;
}
else
if
(
conv1x1
&&
param
->
Groups
()
==
1
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
]
&&
param
->
Paddings
()[
0
]
==
0
&&
param
->
Input
()
->
dims
()[
1
]
>
1
&&
param
->
Strides
()[
0
]
==
param
->
Strides
()[
1
]
&&
param
->
Dilations
()[
0
]
==
param
->
Dilations
()[
1
]
&&
param
->
Strides
()[
0
]
==
1
&&
param
->
Dilations
()[
0
]
==
1
&&
param
->
Output
()
->
dims
()[
2
]
*
param
->
Output
()
->
dims
()[
3
]
>
1
)
{
// transform weight
Variable
*
transformed_var
=
param
->
GetScope
()
->
Var
();
ARMArch
arch
=
framework
::
CPUContext
::
Context
()
->
get_arch
();
param
->
transformed_filter_
=
transformed_var
->
GetMutable
<
framework
::
LoDTensor
>
();
math
::
gemm1x1s1_transform_weight
(
*
param
->
Filter
(),
*
param
->
Output
(),
param
->
transformed_filter_
,
param
->
groups
,
arch
);
param
->
ExecMode
()
=
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
;
}
else
{
param
->
ExecMode
()
=
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
;
}
...
...
src/operators/kernel/arm/convolution/conv_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -54,6 +54,9 @@ void ConvKernel<CPU, float>::Compute(const ConvParam<CPU> ¶m) {
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S1_FLOAT
:
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
:
SlidingwindowConv3x3
<
float
,
float
>
(
param
);
...
...
src/operators/kernel/arm/convolution/conv_relu_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -45,6 +45,9 @@ void ConvReluKernel<CPU, float>::Compute(
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S1_FLOAT
:
case
ConvParam
<
CPU
>::
EXEC_SLIDINGWINDOW3x3S2_FLOAT
:
SlidingwindowConv3x3
<
float
,
float
>
(
param
);
...
...
src/operators/kernel/arm/convolution/dwconv_bn_relu_kernel.cpp
浏览文件 @
74a309cb
...
...
@@ -76,6 +76,9 @@ void DWConvBNReluKernel<CPU, float>::Compute(
case
ConvParam
<
CPU
>::
EXEC_GEMM_FLOAT
:
GemmConv
<
float
,
float
>
(
param
);
break
;
case
ConvParam
<
CPU
>::
EXEC_GEMM1x1s1_FLOAT
:
GemmConv1x1s1
<
float
,
float
>
(
param
);
break
;
default:
PADDLE_MOBILE_THROW_EXCEPTION
(
"Invalid convolution execute mode %d"
,
param
.
ExecMode
());
...
...
src/operators/kernel/central-arm-func/conv_arm_func.cpp
浏览文件 @
74a309cb
...
...
@@ -14,9 +14,11 @@ limitations under the License. */
#include "operators/kernel/central-arm-func/conv_arm_func.h"
#include <vector>
#include "framework/context.h"
#include "operators/math/depthwise/faster_depthwise_conv3x3.h"
#include "operators/math/depthwise_conv3x3.h"
#include "operators/math/depthwise_conv5x5.h"
#include "operators/math/gemm/gemm1x1s1.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/pad.h"
...
...
@@ -137,6 +139,61 @@ void GemmConv(const ConvParam<CPU> ¶m) {
}
}
template
<
typename
Itype
,
typename
Otype
>
void
GemmConv1x1s1
(
const
ConvParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
transformed_filter_
;
Tensor
*
output
=
param
.
Output
();
output
->
mutable_data
<
Otype
>
();
const
float
*
din
=
input
->
data
<
Itype
>
();
float
*
dout
=
output
->
mutable_data
<
Otype
>
();
const
int
num
=
input
->
dims
()[
0
];
const
int
chin
=
input
->
dims
()[
1
];
const
int
hin
=
input
->
dims
()[
2
];
const
int
win
=
input
->
dims
()[
3
];
const
int
chout
=
output
->
dims
()[
1
];
const
int
hout
=
output
->
dims
()[
2
];
const
int
wout
=
output
->
dims
()[
3
];
const
float
*
weights
=
filter
.
mutable_data
<
float
>
();
const
float
*
bias
=
nullptr
;
int
channel_size_out
=
wout
*
hout
;
int
channel_size_in
=
win
*
hin
;
const
int
group
=
param
.
Groups
();
const
int
m
=
chout
/
group
;
const
int
n
=
hout
*
wout
;
const
int
k
=
chin
/
group
;
bool
flag_relu
=
false
;
bool
flag_bias
=
false
;
ARMArch
arch
=
framework
::
CPUContext
::
Context
()
->
get_arch
();
int
hblock
=
math
::
get_hblock
(
arch
);
int
m_roundup
=
hblock
*
((
m
+
hblock
-
1
)
/
hblock
);
int
weights_size_per_group
=
m
*
k
;
if
(
n
>
1
)
{
weights_size_per_group
=
((
m_roundup
*
k
+
15
)
/
16
)
*
16
;
}
for
(
int
b
=
0
;
b
<
num
;
++
b
)
{
// dC
for
(
int
g
=
0
;
g
<
group
;
++
g
)
{
float
*
dout_group
=
static_cast
<
float
*>
(
dout
)
+
(
b
*
chout
+
g
*
m
)
*
channel_size_out
;
const
float
*
din_group
=
static_cast
<
const
float
*>
(
din
)
+
(
b
*
chin
+
g
*
k
)
*
channel_size_in
;
const
float
*
weights_group
=
static_cast
<
const
float
*>
(
weights
)
+
g
*
weights_size_per_group
;
const
float
*
bias_group
=
static_cast
<
const
float
*>
(
bias
)
+
g
*
m
;
if
(
n
>
1
)
{
math
::
sgemm_prepack
(
weights_group
,
din_group
,
bias_group
,
dout_group
,
m
,
n
,
k
,
flag_bias
,
flag_relu
,
false
,
arch
);
}
}
}
}
template
<
int
tile
,
int
kernel
>
void
WinogradConv3x3
(
const
ConvParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
...
...
@@ -293,6 +350,7 @@ void SlidingwindowConv3x3(const ConvParam<CPU> ¶m) {
}
template
void
GemmConv
<
float
,
float
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
GemmConv1x1s1
<
float
,
float
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
WinogradConv3x3
<
8
,
3
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
DepthwiseConv3x3
<
float
,
float
>(
const
ConvParam
<
CPU
>
&
param
);
template
void
DepthwiseConv5x5
<
float
,
float
>(
const
ConvParam
<
CPU
>
&
param
);
...
...
src/operators/kernel/central-arm-func/conv_arm_func.h
浏览文件 @
74a309cb
...
...
@@ -32,6 +32,9 @@ bool IsExpand(const std::vector<int64_t> &filter_dim,
template
<
typename
Itype
,
typename
Otype
>
void
GemmConv
(
const
ConvParam
<
CPU
>
&
param
);
template
<
typename
Itype
,
typename
Otype
>
void
GemmConv1x1s1
(
const
ConvParam
<
CPU
>
&
param
);
template
<
int
tile
,
int
kernel
>
void
WinogradConv3x3
(
const
ConvParam
<
CPU
>
&
param
);
...
...
src/operators/math/gemm/gemm1x1s1.cpp
0 → 100644
浏览文件 @
74a309cb
此差异已折叠。
点击以展开。
src/operators/math/gemm/gemm1x1s1.h
0 → 100644
浏览文件 @
74a309cb
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#pragma once
#include "framework/tensor.h"
namespace
paddle_mobile
{
namespace
operators
{
namespace
math
{
#ifdef __aarch64__
const
int
MBLOCK
=
8
;
const
int
NBLOCK
=
12
;
const
int
KBLOCK
=
4
;
inline
int
get_hblock
(
ARMArch
arch
)
{
return
MBLOCK
;
}
#else
const
int
MBLOCK_A73
=
4
;
const
int
MBLOCK_OTH
=
6
;
const
int
NBLOCK
=
8
;
const
int
KBLOCK
=
4
;
inline
int
get_hblock
(
ARMArch
arch
)
{
if
(
arch
==
A73
)
{
return
MBLOCK_A73
;
}
else
{
return
MBLOCK_OTH
;
}
}
#endif // __aarch64__
void
gemm1x1s1_transform_weight
(
const
framework
::
Tensor
&
weight
,
const
framework
::
Tensor
&
output
,
framework
::
Tensor
*
trans_weight
,
const
int
group
,
ARMArch
arch
);
void
sgemm_prepack
(
const
float
*
A_packed
,
const
float
*
B
,
const
float
*
bias
,
float
*
C
,
int
M
,
int
N
,
int
K
,
bool
is_bias
,
bool
is_relu
,
bool
is_transB
,
ARMArch
arch
);
}
// namespace math
}
// namespace operators
}
// namespace paddle_mobile
#endif // CONV_OP
src/operators/op_param.h
浏览文件 @
74a309cb
...
...
@@ -467,6 +467,7 @@ class ConvParam : public OpParam {
EXEC_SLIDINGWINDOW3x3_FLOAT
,
EXEC_SLIDINGWINDOW5x5_FLOAT
,
EXEC_SLIDINGWINDOW7x7_FLOAT
,
EXEC_GEMM1x1s1_FLOAT
,
};
ExecMode
&
ExecMode
()
const
{
return
exec_mode_
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录