Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
73a7f138
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
337
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
73a7f138
编写于
3月 08, 2019
作者:
H
hjchen2
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert package lhs in sgemm and depthwise conv5x5 to make it no problem on ios
上级
269a3a37
变更
11
展开全部
隐藏空白更改
内联
并排
Showing
11 changed file
with
543 addition
and
353 deletion
+543
-353
src/framework/data_layout.h
src/framework/data_layout.h
+1
-0
src/framework/dim.h
src/framework/dim.h
+4
-0
src/operators/kernel/central-arm-func/conv_arm_func.h
src/operators/kernel/central-arm-func/conv_arm_func.h
+10
-10
src/operators/math/depthwise_conv5x5.cpp
src/operators/math/depthwise_conv5x5.cpp
+15
-14
src/operators/math/gemm/executor.h
src/operators/math/gemm/executor.h
+6
-3
src/operators/math/gemm/pack_kernel.h
src/operators/math/gemm/pack_kernel.h
+484
-266
src/operators/math/gemm/strategy.h
src/operators/math/gemm/strategy.h
+3
-2
src/operators/math/gru_compute.cpp
src/operators/math/gru_compute.cpp
+9
-24
src/operators/math/math_function.cpp
src/operators/math/math_function.cpp
+4
-27
src/operators/math/winograd/winograd_transform_f6k3.cpp
src/operators/math/winograd/winograd_transform_f6k3.cpp
+6
-6
tools/pre-commit.hooks/cpplint.hook
tools/pre-commit.hooks/cpplint.hook
+1
-1
未找到文件。
src/framework/data_layout.h
浏览文件 @
73a7f138
...
...
@@ -42,6 +42,7 @@ inline DataLayout StringToDataLayout(const std::string &str) {
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
"Unknown storage order string: %s"
,
s
.
c_str
())
}
return
DataLayout
::
kNCHW
;
}
inline
std
::
string
DataLayoutToString
(
const
DataLayout
&
data_layout
)
{
...
...
src/framework/dim.h
浏览文件 @
73a7f138
...
...
@@ -82,6 +82,8 @@ struct Dim<0> {
int64_t
&
operator
[](
int
idx
);
int64_t
operator
[](
int
idx
)
const
;
int64_t
head
;
};
namespace
{
...
...
@@ -131,6 +133,7 @@ int64_t &indexer(Dim<D> &dim, int idx) {
template
<
>
int64_t
&
indexer
<
0
>
(
Dim
<
0
>
&
dim
,
int
idx
)
{
PADDLE_MOBILE_THROW_EXCEPTION
(
"Invalid index"
)
return
dim
.
head
;
}
template
<
int
D
>
...
...
@@ -147,6 +150,7 @@ int64_t indexer(const Dim<D> &dim, int idx) {
template
<
>
int64_t
indexer
<
0
>
(
const
Dim
<
0
>
&
dim
,
int
idx
)
{
PADDLE_MOBILE_THROW_EXCEPTION
(
"Invalid index"
)
return
dim
.
head
;
}
}
// namespace
...
...
src/operators/kernel/central-arm-func/conv_arm_func.h
浏览文件 @
73a7f138
...
...
@@ -201,16 +201,16 @@ inline void DepthwiseConv5x5(const ConvParam<CPU> ¶m) {
Tensor
*
output
=
param
.
Output
();
output
->
mutable_data
<
Otype
>
();
if
(
strides
[
0
]
==
1
)
{
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
);
math
::
DepthwiseConv5x5S1
<
Itype
,
Otype
>
(
in_batch
,
*
filter
,
paddings
,
&
out_batch
);
}
}
else
{
GemmConv
<
Itype
,
Otype
>
(
param
);
}
//
if (strides[0] == 1) {
//
for (int i = 0; i < batch_size; i++) {
//
Tensor in_batch = input->Slice(i, i + 1);
//
Tensor out_batch = output->Slice(i, i + 1);
//
math::DepthwiseConv5x5S1<Itype, Otype>(in_batch, *filter, paddings,
//
&out_batch);
//
}
//
} else {
GemmConv
<
Itype
,
Otype
>
(
param
);
//
}
}
template
<
typename
ParamType
>
...
...
src/operators/math/depthwise_conv5x5.cpp
浏览文件 @
73a7f138
...
...
@@ -144,20 +144,21 @@ void DepthwiseConv5x5S1<float, float>(const framework::Tensor &input,
const
float
*
input_data
=
input
.
data
<
float
>
();
const
float
*
filter_data
=
filter
.
data
<
float
>
();
float
*
out_data
=
output
->
mutable_data
<
float
>
();
int
input_h
=
input
.
dims
()[
2
];
int
input_w
=
input
.
dims
()[
3
];
int
output_h
=
output
->
dims
()[
2
];
int
output_w
=
output
->
dims
()[
3
];
int
padding_h
=
paddings
[
0
];
int
padding_w
=
paddings
[
1
];
int
image_size
=
input_h
*
input_w
;
int
out_image_size
=
output_h
*
output_w
;
int
valid_h_start
=
padding_h
;
int
valid_h_end
=
output_h
-
valid_h_start
;
int
valid_h
=
valid_h_end
-
valid_h_start
;
int
valid_w_start
=
padding_w
;
int
valid_w_end
=
output_w
-
valid_w_start
;
int
valid_w
=
valid_w_end
-
valid_w_start
;
const
int
input_h
=
input
.
dims
()[
2
];
const
int
input_w
=
input
.
dims
()[
3
];
const
int
output_h
=
output
->
dims
()[
2
];
const
int
output_w
=
output
->
dims
()[
3
];
const
int
padding_h
=
paddings
[
0
];
const
int
padding_w
=
paddings
[
1
];
const
int
image_size
=
input_h
*
input_w
;
const
int
out_image_size
=
output_h
*
output_w
;
const
int
valid_h_start
=
padding_h
;
const
int
valid_h_end
=
output_h
-
valid_h_start
;
const
int
valid_h
=
valid_h_end
-
valid_h_start
;
const
int
valid_w_start
=
padding_w
;
const
int
valid_w_end
=
output_w
-
valid_w_start
;
const
int
valid_w
=
valid_w_end
-
valid_w_start
;
#pragma omp parallel for
for
(
int
g
=
0
;
g
<
input
.
dims
()[
1
];
++
g
)
{
...
...
src/operators/math/gemm/executor.h
浏览文件 @
73a7f138
...
...
@@ -18,7 +18,8 @@ limitations under the License. */
#ifdef _OPENMP
#include <omp.h>
#endif
#include <sys/time.h>
// #include <sys/time.h>
// #include <iostream>
#include "common/log.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm/cpu_info.h"
...
...
@@ -158,7 +159,8 @@ class GemmExecutor : public Executor {
}
}
}
strategy_
.
write
(
lhs_range
,
N_
,
local_C
,
ldc_
,
C
+
lhs_block
*
ldc
,
ldc
);
strategy_
.
write
(
lhs_range
,
N_
,
alpha
,
local_C
,
ldc_
,
beta
,
C
+
lhs_block
*
ldc
,
ldc
);
}
}
else
{
strategy_
.
pack_lhs
(
M_
,
K_
,
A
,
lda
,
lhs_workspace_
,
true
);
...
...
@@ -188,7 +190,8 @@ class GemmExecutor : public Executor {
}
}
}
strategy_
.
write
(
M_
,
rhs_range
,
local_C
,
ldc_
,
C
+
rhs_block
,
ldc
);
strategy_
.
write
(
M_
,
rhs_range
,
alpha
,
local_C
,
ldc_
,
beta
,
C
+
rhs_block
,
ldc
);
}
}
...
...
src/operators/math/gemm/pack_kernel.h
浏览文件 @
73a7f138
此差异已折叠。
点击以展开。
src/operators/math/gemm/strategy.h
浏览文件 @
73a7f138
...
...
@@ -31,8 +31,9 @@ struct SgemmStrategy {
Itype
*
,
const
bool
);
typedef
void
(
*
kernelFunc
)(
const
Itype
*
,
const
Itype
*
,
const
int
,
Otype
*
,
const
int
);
typedef
void
(
*
WriteFunc
)(
const
int
,
const
int
,
const
Otype
*
,
const
int
,
Otype
*
,
const
int
);
typedef
void
(
*
WriteFunc
)(
const
int
,
const
int
,
const
float
alpha
,
const
Otype
*
,
const
int
,
const
float
beta
,
Otype
*
,
const
int
);
packLhsFunc
pack_lhs
;
packRhsFunc
pack_rhs
;
...
...
src/operators/math/gru_compute.cpp
浏览文件 @
73a7f138
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
#include "operators/math/gru_compute.h"
#include "common/types.h"
#include "operators/math/activation.h"
#include "operators/math/gemm.h"
#include "operators/math/gemm
/cblas
.h"
#include "operators/math/gru_cpu_kernel.h"
namespace
paddle_mobile
{
...
...
@@ -29,35 +29,19 @@ struct GRUUnitFunctor<CPU, T> {
static
void
compute
(
GRUMetaValue
<
T
>
value
,
int
frame_size
,
int
batch_size
,
const
ActivationType
active_node
,
const
ActivationType
active_gate
)
{
Gemm
gemm
;
if
(
value
.
prev_out_value
)
{
#ifdef _OPENMP
gemm
.
Sgemm_omp
(
batch_size
,
frame_size
*
2
,
frame_size
,
1
,
value
.
prev_out_value
,
frame_size
,
value
.
gate_weight
,
frame_size
*
2
,
1
,
value
.
gate_value
,
frame_size
*
3
,
false
,
static_cast
<
float
*>
(
nullptr
));
#else
gemm
.
Sgemm
(
batch_size
,
frame_size
*
2
,
frame_size
,
1
,
value
.
prev_out_value
,
frame_size
,
value
.
gate_weight
,
frame_size
*
2
,
1
,
value
.
gate_value
,
frame_size
*
3
,
false
,
static_cast
<
float
*>
(
nullptr
));
#endif
cblas_sgemm
(
false
,
false
,
batch_size
,
frame_size
*
2
,
frame_size
,
1.
f
,
value
.
prev_out_value
,
frame_size
,
value
.
gate_weight
,
frame_size
*
2
,
1.
f
,
value
.
gate_value
,
frame_size
*
3
);
}
forward_reset_output
(
value
,
frame_size
,
batch_size
,
active_gate
);
if
(
value
.
prev_out_value
)
{
#ifdef _OPENMP
gemm
.
Sgemm_omp
(
batch_size
,
frame_size
,
frame_size
,
1
,
value
.
reset_output_value
,
frame_size
,
value
.
state_weight
,
frame_size
,
1
,
value
.
gate_value
+
frame_size
*
2
,
frame_size
*
3
,
false
,
static_cast
<
float
*>
(
nullptr
));
#else
gemm
.
Sgemm
(
batch_size
,
frame_size
,
frame_size
,
1
,
value
.
reset_output_value
,
frame_size
,
value
.
state_weight
,
frame_size
,
1
,
value
.
gate_value
+
frame_size
*
2
,
frame_size
*
3
,
false
,
static_cast
<
float
*>
(
nullptr
));
#endif
cblas_sgemm
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1.
f
,
value
.
reset_output_value
,
frame_size
,
value
.
state_weight
,
frame_size
,
1.
f
,
value
.
gate_value
+
frame_size
*
2
,
frame_size
*
3
);
}
forward_final_output
(
value
,
frame_size
,
batch_size
,
active_node
);
...
...
@@ -65,6 +49,7 @@ struct GRUUnitFunctor<CPU, T> {
};
template
struct
GRUUnitFunctor
<
CPU
,
float
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/math/math_function.cpp
浏览文件 @
73a7f138
...
...
@@ -71,34 +71,11 @@ void MatMul<float, float>(const framework::Tensor &matrix_a, bool trans_a,
a
[
index
++
]
=
tmp
[
i
*
n
+
j
];
}
}
if
(
M
==
1
)
{
#ifdef _OPENMP
gemm
.
Sgemm_omp
(
M
,
N
,
K
,
alpha
,
a
,
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
,
relu
,
bias
);
#else
gemm
.
Sgemm
(
M
,
N
,
K
,
alpha
,
a
,
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
,
relu
,
bias
);
#endif
}
else
{
cblas_sgemm
(
false
,
false
,
M
,
N
,
K
,
alpha
,
a
,
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
);
}
cblas_sgemm
(
false
,
false
,
M
,
N
,
K
,
alpha
,
a
,
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
);
}
else
{
if
(
M
==
1
)
{
#ifdef _OPENMP
gemm
.
Sgemm_omp
(
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
float
>
(),
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
,
relu
,
bias
);
#else
gemm
.
Sgemm
(
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
float
>
(),
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
,
relu
,
bias
);
#endif
}
else
{
cblas_sgemm
(
false
,
false
,
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
float
>
(),
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
);
}
cblas_sgemm
(
false
,
false
,
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
float
>
(),
K
,
matrix_b
.
data
<
float
>
(),
N
,
beta
,
matrix_out
->
data
<
float
>
(),
N
);
}
}
...
...
src/operators/math/winograd/winograd_transform_f6k3.cpp
浏览文件 @
73a7f138
...
...
@@ -803,9 +803,9 @@ void winograd_transform_output<8, 3>(const framework::Tensor &input,
"dup v15.4s, wzr
\n
"
"cmp %[inter], #0
\n
"
"ble
loop_1c_%=
\n
"
"ble
2f
\n
"
// loop 2 channels
"
loop_2c_%=:
\n
"
"
1:
\n
"
"ld1 {v0.4s, v1.4s}, [%[w_ptr]], #32
\n
"
"ld1 {v2.4s, v3.4s}, [%[in_ptr]], #32
\n
"
"ld1 {v4.4s, v5.4s}, [%[in_ptr]], #32
\n
"
...
...
@@ -829,12 +829,12 @@ void winograd_transform_output<8, 3>(const framework::Tensor &input,
"fmla v15.4s, v5.4s, v1.s[3]
\n
"
"subs %[inter], %[inter], #1
\n
"
"bne
loop_2c_%=
\n
"
"bne
1b
\n
"
// loop 1 channel
"
loop_1c_%=:
\n
"
"
2:
\n
"
"cmp %[remain], #0
\n
"
"ble
store_res_%=
\n
"
"ble
3f
\n
"
"ld1 {v0.4s, v1.4s}, [%[w_ptr]], #32
\n
"
"ld1 {v2.4s, v3.4s}, [%[in_ptr]], #32
\n
"
...
...
@@ -847,7 +847,7 @@ void winograd_transform_output<8, 3>(const framework::Tensor &input,
"fmla v14.4s, v2.4s, v0.s[3]
\n
"
"fmla v15.4s, v3.4s, v0.s[3]
\n
"
"
store_res_%=:
\n
"
"
3:
\n
"
"st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%[uv_ptr]], #64
\n
"
"st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [%[uv_ptr]], #64
\n
"
:
[
w_ptr
]
"+r"
(
w_ptr
),
[
in_ptr
]
"+r"
(
in_ptr
),
[
uv_ptr
]
"+r"
(
uv_ptr
),
...
...
tools/pre-commit.hooks/cpplint.hook
浏览文件 @
73a7f138
...
...
@@ -5,7 +5,7 @@ TOTAL_ERRORS=0
# The trick to remove deleted files: https://stackoverflow.com/a/2413151
for
file
in
$(
git diff
--cached
--name-status
|
awk
'$1 != "D" {print $2}'
|
\
grep
-v
".pb.cpp"
|
grep
-v
".pb.h"
|
grep
-v
".pb-c.h"
|
grep
-v
".pb-c.c"
|
\
grep
-v
"protobuf-c.h"
|
grep
-v
"protobuf-c.c"
)
;
do
grep
-v
"protobuf-c.h"
|
grep
-v
"protobuf-c.c"
|
grep
-v
"dim.h"
)
;
do
cpplint
$file
;
TOTAL_ERRORS
=
$(
expr
$TOTAL_ERRORS
+
$?
)
;
done
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录