Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b16ae4e2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b16ae4e2
编写于
5月 29, 2019
作者:
T
tensor-tang
提交者:
GitHub
5月 29, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[lite] fix fc bias and enable armv7 fc (#17695)
上级
4b253569
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
19 addition
and
21 deletion
+19
-21
cmake/cross_compiling/android.cmake
cmake/cross_compiling/android.cmake
+2
-1
paddle/fluid/lite/arm/math/funcs.cc
paddle/fluid/lite/arm/math/funcs.cc
+6
-7
paddle/fluid/lite/arm/math/packed_sgemm.cc
paddle/fluid/lite/arm/math/packed_sgemm.cc
+2
-2
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+1
-1
paddle/fluid/lite/core/cpu_info.h
paddle/fluid/lite/core/cpu_info.h
+3
-3
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
+1
-1
paddle/fluid/lite/kernels/arm/CMakeLists.txt
paddle/fluid/lite/kernels/arm/CMakeLists.txt
+1
-1
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
+3
-5
未找到文件。
cmake/cross_compiling/android.cmake
浏览文件 @
b16ae4e2
...
...
@@ -45,7 +45,8 @@ if (NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST)
endif
()
if
(
ANDROID_ARCH_ABI STREQUAL
"armeabi-v7a"
)
message
(
STATUS
"NEON is enabled on arm-v7a"
)
set
(
CMAKE_ANDROID_ARM_NEON ON
)
message
(
STATUS
"NEON is enabled on arm-v7a with softfp"
)
endif
()
set
(
ANDROID_STL_TYPE_LITS
"gnustl_static"
"c++_static"
)
...
...
paddle/fluid/lite/arm/math/funcs.cc
浏览文件 @
b16ae4e2
...
...
@@ -21,14 +21,14 @@ namespace arm {
namespace
math
{
template
<
>
void
fill_bias_fc
<
float
>
(
float
*
tensor
,
const
float
*
bias
,
const
int
num
,
void
fill_bias_fc
<
float
>
(
float
*
out
,
const
float
*
bias
,
const
int
num
,
const
int
channel
)
{
int
cnt
=
channel
>>
4
;
int
remain
=
channel
&
15
;
for
(
int
j
=
0
;
j
<
num
;
++
j
)
{
const
float
*
ptr_bias
=
bias
;
float
*
ptr_out
=
tensor
+
j
*
channel
;
float
*
ptr_out
=
out
+
j
*
channel
;
float32x4_t
vout1
;
float32x4_t
vout2
;
...
...
@@ -61,7 +61,6 @@ void fill_bias_fc<float>(float *tensor, const float *bias, const int num,
ptr_out
+=
16
;
ptr_bias
+=
16
;
}
#if 0
if (cnt > 0) {
asm(
...
...
@@ -79,21 +78,21 @@ void fill_bias_fc<float>(float *tensor, const float *bias, const int num,
);
}
#endif
for
(
;
remain
>
0
;
remain
--
)
{
for
(
int
i
=
0
;
i
<
remain
;
++
i
)
{
*
(
ptr_out
++
)
+=
*
(
ptr_bias
++
);
}
}
}
template
<
>
void
fill_bias_fc
<
int
>
(
int
*
tensor
,
const
int
*
bias
,
const
int
num
,
void
fill_bias_fc
<
int
>
(
int
*
out
,
const
int
*
bias
,
const
int
num
,
const
int
channel
)
{
int
cnt
=
channel
>>
4
;
int
remain
=
channel
&
15
;
for
(
int
j
=
0
;
j
<
num
;
++
j
)
{
const
int
*
ptr_bias
=
bias
;
int
*
ptr_out
=
tensor
+
j
*
channel
;
int
*
ptr_out
=
out
+
j
*
channel
;
int32x4_t
vout1
;
int32x4_t
vout2
;
...
...
@@ -144,7 +143,7 @@ void fill_bias_fc<int>(int *tensor, const int *bias, const int num,
);
}
#endif
for
(
;
remain
>
0
;
remain
--
)
{
for
(
int
i
=
0
;
i
<
remain
;
++
i
)
{
*
(
ptr_out
++
)
+=
*
(
ptr_bias
++
);
}
}
...
...
paddle/fluid/lite/arm/math/packed_sgemm.cc
浏览文件 @
b16ae4e2
...
...
@@ -2751,7 +2751,7 @@ void sgemm_conv_4x8(const float* A_packed, const float* B, const float* bias,
bool
transB
,
ARMContext
*
ctx
)
{
size_t
l2_cache
=
ctx
->
l2_cache_size
()
>
0
?
ctx
->
l2_cache_size
()
:
512
*
1024
;
void
*
workspace
=
ctx
->
get_work_space
();
auto
*
workspace
=
ctx
->
workspace_data
<
float
>
();
int
threads
=
ctx
->
threads
();
//! MBLOCK * x (result) + MBLOCK * k (A) + x * k (B) = l2
int
x_block
=
...
...
@@ -2785,7 +2785,7 @@ void sgemm_conv_4x8(const float* A_packed, const float* B, const float* bias,
flag_p_remain
=
true
;
}
//! load bpanel
float
*
b_pannel
=
static_cast
<
float
*>
(
workspace
)
;
float
*
b_pannel
=
workspace
;
if
(
transB
)
{
loadb_trans
(
b_pannel
,
B
,
K
,
0
,
K
,
x0
,
xmax
);
}
else
{
...
...
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
b16ae4e2
...
...
@@ -20,7 +20,7 @@ namespace lite {
#ifdef LITE_WITH_ARM
void
DeviceInfo
::
get_info
(
DeviceInfo
*
dev
)
{
void
DeviceInfo
::
InitInternal
(
DeviceInfo
*
dev
)
{
set_default_cache
(
dev
);
dev
->
compute_core_num_
=
arm_get_cpucount
();
dev
->
max_memory_
=
arm_get_meminfo
();
...
...
paddle/fluid/lite/core/cpu_info.h
浏览文件 @
b16ae4e2
...
...
@@ -85,14 +85,14 @@ class DeviceInfo {
return
*
x
;
}
static
void
init_info
()
{
static
void
Init
()
{
auto
&
info
=
Global
();
get_info
(
&
info
);
InitInternal
(
&
info
);
}
private:
DeviceInfo
()
=
default
;
static
void
get_info
(
DeviceInfo
*
dev
);
static
void
InitInternal
(
DeviceInfo
*
dev
);
};
size_t
arm_get_meminfo
();
...
...
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
浏览文件 @
b16ae4e2
...
...
@@ -64,7 +64,7 @@ class RuntimeContextAssignPass : public StmtPass {
#ifdef LITE_WITH_ARM
std
::
unique_ptr
<
KernelContext
>
NewARMContext
()
{
DeviceInfo
::
init_info
();
DeviceInfo
::
Init
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
return
ctx
;
...
...
paddle/fluid/lite/kernels/arm/CMakeLists.txt
浏览文件 @
b16ae4e2
...
...
@@ -9,7 +9,7 @@ cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps})
cc_library
(
mul_compute_arm SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
eigen3
)
cc_library
(
scale_compute_arm SRCS scale_compute.cc DEPS
${
lite_kernel_deps
}
eigen3
)
lite_cc_test
(
test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm
eigen3
)
lite_cc_test
(
test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm
math_arm
)
set
(
arm_kernels
fc_compute_arm
...
...
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
浏览文件 @
b16ae4e2
...
...
@@ -61,8 +61,6 @@ TEST(fc_arm, compare_test) {
b_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
// TODO(TJ): enable bias soon
b_data
=
nullptr
;
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
batch_size
,
3
,
//
w_data
,
3
,
4
,
//
b_data
,
ref_data
);
...
...
@@ -74,11 +72,11 @@ TEST(fc_arm, compare_test) {
param
.
in_num_col_dims
=
1
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
nullptr
;
param
.
bias
=
&
b
;
param
.
output
=
&
out
;
param
.
in_mat_dims
=
x
.
dims
();
DeviceInfo
::
init_info
();
DeviceInfo
::
Init
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
...
...
@@ -136,7 +134,7 @@ TEST(fc_arm, num_col_dims) {
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
init_info
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录