Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ace19269
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ace19269
编写于
6月 15, 2019
作者:
S
superjomn
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'incubate/lite' of
http://10.87.145.36/inference/paddlelite
into HEAD
上级
0d6c897b
8749f4fc
变更
52
展开全部
隐藏空白更改
内联
并排
Showing
52 changed file
with
1990 addition
and
746 deletion
+1990
-746
.gitlab-ci.yml
.gitlab-ci.yml
+74
-0
CMakeLists.txt
CMakeLists.txt
+1
-0
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+1
-0
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+1
-1
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+1
-1
paddle/fluid/lite/arm/CMakeLists.txt
paddle/fluid/lite/arm/CMakeLists.txt
+1
-0
paddle/fluid/lite/arm/math/CMakeLists.txt
paddle/fluid/lite/arm/math/CMakeLists.txt
+3
-1
paddle/fluid/lite/arm/math/scale.cc
paddle/fluid/lite/arm/math/scale.cc
+105
-0
paddle/fluid/lite/arm/math/scale.h
paddle/fluid/lite/arm/math/scale.h
+8
-0
paddle/fluid/lite/arm/math/type_trans.cpp
paddle/fluid/lite/arm/math/type_trans.cpp
+492
-501
paddle/fluid/lite/core/CMakeLists.txt
paddle/fluid/lite/core/CMakeLists.txt
+4
-2
paddle/fluid/lite/core/cpu_info.cc
paddle/fluid/lite/core/cpu_info.cc
+5
-5
paddle/fluid/lite/core/hvy_tensor.h
paddle/fluid/lite/core/hvy_tensor.h
+2
-0
paddle/fluid/lite/core/mir/CMakeLists.txt
paddle/fluid/lite/core/mir/CMakeLists.txt
+10
-9
paddle/fluid/lite/core/naive_test_model.py
paddle/fluid/lite/core/naive_test_model.py
+6
-6
paddle/fluid/lite/core/profile/CMakeLists.txt
paddle/fluid/lite/core/profile/CMakeLists.txt
+1
-0
paddle/fluid/lite/cuda/CMakeLists.txt
paddle/fluid/lite/cuda/CMakeLists.txt
+1
-0
paddle/fluid/lite/gen_code/CMakeLists.txt
paddle/fluid/lite/gen_code/CMakeLists.txt
+6
-5
paddle/fluid/lite/host/CMakeLists.txt
paddle/fluid/lite/host/CMakeLists.txt
+1
-0
paddle/fluid/lite/kernels/CMakeLists.txt
paddle/fluid/lite/kernels/CMakeLists.txt
+1
-0
paddle/fluid/lite/kernels/arm/CMakeLists.txt
paddle/fluid/lite/kernels/arm/CMakeLists.txt
+6
-1
paddle/fluid/lite/kernels/arm/batch_norm_compute.cc
paddle/fluid/lite/kernels/arm/batch_norm_compute.cc
+114
-0
paddle/fluid/lite/kernels/arm/batch_norm_compute.h
paddle/fluid/lite/kernels/arm/batch_norm_compute.h
+42
-0
paddle/fluid/lite/kernels/arm/batch_norm_compute_test.cc
paddle/fluid/lite/kernels/arm/batch_norm_compute_test.cc
+221
-0
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
+15
-0
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+37
-20
paddle/fluid/lite/kernels/arm/fc_compute.h
paddle/fluid/lite/kernels/arm/fc_compute.h
+10
-3
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
+122
-97
paddle/fluid/lite/kernels/arm/mul_compute.cc
paddle/fluid/lite/kernels/arm/mul_compute.cc
+38
-38
paddle/fluid/lite/kernels/arm/mul_compute.h
paddle/fluid/lite/kernels/arm/mul_compute.h
+39
-0
paddle/fluid/lite/kernels/arm/mul_compute_test.cc
paddle/fluid/lite/kernels/arm/mul_compute_test.cc
+152
-0
paddle/fluid/lite/kernels/arm/pool_compute_test.cc
paddle/fluid/lite/kernels/arm/pool_compute_test.cc
+1
-1
paddle/fluid/lite/kernels/arm/scale_compute_test.cc
paddle/fluid/lite/kernels/arm/scale_compute_test.cc
+11
-0
paddle/fluid/lite/kernels/cuda/CMakeLists.txt
paddle/fluid/lite/kernels/cuda/CMakeLists.txt
+1
-0
paddle/fluid/lite/kernels/host/CMakeLists.txt
paddle/fluid/lite/kernels/host/CMakeLists.txt
+1
-0
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+1
-0
paddle/fluid/lite/kernels/x86/sgd_compute.cc
paddle/fluid/lite/kernels/x86/sgd_compute.cc
+7
-7
paddle/fluid/lite/model_parser/CMakeLists.txt
paddle/fluid/lite/model_parser/CMakeLists.txt
+1
-0
paddle/fluid/lite/model_parser/cpp/CMakeLists.txt
paddle/fluid/lite/model_parser/cpp/CMakeLists.txt
+1
-0
paddle/fluid/lite/model_parser/pb/CMakeLists.txt
paddle/fluid/lite/model_parser/pb/CMakeLists.txt
+1
-0
paddle/fluid/lite/operators/CMakeLists.txt
paddle/fluid/lite/operators/CMakeLists.txt
+4
-0
paddle/fluid/lite/operators/batch_norm_op.cc
paddle/fluid/lite/operators/batch_norm_op.cc
+110
-0
paddle/fluid/lite/operators/batch_norm_op.h
paddle/fluid/lite/operators/batch_norm_op.h
+46
-0
paddle/fluid/lite/operators/batch_norm_op_test.cc
paddle/fluid/lite/operators/batch_norm_op_test.cc
+139
-0
paddle/fluid/lite/operators/op_params.h
paddle/fluid/lite/operators/op_params.h
+20
-0
paddle/fluid/lite/operators/pool_op_test.cc
paddle/fluid/lite/operators/pool_op_test.cc
+4
-0
paddle/fluid/lite/operators/split_op.cc
paddle/fluid/lite/operators/split_op.cc
+1
-1
paddle/fluid/lite/tools/Dockerfile.mobile
paddle/fluid/lite/tools/Dockerfile.mobile
+1
-1
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+102
-46
paddle/fluid/lite/utils/CMakeLists.txt
paddle/fluid/lite/utils/CMakeLists.txt
+1
-0
paddle/fluid/lite/x86/CMakeLists.txt
paddle/fluid/lite/x86/CMakeLists.txt
+1
-0
python/paddle/proto/__init__.py
python/paddle/proto/__init__.py
+16
-0
未找到文件。
.gitlab-ci.yml
0 → 100755
浏览文件 @
ace19269
before_script
:
-
env
image
:
$SERVER_LITE_DOCKER_IMAGE
stages
:
-
ci
-
build_server
-
build_mobile
check:prebuilt:
tags
:
-
lite
stage
:
ci
script
:
#- pip3 install pre-commit
#- alias python=python3
-
rm -rf ~/.pip
-
pip install pre-commit
-
pre-commit install
-
./paddle/fluid/lite/tools/build.sh check_style
#- ./paddle/fluid/lite/tools/build.sh check_need_ci
cache
:
key
:
check_style
paths
:
-
/root/.cache
build:server:
tags
:
-
lite
image
:
$SERVER_LITE_DOCKER_IMAGE
stage
:
build_server
cache
:
key
:
server_thirdparty
paths
:
-
build/third_party
-
/root/.ccache
script
:
-
apt install ccache
-
export http_proxy=http://172.19.57.45:3128
-
export https_proxy=http://172.19.57.45:3128
#- export http_proxy=http://agent.baidu.com:8118
#- export https_proxy=http://agent.baidu.com:8118
-
mkdir -p build
-
cd build
-
../paddle/fluid/lite/tools/build.sh cmake_x86
-
make extern_eigen3
-
make extern_boost
-
make framework_proto
-
make extern_warpctc
-
cd ..
-
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/build/third_party/install/mklml/lib
-
./paddle/fluid/lite/tools/build.sh build_test_server
dependencies
:
-
check:prebuilt
build:mobile:
tags
:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
/root/.ccache
script
:
-
apt install ccache
-
export http_proxy=http://172.19.57.45:3128
-
export https_proxy=http://172.19.57.45:3128
-
./paddle/fluid/lite/tools/build.sh build_test_arm
dependencies
:
-
build:server
CMakeLists.txt
浏览文件 @
ace19269
...
@@ -166,6 +166,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...
@@ -166,6 +166,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
#include(external/zlib) # download, build, install gtest
#include(external/zlib) # download, build, install gtest
include
(
external/protobuf
)
# download, build, install protobuf
include
(
external/protobuf
)
# download, build, install protobuf
include
(
external/eigen
)
# download eigen3
include
(
external/eigen
)
# download eigen3
include
(
ccache
)
# set ccache for compilation
include
(
generic
)
# simplify cmake module
include
(
generic
)
# simplify cmake module
include
(
configure
)
# add paddle env configuration
include
(
configure
)
# add paddle env configuration
...
...
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -172,3 +172,4 @@ add_subdirectory(model_parser)
...
@@ -172,3 +172,4 @@ add_subdirectory(model_parser)
add_subdirectory
(
utils
)
add_subdirectory
(
utils
)
add_subdirectory
(
api
)
add_subdirectory
(
api
)
add_subdirectory
(
gen_code
)
add_subdirectory
(
gen_code
)
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -14,7 +14,7 @@ if(LITE_WITH_CUDA)
...
@@ -14,7 +14,7 @@ if(LITE_WITH_CUDA)
set
(
light_api_deps
${
light_api_deps
}
target_wrapper_cuda
)
set
(
light_api_deps
${
light_api_deps
}
target_wrapper_cuda
)
endif
()
endif
()
cc_library
(
light_api_lite SRCS light_api.cc DEPS
${
light_api_deps
}
${
ops_lite
}
${
host_kernels
}
)
#
cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels})
message
(
STATUS
"get ops
${
ops_lite
}
"
)
message
(
STATUS
"get ops
${
ops_lite
}
"
)
message
(
STATUS
"get Host kernels
${
host_kernels
}
"
)
message
(
STATUS
"get Host kernels
${
host_kernels
}
"
)
...
...
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
ace19269
...
@@ -66,7 +66,7 @@ USE_LITE_OP(fetch);
...
@@ -66,7 +66,7 @@ USE_LITE_OP(fetch);
USE_LITE_OP
(
io_copy
);
USE_LITE_OP
(
io_copy
);
USE_LITE_OP
(
con2d
);
USE_LITE_OP
(
con2d
);
USE_LITE_OP
(
batch_norm
);
//
USE_LITE_OP(batch_norm);
USE_LITE_OP
(
relu
);
USE_LITE_OP
(
relu
);
USE_LITE_OP
(
depthwise_conv2d
);
USE_LITE_OP
(
depthwise_conv2d
);
USE_LITE_OP
(
pool2d
);
USE_LITE_OP
(
pool2d
);
...
...
paddle/fluid/lite/arm/CMakeLists.txt
浏览文件 @
ace19269
add_subdirectory
(
math
)
add_subdirectory
(
math
)
paddle/fluid/lite/arm/math/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -32,5 +32,7 @@ cc_library(math_arm SRCS
...
@@ -32,5 +32,7 @@ cc_library(math_arm SRCS
conv_winograd_3x3.cc
conv_winograd_3x3.cc
conv_winograd.cc
conv_winograd.cc
split.cc
split.cc
DEPS
${
lite_kernel_deps
}
eigen3
)
DEPS
${
lite_kernel_deps
}
eigen3 framework_proto_lite
)
# TODO(TJ): fix me do not deps proto
paddle/fluid/lite/arm/math/scale.cc
浏览文件 @
ace19269
...
@@ -58,6 +58,111 @@ void scale<float>(const float* din, float* dout, int num, float scale,
...
@@ -58,6 +58,111 @@ void scale<float>(const float* din, float* dout, int num, float scale,
}
}
}
}
template
<
>
void
scale
<
float
>
(
const
float
*
din
,
float
*
dout
,
int
outer_dim
,
int
scale_dim
,
int
inner_dim
,
const
float
*
scale_data
,
const
float
*
bias_data
)
{
int
cnt
=
inner_dim
>>
4
;
int
remain
=
inner_dim
%
16
;
int
size
=
inner_dim
*
scale_dim
;
for
(
int
n
=
0
;
n
<
outer_dim
;
n
++
)
{
const
float
*
din_ptr_n
=
din
+
n
*
size
;
float
*
dout_ptr_n
=
dout
+
n
*
size
;
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
scale_dim
;
i
++
)
{
const
float
*
din_ptr
=
din_ptr_n
+
i
*
inner_dim
;
float
*
dout_ptr
=
dout_ptr_n
+
i
*
inner_dim
;
float
scale
=
scale_data
[
i
];
float32x4_t
vscale
=
vdupq_n_f32
(
scale
);
float
bias
=
bias_data
[
i
];
float32x4_t
vbias
=
vdupq_n_f32
(
bias
);
for
(
int
j
=
0
;
j
<
cnt
;
j
++
)
{
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
float32x4_t
din2
=
vld1q_f32
(
din_ptr
+
8
);
float32x4_t
din3
=
vld1q_f32
(
din_ptr
+
12
);
float32x4_t
vsum1
=
vmlaq_f32
(
vbias
,
din0
,
vscale
);
float32x4_t
vsum2
=
vmlaq_f32
(
vbias
,
din1
,
vscale
);
float32x4_t
vsum3
=
vmlaq_f32
(
vbias
,
din2
,
vscale
);
float32x4_t
vsum4
=
vmlaq_f32
(
vbias
,
din3
,
vscale
);
din_ptr
+=
16
;
vst1q_f32
(
dout_ptr
,
vsum1
);
vst1q_f32
(
dout_ptr
+
4
,
vsum2
);
vst1q_f32
(
dout_ptr
+
8
,
vsum3
);
vst1q_f32
(
dout_ptr
+
12
,
vsum4
);
dout_ptr
+=
16
;
}
for
(
int
j
=
0
;
j
<
remain
;
j
++
)
{
*
dout_ptr
=
*
din_ptr
*
scale
+
bias
;
dout_ptr
++
;
din_ptr
++
;
}
}
}
}
template
<
>
void
scale
<
float
>
(
const
float
*
din
,
float
*
dout
,
int
outer_dim
,
int
scale_dim
,
const
float
*
scale_data
,
const
float
*
bias_data
)
{
int
cnt
=
scale_dim
>>
4
;
int
remain
=
scale_dim
%
16
;
for
(
int
n
=
0
;
n
<
outer_dim
;
n
++
)
{
const
float
*
din_ptr_n
=
din
+
n
*
scale_dim
;
float
*
dout_ptr_n
=
dout
+
n
*
scale_dim
;
#pragma omp parallel for
for
(
int
i
=
0
;
i
<
cnt
;
i
++
)
{
int
idx
=
i
<<
4
;
const
float
*
din_ptr
=
din_ptr_n
+
idx
;
const
float
*
scale_ptr
=
scale_data
+
idx
;
const
float
*
bias_ptr
=
bias_data
+
idx
;
float
*
dout_ptr
=
dout_ptr_n
+
idx
;
float32x4_t
din0
=
vld1q_f32
(
din_ptr
);
float32x4_t
vscale0
=
vld1q_f32
(
scale_ptr
);
float32x4_t
vbias0
=
vld1q_f32
(
bias_ptr
);
float32x4_t
din1
=
vld1q_f32
(
din_ptr
+
4
);
float32x4_t
vscale1
=
vld1q_f32
(
scale_ptr
+
4
);
float32x4_t
vbias1
=
vld1q_f32
(
bias_ptr
+
4
);
float32x4_t
din2
=
vld1q_f32
(
din_ptr
+
8
);
float32x4_t
vscale2
=
vld1q_f32
(
scale_ptr
+
8
);
float32x4_t
vbias2
=
vld1q_f32
(
bias_ptr
+
8
);
float32x4_t
vsum1
=
vmlaq_f32
(
vbias0
,
din0
,
vscale0
);
float32x4_t
vsum2
=
vmlaq_f32
(
vbias1
,
din1
,
vscale1
);
float32x4_t
din3
=
vld1q_f32
(
din_ptr
+
12
);
float32x4_t
vscale3
=
vld1q_f32
(
scale_ptr
+
12
);
float32x4_t
vbias3
=
vld1q_f32
(
bias_ptr
+
12
);
vst1q_f32
(
dout_ptr
,
vsum1
);
vst1q_f32
(
dout_ptr
+
4
,
vsum2
);
float32x4_t
vsum3
=
vmlaq_f32
(
vbias2
,
din2
,
vscale2
);
float32x4_t
vsum4
=
vmlaq_f32
(
vbias3
,
din3
,
vscale3
);
vst1q_f32
(
dout_ptr
+
8
,
vsum3
);
vst1q_f32
(
dout_ptr
+
12
,
vsum4
);
}
int
idx
=
cnt
<<
4
;
const
float
*
din_ptr
=
din_ptr_n
+
idx
;
float
*
dout_ptr
=
dout_ptr_n
+
idx
;
const
float
*
scale_ptr
=
scale_data
+
idx
;
const
float
*
bias_ptr
=
bias_data
+
idx
;
for
(
int
j
=
0
;
j
<
remain
;
j
++
)
{
*
dout_ptr
=
*
din_ptr
*
(
*
scale_ptr
)
+
(
*
bias_ptr
);
dout_ptr
++
;
din_ptr
++
;
scale_ptr
++
;
bias_ptr
++
;
}
}
}
}
// namespace math
}
// namespace math
}
// namespace arm
}
// namespace arm
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/arm/math/scale.h
浏览文件 @
ace19269
...
@@ -22,6 +22,14 @@ namespace math {
...
@@ -22,6 +22,14 @@ namespace math {
template
<
typename
T
>
template
<
typename
T
>
void
scale
(
const
T
*
din
,
T
*
dout
,
int
num
,
float
scale
,
float
bias
);
void
scale
(
const
T
*
din
,
T
*
dout
,
int
num
,
float
scale
,
float
bias
);
template
<
typename
T
>
void
scale
(
const
T
*
din
,
T
*
dout
,
int
outer_dim
,
int
scale_dim
,
int
inner_dim
,
const
float
*
scale_data
,
const
float
*
bias_data
);
template
<
typename
T
>
void
scale
(
const
T
*
din
,
T
*
dout
,
int
outer_dim
,
int
scale_dim
,
const
float
*
scale_data
,
const
float
*
bias_data
);
}
// namespace math
}
// namespace math
}
// namespace arm
}
// namespace arm
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/arm/math/type_trans.cpp
浏览文件 @
ace19269
此差异已折叠。
点击以展开。
paddle/fluid/lite/core/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -24,13 +24,14 @@ cc_library(variable_lite SRCS variable.cc)
...
@@ -24,13 +24,14 @@ cc_library(variable_lite SRCS variable.cc)
cc_library
(
op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite
)
cc_library
(
op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite
)
cc_library
(
scope_lite SRCS scope.cc DEPS
${
tensor_lite
}
)
cc_library
(
scope_lite SRCS scope.cc DEPS
${
tensor_lite
}
)
cc_library
(
cpu_info_lite SRCS cpu_info.cc
)
cc_library
(
cpu_info_lite SRCS cpu_info.cc
)
cc_library
(
context_lite SRCS context.cc DEPS
${
tensor_lite
}
any_lite cpu_info_lite
)
lite_cc_library
(
context_lite SRCS context.cc DEPS
${
tensor_lite
}
any_lite cpu_info_lite eigen3
)
cc_library
(
op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
cc_library
(
op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
cpp_op_desc_lite
${
tensor_lite
}
)
cpp_op_desc_lite
${
tensor_lite
}
)
cc_library
(
types_lite SRCS types.cc
)
cc_library
(
types_lite SRCS types.cc
)
cc_library
(
type_system SRCS type_system.cc DEPS
${
tensor_lite
}
target_wrapper_lite
)
cc_library
(
type_system SRCS type_system.cc DEPS
${
tensor_lite
}
target_wrapper_lite
)
lite_cc_library
(
program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite HVY_DEPS framework_proto
)
lite_cc_library
(
program_lite SRCS program.cc DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite HVY_DEPS framework_proto
PROFILE_DEPS basic_profiler_lite
)
cc_library
(
optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite
)
cc_library
(
optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite
)
add_subdirectory
(
mir
)
add_subdirectory
(
mir
)
...
@@ -56,3 +57,4 @@ lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_li
...
@@ -56,3 +57,4 @@ lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_li
lite_cc_test
(
test_types_lite SRCS types_test.cc DEPS types_lite
)
lite_cc_test
(
test_types_lite SRCS types_test.cc DEPS types_lite
)
lite_cc_test
(
test_memory_lite SRCS memory_test.cc DEPS memory_lite
)
lite_cc_test
(
test_memory_lite SRCS memory_test.cc DEPS memory_lite
)
lite_cc_test
(
test_context_lite SRCS context_test.cc DEPS context_lite X86_DEPS operator
)
lite_cc_test
(
test_context_lite SRCS context_test.cc DEPS context_lite X86_DEPS operator
)
paddle/fluid/lite/core/cpu_info.cc
浏览文件 @
ace19269
...
@@ -54,15 +54,15 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) {
...
@@ -54,15 +54,15 @@ void DeviceInfo::InitInternal(DeviceInfo* dev) {
<<
", cluster ID: "
<<
dev
->
cluster_ids_
[
dev
->
core_ids_
[
i
]]
<<
", cluster ID: "
<<
dev
->
cluster_ids_
[
dev
->
core_ids_
[
i
]]
<<
", CPU ARCH: A"
<<
dev
->
archs_
[
i
];
<<
", CPU ARCH: A"
<<
dev
->
archs_
[
i
];
}
}
LOG
(
INFO
)
<<
"L1 DataCache size is: "
;
VLOG
(
1
)
<<
"L1 DataCache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
dev
->
L1_cache_
[
i
]
/
1024
<<
" KB"
;
VLOG
(
1
)
<<
dev
->
L1_cache_
[
i
]
/
1024
<<
" KB"
;
}
}
LOG
(
INFO
)
<<
"L2 Cache size is: "
;
VLOG
(
1
)
<<
"L2 Cache size is: "
;
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
for
(
int
i
=
0
;
i
<
dev
->
compute_core_num_
;
++
i
)
{
LOG
(
INFO
)
<<
dev
->
L2_cache_
[
i
]
/
1024
<<
" KB"
;
VLOG
(
1
)
<<
dev
->
L2_cache_
[
i
]
/
1024
<<
" KB"
;
}
}
LOG
(
INFO
)
<<
"Total memory: "
<<
dev
->
max_memory_
<<
"KB"
;
VLOG
(
1
)
<<
"Total memory: "
<<
dev
->
max_memory_
<<
"KB"
;
dev
->
max_freq_
=
max_freq
[
0
];
dev
->
max_freq_
=
max_freq
[
0
];
for
(
int
j
=
1
;
j
<
dev
->
compute_core_num_
;
++
j
)
{
for
(
int
j
=
1
;
j
<
dev
->
compute_core_num_
;
++
j
)
{
...
...
paddle/fluid/lite/core/hvy_tensor.h
浏览文件 @
ace19269
...
@@ -107,6 +107,8 @@ class TensorHvy : public TensorBase<TensorHvy> {
...
@@ -107,6 +107,8 @@ class TensorHvy : public TensorBase<TensorHvy> {
data_
.
Resize
(
framework
::
make_ddim
(
dims
.
Vectorize
()));
data_
.
Resize
(
framework
::
make_ddim
(
dims
.
Vectorize
()));
}
}
void
Resize
(
const
std
::
vector
<
int64_t
>&
x
)
{
Resize
(
DDimHvy
(
x
));
}
void
ShareDataWith
(
const
TensorHvy
&
other
)
{
void
ShareDataWith
(
const
TensorHvy
&
other
)
{
data_
.
ShareDataWith
(
other
.
data_
);
data_
.
ShareDataWith
(
other
.
data_
);
}
}
...
...
paddle/fluid/lite/core/mir/CMakeLists.txt
浏览文件 @
ace19269
cc_library
(
mir_node SRCS node.cc DEPS framework_proto_lite
)
cc_library
(
mir_node SRCS node.cc DEPS framework_proto_lite
)
cc_library
(
mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node
)
cc_library
(
mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node
program_lite
)
cc_library
(
mir_pass SRCS pass.cc DEPS mir_ssa_graph
)
cc_library
(
mir_pass SRCS pass.cc DEPS mir_ssa_graph
)
cc_library
(
mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes
)
cc_library
(
mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes
)
cc_library
(
mir_pass_registry SRCS pass_registry.cc DEPS mir_pass_manager
)
cc_library
(
mir_pass_registry SRCS pass_registry.cc DEPS mir_pass_manager
)
...
@@ -20,14 +20,14 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...
@@ -20,14 +20,14 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
return
()
return
()
endif
()
endif
()
cc_test
(
test_mir_pass_manager SRCS pass_manager_test.cc DEPS mir_pass_manager mir_passes
)
cc_test
(
test_mir_pass_manager SRCS pass_manager_test.cc DEPS mir_pass_manager mir_passes
)
cc_test
(
test_ssa_graph SRCS ssa_graph_test.cc DEPS
#
cc_test(test_ssa_graph SRCS ssa_graph_test.cc DEPS
mir_ssa_graph scope_lite op_lite
#
mir_ssa_graph scope_lite op_lite
fc_op_lite
#
fc_op_lite
${
host_kernels
}
#
${host_kernels}
mir_passes
#
mir_passes
mir_pass_manager
#
mir_pass_manager
program_fake_utils
#
program_fake_utils
)
#
)
# lite_cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc
# lite_cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc
# DEPS
# DEPS
# mul_op_lite
# mul_op_lite
...
@@ -59,3 +59,4 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...
@@ -59,3 +59,4 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
pattern_matcher_high_api proto_desc mir_pass_manager fc_op_lite mul_op_lite elementwise_ops_lite
mir_passes compatible_pb_lite program_lite
${
ops_lite
}
)
mir_passes compatible_pb_lite program_lite
${
ops_lite
}
)
endif
()
endif
()
paddle/fluid/lite/core/naive_test_model.py
浏览文件 @
ace19269
...
@@ -18,10 +18,10 @@ import numpy as np
...
@@ -18,10 +18,10 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.backward
import
append_backward
a
=
fluid
.
layers
.
data
(
name
=
"a"
,
shape
=
[
100
],
dtype
=
'float32'
)
a
=
fluid
.
layers
.
data
(
name
=
"a"
,
shape
=
[
2
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
10
0
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
10
],
dtype
=
'float32'
)
a1
=
fluid
.
layers
.
fc
(
input
=
a
,
size
=
500
,
act
=
None
,
bias_attr
=
False
)
a1
=
fluid
.
layers
.
fc
(
input
=
a
,
size
=
3
,
act
=
None
,
bias_attr
=
False
)
cost
=
fluid
.
layers
.
square_error_cost
(
a1
,
label
)
cost
=
fluid
.
layers
.
square_error_cost
(
a1
,
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
...
@@ -36,7 +36,7 @@ exe.run(fluid.default_startup_program())
...
@@ -36,7 +36,7 @@ exe.run(fluid.default_startup_program())
with
open
(
'startup_program.pb'
,
'wb'
)
as
f
:
with
open
(
'startup_program.pb'
,
'wb'
)
as
f
:
f
.
write
(
fluid
.
default_startup_program
().
desc
.
serialize_to_string
())
f
.
write
(
fluid
.
default_startup_program
().
desc
.
serialize_to_string
())
data_1
=
np
.
array
(
numpy
.
random
.
random
([
100
,
100
]),
dtype
=
'float32'
)
#
data_1 = np.array(numpy.random.random([100, 100]), dtype='float32')
#fluid.default_main_program().desc.
#fluid.default_main_program().desc.
...
@@ -50,7 +50,7 @@ with open('main_program.pb', 'wb') as f:
...
@@ -50,7 +50,7 @@ with open('main_program.pb', 'wb') as f:
#outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost])
#outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost])
sys
.
exit
(
0
)
#
sys.exit(0)
fluid
.
io
.
save_inference_model
(
"./model2"
,
[
a
.
name
],
[
a1
],
exe
)
fluid
.
io
.
save_inference_model
(
"./model2"
,
[
a
.
name
],
[
a1
],
exe
)
print
(
numpy
.
array
(
outs
))
#
print(numpy.array(outs))
paddle/fluid/lite/core/profile/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -4,3 +4,4 @@ endif()
...
@@ -4,3 +4,4 @@ endif()
lite_cc_library
(
basic_profiler_lite SRCS basic_profiler.cc
)
lite_cc_library
(
basic_profiler_lite SRCS basic_profiler.cc
)
lite_cc_test
(
test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler_lite
)
lite_cc_test
(
test_basic_profiler SRCS basic_profiler_test.cc DEPS basic_profiler_lite
)
paddle/fluid/lite/cuda/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -4,3 +4,4 @@ endif()
...
@@ -4,3 +4,4 @@ endif()
nv_library
(
target_wrapper_cuda SRCS target_wrapper.cc
)
nv_library
(
target_wrapper_cuda SRCS target_wrapper.cc
)
nv_library
(
cuda_blas_lite SRCS blas.cc
)
nv_library
(
cuda_blas_lite SRCS blas.cc
)
paddle/fluid/lite/gen_code/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -18,10 +18,11 @@ if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...
@@ -18,10 +18,11 @@ if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
DEPS scope_lite op_lite kernel_lite paddle_infer_gencode
DEPS scope_lite op_lite kernel_lite paddle_infer_gencode
)
)
lite_cc_test
(
test_generated_code SRCS generated_code_test.cc DEPS __generated_code__
#
lite_cc_test(test_generated_code SRCS generated_code_test.cc DEPS __generated_code__
${
ops_lite
}
${
host_kernels
}
#
${ops_lite} ${host_kernels}
X86_DEPS
${
x86_kernels
}
#
X86_DEPS ${x86_kernels}
)
#
)
add_dependencies
(
__generated_code__ test_gen_code_lite
)
#
add_dependencies(__generated_code__ test_gen_code_lite)
endif
()
endif
()
paddle/fluid/lite/host/CMakeLists.txt
浏览文件 @
ace19269
cc_library
(
target_wrapper_host SRCS target_wrapper.cc
)
cc_library
(
target_wrapper_host SRCS target_wrapper.cc
)
paddle/fluid/lite/kernels/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -5,3 +5,4 @@ add_subdirectory(arm)
...
@@ -5,3 +5,4 @@ add_subdirectory(arm)
add_subdirectory
(
cuda
)
add_subdirectory
(
cuda
)
add_subdirectory
(
x86
)
add_subdirectory
(
x86
)
paddle/fluid/lite/kernels/arm/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -6,10 +6,11 @@ message(STATUS "compile with lite ARM kernels")
...
@@ -6,10 +6,11 @@ message(STATUS "compile with lite ARM kernels")
cc_library
(
fc_compute_arm SRCS fc_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
fc_compute_arm SRCS fc_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
relu_compute_arm SRCS relu_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
relu_compute_arm SRCS relu_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mul_compute_arm SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
eigen3
)
cc_library
(
mul_compute_arm SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
scale_compute_arm SRCS scale_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
scale_compute_arm SRCS scale_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
softmax_compute_arm SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
softmax_compute_arm SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
conv_compute_arm SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
conv_compute_arm SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
elementwise_add_compute_arm SRCS elementwise_add_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
elementwise_add_compute_arm SRCS elementwise_add_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
pool_compute_arm SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
pool_compute_arm SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
split_compute_arm SRCS split_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
split_compute_arm SRCS split_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
...
@@ -18,8 +19,10 @@ lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm mat
...
@@ -18,8 +19,10 @@ lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm mat
lite_cc_test
(
test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm
)
lite_cc_test
(
test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm
)
lite_cc_test
(
test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_compute_arm
)
lite_cc_test
(
test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_compute_arm
)
lite_cc_test
(
test_conv_compute_arm SRCS conv_compute_test.cc DEPS conv_compute_arm
)
lite_cc_test
(
test_conv_compute_arm SRCS conv_compute_test.cc DEPS conv_compute_arm
)
lite_cc_test
(
test_batch_norm_compute_arm SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_arm
)
lite_cc_test
(
test_elementwise_add_compute_arm SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_arm
)
lite_cc_test
(
test_elementwise_add_compute_arm SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_arm
)
lite_cc_test
(
test_pool_compute_arm SRCS pool_compute_test.cc DEPS pool_compute_arm
)
lite_cc_test
(
test_pool_compute_arm SRCS pool_compute_test.cc DEPS pool_compute_arm
)
lite_cc_test
(
test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm
)
lite_cc_test
(
test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm
)
lite_cc_test
(
test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm
)
set
(
arm_kernels
set
(
arm_kernels
...
@@ -29,6 +32,7 @@ set(arm_kernels
...
@@ -29,6 +32,7 @@ set(arm_kernels
scale_compute_arm
scale_compute_arm
softmax_compute_arm
softmax_compute_arm
conv_compute_arm
conv_compute_arm
batch_norm_compute_arm
elementwise_add_compute_arm
elementwise_add_compute_arm
pool_compute_arm
pool_compute_arm
split_compute_arm
split_compute_arm
...
@@ -36,3 +40,4 @@ set(arm_kernels
...
@@ -36,3 +40,4 @@ set(arm_kernels
set
(
arm_kernels
"
${
arm_kernels
}
"
CACHE INTERNAL
"arm kernels"
)
set
(
arm_kernels
"
${
arm_kernels
}
"
CACHE INTERNAL
"arm kernels"
)
paddle/fluid/lite/kernels/arm/batch_norm_compute.cc
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/batch_norm_compute.h"
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
void
BatchNormCompute
::
PrepareForRun
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
x_dims
=
param
.
x
->
dims
();
bool
global_stats
=
param
.
is_test
||
param
.
use_global_stats
;
if
(
global_stats
)
{
int64_t
channel_size
=
0
;
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
channel_size
=
x_dims
[
1
];
break
;
// case DATALAYOUT(kNHWC):
// channel_size = x_dims[x_dims.size() - 1];
// break;
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
param
.
data_layout
);
break
;
}
new_scale
.
Resize
({
channel_size
});
new_bias
.
Resize
({
channel_size
});
auto
*
scale_data
=
param
.
scale
->
mutable_data
<
float
>
();
auto
*
bias_data
=
param
.
bias
->
mutable_data
<
float
>
();
auto
*
mean_data
=
param
.
mean
->
mutable_data
<
float
>
();
auto
*
variance_data
=
param
.
variance
->
mutable_data
<
float
>
();
auto
*
new_scale_data
=
new_scale
.
mutable_data
<
float
>
();
auto
*
new_bias_data
=
new_bias
.
mutable_data
<
float
>
();
for
(
int
c
=
0
;
c
<
channel_size
;
c
++
)
{
float
inv_scale
=
1.
f
/
(
std
::
sqrt
(
variance_data
[
c
]
+
param
.
epsilon
));
new_bias_data
[
c
]
=
bias_data
[
c
]
-
inv_scale
*
scale_data
[
c
]
*
mean_data
[
c
];
new_scale_data
[
c
]
=
inv_scale
*
scale_data
[
c
];
}
}
}
void
BatchNormCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
x_dims
=
param
.
x
->
dims
();
auto
x_data
=
param
.
x
->
mutable_data
<
float
>
();
auto
y_data
=
param
.
y
->
mutable_data
<
float
>
();
bool
global_stats
=
param
.
is_test
||
param
.
use_global_stats
;
if
(
global_stats
)
{
auto
*
new_scale_data
=
new_scale
.
mutable_data
<
float
>
();
auto
*
new_bias_data
=
new_bias
.
mutable_data
<
float
>
();
int64_t
outer_size
=
0
;
int64_t
channel_size
=
0
;
int64_t
inner_size
=
0
;
switch
(
param
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
outer_size
=
x_dims
[
0
];
channel_size
=
x_dims
[
1
];
inner_size
=
x_dims
.
Slice
(
2
,
x_dims
.
size
()).
production
();
lite
::
arm
::
math
::
scale
(
x_data
,
y_data
,
outer_size
,
channel_size
,
inner_size
,
new_scale_data
,
new_bias_data
);
break
;
// case DATALAYOUT(kNHWC):
// outer_size = x_dims.Slice(0, x_dims.size() - 1).production();
// channel_size = x_dims[x_dims.size() - 1];
// lite::arm::math::scale(x_data, y_data, outer_size, channel_size,
// new_scale_data, new_bias_data);
// break;
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
param
.
data_layout
);
break
;
}
}
else
{
// TODO(hong19860320) calculate mean_out, variance_out, saved_mean and
// saved_variance
}
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
batch_norm
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
BatchNormCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Mean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Variance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"MeanOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"VarianceOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"SavedMean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"SavedVariance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/batch_norm_compute.h
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
class
BatchNormCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
BatchNormParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
BatchNormCompute
()
=
default
;
private:
Tensor
new_scale
;
Tensor
new_bias
;
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/arm/batch_norm_compute_test.cc
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/batch_norm_compute.h"
#include <gtest/gtest.h>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
template
<
typename
dtype
>
void
batch_norm_compute_ref
(
const
operators
::
BatchNormParam
&
param
)
{
DDim
x_dims
=
param
.
x
->
dims
();
auto
x_data
=
param
.
x
->
mutable_data
<
dtype
>
();
auto
scale_data
=
param
.
scale
->
mutable_data
<
dtype
>
();
auto
bias_data
=
param
.
bias
->
mutable_data
<
dtype
>
();
auto
mean_data
=
param
.
mean
->
mutable_data
<
dtype
>
();
auto
variance_data
=
param
.
variance
->
mutable_data
<
dtype
>
();
auto
y_data
=
param
.
y
->
mutable_data
<
dtype
>
();
float
epsilon
=
param
.
epsilon
;
float
momentum
=
param
.
momentum
;
DataLayoutType
data_layout
=
param
.
data_layout
;
bool
global_stats
=
param
.
is_test
||
param
.
use_global_stats
;
if
(
global_stats
)
{
int64_t
outer_size
=
0
;
int64_t
channel_size
=
0
;
int64_t
inner_size
=
0
;
switch
(
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
outer_size
=
x_dims
[
0
];
channel_size
=
x_dims
[
1
];
inner_size
=
x_dims
.
Slice
(
2
,
x_dims
.
size
()).
production
();
break
;
// case DATALAYOUT(kNHWC):
// outer_size = x_dims.Slice(0, x_dims.size() - 1).production();
// channel_size = x_dims[x_dims.size() - 1];
// inner_size = 1;
// break;
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
data_layout
);
break
;
}
auto
x_ptr
=
x_data
;
auto
y_ptr
=
y_data
;
for
(
int
o
=
0
;
o
<
outer_size
;
o
++
)
{
for
(
int
c
=
0
;
c
<
channel_size
;
c
++
)
{
for
(
int
i
=
0
;
i
<
inner_size
;
i
++
)
{
dtype
norm_x
=
(
*
x_ptr
-
mean_data
[
c
])
/
std
::
sqrt
(
variance_data
[
c
]
+
epsilon
);
*
y_ptr
=
norm_x
*
scale_data
[
c
]
+
bias_data
[
c
];
x_ptr
++
;
y_ptr
++
;
}
}
}
}
else
{
// TODO(hong19860320) calculate mean_out, variance_out, saved_mean and
// saved_variance
}
}
TEST
(
batch_norm_arm
,
retrive_op
)
{
auto
batch_norm
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"batch_norm"
);
ASSERT_FALSE
(
batch_norm
.
empty
());
ASSERT_TRUE
(
batch_norm
.
front
());
}
TEST
(
batch_norm_arm
,
init
)
{
BatchNormCompute
batch_norm
;
ASSERT_EQ
(
batch_norm
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
batch_norm
.
target
(),
TARGET
(
kARM
));
}
TEST
(
batch_norm_arm
,
compute
)
{
DeviceInfo
::
Init
();
for
(
auto
n
:
{
1
,
2
})
{
for
(
auto
c
:
{
6
,
32
/*, 128*/
})
{
for
(
auto
h
:
{
9
,
18
/*, 56 , 112, 224, 512*/
})
{
for
(
auto
w
:
{
9
,
18
/*, 56, 112, 224, 512*/
})
{
for
(
auto
is_test
:
{
/*false, */
true
})
{
for
(
auto
use_global_stats
:
{
false
,
true
})
{
for
(
auto
epsilon
:
{
1e-4
f
,
1e-5
f
})
{
for
(
auto
momentum
:
{
0.9
f
,
0.99
f
})
{
for
(
auto
data_layout
:
{
DATALAYOUT
(
kNCHW
)
/*, DATALAYOUT(kNHWC)*/
})
{
Tensor
x
;
Tensor
scale
;
Tensor
bias
;
Tensor
mean
;
Tensor
variance
;
Tensor
y
;
Tensor
mean_out
;
Tensor
variance_out
;
Tensor
saved_mean
;
Tensor
saved_variance
;
Tensor
y_ref
;
Tensor
mean_out_ref
;
Tensor
variance_out_ref
;
Tensor
saved_mean_ref
;
Tensor
saved_variance_ref
;
// set the dims of input, output, ref output tensors
std
::
vector
<
int64_t
>
in_out_shape
;
switch
(
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
in_out_shape
=
{
n
,
c
,
h
,
w
};
break
;
// case DATALAYOUT(kNHWC):
// in_out_shape = {n, h, w, c};
// break;
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
data_layout
);
break
;
}
x
.
Resize
(
in_out_shape
);
scale
.
Resize
({
c
});
bias
.
Resize
({
c
});
mean
.
Resize
({
c
});
variance
.
Resize
({
c
});
y
.
Resize
(
in_out_shape
);
mean_out
.
Resize
({
c
});
variance_out
.
Resize
({
c
});
saved_mean
.
Resize
({
c
});
saved_variance
.
Resize
({
c
});
y_ref
.
Resize
(
in_out_shape
);
mean_out_ref
.
Resize
({
c
});
variance_out_ref
.
Resize
({
c
});
saved_mean_ref
.
Resize
({
c
});
saved_variance_ref
.
Resize
({
c
});
// initialize the data of input tensors
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
scale_data
=
scale
.
mutable_data
<
float
>
();
auto
*
bias_data
=
bias
.
mutable_data
<
float
>
();
auto
*
mean_data
=
mean
.
mutable_data
<
float
>
();
auto
*
variance_data
=
variance
.
mutable_data
<
float
>
();
auto
*
y_data
=
y
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
x
.
dims
().
production
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
%
64
);
}
for
(
int
i
=
0
;
i
<
scale
.
dims
().
production
();
i
++
)
{
scale_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.01
f
+
0.03
f
;
}
for
(
int
i
=
0
;
i
<
bias
.
dims
().
production
();
i
++
)
{
bias_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.065
f
+
0.1
f
;
}
for
(
int
i
=
0
;
i
<
mean
.
dims
().
production
();
i
++
)
{
mean_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
0.0565
f
;
}
for
(
int
i
=
0
;
i
<
variance
.
dims
().
production
();
i
++
)
{
variance_data
[
i
]
=
static_cast
<
float
>
(
i
)
*
2.08
f
+
1.5
f
;
}
// prepare kernel params and run
BatchNormCompute
batch_norm
;
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
batch_norm
.
SetContext
(
std
::
move
(
ctx
));
operators
::
BatchNormParam
param
;
param
.
x
=
&
x
;
param
.
scale
=
&
scale
;
param
.
bias
=
&
bias
;
param
.
mean
=
&
mean
;
param
.
variance
=
&
variance
;
param
.
is_test
=
is_test
;
param
.
use_global_stats
=
use_global_stats
;
param
.
epsilon
=
epsilon
;
param
.
momentum
=
momentum
;
param
.
data_layout
=
data_layout
;
param
.
y
=
&
y
;
param
.
mean_out
=
&
mean_out
;
param
.
variance_out
=
&
variance_out
;
param
.
saved_mean
=
&
saved_mean
;
param
.
saved_variance
=
&
saved_variance
;
batch_norm
.
SetParam
(
param
);
batch_norm
.
Launch
();
// invoking ref implementation and compare results
param
.
y
=
&
y_ref
;
param
.
mean_out
=
&
mean_out_ref
;
param
.
variance_out
=
&
variance_out_ref
;
param
.
saved_mean
=
&
saved_mean_ref
;
param
.
saved_variance
=
&
saved_variance_ref
;
batch_norm_compute_ref
<
float
>
(
param
);
auto
*
y_ref_data
=
y_ref
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
y
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
y_data
[
i
],
y_ref_data
[
i
],
1e-5
);
}
}
}
}
}
}
}
}
}
}
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
batch_norm
,
kARM
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/arm/conv_compute_test.cc
浏览文件 @
ace19269
...
@@ -124,6 +124,20 @@ TEST(conv_arm, init) {
...
@@ -124,6 +124,20 @@ TEST(conv_arm, init) {
TEST
(
conv_arm
,
compute
)
{
TEST
(
conv_arm
,
compute
)
{
DeviceInfo
::
Init
();
DeviceInfo
::
Init
();
#if 1
for
(
auto
n
:
{
2
})
{
for
(
auto
ic
:
{
6
})
{
for
(
auto
oc
:
{
6
})
{
for
(
auto
ih
:
{
9
})
{
for
(
auto
iw
:
{
9
})
{
for
(
auto
flag_bias
:
{
false
,
true
})
{
for
(
auto
flag_relu
:
{
false
,
true
})
{
for
(
auto
depthwise
:
{
false
,
true
})
{
for
(
auto
dilation
:
{
1
})
{
for
(
auto
stride
:
{
1
,
2
})
{
for
(
auto
padding
:
{
0
,
1
,
2
})
{
for
(
auto
ks
:
{
1
,
3
,
5
})
{
#else
for
(
auto
n
:
{
1
,
2
})
{
for
(
auto
n
:
{
1
,
2
})
{
for
(
auto
ic
:
{
6
,
32
/*, 128*/
})
{
for
(
auto
ic
:
{
6
,
32
/*, 128*/
})
{
for
(
auto
oc
:
{
6
,
32
/*, 128*/
})
{
for
(
auto
oc
:
{
6
,
32
/*, 128*/
})
{
...
@@ -136,6 +150,7 @@ TEST(conv_arm, compute) {
...
@@ -136,6 +150,7 @@ TEST(conv_arm, compute) {
for
(
auto
stride
:
{
1
,
2
})
{
for
(
auto
stride
:
{
1
,
2
})
{
for
(
auto
padding
:
{
0
,
1
,
2
})
{
for
(
auto
padding
:
{
0
,
1
,
2
})
{
for
(
auto
ks
:
{
1
,
3
,
5
})
{
for
(
auto
ks
:
{
1
,
3
,
5
})
{
#endif
int
group
=
1
;
int
group
=
1
;
if
(
depthwise
)
{
// depthwise convolution ?
if
(
depthwise
)
{
// depthwise convolution ?
group
=
oc
=
ic
;
group
=
oc
=
ic
;
...
...
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
ace19269
...
@@ -22,7 +22,7 @@ namespace lite {
...
@@ -22,7 +22,7 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
void
FcCompute
::
Run
()
{
void
FcCompute
::
PrepareFor
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
auto
x_dims
=
param
.
input
->
dims
();
auto
x_dims
=
param
.
input
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
...
@@ -31,39 +31,56 @@ void FcCompute::Run() {
...
@@ -31,39 +31,56 @@ void FcCompute::Run() {
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
m_
=
x_dims
.
Slice
(
0
,
param
.
in_num_col_dims
).
production
();
k_
=
x_dims
.
Slice
(
param
.
in_num_col_dims
,
x_dims
.
size
()).
production
();
n_
=
w_dims
[
1
];
CHECK_EQ
(
k_
,
static_cast
<
int
>
(
w_dims
[
0
]));
if
(
m_
==
1
)
{
if
(
!
transed_weight_
)
{
transed_weight_
=
new
Tensor
;
}
transed_weight_
->
Resize
({
n_
,
k_
});
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
auto
*
t_data
=
transed_weight_
->
mutable_data
<
float
>
();
int
i
=
0
;
for
(
int
nn
=
0
;
nn
<
n_
;
++
nn
)
{
for
(
int
kk
=
0
;
kk
<
k_
;
++
kk
)
{
t_data
[
i
++
]
=
w_data
[
kk
*
n_
+
nn
];
}
}
}
}
void
FcCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
const
auto
*
i_data
=
param
.
input
->
data
<
float
>
();
const
auto
*
i_data
=
param
.
input
->
data
<
float
>
();
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
const
auto
*
b_data
=
param
.
bias
?
param
.
bias
->
data
<
float
>
()
:
nullptr
;
const
auto
*
b_data
=
param
.
bias
?
param
.
bias
->
data
<
float
>
()
:
nullptr
;
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
int
x_h
=
x_dims
.
Slice
(
0
,
param
.
in_num_col_dims
).
production
();
int
x_w
=
x_dims
.
Slice
(
param
.
in_num_col_dims
,
x_dims
.
size
()).
production
();
int
n
=
w_dims
[
1
];
CHECK_EQ
(
x_w
,
static_cast
<
int
>
(
w_dims
[
0
]));
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
if
(
x_h
>
1
)
{
if
(
m_
>
1
)
{
float
*
packed_in
=
static_cast
<
float
*>
(
ctx
.
workspace_data
<
float
>
())
+
float
*
packed_in
=
static_cast
<
float
*>
(
ctx
.
workspace_data
<
float
>
())
+
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
lite
::
arm
::
math
::
prepackA
(
packed_in
,
i_data
,
x_w
,
0
,
x_h
,
0
,
x_w
,
false
,
lite
::
arm
::
math
::
prepackA
(
packed_in
,
i_data
,
k_
,
0
,
m_
,
0
,
k_
,
false
,
&
ctx
);
&
ctx
);
lite
::
arm
::
math
::
sgemm_prepack
(
packed_in
,
w_data
,
b_data
,
o_data
,
m_
,
n_
,
lite
::
arm
::
math
::
sgemm_prepack
(
packed_in
,
w_data
,
b_data
,
o_data
,
x_h
,
n
,
k_
,
false
,
false
,
false
,
&
ctx
);
x_w
,
false
,
false
,
false
,
&
ctx
);
if
(
param
.
bias
)
{
if
(
param
.
bias
)
{
CHECK_EQ
(
param
.
bias
->
numel
(),
n
);
CHECK_EQ
(
param
.
bias
->
numel
(),
n
_
);
lite
::
arm
::
math
::
fill_bias_fc
(
o_data
,
b_data
,
x_h
,
n
);
lite
::
arm
::
math
::
fill_bias_fc
(
o_data
,
b_data
,
m_
,
n_
);
}
}
}
else
{
}
else
{
// use sgemmv
CHECK
(
transed_weight_
);
// sgemv((const float*)weights, (const float*)din, (float*)dout,
const
auto
*
t_data
=
transed_weight_
->
data
<
float
>
();
// false, n, x_w, _param->_flag_bias, (float*)bias, false);
lite
::
arm
::
math
::
sgemv
(
t_data
,
i_data
,
o_data
,
false
,
n_
,
k_
,
b_data
!=
nullptr
,
b_data
,
false
);
}
}
}
}
TargetType
FcCompute
::
target
()
const
{
return
TARGET
(
kARM
);
}
PrecisionType
FcCompute
::
precision
()
const
{
return
PRECISION
(
kFloat
);
}
}
// namespace arm
}
// namespace arm
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/kernels/arm/fc_compute.h
浏览文件 @
ace19269
...
@@ -25,12 +25,19 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
...
@@ -25,12 +25,19 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
public:
using
param_t
=
operators
::
FcParam
;
using
param_t
=
operators
::
FcParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
void
Run
()
override
;
TargetType
target
()
const
override
;
~
FcCompute
()
override
{
PrecisionType
precision
()
const
override
;
if
(
transed_weight_
)
{
delete
transed_weight_
;
}
};
virtual
~
FcCompute
()
=
default
;
private:
lite
::
Tensor
*
transed_weight_
{
nullptr
};
int
m_
,
n_
,
k_
;
};
};
}
// namespace arm
}
// namespace arm
...
...
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
浏览文件 @
ace19269
...
@@ -14,6 +14,11 @@
...
@@ -14,6 +14,11 @@
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <memory>
#include <random>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
...
@@ -23,6 +28,17 @@ namespace lite {
...
@@ -23,6 +28,17 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
template
<
typename
T
>
void
FillData
(
T
*
a
,
const
int
n
,
const
T
lower
=
static_cast
<
T
>
(
-
2.
f
),
const
T
upper
=
static_cast
<
T
>
(
2.
f
))
{
static
unsigned
int
seed
=
100
;
std
::
mt19937
rng
(
seed
++
);
std
::
uniform_real_distribution
<
double
>
uniform_dist
(
0
,
1
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
a
[
i
]
=
static_cast
<
T
>
(
uniform_dist
(
rng
)
*
(
upper
-
lower
)
+
lower
);
}
}
TEST
(
fc_arm
,
retrive_op
)
{
TEST
(
fc_arm
,
retrive_op
)
{
auto
fc
=
auto
fc
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"fc"
);
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"fc"
);
...
@@ -37,108 +53,117 @@ TEST(fc_arm, init) {
...
@@ -37,108 +53,117 @@ TEST(fc_arm, init) {
}
}
TEST
(
fc_arm
,
compare_test
)
{
TEST
(
fc_arm
,
compare_test
)
{
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
using
T
=
float
;
constexpr
int
batch_size
=
2
;
x
.
Resize
({
batch_size
,
3
});
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
w
.
Resize
({
3
,
4
});
for
(
int
n
:
{
1
,
2
,
3
,
4
})
{
b
.
Resize
({
1
,
4
});
for
(
int
k
:
{
1
,
2
,
3
,
4
})
{
out
.
Resize
({
batch_size
,
4
});
for
(
bool
with_bias
:
{
true
,
false
})
{
ref
.
Resize
({
batch_size
,
4
});
VLOG
(
3
)
<<
"m: "
<<
m
<<
", n: "
<<
n
<<
", k: "
<<
k
<<
(
with_bias
?
", with bias"
:
""
);
auto
x_data
=
x
.
mutable_data
<
float
>
();
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
auto
w_data
=
w
.
mutable_data
<
float
>
();
auto
b_data
=
b
.
mutable_data
<
float
>
();
x
.
Resize
({
m
,
k
});
auto
out_data
=
out
.
mutable_data
<
float
>
();
w
.
Resize
({
k
,
n
});
auto
ref_data
=
ref
.
mutable_data
<
float
>
();
b
.
Resize
({
1
,
n
});
out
.
Resize
({
m
,
n
});
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
product
();
i
++
)
{
ref
.
Resize
({
m
,
n
});
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
auto
*
x_data
=
x
.
mutable_data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
w
.
dims
().
product
();
i
++
)
{
auto
*
w_data
=
w
.
mutable_data
<
T
>
();
w_data
[
i
]
=
static_cast
<
float
>
(
i
);
auto
*
b_data
=
with_bias
?
b
.
mutable_data
<
T
>
()
:
nullptr
;
}
for
(
int64_t
i
=
0
;
i
<
b
.
dims
().
product
();
i
++
)
{
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
b_data
[
i
]
=
static_cast
<
float
>
(
i
);
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
}
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
batch_size
,
3
,
//
FillData
<
T
>
(
w_data
,
w
.
dims
().
production
());
w_data
,
3
,
4
,
//
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
(),
0
,
0
);
b_data
,
ref_data
);
FillData
<
T
>
(
ref_data
,
ref
.
dims
().
production
(),
0
,
0
);
// fc compute kernel
if
(
with_bias
)
{
FcCompute
fc
;
FillData
<
T
>
(
b_data
,
b
.
dims
().
production
());
operators
::
FcParam
param
;
}
param
.
in_num_col_dims
=
1
;
FcCompute
fc
;
param
.
input
=
&
x
;
operators
::
FcParam
param
;
param
.
w
=
&
w
;
param
.
bias
=
&
b
;
param
.
input
=
&
x
;
param
.
output
=
&
out
;
param
.
w
=
&
w
;
param
.
in_mat_dims
=
x
.
dims
();
param
.
bias
=
with_bias
?
&
b
:
nullptr
;
param
.
output
=
&
out
;
DeviceInfo
::
Init
();
param
.
in_num_col_dims
=
1
;
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
param
.
in_mat_dims
=
x
.
dims
();
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
DeviceInfo
::
Init
();
fc
.
SetContext
(
std
::
move
(
ctx
));
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
fc
.
Run
();
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
VLOG
(
3
)
<<
"output vs ref"
;
fc
.
SetContext
(
std
::
move
(
ctx
));
for
(
int
i
=
0
;
i
<
out
.
dims
().
product
();
i
++
)
{
fc
.
PrepareForRun
();
VLOG
(
3
)
<<
out_data
[
i
]
<<
" vs "
<<
ref_data
[
i
];
fc
.
Run
();
}
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
m
,
k
,
w_data
,
k
,
n
,
b_data
,
for
(
int
i
=
0
;
i
<
out
.
dims
().
product
();
++
i
)
{
ref_data
);
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-5
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
}
}
}
}
}
}
TEST
(
fc_arm
,
num_col_dims
)
{
TEST
(
fc_arm
,
num_col_dims
)
{
FcCompute
fc
;
using
T
=
float
;
operators
::
FcParam
param
;
for
(
bool
with_bias
:
{
true
,
false
})
{
lite
::
Tensor
x
;
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
lite
::
Tensor
w
;
lite
::
Tensor
bias
;
x
.
Resize
({
1
,
2
,
3
});
lite
::
Tensor
output
;
w
.
Resize
({
3
,
4
});
b
.
Resize
({
1
,
4
});
x
.
Resize
({
1
,
2
,
3
});
out
.
Resize
({
2
,
4
});
w
.
Resize
({
3
,
4
});
ref
.
Resize
({
2
,
4
});
bias
.
Resize
({
1
,
4
});
output
.
Resize
({
2
,
4
});
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
w_data
=
w
.
mutable_data
<
float
>
();
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
b_data
=
with_bias
?
b
.
mutable_data
<
T
>
()
:
nullptr
;
auto
*
w_data
=
w
.
mutable_data
<
float
>
();
auto
*
bias_data
=
bias
.
mutable_data
<
float
>
();
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
auto
*
output_data
=
output
.
mutable_data
<
float
>
();
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
product
();
i
++
)
{
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
FillData
<
T
>
(
w_data
,
w
.
dims
().
production
());
}
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
(),
0
,
0
);
for
(
int64_t
i
=
0
;
i
<
w
.
dims
().
product
();
i
++
)
{
FillData
<
T
>
(
ref_data
,
ref
.
dims
().
production
(),
0
,
0
);
w_data
[
i
]
=
static_cast
<
float
>
(
i
);
if
(
with_bias
)
{
FillData
<
T
>
(
b_data
,
b
.
dims
().
production
());
}
FcCompute
fc
;
operators
::
FcParam
param
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
with_bias
?
&
b
:
nullptr
;
param
.
output
=
&
out
;
param
.
in_num_col_dims
=
2
;
param
.
in_mat_dims
=
x
.
dims
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
PrepareForRun
();
fc
.
Run
();
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
2
,
3
,
w_data
,
3
,
4
,
b_data
,
ref_data
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
}
for
(
int64_t
i
=
0
;
i
<
bias
.
dims
().
product
();
i
++
)
{
bias_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
output
.
dims
().
product
();
i
++
)
{
output_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
param
.
in_num_col_dims
=
2
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
&
bias
;
param
.
output
=
&
output
;
param
.
in_mat_dims
=
x
.
dims
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
Run
();
}
}
}
// namespace arm
}
// namespace arm
...
...
paddle/fluid/lite/kernels/arm/mul_compute.cc
浏览文件 @
ace19269
...
@@ -12,57 +12,57 @@
...
@@ -12,57 +12,57 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include
<Eigen/Core>
#include
"paddle/fluid/lite/kernels/arm/mul_compute.h"
#include "paddle/fluid/lite/
core/kernel
.h"
#include "paddle/fluid/lite/
arm/math/funcs
.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type
s
.h"
#include "paddle/fluid/lite/core/type
_system
.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
template
<
typename
T
>
void
MulCompute
::
PrepareForRun
()
{
void
mul_compute_eigen
(
const
T
*
x
,
int
x_h
,
int
x_w
,
const
T
*
y
,
int
y_h
,
// TODO(TJ): transpose x or y if necessary
int
y_w
,
T
*
out
)
{
}
using
matrix_t
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
Eigen
::
Map
<
const
matrix_t
>
X
(
x
,
x_h
,
x_w
);
void
MulCompute
::
Run
()
{
Eigen
::
Map
<
const
matrix_t
>
Y
(
y
,
y_h
,
y_w
);
auto
&
param
=
Param
<
param_t
>
();
Eigen
::
Map
<
matrix_t
>
Out
(
out
,
x_h
,
y_w
);
Out
=
X
*
Y
;
const
auto
*
x_data
=
param
.
x
->
data
<
float
>
();
}
const
auto
*
y_data
=
param
.
y
->
data
<
float
>
();
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
class
MulCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
int
m
=
static_cast
<
int
>
(
public:
param
.
x
->
dims
().
Slice
(
0
,
param
.
x_num_col_dims
).
production
());
using
param_t
=
operators
::
MulParam
;
int
x_w
=
static_cast
<
int
>
(
param
.
x
->
dims
()
.
Slice
(
param
.
x_num_col_dims
,
param
.
x
->
dims
().
size
())
.
production
());
int
y_h
=
static_cast
<
int
>
(
param
.
y
->
dims
().
Slice
(
0
,
param
.
y_num_col_dims
).
production
());
int
n
=
static_cast
<
int
>
(
param
.
y
->
dims
()
.
Slice
(
param
.
y_num_col_dims
,
param
.
y
->
dims
().
size
())
.
production
());
void
Run
()
override
{
CHECK_EQ
(
x_w
,
y_h
)
<<
"x_w must be equal with y_h"
;
auto
&
param
=
Param
<
operators
::
MulParam
>
();
auto
k
=
x_w
;
core
::
dim2
x_shape
(
if
(
n
==
1
)
{
{
static_cast
<
int
>
(
lite
::
arm
::
math
::
sgemv
(
x_data
,
y_data
,
o_data
,
false
,
m
,
k
,
false
,
nullptr
,
param
.
x
->
dims
().
Slice
(
0
,
param
.
x_num_col_dims
).
production
()),
false
);
static_cast
<
int
>
(
param
.
x
->
dims
()
.
Slice
(
param
.
x_num_col_dims
,
param
.
x
->
dims
().
size
())
.
production
())});
core
::
dim2
y_shape
(
{
static_cast
<
int
>
(
param
.
y
->
dims
().
Slice
(
0
,
param
.
y_num_col_dims
).
production
()),
static_cast
<
int
>
(
param
.
y
->
dims
()
.
Slice
(
param
.
y_num_col_dims
,
param
.
y
->
dims
().
size
())
.
production
())});
mul_compute_eigen
(
param
.
x
->
data
<
float
>
(),
x_shape
.
x
,
x_shape
.
y
,
//
}
else
{
param
.
y
->
data
<
float
>
(),
y_shape
.
x
,
y_shape
.
y
,
//
constexpr
bool
is_tranposed_y
=
false
;
param
.
output
->
mutable_data
<
float
>
());
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
}
virtual
~
MulCompute
()
=
default
;
float
*
packed_x
=
static_cast
<
float
*>
(
ctx
.
workspace_data
<
float
>
())
+
};
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
lite
::
arm
::
math
::
prepackA
(
packed_x
,
x_data
,
k
,
0
,
m
,
0
,
k
,
false
,
&
ctx
);
lite
::
arm
::
math
::
sgemm_prepack
(
packed_x
,
y_data
,
nullptr
,
o_data
,
m
,
n
,
k
,
false
,
false
,
is_tranposed_y
,
&
ctx
);
}
}
}
// namespace arm
}
// namespace arm
}
// namespace kernels
}
// namespace kernels
...
...
paddle/fluid/lite/kernels/arm/mul_compute.h
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
class
MulCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
MulParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
MulCompute
()
=
default
;
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/kernels/arm/mul_compute_test.cc
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/mul_compute.h"
#include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <memory>
#include <random>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
template
<
typename
T
>
void
FillData
(
T
*
a
,
const
int
n
,
const
T
lower
=
static_cast
<
T
>
(
-
2.
f
),
const
T
upper
=
static_cast
<
T
>
(
2.
f
))
{
static
unsigned
int
seed
=
100
;
std
::
mt19937
rng
(
seed
++
);
std
::
uniform_real_distribution
<
double
>
uniform_dist
(
0
,
1
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
a
[
i
]
=
static_cast
<
T
>
(
uniform_dist
(
rng
)
*
(
upper
-
lower
)
+
lower
);
}
}
TEST
(
mul_arm
,
retrive_op
)
{
auto
mul
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"mul"
);
ASSERT_FALSE
(
mul
.
empty
());
ASSERT_TRUE
(
mul
.
front
());
}
TEST
(
mul_arm
,
init
)
{
MulCompute
mul
;
ASSERT_EQ
(
mul
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
mul
.
target
(),
TARGET
(
kARM
));
}
TEST
(
mul_arm
,
compare_test
)
{
using
T
=
float
;
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
for
(
int
n
:
{
1
,
2
,
3
,
4
})
{
for
(
int
k
:
{
1
,
2
,
3
,
4
})
{
VLOG
(
3
)
<<
"m: "
<<
m
<<
", n: "
<<
n
<<
", k: "
<<
k
;
lite
::
Tensor
x
,
y
,
out
,
ref
;
x
.
Resize
({
m
,
k
});
y
.
Resize
({
k
,
n
});
out
.
Resize
({
m
,
n
});
ref
.
Resize
({
m
,
n
});
auto
*
x_data
=
x
.
mutable_data
<
T
>
();
auto
*
y_data
=
y
.
mutable_data
<
T
>
();
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
FillData
<
T
>
(
y_data
,
y
.
dims
().
production
());
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
(),
0
,
0
);
FillData
<
T
>
(
ref_data
,
ref
.
dims
().
production
(),
0
,
0
);
MulCompute
mul
;
operators
::
MulParam
param
;
param
.
x
=
&
x
;
param
.
y
=
&
y
;
param
.
output
=
&
out
;
DeviceInfo
::
Init
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
mul
.
SetParam
(
param
);
mul
.
SetContext
(
std
::
move
(
ctx
));
mul
.
PrepareForRun
();
mul
.
Run
();
lite
::
arm
::
math
::
mul_compute_eigen
(
x_data
,
m
,
k
,
y_data
,
k
,
n
,
ref_data
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
}
}
}
TEST
(
mul_arm
,
num_col_dims
)
{
using
T
=
float
;
lite
::
Tensor
x
,
y
,
out
,
ref
;
x
.
Resize
({
2
,
3
,
4
});
y
.
Resize
({
3
,
4
,
5
});
out
.
Resize
({
2
,
5
});
ref
.
Resize
({
2
,
5
});
auto
*
x_data
=
x
.
mutable_data
<
T
>
();
auto
*
y_data
=
y
.
mutable_data
<
T
>
();
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
FillData
<
T
>
(
y_data
,
y
.
dims
().
production
());
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
());
FillData
<
T
>
(
ref_data
,
out
.
dims
().
production
());
MulCompute
mul
;
operators
::
MulParam
param
;
param
.
x
=
&
x
;
param
.
y
=
&
y
;
param
.
output
=
&
out
;
param
.
x_num_col_dims
=
1
;
param
.
y_num_col_dims
=
2
;
DeviceInfo
::
Init
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
mul
.
SetParam
(
param
);
mul
.
SetContext
(
std
::
move
(
ctx
));
mul
.
PrepareForRun
();
mul
.
Run
();
lite
::
arm
::
math
::
mul_compute_eigen
(
x_data
,
2
,
12
,
y_data
,
12
,
5
,
ref_data
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/arm/pool_compute_test.cc
浏览文件 @
ace19269
...
@@ -182,7 +182,7 @@ TEST(pool_arm, compute) {
...
@@ -182,7 +182,7 @@ TEST(pool_arm, compute) {
for
(
auto
stride
:
{
2
})
{
for
(
auto
stride
:
{
2
})
{
for
(
auto
pad
:
{
0
})
{
for
(
auto
pad
:
{
0
})
{
for
(
auto
n
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
n
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
c
:
{
1
,
3
,
11
,
4
,
1024
})
{
for
(
auto
c
:
{
1
,
3
,
11
/* ,1024 */
})
{
// speedup for ci
for
(
auto
h
:
{
3
,
1
,
11
,
4
,
1
})
{
for
(
auto
h
:
{
3
,
1
,
11
,
4
,
1
})
{
for
(
auto
w
:
{
1
,
3
,
4
,
12
,
1
})
{
for
(
auto
w
:
{
1
,
3
,
4
,
12
,
1
})
{
VLOG
(
3
)
<<
"n:"
<<
n
<<
" c:"
<<
c
<<
" h:"
<<
h
<<
" w:"
<<
w
VLOG
(
3
)
<<
"n:"
<<
n
<<
" c:"
<<
c
<<
" h:"
<<
h
<<
" w:"
<<
w
...
...
paddle/fluid/lite/kernels/arm/scale_compute_test.cc
浏览文件 @
ace19269
...
@@ -54,6 +54,15 @@ TEST(scale_arm, compute) {
...
@@ -54,6 +54,15 @@ TEST(scale_arm, compute) {
lite
::
Tensor
output
;
lite
::
Tensor
output
;
lite
::
Tensor
output_ref
;
lite
::
Tensor
output_ref
;
#if 1 // for ci speedup
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
1
,
3
})
{
for
(
auto
h
:
{
3
,
4
})
{
for
(
auto
w
:
{
4
,
3
})
{
for
(
auto
bias_after_scale
:
{
true
,
false
})
{
for
(
auto
s
:
{
-
1.0
f
,
0.13
f
})
{
for
(
auto
b
:
{
-
15.
f
,
0.11234
f
})
{
#else
for
(
auto
n
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
n
:
{
1
,
3
,
4
,
11
})
{
for
(
auto
c
:
{
1
,
3
,
11
,
4
})
{
for
(
auto
c
:
{
1
,
3
,
11
,
4
})
{
for
(
auto
h
:
{
3
,
1
,
11
,
4
})
{
for
(
auto
h
:
{
3
,
1
,
11
,
4
})
{
...
@@ -61,6 +70,8 @@ TEST(scale_arm, compute) {
...
@@ -61,6 +70,8 @@ TEST(scale_arm, compute) {
for
(
auto
bias_after_scale
:
{
true
,
false
})
{
for
(
auto
bias_after_scale
:
{
true
,
false
})
{
for
(
auto
s
:
{
-
100.25
f
,
-
1.0
f
,
0.13
f
,
3840.975
f
})
{
for
(
auto
s
:
{
-
100.25
f
,
-
1.0
f
,
0.13
f
,
3840.975
f
})
{
for
(
auto
b
:
{
-
3075.495
f
,
-
15.
f
,
0.11234
f
,
128.15
f
})
{
for
(
auto
b
:
{
-
3075.495
f
,
-
15.
f
,
0.11234
f
,
128.15
f
})
{
#endif
x
.
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
x
.
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
output
.
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
output
.
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
output_ref
.
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
output_ref
.
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
...
...
paddle/fluid/lite/kernels/cuda/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -9,3 +9,4 @@ cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})
...
@@ -9,3 +9,4 @@ cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})
nv_library
(
kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite
)
nv_library
(
kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite
)
paddle/fluid/lite/kernels/host/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -13,3 +13,4 @@ set(host_kernels
...
@@ -13,3 +13,4 @@ set(host_kernels
)
)
set
(
host_kernels
"
${
host_kernels
}
"
CACHE GLOBAL
"host kernels"
)
set
(
host_kernels
"
${
host_kernels
}
"
CACHE GLOBAL
"host kernels"
)
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -35,3 +35,4 @@ set(x86_kernels
...
@@ -35,3 +35,4 @@ set(x86_kernels
)
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
paddle/fluid/lite/kernels/x86/sgd_compute.cc
浏览文件 @
ace19269
...
@@ -29,9 +29,9 @@ class SGDCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
...
@@ -29,9 +29,9 @@ class SGDCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using
param_t
=
operators
::
ActivationParam
;
using
param_t
=
operators
::
ActivationParam
;
void
Run
()
override
{
void
Run
()
override
{
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
auto
&
sgd_param
=
*
param_
.
get_mutable
<
operators
::
SGDParam
>
();
auto
&
sgd_param
=
*
param_
.
get_mutable
<
operators
::
SGDParam
>
();
CHECK
(
context
.
x86_device_context
);
CHECK
(
context
.
x86_device_context
()
);
// param.Out->template mutable_data<T>();
// param.Out->template mutable_data<T>();
...
@@ -45,12 +45,12 @@ class SGDCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
...
@@ -45,12 +45,12 @@ class SGDCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
PADDLE_ENFORCE_EQ
(
grad
->
numel
(),
sz
);
PADDLE_ENFORCE_EQ
(
grad
->
numel
(),
sz
);
paddle
::
operators
::
jit
::
sgd_attr_t
attr
(
1
,
sz
,
1
,
sz
,
1
);
paddle
::
operators
::
jit
::
sgd_attr_t
attr
(
1
,
sz
,
1
,
sz
,
1
);
const
T
*
lr
=
learning_rate
->
data
<
T
>
();
const
T
*
lr
=
learning_rate
->
template
data
<
T
>();
const
T
*
param_data
=
param
->
data
<
T
>
();
const
T
*
param_data
=
param
->
template
data
<
T
>();
const
T
*
grad_data
=
grad
->
data
<
T
>
();
const
T
*
grad_data
=
grad
->
template
data
<
T
>();
int64_t
rows_idx
=
0
;
int64_t
rows_idx
=
0
;
T
*
out_data
=
T
*
out_data
=
param_out
->
template
mutable_data
<
T
>(
param_out
->
mutable_data
<
T
>
(
context
.
x86_device_context
->
GetPlace
());
context
.
x86_device_context
()
->
GetPlace
());
auto
sgd
=
auto
sgd
=
paddle
::
operators
::
jit
::
KernelFuncs
<
paddle
::
operators
::
jit
::
SgdTuple
<
T
>
,
paddle
::
operators
::
jit
::
KernelFuncs
<
paddle
::
operators
::
jit
::
SgdTuple
<
T
>
,
...
...
paddle/fluid/lite/model_parser/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -27,3 +27,4 @@ lite_cc_test(test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_des
...
@@ -27,3 +27,4 @@ lite_cc_test(test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_des
add_subdirectory
(
pb
)
add_subdirectory
(
pb
)
add_subdirectory
(
cpp
)
add_subdirectory
(
cpp
)
paddle/fluid/lite/model_parser/cpp/CMakeLists.txt
浏览文件 @
ace19269
cc_library
(
cpp_op_desc_lite SRCS op_desc.cc DEPS any_lite
)
cc_library
(
cpp_op_desc_lite SRCS op_desc.cc DEPS any_lite
)
paddle/fluid/lite/model_parser/pb/CMakeLists.txt
浏览文件 @
ace19269
cc_library
(
var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite
)
cc_library
(
var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite
)
cc_library
(
op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite
)
cc_library
(
op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite
)
paddle/fluid/lite/operators/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -8,6 +8,7 @@ cc_library(mul_op_lite SRCS mul_op.cc DEPS ${op_DEPS})
...
@@ -8,6 +8,7 @@ cc_library(mul_op_lite SRCS mul_op.cc DEPS ${op_DEPS})
cc_library
(
scale_op_lite SRCS scale_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
scale_op_lite SRCS scale_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
softmax_op_lite SRCS softmax_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
softmax_op_lite SRCS softmax_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
reshape_op_lite SRCS reshape_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
reshape_op_lite SRCS reshape_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
batch_norm_op_lite SRCS batch_norm_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
feed_op_lite SRCS feed_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
feed_op_lite SRCS feed_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fetch_op_lite SRCS fetch_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fetch_op_lite SRCS fetch_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
io_copy_op_lite SRCS io_copy_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
io_copy_op_lite SRCS io_copy_op.cc DEPS
${
op_DEPS
}
)
...
@@ -30,6 +31,7 @@ set(ops_lite
...
@@ -30,6 +31,7 @@ set(ops_lite
scale_op_lite
scale_op_lite
softmax_op_lite
softmax_op_lite
reshape_op_lite
reshape_op_lite
batch_norm_op_lite
feed_op_lite
feed_op_lite
fetch_op_lite
fetch_op_lite
io_copy_op_lite
io_copy_op_lite
...
@@ -52,4 +54,6 @@ lite_cc_test(test_pool_op_lite SRCS pool_op_test.cc
...
@@ -52,4 +54,6 @@ lite_cc_test(test_pool_op_lite SRCS pool_op_test.cc
lite_cc_test
(
test_scale_op_lite SRCS scale_op_test.cc DEPS scale_op_lite memory_lite
)
lite_cc_test
(
test_scale_op_lite SRCS scale_op_test.cc DEPS scale_op_lite memory_lite
)
lite_cc_test
(
test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite
)
lite_cc_test
(
test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite
)
lite_cc_test
(
test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite
)
lite_cc_test
(
test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite
)
lite_cc_test
(
test_batch_norm_op_lite SRCS batch_norm_op_test.cc DEPS batch_norm_op_lite memory_lite
)
lite_cc_test
(
test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite
)
lite_cc_test
(
test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite
)
paddle/fluid/lite/operators/batch_norm_op.cc
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/batch_norm_op.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
bool
BatchNormOp
::
CheckShape
()
const
{
CHECK_OR_FALSE
(
param_
.
x
);
CHECK_OR_FALSE
(
param_
.
bias
);
CHECK_OR_FALSE
(
param_
.
scale
);
CHECK_OR_FALSE
(
param_
.
mean
);
CHECK_OR_FALSE
(
param_
.
variance
);
CHECK_OR_FALSE
(
param_
.
y
);
if
(
!
param_
.
is_test
)
{
CHECK_OR_FALSE
(
param_
.
mean_out
);
CHECK_OR_FALSE
(
param_
.
variance_out
);
CHECK_OR_FALSE
(
param_
.
saved_mean
);
CHECK_OR_FALSE
(
param_
.
saved_variance
);
}
auto
x_dims
=
param_
.
x
->
dims
();
auto
scale_dims
=
param_
.
scale
->
dims
();
auto
bias_dims
=
param_
.
bias
->
dims
();
auto
mean_dims
=
param_
.
mean
->
dims
();
auto
variance_dims
=
param_
.
variance
->
dims
();
CHECK
(
x_dims
.
size
()
>=
2
&&
x_dims
.
size
()
<=
5
)
<<
"Input X must have 2 to 5 dimensions."
;
CHECK_EQ
(
scale_dims
.
size
(),
1UL
)
<<
"Input Scale must have 1 dimensions."
;
CHECK_EQ
(
bias_dims
.
size
(),
1UL
)
<<
"Input Bias must have 1 dimensions."
;
CHECK_EQ
(
mean_dims
.
size
(),
1UL
)
<<
"Input Mean must have 1 dimensions."
;
CHECK_EQ
(
variance_dims
.
size
(),
1UL
)
<<
"Input Variance must have 1 dimensions."
;
return
true
;
}
bool
BatchNormOp
::
InferShape
()
const
{
auto
x_dims
=
param_
.
x
->
dims
();
int64_t
channel_size
=
0
;
switch
(
param_
.
data_layout
)
{
case
DATALAYOUT
(
kNCHW
):
channel_size
=
x_dims
[
1
];
break
;
// case DATALAYOUT(kNHWC):
// channel_size = x_dims[x_dims.size() - 1];
// break;
default:
LOG
(
FATAL
)
<<
"Unknown storage order: "
<<
DataLayoutToStr
(
param_
.
data_layout
);
break
;
}
if
(
!
param_
.
is_test
)
{
param_
.
mean_out
->
Resize
({
channel_size
});
param_
.
variance_out
->
Resize
({
channel_size
});
param_
.
saved_mean
->
Resize
({
channel_size
});
param_
.
saved_variance
->
Resize
({
channel_size
});
}
param_
.
y
->
Resize
(
x_dims
);
return
true
;
}
bool
BatchNormOp
::
AttachImpl
(
const
cpp
::
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
{
param_
.
x
=
scope
->
FindVar
(
op_desc
.
Input
(
"X"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
bias
=
scope
->
FindVar
(
op_desc
.
Input
(
"Bias"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
scale
=
scope
->
FindVar
(
op_desc
.
Input
(
"Scale"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
mean
=
scope
->
FindVar
(
op_desc
.
Input
(
"Mean"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
variance
=
scope
->
FindVar
(
op_desc
.
Input
(
"Variance"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
y
=
scope
->
FindVar
(
op_desc
.
Output
(
"Y"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
is_test
=
op_desc
.
GetAttr
<
bool
>
(
"is_test"
);
param_
.
use_global_stats
=
op_desc
.
GetAttr
<
bool
>
(
"use_global_stats"
);
if
(
!
param_
.
is_test
)
{
param_
.
mean_out
=
scope
->
FindVar
(
op_desc
.
Output
(
"MeanOut"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
variance_out
=
scope
->
FindVar
(
op_desc
.
Output
(
"VarianceOut"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
saved_mean
=
scope
->
FindVar
(
op_desc
.
Output
(
"SavedMean"
).
front
())
->
GetMutable
<
Tensor
>
();
param_
.
saved_variance
=
scope
->
FindVar
(
op_desc
.
Output
(
"SavedVariance"
).
front
())
->
GetMutable
<
Tensor
>
();
}
param_
.
epsilon
=
op_desc
.
GetAttr
<
float
>
(
"epsilon"
);
param_
.
momentum
=
op_desc
.
GetAttr
<
float
>
(
"momentum"
);
std
::
string
data_layout
=
op_desc
.
GetAttr
<
std
::
string
>
(
"data_layout"
);
CHECK_EQ
(
data_layout
,
"NCHW"
)
<<
"TODO(hong19860320): Only support NCHW."
;
// param_.data_layout = StringToDataLayout(data_layout);
return
true
;
}
}
// namespace operators
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
batch_norm
,
paddle
::
lite
::
operators
::
BatchNormOp
);
paddle/fluid/lite/operators/batch_norm_op.h
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/scope.h"
#include "paddle/fluid/lite/utils/all.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
class
BatchNormOp
:
public
OpLite
{
public:
BatchNormOp
()
{}
explicit
BatchNormOp
(
const
std
::
string
&
op_type
)
:
OpLite
(
op_type
)
{}
bool
CheckShape
()
const
override
;
bool
InferShape
()
const
override
;
bool
AttachImpl
(
const
cpp
::
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"batch_norm"
;
}
private:
mutable
BatchNormParam
param_
;
};
}
// namespace operators
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/operators/batch_norm_op_test.cc
0 → 100644
浏览文件 @
ace19269
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/batch_norm_op.h"
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
TEST
(
batch_norm_op_lite
,
test
)
{
// prepare variables
Scope
scope
;
auto
*
x
=
scope
.
Var
(
"x"
)
->
GetMutable
<
Tensor
>
();
auto
*
scale
=
scope
.
Var
(
"scale"
)
->
GetMutable
<
Tensor
>
();
auto
*
bias
=
scope
.
Var
(
"bias"
)
->
GetMutable
<
Tensor
>
();
auto
*
mean
=
scope
.
Var
(
"mean"
)
->
GetMutable
<
Tensor
>
();
auto
*
variance
=
scope
.
Var
(
"variance"
)
->
GetMutable
<
Tensor
>
();
auto
*
y
=
scope
.
Var
(
"y"
)
->
GetMutable
<
Tensor
>
();
x
->
Resize
({
2
,
32
,
10
,
20
});
auto
x_dims
=
x
->
dims
();
const
int64_t
channel_size
=
x_dims
[
1
];
// NCHW
scale
->
Resize
({
channel_size
});
bias
->
Resize
({
channel_size
});
mean
->
Resize
({
channel_size
});
variance
->
Resize
({
channel_size
});
// prepare op desc
cpp
::
OpDesc
desc
;
desc
.
SetType
(
"batch_norm"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetInput
(
"Scale"
,
{
"scale"
});
desc
.
SetInput
(
"Bias"
,
{
"bias"
});
desc
.
SetInput
(
"Mean"
,
{
"mean"
});
desc
.
SetInput
(
"Variance"
,
{
"variance"
});
desc
.
SetOutput
(
"Y"
,
{
"y"
});
desc
.
SetAttr
(
"is_test"
,
true
);
desc
.
SetAttr
(
"use_global_stats"
,
false
);
desc
.
SetAttr
(
"epsilon"
,
1e-5
f
);
desc
.
SetAttr
(
"momentum"
,
0.9
f
);
desc
.
SetAttr
(
"data_layout"
,
std
::
string
(
"NCHW"
));
BatchNormOp
batch_norm
(
"batch_norm"
);
batch_norm
.
SetValidPlaces
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}});
batch_norm
.
Attach
(
desc
,
&
scope
);
batch_norm
.
CheckShape
();
batch_norm
.
InferShape
();
// check output dims
auto
y_dims
=
y
->
dims
();
CHECK_EQ
(
y_dims
.
size
(),
x_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
y_dims
.
size
();
i
++
)
{
CHECK_EQ
(
y_dims
[
i
],
x_dims
[
i
]);
}
}
TEST
(
batch_norm_op_lite
,
test_enable_is_test
)
{
// prepare variables
Scope
scope
;
auto
*
x
=
scope
.
Var
(
"x"
)
->
GetMutable
<
Tensor
>
();
auto
*
scale
=
scope
.
Var
(
"scale"
)
->
GetMutable
<
Tensor
>
();
auto
*
bias
=
scope
.
Var
(
"bias"
)
->
GetMutable
<
Tensor
>
();
auto
*
mean
=
scope
.
Var
(
"mean"
)
->
GetMutable
<
Tensor
>
();
auto
*
variance
=
scope
.
Var
(
"variance"
)
->
GetMutable
<
Tensor
>
();
auto
*
y
=
scope
.
Var
(
"y"
)
->
GetMutable
<
Tensor
>
();
auto
*
mean_out
=
scope
.
Var
(
"mean_out"
)
->
GetMutable
<
Tensor
>
();
auto
*
variance_out
=
scope
.
Var
(
"variance_out"
)
->
GetMutable
<
Tensor
>
();
auto
*
saved_mean
=
scope
.
Var
(
"saved_mean"
)
->
GetMutable
<
Tensor
>
();
auto
*
saved_variance
=
scope
.
Var
(
"saved_variance"
)
->
GetMutable
<
Tensor
>
();
x
->
Resize
({
2
,
32
,
10
,
20
});
auto
x_dims
=
x
->
dims
();
const
int64_t
channel_size
=
x_dims
[
1
];
// NCHW
scale
->
Resize
({
channel_size
});
bias
->
Resize
({
channel_size
});
mean
->
Resize
({
channel_size
});
variance
->
Resize
({
channel_size
});
// prepare op desc
cpp
::
OpDesc
desc
;
desc
.
SetType
(
"batch_norm"
);
desc
.
SetInput
(
"X"
,
{
"x"
});
desc
.
SetInput
(
"Scale"
,
{
"scale"
});
desc
.
SetInput
(
"Bias"
,
{
"bias"
});
desc
.
SetInput
(
"Mean"
,
{
"mean"
});
desc
.
SetInput
(
"Variance"
,
{
"variance"
});
desc
.
SetOutput
(
"Y"
,
{
"y"
});
desc
.
SetOutput
(
"MeanOut"
,
{
"mean_out"
});
desc
.
SetOutput
(
"VarianceOut"
,
{
"variance_out"
});
desc
.
SetOutput
(
"SavedMean"
,
{
"saved_mean"
});
desc
.
SetOutput
(
"SavedVariance"
,
{
"saved_variance"
});
desc
.
SetAttr
(
"is_test"
,
false
);
desc
.
SetAttr
(
"use_global_stats"
,
false
);
desc
.
SetAttr
(
"epsilon"
,
1e-5
f
);
desc
.
SetAttr
(
"momentum"
,
0.9
f
);
desc
.
SetAttr
(
"data_layout"
,
std
::
string
(
"NCHW"
));
BatchNormOp
batch_norm
(
"batch_norm"
);
batch_norm
.
SetValidPlaces
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}});
batch_norm
.
Attach
(
desc
,
&
scope
);
batch_norm
.
CheckShape
();
batch_norm
.
InferShape
();
// check output dims
auto
y_dims
=
y
->
dims
();
CHECK_EQ
(
y_dims
.
size
(),
x_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
y_dims
.
size
();
i
++
)
{
CHECK_EQ
(
y_dims
[
i
],
x_dims
[
i
]);
}
auto
mean_out_dims
=
mean_out
->
dims
();
auto
variance_out_dims
=
variance_out
->
dims
();
auto
saved_mean_dims
=
saved_mean
->
dims
();
auto
saved_variance_dims
=
saved_variance
->
dims
();
CHECK_EQ
(
mean_out_dims
.
size
(),
1UL
);
CHECK_EQ
(
variance_out_dims
.
size
(),
1UL
);
CHECK_EQ
(
saved_mean_dims
.
size
(),
1UL
);
CHECK_EQ
(
saved_variance_dims
.
size
(),
1UL
);
CHECK_EQ
(
mean_out_dims
[
0
],
channel_size
);
CHECK_EQ
(
variance_out_dims
[
0
],
channel_size
);
CHECK_EQ
(
saved_mean_dims
[
0
],
channel_size
);
CHECK_EQ
(
saved_variance_dims
[
0
],
channel_size
);
}
}
// namespace operators
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/operators/op_params.h
浏览文件 @
ace19269
...
@@ -57,6 +57,7 @@ struct FcParam {
...
@@ -57,6 +57,7 @@ struct FcParam {
lite
::
Tensor
*
output
{};
lite
::
Tensor
*
output
{};
lite
::
DDim
in_mat_dims
;
lite
::
DDim
in_mat_dims
;
int
in_num_col_dims
{
1
};
int
in_num_col_dims
{
1
};
bool
weight_transposed
{
false
};
};
};
struct
ReluParam
{
struct
ReluParam
{
...
@@ -145,6 +146,25 @@ struct ConvParam {
...
@@ -145,6 +146,25 @@ struct ConvParam {
std
::
string
data_format
{
"Anylayout"
};
std
::
string
data_format
{
"Anylayout"
};
};
};
// For BatchNorm op
struct
BatchNormParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
bias
{};
lite
::
Tensor
*
scale
{};
lite
::
Tensor
*
mean
{};
lite
::
Tensor
*
variance
{};
lite
::
Tensor
*
y
{};
lite
::
Tensor
*
mean_out
{};
lite
::
Tensor
*
variance_out
{};
lite
::
Tensor
*
saved_mean
{};
lite
::
Tensor
*
saved_variance
{};
bool
is_test
{
true
};
bool
use_global_stats
{
false
};
float
epsilon
;
float
momentum
;
DataLayoutType
data_layout
{
DATALAYOUT
(
kNCHW
)};
};
// For Pooling op
// For Pooling op
struct
PoolParam
{
struct
PoolParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
x
{};
...
...
paddle/fluid/lite/operators/pool_op_test.cc
浏览文件 @
ace19269
...
@@ -74,7 +74,11 @@ TEST(pool_op_lite, test) {
...
@@ -74,7 +74,11 @@ TEST(pool_op_lite, test) {
pool
.
Attach
(
desc
,
&
scope
);
pool
.
Attach
(
desc
,
&
scope
);
auto
kernels
=
pool
.
CreateKernels
({
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
auto
kernels
=
pool
.
CreateKernels
({
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
LOG
(
INFO
)
<<
"kernels.size(): "
<<
kernels
.
size
();
LOG
(
INFO
)
<<
"kernels.size(): "
<<
kernels
.
size
();
#ifdef LITE_WITH_ARM
ASSERT_FALSE
(
kernels
.
empty
());
ASSERT_FALSE
(
kernels
.
empty
());
#else
ASSERT_TRUE
(
kernels
.
empty
());
#endif
}
}
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/lite/operators/split_op.cc
浏览文件 @
ace19269
...
@@ -37,7 +37,7 @@ bool SplitOp::InferShape() const {
...
@@ -37,7 +37,7 @@ bool SplitOp::InferShape() const {
const
auto
&
sections
=
param_
.
sections
;
const
auto
&
sections
=
param_
.
sections
;
const
int
outs_number
=
outs
.
size
();
const
int
outs_number
=
outs
.
size
();
std
::
vector
<
lite
::
DDim
Lite
>
outs_dims
;
std
::
vector
<
lite
::
DDim
Hvy
>
outs_dims
;
outs_dims
.
reserve
(
outs_number
);
outs_dims
.
reserve
(
outs_number
);
if
(
num
>
0
)
{
if
(
num
>
0
)
{
...
...
paddle/fluid/lite/tools/Dockerfile.mobile
浏览文件 @
ace19269
...
@@ -88,4 +88,4 @@ RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel
...
@@ -88,4 +88,4 @@ RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pre-commit
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pre-commit
RUN apt-get autoremove -y && apt-get clean
RUN apt-get autoremove -y && apt-get clean
RUN rm -rf /sdk-tools-linux-4333796.zip /tmp/android-ndk-r17c-linux-x86_64.zip /cmake-3.10.3-Linux-x86_64.tar.gz
RUN rm -rf /sdk-tools-linux-4333796.zip /tmp/android-ndk-r17c-linux-x86_64.zip /cmake-3.10.3-Linux-x86_64.tar.gz
\ No newline at end of file
paddle/fluid/lite/tools/build.sh
浏览文件 @
ace19269
...
@@ -13,6 +13,11 @@ function prepare_for_codegen {
...
@@ -13,6 +13,11 @@ function prepare_for_codegen {
mkdir
-p
./paddle/fluid/lite/gen_code
mkdir
-p
./paddle/fluid/lite/gen_code
touch
./paddle/fluid/lite/gen_code/__generated_code__.cc
touch
./paddle/fluid/lite/gen_code/__generated_code__.cc
}
}
function
check_need_ci
{
git log
-1
--oneline
|
grep
"test=develop"
||
exit
-1
}
function
cmake_x86
{
function
cmake_x86
{
prepare_for_codegen
prepare_for_codegen
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
...
@@ -28,6 +33,17 @@ function cmake_gpu {
...
@@ -28,6 +33,17 @@ function cmake_gpu {
cmake ..
" -DWITH_GPU=ON {common_flags} -DLITE_WITH_GPU=ON"
cmake ..
" -DWITH_GPU=ON {common_flags} -DLITE_WITH_GPU=ON"
}
}
function
check_style
{
export
PATH
=
/usr/bin:
$PATH
#pre-commit install
clang-format
--version
if
!
pre-commit run
-a
;
then
git diff
exit
1
fi
}
function
cmake_arm
{
function
cmake_arm
{
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
...
@@ -43,10 +59,15 @@ function cmake_arm {
...
@@ -43,10 +59,15 @@ function cmake_arm {
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
}
}
function
build_single
{
#make $1 -j$(expr $(nproc) - 2)
make
$1
-j8
}
function
build
{
function
build
{
file
=
$1
file
=
$1
for
_test
in
$(
cat
$file
)
;
do
for
_test
in
$(
cat
$file
)
;
do
make
$_test
-j
$(
expr
$(
nproc
)
- 2
)
build_single
$_test
done
done
}
}
...
@@ -58,44 +79,12 @@ function test_lite {
...
@@ -58,44 +79,12 @@ function test_lite {
for
_test
in
$(
cat
$file
)
;
do
for
_test
in
$(
cat
$file
)
;
do
# We move the build phase here to make the 'gen_code' test compiles after the
# We move the build phase here to make the 'gen_code' test compiles after the
# corresponding test is executed and the C++ code generates.
# corresponding test is executed and the C++ code generates.
make
$_test
-j
$(
expr
$(
nproc
)
- 2
)
#make $_test -j$(expr $(nproc) - 2)
make
$_test
-j8
ctest
-R
$_test
-V
ctest
-R
$_test
-V
done
done
}
}
port_armv8
=
5554
port_armv7
=
5556
# Run test on android
function
test_lite_android
{
local
file
=
$1
local
adb_abi
=
$2
local
port
=
if
[[
${
adb_abi
}
==
"armeabi-v7a"
]]
;
then
port
=
${
port_armv7
}
fi
if
[[
${
adb_abi
}
==
"arm64-v8a"
]]
;
then
port
=
${
port_armv8
}
fi
if
[[
"
${
port
}
x"
==
"x"
]]
;
then
echo
"Port can not be empty"
exit
1
fi
echo
"file:
${
file
}
"
# push all to adb and test
adb_work_dir
=
"/data/local/tmp"
skip_list
=
"test_model_parser_lite"
for
_test
in
$(
cat
$file
)
;
do
[[
$skip_list
=
~
(
^|[[:space:]]
)
$_test
(
$|
[[
:space:]]
)
]]
&&
continue
||
echo
'skip $_test'
testpath
=
$(
find ./paddle/fluid
-name
${
_test
}
)
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
_test
}
"
adb
-s
emulator-
${
port
}
shell
"./
${
adb_work_dir
}
/
${
_test
}
"
done
}
# Build the code and run lite server tests. This is executed in the CI system.
# Build the code and run lite server tests. This is executed in the CI system.
function
build_test_server
{
function
build_test_server
{
mkdir
-p
./build
mkdir
-p
./build
...
@@ -108,8 +97,34 @@ function build_test_server {
...
@@ -108,8 +97,34 @@ function build_test_server {
build
$LIBS_FILE
build
$LIBS_FILE
}
}
# Build the code and run lite server tests. This is executed in the CI system.
# test_arm_android <some_test_name> <adb_port_number>
function
test_arm_android
{
test_name
=
$1
port
=
$2
if
[[
"
${
test_name
}
x"
==
"x"
]]
;
then
echo
"test_name can not be empty"
exit
1
fi
if
[[
"
${
port
}
x"
==
"x"
]]
;
then
echo
"Port can not be empty"
exit
1
fi
echo
"test name:
${
test_name
}
"
adb_work_dir
=
"/data/local/tmp"
skip_list
=
"test_model_parser_lite"
# add more with space
[[
$skip_list
=
~
(
^|[[:space:]]
)
$test_name
(
$|
[[
:space:]]
)
]]
&&
continue
||
echo
'skip $test_name'
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
adb
-s
emulator-
${
port
}
shell
"./
${
adb_work_dir
}
/
${
test_name
}
"
}
# Build the code and run lite arm tests. This is executed in the CI system.
function
build_test_arm
{
function
build_test_arm
{
port_armv8
=
5554
port_armv7
=
5556
adb kill-server
adb kill-server
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
# start android arm64-v8a armeabi-v7a emulators first
# start android arm64-v8a armeabi-v7a emulators first
...
@@ -122,6 +137,7 @@ function build_test_arm {
...
@@ -122,6 +137,7 @@ function build_test_arm {
for
os
in
"android"
"armlinux"
;
do
for
os
in
"android"
"armlinux"
;
do
for
abi
in
"arm64-v8a"
"armeabi-v7a"
"armeabi-v7a-hf"
;
do
for
abi
in
"arm64-v8a"
"armeabi-v7a"
"armeabi-v7a-hf"
;
do
# TODO(TJ): enable compile on v7-hf on andorid and all v7 on armlinux
if
[[
${
abi
}
==
"armeabi-v7a-hf"
]]
;
then
if
[[
${
abi
}
==
"armeabi-v7a-hf"
]]
;
then
echo
"armeabi-v7a-hf is not supported on both android and armlinux"
echo
"armeabi-v7a-hf is not supported on both android and armlinux"
continue
continue
...
@@ -138,17 +154,30 @@ function build_test_arm {
...
@@ -138,17 +154,30 @@ function build_test_arm {
cmake_arm
${
os
}
${
abi
}
cmake_arm
${
os
}
${
abi
}
build
$TESTS_FILE
build
$TESTS_FILE
# armlinux need in another docker
# TODO(TJ): enable test with armlinux
if
[[
${
os
}
==
"android"
]]
;
then
if
[[
${
os
}
==
"android"
]]
;
then
adb_abi
=
${
abi
}
adb_abi
=
${
abi
}
if
[[
${
adb_abi
}
==
"armeabi-v7a-hf"
]]
;
then
if
[[
${
adb_abi
}
==
"armeabi-v7a-hf"
]]
;
then
adb_abi
=
"armeabi-v7a"
adb_abi
=
"armeabi-v7a"
fi
fi
if
[[
${
adb_abi
}
==
"armeabi-v7a"
]]
;
then
if
[[
${
adb_abi
}
==
"armeabi-v7a"
]]
;
then
# skip v7 tests
# skip all armv7 tests
# TODO(TJ): enable test with armv7
continue
continue
fi
fi
test_lite_android
$TESTS_FILE
${
adb_abi
}
local
port
=
# armlinux need in another docker
if
[[
${
adb_abi
}
==
"armeabi-v7a"
]]
;
then
port
=
${
port_armv7
}
fi
if
[[
${
adb_abi
}
==
"arm64-v8a"
]]
;
then
port
=
${
port_armv8
}
fi
echo
"test file:
${
TESTS_FILE
}
"
for
_test
in
$(
cat
$TESTS_FILE
)
;
do
test_arm_android
$_test
$port
done
fi
fi
cd
-
cd
-
done
done
...
@@ -164,12 +193,13 @@ function print_usage {
...
@@ -164,12 +193,13 @@ function print_usage {
echo
"----------------------------------------"
echo
"----------------------------------------"
echo
-e
"cmake_x86: run cmake with X86 mode"
echo
-e
"cmake_x86: run cmake with X86 mode"
echo
-e
"cmake_cuda: run cmake with CUDA mode"
echo
-e
"cmake_cuda: run cmake with CUDA mode"
echo
-e
"cmake_arm: run cmake with ARM mode"
echo
-e
"
--arm_os=<os> --arm_abi=<abi>
cmake_arm: run cmake with ARM mode"
echo
echo
echo
-e
"build: compile the tests"
echo
-e
"build: compile the tests"
echo
-e
"--test_name=<test_name> build_single: compile single test"
echo
echo
echo
-e
"test_server: run server tests"
echo
-e
"test_server: run server tests"
echo
-e
"
test_mobile: run mobile tests
"
echo
-e
"
--test_name=<test_name> --adb_port_number=<adb_port_number> test_arm_android: run arm test
"
echo
"----------------------------------------"
echo
"----------------------------------------"
echo
echo
}
}
...
@@ -182,11 +212,31 @@ function main {
...
@@ -182,11 +212,31 @@ function main {
TESTS_FILE
=
"
${
i
#*=
}
"
TESTS_FILE
=
"
${
i
#*=
}
"
shift
shift
;;
;;
--test_name
=
*
)
TEST_NAME
=
"
${
i
#*=
}
"
shift
;;
--arm_os
=
*
)
ARM_OS
=
"
${
i
#*=
}
"
shift
;;
--arm_abi
=
*
)
ARM_ABI
=
"
${
i
#*=
}
"
shift
;;
--arm_port
=
*
)
ARM_PORT
=
"
${
i
#*=
}
"
shift
;;
build
)
build
)
build
$TESTS_FILE
build
$TESTS_FILE
build
$LIBS_FILE
build
$LIBS_FILE
shift
shift
;;
;;
build_single
)
build_single
$TEST_NAME
shift
;;
cmake_x86
)
cmake_x86
)
cmake_x86
cmake_x86
shift
shift
...
@@ -196,15 +246,15 @@ function main {
...
@@ -196,15 +246,15 @@ function main {
shift
shift
;;
;;
cmake_arm
)
cmake_arm
)
cmake_arm
$
2
$3
cmake_arm
$
ARM_OS
$ARM_ABI
shift
shift
;;
;;
test_server
)
test_server
)
test_lite
$TESTS_FILE
test_lite
$TESTS_FILE
shift
shift
;;
;;
test_
mobile
)
test_
arm_android
)
test_
lite
$TESTS_FILE
test_
arm_android
$TEST_NAME
$ARM_PORT
shift
shift
;;
;;
build_test_server
)
build_test_server
)
...
@@ -215,6 +265,14 @@ function main {
...
@@ -215,6 +265,14 @@ function main {
build_test_arm
build_test_arm
shift
shift
;;
;;
check_style
)
check_style
shift
;;
check_need_ci
)
check_need_ci
shift
;;
*
)
*
)
# unknown option
# unknown option
print_usage
print_usage
...
@@ -224,7 +282,5 @@ function main {
...
@@ -224,7 +282,5 @@ function main {
done
done
}
}
print_usage
main
$@
main
$@
paddle/fluid/lite/utils/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -9,3 +9,4 @@ set(utils_DEPS glog)
...
@@ -9,3 +9,4 @@ set(utils_DEPS glog)
lite_cc_test
(
test_varient SRCS varient_test.cc DEPS utils_lite
)
lite_cc_test
(
test_varient SRCS varient_test.cc DEPS utils_lite
)
cc_library
(
any_lite SRCS any.cc
)
cc_library
(
any_lite SRCS any.cc
)
cc_library
(
utils_lite SRCS cp_logging.cc string.cc DEPS
${
utils_DEPS
}
any_lite
)
cc_library
(
utils_lite SRCS cp_logging.cc string.cc DEPS
${
utils_DEPS
}
any_lite
)
paddle/fluid/lite/x86/CMakeLists.txt
浏览文件 @
ace19269
...
@@ -4,3 +4,4 @@ endif()
...
@@ -4,3 +4,4 @@ endif()
cc_library
(
target_wrapper_x86 SRCS target_wrapper.cc
)
cc_library
(
target_wrapper_x86 SRCS target_wrapper.cc
)
python/paddle/proto/__init__.py
0 → 100644
浏览文件 @
ace19269
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.proto.TrainerConfig_pb2
import
OptimizationConfig
,
TrainerConfig
from
paddle.proto.ModelConfig_pb2
import
ModelConfig
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录