Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
a95dcea5
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a95dcea5
编写于
6月 23, 2019
作者:
H
hong19860320
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'incubate/lite' of
http://10.87.145.36/inference/paddlelite
into hongming/fix_pooling
上级
54f421c6
c8deaaa9
变更
63
展开全部
隐藏空白更改
内联
并排
Showing
63 changed file
with
4083 addition
and
296 deletion
+4083
-296
.gitlab-ci.yml
.gitlab-ci.yml
+11
-34
CMakeLists.txt
CMakeLists.txt
+7
-0
cmake/configure.cmake
cmake/configure.cmake
+4
-0
cmake/external/opencl-clhpp.cmake
cmake/external/opencl-clhpp.cmake
+36
-0
cmake/external/opencl-headers.cmake
cmake/external/opencl-headers.cmake
+33
-0
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+4
-2
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+26
-27
paddle/fluid/lite/api/apis_test.cc
paddle/fluid/lite/api/apis_test.cc
+25
-10
paddle/fluid/lite/api/cxx_api.cc
paddle/fluid/lite/api/cxx_api.cc
+54
-7
paddle/fluid/lite/api/cxx_api.h
paddle/fluid/lite/api/cxx_api.h
+30
-57
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+1
-1
paddle/fluid/lite/api/cxx_api_test.cc
paddle/fluid/lite/api/cxx_api_test.cc
+1
-1
paddle/fluid/lite/api/inceptionv4_test.cc
paddle/fluid/lite/api/inceptionv4_test.cc
+17
-5
paddle/fluid/lite/api/light_api.cc
paddle/fluid/lite/api/light_api.cc
+64
-0
paddle/fluid/lite/api/light_api.h
paddle/fluid/lite/api/light_api.h
+11
-52
paddle/fluid/lite/api/light_api_test.cc
paddle/fluid/lite/api/light_api_test.cc
+4
-2
paddle/fluid/lite/api/lite_api_test_helper.cc
paddle/fluid/lite/api/lite_api_test_helper.cc
+1
-1
paddle/fluid/lite/api/mobilenetv1_test.cc
paddle/fluid/lite/api/mobilenetv1_test.cc
+17
-5
paddle/fluid/lite/api/mobilenetv2_test.cc
paddle/fluid/lite/api/mobilenetv2_test.cc
+17
-5
paddle/fluid/lite/api/resnet50_test.cc
paddle/fluid/lite/api/resnet50_test.cc
+17
-5
paddle/fluid/lite/api/test_helper.h
paddle/fluid/lite/api/test_helper.h
+20
-9
paddle/fluid/lite/arm/math/CMakeLists.txt
paddle/fluid/lite/arm/math/CMakeLists.txt
+2
-0
paddle/fluid/lite/core/CMakeLists.txt
paddle/fluid/lite/core/CMakeLists.txt
+1
-1
paddle/fluid/lite/core/kernel.cc
paddle/fluid/lite/core/kernel.cc
+31
-0
paddle/fluid/lite/core/kernel.h
paddle/fluid/lite/core/kernel.h
+2
-24
paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc
paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc
+2
-2
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
+1
-1
paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
+1
-1
paddle/fluid/lite/core/op_registry.h
paddle/fluid/lite/core/op_registry.h
+2
-0
paddle/fluid/lite/core/optimizer.h
paddle/fluid/lite/core/optimizer.h
+0
-1
paddle/fluid/lite/core/target_wrapper.h
paddle/fluid/lite/core/target_wrapper.h
+15
-1
paddle/fluid/lite/kernels/arm/CMakeLists.txt
paddle/fluid/lite/kernels/arm/CMakeLists.txt
+0
-2
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+23
-0
paddle/fluid/lite/kernels/arm/conv_compute.h
paddle/fluid/lite/kernels/arm/conv_compute.h
+19
-0
paddle/fluid/lite/kernels/use_kernels.h
paddle/fluid/lite/kernels/use_kernels.h
+20
-16
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+3
-4
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+19
-0
paddle/fluid/lite/opencl/cl2_header.h
paddle/fluid/lite/opencl/cl2_header.h
+21
-0
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+88
-0
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+30
-0
paddle/fluid/lite/opencl/cl_context.cc
paddle/fluid/lite/opencl/cl_context.cc
+73
-0
paddle/fluid/lite/opencl/cl_context.h
paddle/fluid/lite/opencl/cl_context.h
+43
-0
paddle/fluid/lite/opencl/cl_engine.cc
paddle/fluid/lite/opencl/cl_engine.cc
+171
-0
paddle/fluid/lite/opencl/cl_engine.h
paddle/fluid/lite/opencl/cl_engine.h
+96
-0
paddle/fluid/lite/opencl/cl_half.cc
paddle/fluid/lite/opencl/cl_half.cc
+518
-0
paddle/fluid/lite/opencl/cl_half.h
paddle/fluid/lite/opencl/cl_half.h
+32
-0
paddle/fluid/lite/opencl/cl_helper.cc
paddle/fluid/lite/opencl/cl_helper.cc
+90
-0
paddle/fluid/lite/opencl/cl_helper.h
paddle/fluid/lite/opencl/cl_helper.h
+52
-0
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+164
-0
paddle/fluid/lite/opencl/cl_image.h
paddle/fluid/lite/opencl/cl_image.h
+118
-0
paddle/fluid/lite/opencl/cl_image_converter.cc
paddle/fluid/lite/opencl/cl_image_converter.cc
+450
-0
paddle/fluid/lite/opencl/cl_image_converter.h
paddle/fluid/lite/opencl/cl_image_converter.h
+115
-0
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
+34
-0
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
+27
-0
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
+91
-0
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+154
-0
paddle/fluid/lite/opencl/cl_tool.cc
paddle/fluid/lite/opencl/cl_tool.cc
+84
-0
paddle/fluid/lite/opencl/cl_tool.h
paddle/fluid/lite/opencl/cl_tool.h
+32
-0
paddle/fluid/lite/opencl/cl_wrapper.cc
paddle/fluid/lite/opencl/cl_wrapper.cc
+962
-0
paddle/fluid/lite/operators/use_ops.h
paddle/fluid/lite/operators/use_ops.h
+4
-3
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+71
-16
paddle/fluid/lite/utils/io.h
paddle/fluid/lite/utils/io.h
+12
-1
paddle/fluid/lite/utils/string.h
paddle/fluid/lite/utils/string.h
+10
-0
未找到文件。
.gitlab-ci.yml
浏览文件 @
a95dcea5
...
...
@@ -76,8 +76,8 @@ build:mobile_android:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc/third_party
-
build.lite.android.armv7.gcc/third_party
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_ccache
script
:
...
...
@@ -96,8 +96,9 @@ build:mobile_armlinux:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.armlinux.armv8.gcc
-
build.lite.armlinux.armv7.gcc
-
build.lite.armlinux.armv7hf.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_ccache2
script
:
...
...
@@ -107,24 +108,13 @@ build:mobile_armlinux:
dependencies
:
-
build:server
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
build:mobile_model_mobilenetv1:
tags
:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv1
-
./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv1
...
...
@@ -135,8 +125,7 @@ build:mobile_model_mobilenetv1:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_model_mobilenetv1
...
...
@@ -145,12 +134,7 @@ build:mobile_model_mobilenetv2:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv2
-
./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv2
...
...
@@ -161,8 +145,7 @@ build:mobile_model_mobilenetv2:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_model_mobilenetv2
...
...
@@ -171,12 +154,7 @@ build:mobile_model_resnet50:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_resnet50
-
./paddle/fluid/lite/tools/build.sh build_test_arm_model_resnet50
...
...
@@ -187,8 +165,7 @@ build:mobile_model_resnet50:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_model_resnet50
...
...
CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option
(
LITE_WITH_CUDA
"Enable CUDA in lite mode"
OFF
)
option
(
LITE_WITH_X86
"Enable X86 in lite mode"
ON
)
option
(
LITE_WITH_ARM
"Enable ARM in lite mode"
OFF
)
option
(
LITE_WITH_OPENCL
"Enable OpenCL support in lite"
OFF
)
option
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"Enable light-weight framework"
OFF
)
option
(
LITE_WITH_PROFILE
"Enable profile mode in lite framework"
OFF
)
...
...
@@ -181,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include
(
external/eigen
)
# download eigen3
include
(
ccache
)
# set ccache for compilation
# for opencl
if
(
LITE_WITH_OPENCL
)
include
(
external/opencl-headers
)
include
(
external/opencl-clhpp
)
endif
()
include
(
generic
)
# simplify cmake module
include
(
configure
)
# add paddle env configuration
...
...
cmake/configure.cmake
浏览文件 @
a95dcea5
...
...
@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
add_definitions
(
"-DLITE_WITH_ARM"
)
endif
()
if
(
LITE_WITH_OPENCL
)
add_definitions
(
"-DLITE_WITH_OPENCL"
)
endif
()
if
(
LITE_WITH_PROFILE
)
add_definitions
(
"-DLITE_WITH_PROFILE"
)
endif
()
...
...
cmake/external/opencl-clhpp.cmake
0 → 100644
浏览文件 @
a95dcea5
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_CLHPP_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INCLUDE_DIR
"
${
OPENCL_CLHPP_INSTALL_DIR
}
"
CACHE PATH
"opencl-clhpp include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_CLHPP_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_clhpp
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-CLHPP.git"
GIT_TAG
"v2.0.10"
PREFIX
"
${
OPENCL_CLHPP_SRCS_DIR
}
"
CMAKE_ARGS -DBUILD_DOCS=OFF
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=
${
OPENCL_CLHPP_INSTALL_DIR
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
OPENCL_CLHPP_INSTALL_DIR
}
-DCMAKE_BUILD_TYPE:STRING=
${
THIRD_PARTY_BUILD_TYPE
}
)
ADD_DEPENDENCIES
(
opencl_clhpp opencl_headers
)
cmake/external/opencl-headers.cmake
0 → 100644
浏览文件 @
a95dcea5
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_HEADERS_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-headers
)
SET
(
OPENCL_HEADERS_INCLUDE_DIR
"
${
OPENCL_HEADERS_SRCS_DIR
}
/src/opencl_headers"
CACHE PATH
"opencl-headers include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_HEADERS_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_headers
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-Headers.git"
GIT_TAG
"c5a4bbeabb10d8ed3d1c651b93aa31737bc473dd"
PREFIX
${
OPENCL_HEADERS_SRCS_DIR
}
DOWNLOAD_NAME
"OpenCL-Headers"
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND
""
TEST_COMMAND
""
)
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -24,8 +24,7 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME)
${
EXTERNAL_PROJECT_NAME
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
INSTALL_DIR
}
DOWNLOAD_COMMAND wget --no-check-certificate -q -O
${
INSTALL_DIR
}
/
${
FILENAME
}
${
URL
}
/
${
FILENAME
}
&&
${
CMAKE_COMMAND
}
-E tar xzf
${
INSTALL_DIR
}
/
${
FILENAME
}
DOWNLOAD_COMMAND wget --no-check-certificate -q -O
${
INSTALL_DIR
}
/
${
FILENAME
}
${
URL
}
/
${
FILENAME
}
&&
${
CMAKE_COMMAND
}
-E tar xzf
${
INSTALL_DIR
}
/
${
FILENAME
}
DOWNLOAD_DIR
${
INSTALL_DIR
}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND
""
...
...
@@ -143,6 +142,8 @@ function(lite_cc_binary TARGET)
HVY_DEPS
${
args_HVY_DEPS
}
)
cc_binary
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
${
args_DEPS
}
)
# collect targets need to compile for lite
add_dependencies
(
lite_compile_deps
${
TARGET
}
)
endfunction
()
# Add a unit-test name to file for latter offline manual test.
...
...
@@ -181,6 +182,7 @@ add_subdirectory(x86)
add_subdirectory
(
arm
)
add_subdirectory
(
host
)
add_subdirectory
(
cuda
)
add_subdirectory
(
opencl
)
add_subdirectory
(
model_parser
)
add_subdirectory
(
utils
)
add_subdirectory
(
api
)
...
...
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -12,7 +12,6 @@ lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc
CUDA_DEPS kernels_cuda
X86_DEPS
${
x86_kernels
}
)
lite_cc_library
(
cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper
)
set
(
light_api_deps
scope_lite target_wrapper_host model_parser_lite program_lite
)
...
...
@@ -21,27 +20,34 @@ if(LITE_WITH_CUDA)
set
(
light_api_deps
${
light_api_deps
}
target_wrapper_cuda
)
endif
()
lite_cc_library
(
light_api_lite SRCS light_api.cc
DEPS
${
light_api_deps
}
${
ops_lite
}
${
host_kernels
}
)
message
(
STATUS
"get ops
${
ops_lite
}
"
)
message
(
STATUS
"get Host kernels
${
host_kernels
}
"
)
message
(
STATUS
"get ARM kernels
${
arm_kernels
}
"
)
lite_cc_library
(
cxx_api_lite SRCS cxx_api.cc DEPS
${
cxx_api_lite_deps
}
${
ops_lite
}
${
host_kernels
}
program_lite
)
lite_cc_library
(
light_api_lite SRCS light_api.cc
DEPS scope_lite target_wrapper_host model_parser_lite
${
light_api_deps
}
${
ops_lite
}
${
host_kernels
}
program_lite
CUDA_DEPS target_wrapper_cuda
X86_DEPS
${
x86_kernels
}
operator
ARM_DEPS
${
arm_kernels
}
)
include
(
ExternalProject
)
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
"A path setting inference demo download directories."
)
if
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING
)
lite_cc_test
(
test_cxx_api_lite SRCS cxx_api_test.cc
DEPS cxx_api_lite mir_passes
DEPS cxx_api_lite mir_passes
lite_api_test_helper
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
add_dependencies
(
test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz
)
endif
()
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING
)
set
(
lite_model_test_DEPS cxx_api_lite mir_passes
${
ops_lite
}
${
host_kernels
}
${
arm_kernels
}
)
...
...
@@ -68,25 +74,18 @@ endif()
# These tests needs CLI arguments, and is not supported in ARM CI.
# TODO(Superjomn) support latter.
if
(
NOT LITE_ON_MOBILE
)
lite_cc_test
(
test_light_api SRCS light_api_test.cc
DEPS light_api_lite mir_passes
X86_DEPS
${
x86_kernels
}
ARGS --optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt
SERIAL
)
lite_cc_test
(
test_apis_lite SRCS apis_test.cc
DEPS cxx_api_lite light_api_lite
${
ops_lite
}
mir_passes
X86_DEPS
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
endif
()
lite_cc_test
(
test_light_api_lite SRCS light_api_test.cc
DEPS light_api_lite program_lite mir_passes
ARGS --optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt
SERIAL
)
lite_cc_test
(
test_apis_lite SRCS apis_test.cc
DEPS cxx_api_lite light_api_lite
${
ops_lite
}
X86_DEPS
${
x86_kernels
}
operator
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
lite_cc_binary
(
cxx_api_lite_bin SRCS cxx_api_bin.cc
DEPS
cxx_api_lite
model_parser_lite
target_wrapper_host
mir_passes
${
ops_lite
}
${
host_kernels
}
ARM_DEPS
${
arm_kernels
}
)
#lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
#X86_DEPS operator
#DEPS light_api_lite model_parser_lite target_wrapper_host mir_passes
#ARM_DEPS ${arm_kernels})
paddle/fluid/lite/api/apis_test.cc
浏览文件 @
a95dcea5
...
...
@@ -39,23 +39,41 @@ void SetConstInput(lite::Tensor* x) {
}
}
bool
CompareTensors
(
const
std
::
string
&
name
,
const
ExecutorLite
&
cxx_api
,
bool
CompareTensors
(
const
std
::
string
&
name
,
const
Predictor
&
cxx_api
,
const
LightPredictor
&
light_api
)
{
const
auto
*
a
=
cxx_api
.
GetTensor
(
name
);
const
auto
*
b
=
light_api
.
GetTensor
(
name
);
return
TensorCompareWith
(
*
a
,
*
b
);
}
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXApi_LightApi
,
optim_model
)
{
lite
::
Predictor
cxx_api
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
// Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
valid_places
);
cxx_api
.
SaveModel
(
FLAGS_optimized_model
);
}
TEST
(
CXXApi_LightApi
,
save_and_load_model
)
{
lite
::
ExecutorLite
cxx_api
;
lite
::
LightPredictor
light_api
;
lite
::
Predictor
cxx_api
;
lite
::
LightPredictor
light_api
(
FLAGS_optimized_model
)
;
// CXXAPi
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
cxx_api
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
// Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
valid_places
);
auto
*
x
=
cxx_api
.
GetInput
(
0
);
...
...
@@ -69,8 +87,6 @@ TEST(CXXApi_LightApi, save_and_load_model) {
// LightApi
{
light_api
.
Build
(
FLAGS_optimized_model
);
auto
*
x
=
light_api
.
GetInput
(
0
);
SetConstInput
(
x
);
...
...
@@ -89,7 +105,6 @@ TEST(CXXApi_LightApi, save_and_load_model) {
ASSERT_TRUE
(
CompareTensors
(
tensor_name
,
cxx_api
,
light_api
));
}
}
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/cxx_api.cc
浏览文件 @
a95dcea5
...
...
@@ -17,19 +17,66 @@
#include <string>
#include <utility>
#include <vector>
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include "paddle/fluid/platform/port.h"
#endif
#include "paddle/fluid/lite/utils/io.h"
namespace
paddle
{
namespace
lite
{
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void
ExecutorLite
::
SaveModel
(
const
std
::
string
&
dir
)
{
MkDirRecursively
(
dir
.
c_str
());
void
Predictor
::
SaveModel
(
const
std
::
string
&
dir
)
{
#ifndef LITE_WITH_ARM
MkDirRecur
(
dir
);
#else
#endif
program_
->
PersistModel
(
dir
,
program_desc_
);
LOG
(
INFO
)
<<
"Save model to "
<<
dir
;
}
lite
::
Tensor
*
Predictor
::
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
const
lite
::
Tensor
*
Predictor
::
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
void
Predictor
::
Build
(
const
std
::
string
&
model_path
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>
&
valid_places
)
{
LoadModel
(
model_path
,
scope_
.
get
(),
&
program_desc_
);
Build
(
program_desc_
,
prefer_place
,
valid_places
);
}
const
framework
::
proto
::
ProgramDesc
&
Predictor
::
program_desc
()
const
{
return
program_desc_
;
}
void
Predictor
::
Build
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>
&
valid_places
)
{
program_desc_
=
desc
;
Program
program
(
desc
,
scope_
,
valid_places
);
optimizer_
.
KernelPickPreferPlace
(
prefer_place
);
core
::
KernelPickFactor
factor
;
factor
.
ConsiderTarget
();
factor
.
ConsiderPrecision
();
optimizer_
.
Run
(
std
::
move
(
program
),
valid_places
,
factor
);
program_
=
optimizer_
.
GenRuntimeProgram
();
}
const
lite
::
Tensor
*
Predictor
::
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
return
&
var
->
Get
<
lite
::
Tensor
>
();
}
#endif
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/cxx_api.h
浏览文件 @
a95dcea5
...
...
@@ -26,68 +26,39 @@
namespace
paddle
{
namespace
lite
{
struct
Config
{};
class
ExecutorLite
{
/*
* Predictor for inference, input a model, it will optimize and execute it.
*/
class
Predictor
{
public:
ExecutorLite
()
{
scope_
=
std
::
make_shared
<
Scope
>
();
}
explicit
ExecutorLite
(
const
std
::
shared_ptr
<
lite
::
Scope
>&
root_scope
)
{
scope_
=
root_scope
;
}
// Create an empty predictor.
Predictor
()
{
scope_
=
std
::
make_shared
<
Scope
>
();
}
// Create a predictor with the weight variable scope set.
explicit
Predictor
(
const
std
::
shared_ptr
<
lite
::
Scope
>&
root_scope
)
:
scope_
(
root_scope
)
{}
// Build from a model, with places set for hardware config.
void
Build
(
const
std
::
string
&
model_path
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>&
valid_places
)
{
LoadModel
(
model_path
,
scope_
.
get
(),
&
program_desc_
);
Build
(
program_desc_
,
prefer_place
,
valid_places
);
}
const
std
::
vector
<
Place
>&
valid_places
);
void
Build
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>&
valid_places
)
{
program_desc_
=
desc
;
Program
program
(
desc
,
scope_
,
valid_places
);
optimizer_
.
KernelPickPreferPlace
(
prefer_place
);
core
::
KernelPickFactor
factor
;
factor
.
ConsiderTarget
();
optimizer_
.
Run
(
std
::
move
(
program
),
valid_places
,
factor
);
program_
=
optimizer_
.
GenRuntimeProgram
();
}
// This method is disabled in mobile, or unnecessary dependencies required.
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void
SaveModel
(
const
std
::
string
&
dir
);
#endif
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>&
valid_places
);
// Get offset-th col of feed.
lite
::
Tensor
*
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
// Run the predictor for a single batch of data.
void
Run
()
{
program_
->
Run
();
}
const
lite
::
Tensor
*
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
// Get offset-th col of feed inputs.
lite
::
Tensor
*
GetInput
(
size_t
offset
);
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
return
&
var
->
Get
<
lite
::
Tensor
>
();
}
// Get offset-th col of fetch results.
const
lite
::
Tensor
*
GetOutput
(
size_t
offset
);
void
Run
()
{
program_
->
Run
();
}
// Return the program desc for debug.
const
framework
::
proto
::
ProgramDesc
&
program_desc
()
const
;
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
;
const
framework
::
proto
::
ProgramDesc
&
program_desc
()
const
{
return
program_desc_
;
}
// This method is disabled in mobile, for unnecessary dependencies required.
void
SaveModel
(
const
std
::
string
&
dir
);
private:
Optimizer
optimizer_
;
...
...
@@ -96,6 +67,7 @@ class ExecutorLite {
std
::
unique_ptr
<
RuntimeProgram
>
program_
;
};
#ifdef LITE_WITH_X86
/*
* An executor for training.
*
...
...
@@ -119,13 +91,13 @@ class CXXTrainer {
:
scope_
(
root_scope
),
preferred_place_
(
preferred_place
),
valid_places_
(
valid_places
),
main_program_executor_
(
ExecutorLite
(
scope_
))
{}
main_program_executor_
(
Predictor
(
scope_
))
{}
// Build the RuntimeProgram cache for the main program. The cache will run
// multiple times for the epoches.
// NOTE Just support to execute the 0-th block currently.
ExecutorLite
&
BuildMainProgramExecutor
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
int
block_id
=
0
)
{
Predictor
&
BuildMainProgramExecutor
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
int
block_id
=
0
)
{
main_program_executor_
.
Build
(
desc
,
preferred_place_
,
valid_places_
);
return
main_program_executor_
;
}
...
...
@@ -133,7 +105,7 @@ class CXXTrainer {
// Run the startup program. It just executes once, no cache needed.
void
RunStartupProgram
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
int
block_id
=
0
)
{
ExecutorLite
exe
(
scope_
);
Predictor
exe
(
scope_
);
exe
.
Build
(
desc
,
preferred_place_
,
valid_places_
);
exe
.
Run
();
}
...
...
@@ -145,8 +117,9 @@ class CXXTrainer {
std
::
vector
<
Place
>
valid_places_
;
// The training program.
ExecutorLite
main_program_executor_
;
Predictor
main_program_executor_
;
};
#endif
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
a95dcea5
...
...
@@ -34,7 +34,7 @@ void Run(const char* model_dir, int repeat, int thread_num) {
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
thread_num
);
#endif
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
paddle/fluid/lite/api/cxx_api_test.cc
浏览文件 @
a95dcea5
...
...
@@ -42,7 +42,7 @@ TEST(CXXApi, test) {
}
TEST
(
CXXApi
,
save_model
)
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
...
...
paddle/fluid/lite/api/inceptionv4_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
InceptionV4
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(InceptionV4, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
0.00078033
,
0.00083865
,
0.00060029
,
0.00057083
,
...
...
paddle/fluid/lite/api/light_api.cc
浏览文件 @
a95dcea5
...
...
@@ -13,3 +13,67 @@
// limitations under the License.
#include "paddle/fluid/lite/api/light_api.h"
namespace
paddle
{
namespace
lite
{
void
LightPredictor
::
Build
(
const
std
::
string
&
model_dir
)
{
framework
::
proto
::
ProgramDesc
desc
;
LoadModel
(
model_dir
,
scope_
.
get
(),
&
desc
);
BuildRuntimeProgram
(
desc
);
}
Tensor
*
LightPredictor
::
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
const
Tensor
*
LightPredictor
::
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
void
LightPredictor
::
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
std
::
vector
<
Instruction
>
insts
;
// 1. Create op first
Program
program
(
prog
,
scope_
,
{});
// 2. Create Instructs
// Create the kernels of the target places, and filter out the specific
// kernel with the target alias.
for
(
auto
&
op
:
program
.
ops
())
{
auto
kernel_type
=
op
->
op_info
()
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
std
::
string
op_type
,
alias
;
Place
place
;
KernelBase
::
ParseKernelType
(
kernel_type
,
&
op_type
,
&
alias
,
&
place
);
auto
kernels
=
op
->
CreateKernels
({
place
});
// filter out a kernel
auto
it
=
std
::
find_if
(
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>&
it
)
{
return
it
->
alias
()
==
alias
;
});
CHECK
(
it
!=
kernels
.
end
());
(
*
it
)
->
SetContext
(
ContextScheduler
::
Global
().
NewContext
((
*
it
)
->
target
()));
insts
.
emplace_back
(
op
,
std
::
move
(
*
it
));
}
program_
.
reset
(
new
RuntimeProgram
(
std
::
move
(
insts
)));
CHECK
(
program
.
exec_scope
());
program_
->
set_exec_scope
(
program
.
exec_scope
());
}
LightPredictor
::
LightPredictor
(
const
std
::
string
&
model_dir
)
{
scope_
=
std
::
make_shared
<
Scope
>
();
Build
(
model_dir
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/light_api.h
浏览文件 @
a95dcea5
...
...
@@ -32,36 +32,21 @@
namespace
paddle
{
namespace
lite
{
/*
* The light weight predictor, mainly for mobile. It loads an optimized model,
* and will not depend on the MIR or perform latter optimization.
*/
class
LightPredictor
{
public:
LightPredictor
()
{
scope_
=
std
::
make_shared
<
Scope
>
();
}
void
Build
(
const
std
::
string
&
model_dir
)
{
framework
::
proto
::
ProgramDesc
desc
;
LoadModel
(
model_dir
,
scope_
.
get
(),
&
desc
);
BuildRuntimeProgram
(
desc
);
}
explicit
LightPredictor
(
const
std
::
string
&
model_dir
);
void
Run
()
{
program_
->
Run
();
}
// Get offset-th col of feed.
Tensor
*
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
// Get offset-th col of feed inputs.
Tensor
*
GetInput
(
size_t
offset
);
const
Tensor
*
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
// Get offset-th col of fetch outputs.
const
Tensor
*
GetOutput
(
size_t
offset
);
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
...
...
@@ -69,34 +54,8 @@ class LightPredictor {
}
private:
void
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
std
::
vector
<
Instruction
>
insts
;
// 1. Create op first
Program
program
(
prog
,
scope_
,
{});
// 2. Create Instructs
// Create the kernels of the target places, and filter out the specific
// kernel with the target alias.
for
(
auto
&
op
:
program
.
ops
())
{
auto
kernel_type
=
op
->
op_info
()
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
std
::
string
op_type
,
alias
;
Place
place
;
KernelBase
::
ParseKernelType
(
kernel_type
,
&
op_type
,
&
alias
,
&
place
);
auto
kernels
=
op
->
CreateKernels
({
place
});
// filter out a kernel
auto
it
=
std
::
find_if
(
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>&
it
)
{
return
it
->
alias
()
==
alias
;
});
CHECK
(
it
!=
kernels
.
end
());
(
*
it
)
->
SetContext
(
ContextScheduler
::
Global
().
NewContext
((
*
it
)
->
target
()));
insts
.
emplace_back
(
op
,
std
::
move
(
*
it
));
}
program_
.
reset
(
new
RuntimeProgram
(
std
::
move
(
insts
)));
CHECK
(
program
.
exec_scope
());
program_
->
set_exec_scope
(
program
.
exec_scope
());
}
void
Build
(
const
std
::
string
&
model_dir
);
void
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
);
private:
std
::
shared_ptr
<
Scope
>
scope_
;
...
...
paddle/fluid/lite/api/light_api_test.cc
浏览文件 @
a95dcea5
...
...
@@ -25,8 +25,10 @@ namespace paddle {
namespace
lite
{
TEST
(
LightAPI
,
load
)
{
LightPredictor
predictor
;
predictor
.
Build
(
FLAGS_optimized_model
);
if
(
FLAGS_optimized_model
.
empty
())
{
FLAGS_optimized_model
=
"lite_naive_model"
;
}
LightPredictor
predictor
(
FLAGS_optimized_model
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
100
,
100
})));
...
...
paddle/fluid/lite/api/lite_api_test_helper.cc
浏览文件 @
a95dcea5
...
...
@@ -22,7 +22,7 @@ namespace paddle {
namespace
lite
{
const
lite
::
Tensor
*
RunHvyModel
()
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
...
...
paddle/fluid/lite/api/mobilenetv1_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
MobileNetV1
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(MobileNetV1, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
1.91308980e-04
,
5.92055148e-04
,
1.12303176e-04
,
...
...
paddle/fluid/lite/api/mobilenetv2_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
MobileNetV2
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(MobileNetV2, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
0.00097802
,
0.00099822
,
0.00103093
,
0.00100121
,
...
...
paddle/fluid/lite/api/resnet50_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
ResNet50
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(ResNet50, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
2.41399175e-04
,
4.13724629e-04
,
2.64324830e-04
,
...
...
paddle/fluid/lite/
kernels/arm/use_kernels
.h
→
paddle/fluid/lite/
api/test_helper
.h
浏览文件 @
a95dcea5
...
...
@@ -13,13 +13,24 @@
// limitations under the License.
#pragma once
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
feed
,
kARM
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kARM
,
kAny
,
kAny
,
def
);
#include <gflags/gflags.h>
#include <time.h>
// for eval
DEFINE_string
(
model_dir
,
""
,
"model dir"
);
DEFINE_int32
(
warmup
,
0
,
"warmup times"
);
DEFINE_int32
(
repeats
,
1
,
"repeats times"
);
DEFINE_int32
(
threads
,
1
,
"threads num"
);
namespace
paddle
{
namespace
lite
{
inline
double
GetCurrentUS
()
{
struct
timeval
time
;
gettimeofday
(
&
time
,
NULL
);
return
1e+6
*
time
.
tv_sec
+
time
.
tv_usec
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/arm/math/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -35,6 +35,8 @@ cc_library(math_arm SRCS
split.cc
activation.cc
dropout.cc
gemm_prepacked_int8.cc
gemv_arm_int8.cc
DEPS
${
lite_kernel_deps
}
eigen3 framework_proto_lite
)
# TODO(TJ): fix me do not deps proto
...
...
paddle/fluid/lite/core/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -25,7 +25,7 @@ cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
cc_library
(
scope_lite SRCS scope.cc DEPS
${
tensor_lite
}
)
cc_library
(
cpu_info_lite SRCS cpu_info.cc
)
lite_cc_library
(
context_lite SRCS context.cc DEPS
${
tensor_lite
}
any_lite cpu_info_lite eigen3
)
cc_library
(
op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
cc_library
(
op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
kernel_lite
cpp_op_desc_lite
${
tensor_lite
}
)
cc_library
(
types_lite SRCS types.cc
)
cc_library
(
type_system SRCS type_system.cc DEPS
${
tensor_lite
}
target_wrapper_lite
)
...
...
paddle/fluid/lite/core/kernel.cc
浏览文件 @
a95dcea5
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/lite/core/kernel.h"
#include <cstdlib>
namespace
paddle
{
namespace
lite
{
...
...
@@ -49,6 +50,36 @@ std::string KernelBase::GenParamTypeKey() const {
return
ss
.
str
();
}
void
KernelBase
::
ParseKernelType
(
const
std
::
string
&
kernel_type
,
std
::
string
*
op_type
,
std
::
string
*
alias
,
Place
*
place
)
{
std
::
stringstream
ss
(
kernel_type
);
std
::
getline
(
ss
,
*
op_type
,
'/'
);
std
::
getline
(
ss
,
*
alias
,
'/'
);
std
::
string
target
,
precision
,
layout
;
std
::
getline
(
ss
,
target
,
'/'
);
std
::
getline
(
ss
,
precision
,
'/'
);
std
::
getline
(
ss
,
layout
,
'/'
);
place
->
target
=
static_cast
<
TargetType
>
(
std
::
atoi
(
target
.
c_str
()));
place
->
precision
=
static_cast
<
PrecisionType
>
(
std
::
atoi
(
precision
.
c_str
()));
place
->
layout
=
static_cast
<
DataLayoutType
>
(
std
::
atoi
(
layout
.
c_str
()));
}
std
::
string
KernelBase
::
SerializeKernelType
(
const
std
::
string
&
op_type
,
const
std
::
string
&
alias
,
const
Place
&
place
)
{
std
::
stringstream
ss
;
ss
<<
op_type
<<
"/"
;
ss
<<
alias
<<
"/"
;
// We serialize the place value not the string representation here for
// easier deserialization.
ss
<<
static_cast
<
int
>
(
place
.
target
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
precision
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
layout
);
return
ss
.
str
();
}
bool
ParamTypeRegistry
::
KeyCmp
::
operator
()(
const
ParamTypeRegistry
::
key_t
&
a
,
const
ParamTypeRegistry
::
key_t
&
b
)
const
{
...
...
paddle/fluid/lite/core/kernel.h
浏览文件 @
a95dcea5
...
...
@@ -118,33 +118,11 @@ class KernelBase {
static
std
::
string
SerializeKernelType
(
const
std
::
string
&
op_type
,
const
std
::
string
&
alias
,
const
Place
&
place
)
{
std
::
stringstream
ss
;
ss
<<
op_type
<<
"/"
;
ss
<<
alias
<<
"/"
;
// We serialize the place value not the string representation here for
// easier deserialization.
ss
<<
static_cast
<
int
>
(
place
.
target
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
precision
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
layout
);
return
ss
.
str
();
}
const
Place
&
place
);
static
void
ParseKernelType
(
const
std
::
string
&
kernel_type
,
std
::
string
*
op_type
,
std
::
string
*
alias
,
Place
*
place
)
{
std
::
stringstream
ss
(
kernel_type
);
std
::
getline
(
ss
,
*
op_type
,
'/'
);
std
::
getline
(
ss
,
*
alias
,
'/'
);
std
::
string
target
,
precision
,
layout
;
std
::
getline
(
ss
,
target
,
'/'
);
std
::
getline
(
ss
,
precision
,
'/'
);
std
::
getline
(
ss
,
layout
,
'/'
);
place
->
target
=
static_cast
<
TargetType
>
(
std
::
stoi
(
target
));
place
->
precision
=
static_cast
<
PrecisionType
>
(
std
::
stoi
(
precision
));
place
->
layout
=
static_cast
<
DataLayoutType
>
(
std
::
stoi
(
layout
));
}
Place
*
place
);
virtual
~
KernelBase
()
=
default
;
void
Torch
()
{}
...
...
paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc
浏览文件 @
a95dcea5
...
...
@@ -28,7 +28,7 @@ namespace lite {
namespace
mir
{
TEST
(
fc_fuse_pass
,
fuse_test
)
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
...
...
@@ -69,7 +69,7 @@ TEST(fc_fuse_pass, fuse_test) {
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
fc_fuse_pass
,
save_model_test
)
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
...
...
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
浏览文件 @
a95dcea5
...
...
@@ -25,7 +25,7 @@ namespace fusion {
/* The model trained by fluid quantization is a simulation of real int8.
* The quantized Ops(conv2d, mul, depthwise conv2d etc) have fake_quantop
* in front
and fake_dequantop behind.
* in front and fake_dequantop behind.
*
* When in int8 mode, the pattern like "fake_quant + quantized_op +
* fake_dequant"
...
...
paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
浏览文件 @
a95dcea5
...
...
@@ -41,7 +41,7 @@ void FuseBase::DeleteInterNodes(SSAGraph *graph) {
}
}
LOG
(
INFO
)
<<
"keys: "
<<
key2nodes_
.
size
();
VLOG
(
4
)
<<
"keys: "
<<
key2nodes_
.
size
();
std
::
unordered_set
<
const
Node
*>
nodes2rm
;
for
(
auto
&
matched
:
key2nodes_
)
{
for
(
const
auto
&
key
:
keys
)
{
...
...
paddle/fluid/lite/core/op_registry.h
浏览文件 @
a95dcea5
...
...
@@ -80,6 +80,8 @@ class KernelRegistry final {
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
)
>
*
//
>
;
...
...
paddle/fluid/lite/core/optimizer.h
浏览文件 @
a95dcea5
...
...
@@ -58,7 +58,6 @@ class Optimizer {
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"lite_elementwise_add_activation_fuse_pass"
,
//
#endif
"lite_fc_fuse_pass"
,
//
"static_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"argument_type_display_pass"
,
//
...
...
paddle/fluid/lite/core/target_wrapper.h
浏览文件 @
a95dcea5
...
...
@@ -38,6 +38,7 @@ enum class PrecisionType : int {
kUnk
=
0
,
kFloat
,
kInt8
,
kInt32
,
kAny
,
// any precision
NUM
,
// number of fields.
};
...
...
@@ -48,6 +49,19 @@ enum class DataLayoutType : int {
NUM
,
// number of fields.
};
static
size_t
PrecisionTypeLength
(
PrecisionType
type
)
{
switch
(
type
)
{
case
PrecisionType
::
kFloat
:
return
4
;
case
PrecisionType
::
kInt8
:
return
1
;
case
PrecisionType
::
kInt32
:
return
4
;
default:
return
4
;
}
}
// Some helper macro to get a specific TargetType.
#define TARGET(item__) paddle::lite::TargetType::item__
// Some helper macro to get a specific PrecisionType.
...
...
@@ -87,7 +101,7 @@ static const std::string& TargetRepr(TargetType target) {
static
const
std
::
string
&
PrecisionRepr
(
PrecisionType
precision
)
{
static
const
std
::
string
precision2string
[]
=
{
"kUnk"
,
"kFloat"
,
"kInt8"
,
"kAny"
};
"k
Int32"
,
"k
Any"
};
auto
x
=
static_cast
<
int
>
(
precision
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
PRECISION
(
NUM
)));
return
precision2string
[
x
];
...
...
paddle/fluid/lite/kernels/arm/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -51,5 +51,3 @@ set(arm_kernels
)
set
(
arm_kernels
"
${
arm_kernels
}
"
CACHE INTERNAL
"arm kernels"
)
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
a95dcea5
...
...
@@ -92,6 +92,9 @@ void ConvCompute::Run() {
// }
}
void
ConvComputeInt8
::
PrepareForRun
()
{}
void
ConvComputeInt8
::
Run
()
{}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
...
...
@@ -112,3 +115,23 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW,
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/conv_compute.h
浏览文件 @
a95dcea5
...
...
@@ -41,6 +41,25 @@ class ConvCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
nullptr
};
};
class
ConvComputeInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
ConvParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
~
ConvComputeInt8
()
{
if
(
impl_
!=
nullptr
)
{
delete
impl_
;
}
}
private:
lite
::
arm
::
math
::
ImplBase
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
param_t
>*
impl_
{
nullptr
};
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
...
...
paddle/fluid/lite/kernels/use_kernels.h
浏览文件 @
a95dcea5
...
...
@@ -12,14 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
#pragma once
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
split
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
relu
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
transpose
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
transpose2
,
kARM
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
...
...
@@ -36,21 +55,6 @@ USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
batch_norm
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
relu
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kARM
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL
(
mul
,
kCUDA
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
...
...
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -44,10 +44,9 @@ set(x86_kernels
softmax_compute_x86
dropout_compute_x86
concat_compute_x86
conv_compute_x86
pool_compute_x86
batch_norm_compute_x86
conv_compute_x86
pool_compute_x86
batch_norm_compute_x86
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
paddle/fluid/lite/opencl/CMakeLists.txt
0 → 100644
浏览文件 @
a95dcea5
if
(
NOT LITE_WITH_OPENCL
)
return
()
endif
()
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_half SRCS cl_half.cc
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
add_dependencies
(
cl_tool opencl_clhpp
)
endif
()
paddle/fluid/lite/opencl/cl2_header.h
0 → 100644
浏览文件 @
a95dcea5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#include <CL/cl2.hpp>
paddle/fluid/lite/opencl/cl_caller.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
static
void
CopyImageData
(
const
CLImage
&
cl_image
,
float
*
out
)
{
int
width
=
cl_image
.
image_dims
()[
0
];
int
height
=
cl_image
.
image_dims
()[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
out
,
cl_image
.
image_dims
(),
cl_image
.
tensor_dims
());
delete
[]
image_data
;
}
bool
InitOpenCLEngine
(
std
::
string
cl_path
)
{
auto
*
engine
=
CLEngine
::
Global
();
engine
->
set_cl_path
(
cl_path
);
return
engine
->
IsInitSuccess
();
}
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
CLHelper
helper
(
context
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
.
KernelAt
(
0
);
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Inpu image: "
<<
in_image
<<
" --- "
;
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias
,
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Bias image: "
<<
bias_image
<<
" --- "
;
CLImage
out_image
;
out_image
.
InitEmptyImage
(
helper
.
OpenCLContext
(),
out_dim
);
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
status
=
helper
.
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- Out image: "
<<
out_image
<<
" --- "
;
CopyImageData
(
out_image
,
out
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_caller.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
namespace
paddle
{
namespace
lite
{
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_context.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <memory>
#include <string>
#include <utility>
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
cl
::
CommandQueue
&
CLContext
::
GetCommandQueue
()
{
return
CLEngine
::
Global
()
->
command_queue
();
}
cl
::
Context
&
CLContext
::
GetContext
()
{
return
CLEngine
::
Global
()
->
context
();
}
cl
::
Program
&
CLContext
::
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
std
::
string
program_key
=
file_name
;
if
(
!
options
.
empty
())
{
program_key
+=
options
;
}
auto
it
=
programs_
.
find
(
program_key
);
if
(
it
!=
programs_
.
end
())
{
VLOG
(
3
)
<<
" --- program -> "
<<
program_key
<<
" has been built --- "
;
return
*
(
it
->
second
);
}
auto
program
=
CLEngine
::
Global
()
->
CreateProgram
(
GetContext
(),
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel/"
+
file_name
);
VLOG
(
3
)
<<
" --- begin build program -> "
<<
program_key
<<
" --- "
;
CLEngine
::
Global
()
->
BuildProgram
(
program
.
get
(),
options
);
VLOG
(
3
)
<<
" --- end build program -> "
<<
program_key
<<
" --- "
;
programs_
[
program_key
]
=
std
::
move
(
program
);
return
*
(
programs_
[
program_key
]);
}
std
::
unique_ptr
<
cl
::
Kernel
>
CLContext
::
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
cl_int
status
{
CL_SUCCESS
};
VLOG
(
3
)
<<
" --- to get program "
<<
file_name
<<
" --- "
;
auto
program
=
GetProgram
(
file_name
,
options
);
VLOG
(
3
)
<<
" --- end get program --- "
;
VLOG
(
3
)
<<
" --- to create kernel: "
<<
kernel_name
<<
" --- "
;
std
::
unique_ptr
<
cl
::
Kernel
>
kernel
(
new
cl
::
Kernel
(
program
,
kernel_name
.
c_str
(),
&
status
));
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- end create kernel --- "
;
return
std
::
move
(
kernel
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_context.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace
paddle
{
namespace
lite
{
class
CLContext
{
public:
cl
::
CommandQueue
&
GetCommandQueue
();
cl
::
Context
&
GetContext
();
cl
::
Program
&
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
std
::
unique_ptr
<
cl
::
Kernel
>
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
cl
::
Program
>>
programs_
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
CLEngine
*
CLEngine
::
Global
()
{
static
CLEngine
cl_engine_
;
cl_engine_
.
Init
();
return
&
cl_engine_
;
}
CLEngine
::~
CLEngine
()
{
if
(
command_queue_
!=
nullptr
)
{
command_queue_
->
finish
();
}
// For controlling the destruction order:
command_queue_
.
reset
();
context_
.
reset
();
device_
.
reset
();
platform_
.
reset
();
}
bool
CLEngine
::
Init
()
{
if
(
initialized_
)
{
return
true
;
}
bool
is_platform_init
=
InitializePlatform
();
bool
is_device_init
=
InitializeDevice
();
is_init_success_
=
is_platform_init
&&
is_device_init
;
initialized_
=
true
;
return
initialized_
;
}
cl
::
Platform
&
CLEngine
::
platform
()
{
CHECK
(
platform_
!=
nullptr
)
<<
"platform_ is not initialized!"
;
return
*
platform_
;
}
cl
::
Context
&
CLEngine
::
context
()
{
if
(
context_
==
nullptr
)
{
context_
=
CreateContext
();
}
return
*
context_
;
}
cl
::
Device
&
CLEngine
::
device
()
{
CHECK
(
device_
!=
nullptr
)
<<
"device_ is not initialized!"
;
return
*
device_
;
}
cl
::
CommandQueue
&
CLEngine
::
command_queue
()
{
if
(
command_queue_
==
nullptr
)
{
command_queue_
=
CreateCommandQueue
(
context
());
}
return
*
command_queue_
;
}
std
::
unique_ptr
<
cl
::
Program
>
CLEngine
::
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
)
{
std
::
ifstream
file
{
file_name
,
std
::
ios
::
binary
|
std
::
ios
::
ate
};
CHECK
(
file
.
is_open
())
<<
"Can't open file from "
<<
file_name
;
auto
size
=
file
.
tellg
();
CHECK
(
size
>
0
)
<<
"size is too small."
;
std
::
string
content
(
size
,
'\0'
);
file
.
seekg
(
0
);
file
.
read
(
&
content
[
0
],
size
);
cl
::
Program
::
Sources
sources
;
sources
.
push_back
(
content
);
auto
prog
=
std
::
unique_ptr
<
cl
::
Program
>
(
new
cl
::
Program
(
context
,
sources
,
&
status_
));
LOG
(
INFO
)
<<
"OpenCL kernel file name: "
<<
file_name
;
LOG
(
INFO
)
<<
"Program source size: "
<<
content
.
size
();
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
prog
);
}
std
::
unique_ptr
<
cl
::
UserEvent
>
CLEngine
::
CreateEvent
(
const
cl
::
Context
&
context
)
{
auto
event
=
std
::
unique_ptr
<
cl
::
UserEvent
>
(
new
cl
::
UserEvent
(
context
,
&
status_
));
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
event
);
}
bool
CLEngine
::
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
)
{
std
::
string
build_option
=
options
+
" -cl-fast-relaxed-math -I "
+
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel"
;
status_
=
program
->
build
({
*
device_
},
build_option
.
c_str
());
CL_CHECK_ERRORS
(
status_
);
if
(
status_
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
device
())
==
CL_BUILD_ERROR
)
{
std
::
string
log
=
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
device
());
LOG
(
INFO
)
<<
"Program build error: "
<<
log
;
}
return
false
;
}
return
true
;
}
bool
CLEngine
::
InitializePlatform
()
{
std
::
vector
<
cl
::
Platform
>
all_platforms
;
status_
=
cl
::
Platform
::
get
(
&
all_platforms
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_platforms
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL platform found!"
;
return
false
;
}
platform_
=
std
::
make_shared
<
cl
::
Platform
>
();
*
platform_
=
all_platforms
[
0
];
return
true
;
}
bool
CLEngine
::
InitializeDevice
()
{
std
::
vector
<
cl
::
Device
>
all_devices
;
status_
=
platform_
->
getDevices
(
CL_DEVICE_TYPE_GPU
,
&
all_devices
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_devices
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL GPU device found!"
;
return
false
;
}
device_
=
std
::
make_shared
<
cl
::
Device
>
();
*
device_
=
all_devices
[
0
];
auto
device_name
=
device_
->
getInfo
<
CL_DEVICE_NAME
>
();
LOG
(
INFO
)
<<
"Using device: "
<<
device_name
;
auto
image_support
=
device_
->
getInfo
<
CL_DEVICE_IMAGE_SUPPORT
>
();
if
(
image_support
)
{
LOG
(
INFO
)
<<
"The chosen device supports image processing."
;
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support image processing!"
;
return
false
;
}
auto
ext_data
=
device_
->
getInfo
<
CL_DEVICE_EXTENSIONS
>
();
LOG
(
INFO
)
<<
"The extensions supported by this device: "
<<
ext_data
;
if
(
ext_data
.
find
(
"cl_khr_fp16"
)
!=
std
::
string
::
npos
)
{
LOG
(
INFO
)
<<
"The chosen device supports the half data type."
;
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support the half data type!"
;
return
false
;
}
auto
max_units
=
device_
->
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
LOG
(
INFO
)
<<
"The chosen device has "
<<
max_units
<<
" compute units."
;
auto
local_mem
=
device_
->
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
();
LOG
(
INFO
)
<<
"The local memory size of the chosen device is "
<<
static_cast
<
float
>
(
local_mem
)
/
1024
<<
" KB."
;
return
true
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
class
CLEngine
{
public:
static
CLEngine
*
Global
();
bool
Init
();
cl
::
Platform
&
platform
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
std
::
unique_ptr
<
cl
::
Program
>
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
);
std
::
unique_ptr
<
cl
::
UserEvent
>
CreateEvent
(
const
cl
::
Context
&
context
);
bool
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
=
""
);
bool
IsInitSuccess
()
{
return
is_init_success_
;
}
std
::
string
cl_path
()
{
return
cl_path_
;
}
void
set_cl_path
(
std
::
string
cl_path
)
{
cl_path_
=
cl_path
;
}
private:
CLEngine
()
=
default
;
~
CLEngine
();
bool
InitializePlatform
();
bool
InitializeDevice
();
std
::
shared_ptr
<
cl
::
Context
>
CreateContext
()
{
auto
context
=
std
::
make_shared
<
cl
::
Context
>
(
std
::
vector
<
cl
::
Device
>
{
device
()},
nullptr
,
nullptr
,
nullptr
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
context
;
}
std
::
shared_ptr
<
cl
::
CommandQueue
>
CreateCommandQueue
(
const
cl
::
Context
&
context
)
{
auto
queue
=
std
::
make_shared
<
cl
::
CommandQueue
>
(
context
,
device
(),
0
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
queue
;
}
std
::
string
cl_path_
;
std
::
shared_ptr
<
cl
::
Platform
>
platform_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Context
>
context_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Device
>
device_
{
nullptr
};
std
::
shared_ptr
<
cl
::
CommandQueue
>
command_queue_
{
nullptr
};
cl_int
status_
{
CL_SUCCESS
};
bool
initialized_
{
false
};
bool
is_init_success_
{
false
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_half.cc
0 → 100644
浏览文件 @
a95dcea5
此差异已折叠。
点击以展开。
paddle/fluid/lite/opencl/cl_half.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace
paddle
{
namespace
lite
{
typedef
uint16_t
half_t
;
half_t
Float2Half
(
float
f
);
float
Half2Float
(
half_t
h
);
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
);
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
void
CLHelper
::
set_context
(
CLContext
*
context
)
{
context_
=
context
;
}
void
CLHelper
::
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
kernels
.
emplace_back
(
std
::
move
(
kernel
));
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
}
cl
::
Kernel
&
CLHelper
::
KernelAt
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
.
size
()
<<
" --- "
;
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
.
size
())
<<
"The index must be less than the size of kernels."
;
CHECK
(
kernels
[
index
]
!=
nullptr
)
<<
"The target kernel pointer cannot be null."
;
return
*
(
kernels
[
index
]);
}
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetCommandQueue
();
}
cl
::
Context
&
CLHelper
::
OpenCLContext
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetContext
();
}
cl
::
NDRange
CLHelper
::
DefaultWorkSize
(
const
CLImage
&
image
)
{
// n c h w
auto
image_dim
=
image
.
tensor_dims
();
if
(
image_dim
.
size
()
==
4
)
{
auto
n
=
image_dim
[
0
];
auto
h
=
image_dim
[
2
];
auto
w
=
image_dim
[
3
];
auto
image_width
=
image
.
ImageWidth
();
auto
work_size_0
=
image_width
/
w
;
auto
work_size_1
=
w
;
auto
work_size_2
=
n
*
h
;
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
work_size_0
),
static_cast
<
size_t
>
(
work_size_1
),
static_cast
<
size_t
>
(
work_size_2
)};
}
else
if
(
image_dim
.
size
()
==
2
)
{
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
image
.
ImageHeight
())};
}
else
if
(
image_dim
.
size
()
==
1
)
{
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
1
)};
}
else
if
(
image_dim
.
size
()
==
3
)
{
auto
c
=
image_dim
[
0
];
auto
h
=
image_dim
[
1
];
auto
w
=
image_dim
[
2
];
return
cl
::
NDRange
{
static_cast
<
size_t
>
((
c
+
3
)
/
4
),
static_cast
<
size_t
>
(
w
),
static_cast
<
size_t
>
(
h
)};
}
else
{
LOG
(
FATAL
)
<<
"Not support this dimension, need to be implemented!"
;
return
cl
::
NDRange
{};
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
namespace
paddle
{
namespace
lite
{
class
CLHelper
{
public:
CLHelper
()
=
default
;
explicit
CLHelper
(
CLContext
*
context
)
:
context_
(
context
)
{}
void
set_context
(
CLContext
*
context
);
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
=
""
);
cl
::
Kernel
&
KernelAt
(
const
int
index
);
cl
::
CommandQueue
&
OpenCLCommandQueue
();
cl
::
Context
&
OpenCLContext
();
cl
::
NDRange
DefaultWorkSize
(
const
CLImage
&
image
);
private:
CLContext
*
context_
{
nullptr
};
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image.h"
#include <glog/logging.h>
#include <array>
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
cl_image
)
{
int
width
=
cl_image
.
image_dims_
[
0
];
int
height
=
cl_image
.
image_dims_
[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
float
*
tensor_data
=
new
float
[
cl_image
.
numel
()];
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
tensor_data
,
cl_image
.
image_dims_
,
cl_image
.
tensor_dims_
);
int
stride
=
cl_image
.
numel
()
/
20
;
stride
=
stride
>
0
?
stride
:
1
;
os
<<
" dims: "
<<
cl_image
.
tensor_dims_
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
cl_image
.
numel
();
i
+=
stride
)
{
os
<<
tensor_data
[
i
]
<<
" "
;
}
delete
[]
tensor_data
;
delete
[]
image_data
;
return
os
;
}
void
CLImage
::
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto
numel
=
dim
.
product
();
#else
auto
numel
=
dim
.
production
();
#endif
tensor_data_
.
reset
(
new
float
[
numel
]);
memcpy
(
tensor_data_
.
get
(),
tensor_data
,
numel
*
sizeof
(
float
));
tensor_dims_
=
dim
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call "
"set_tensohelper->DefaultWorkSize(out_"
"image)r_data first!"
;
image_converter_
.
reset
(
new
CLImageConverterFolder
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNormalCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterNWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitDWImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterDWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
)
{
CHECK
(
tensor_data_
==
nullptr
)
<<
" Empty image tensor data shouldn't have value"
;
tensor_dims_
=
dim
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_converter_
->
InitImageDimInfoWith
(
tensor_dims_
);
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
)
{
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_dims
;
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image"
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
VLOG
(
3
)
<<
" begin init cl image "
;
image_dims_
=
converter
->
InitImageDimInfoWith
(
tensor_dims_
);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
half_t
*
image_data
=
new
half_t
[
image_dims_
.
product
()
*
4
];
#else
half_t
*
image_data
=
new
half_t
[
image_dims_
.
production
()
*
4
];
#endif
VLOG
(
3
)
<<
" convert to image "
;
converter
->
NCHWToImage
(
tensor_data_
.
get
(),
image_data
,
tensor_dims_
);
VLOG
(
3
)
<<
" end convert to image "
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
image_data
);
delete
[]
image_data
;
tensor_data_
=
nullptr
;
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
)
{
cl
::
ImageFormat
img_format
(
CL_RGBA
,
CL_HALF_FLOAT
);
cl_int
err
;
cl_image_
.
reset
(
new
cl
::
Image2D
(
context
,
CL_MEM_READ_WRITE
|
(
data
?
CL_MEM_COPY_HOST_PTR
:
0
),
img_format
,
width
,
height
,
0
,
data
,
&
err
));
CL_CHECK_ERRORS
(
err
);
CHECK
(
err
==
CL_SUCCESS
)
<<
" Create image 2d error."
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <iostream>
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
namespace
paddle
{
namespace
lite
{
class
CLImage
{
// For debug
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
image
);
public:
CLImage
()
=
default
;
/*
* Will not hold input tensor data, memcpy in this method.
* */
void
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
);
bool
IsInit
()
{
return
initialized_
;
}
/*
* Need call set_tensor_data first.
* Folder when one dim or two dim.
* */
void
InitCLImage
(
const
cl
::
Context
&
context
);
void
InitNormalCLImage
(
const
cl
::
Context
&
context
);
void
InitNImage
(
const
cl
::
Context
&
context
);
void
InitDWImage
(
const
cl
::
Context
&
context
);
void
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
);
void
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
);
cl
::
Image
*
cl_image
()
const
{
return
cl_image_
.
get
();
}
const
DDim
&
image_dims
()
const
{
return
image_dims_
;
}
inline
size_t
ImageWidth
()
const
{
return
image_dims_
[
0
];
}
inline
size_t
ImageHeight
()
const
{
return
image_dims_
[
1
];
}
const
DDim
&
tensor_dims
()
const
{
return
tensor_dims_
;
}
/*with_da
* Resize original tensor dim.
* */
inline
CLImage
&
Resize
(
const
DDim
&
dims
)
{
tensor_dims_
=
dims
;
return
*
this
;
}
template
<
typename
T
>
T
*
data
()
const
{
CHECK
(
!
initialized_
)
<<
"CL image has initialized, tensor data has been "
"deleted, can't use tensor data!"
;
return
reinterpret_cast
<
T
*>
(
tensor_data_
);
}
/*
* Numel of tensor dim
* */
inline
int64_t
numel
()
const
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
return
tensor_dims_
.
product
();
#else
return
tensor_dims_
.
production
();
#endif
}
/*
* Original tensor dim
* */
cl
::
UserEvent
&
cl_event
()
const
{
return
*
cl_event_
;
}
CLImageConverterBase
*
image_converter
()
const
{
return
image_converter_
.
get
();
}
private:
void
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
);
void
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
);
bool
initialized_
=
false
;
std
::
unique_ptr
<
cl
::
Image2D
>
cl_image_
{
nullptr
};
std
::
unique_ptr
<
cl
::
UserEvent
>
cl_event_
{
nullptr
};
DDim
tensor_dims_
;
DDim
image_dims_
;
std
::
unique_ptr
<
float
>
tensor_data_
{
nullptr
};
std
::
unique_ptr
<
CLImageConverterBase
>
image_converter_
{
nullptr
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
#include <glog/logging.h>
#include <vector>
namespace
paddle
{
namespace
lite
{
DDim
CLImageConverterDefault
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDefault
::
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
nchw
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDefault
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
image_dim
[
0
];
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterFolder
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
<=
2
)
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
size_t
width
=
(
tdim
[
1
]
+
3
)
/
4
;
size_t
height
=
tdim
[
0
];
width_of_one_block_
=
width
;
height_of_one_block_
=
height
;
c_block_
=
1
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
else
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterFolder
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
else
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
DDim
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
size_t
width
=
image_dim
[
0
];
for
(
size_t
h
=
0
;
h
<
tdim
[
0
];
h
++
)
{
for
(
size_t
w
=
0
;
w
<
tdim
[
1
];
w
++
)
{
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]
=
Float2Half
(
tensor
[
h
*
tdim
[
1
]
+
w
]);
}
}
}
}
void
CLImageConverterFolder
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
else
{
size_t
width
=
image_dim
[
0
];
size_t
H
=
1
,
W
=
1
;
if
(
tensor_dim
.
size
()
==
2
)
{
H
=
tensor_dim
[
0
];
W
=
tensor_dim
[
1
];
}
else
if
(
tensor_dim
.
size
()
==
1
)
{
W
=
tensor_dim
[
0
];
}
float
*
p
=
tensor
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
p
[
h
*
W
+
w
]
=
Half2Float
(
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]);
}
}
}
}
DDim
CLImageConverterNWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
auto
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
size_t
block
=
image_dim
[
0
]
/
tensor_dim
[
3
];
for
(
size_t
n
=
0
;
n
<
block
*
4
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
if
(
n
<
N
)
{
image
[
index
]
=
Float2Half
(
*
p
);
p
++
;
}
else
{
image
[
index
]
=
0.0
;
}
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
void
CLImageConverterNWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
*
p
=
Half2Float
(
image
[
index
]);
p
++
;
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
DDim
CLImageConverterDWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
1
];
C
=
new_dims
[
0
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
1
];
size_t
C
=
tensor_dim
[
0
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterNormal
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNormal
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
void
CLImageConverterNormal
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
DDim
CLImageConverterWinoTransWeight
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
size_t
width
=
(
C
+
3
)
/
4
;
size_t
height
=
N
*
16
;
// N * (wino_blk_size + 2) * (wino_blk_size + 2)
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterWinoTransWeight
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{}
void
CLImageConverterWinoTransWeight
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace
paddle
{
namespace
lite
{
class
CLImageConverterBase
{
public:
virtual
~
CLImageConverterBase
()
{}
virtual
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
void
ImageToNCHW
(
half_t
*
image
,
float
*
nchw
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
=
0
;
};
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline
half4
activation
(
half4
in
#ifdef PRELU
,
half4
prelu_alpha
#endif
)
{
half4
output
;
#ifdef PRELU
output
=
select
(
prelu_alpha
*
in
,
in
,
in
>=
(
half4
)
0
.
0
);
#endif
#ifdef RELU
output
=
fmax
(
in
,
(
half4
)(
0
.
0
f
));
#endif
return
output
;
}
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
0 → 100644
浏览文件 @
a95dcea5
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
elementwise_add
(
__global
image2d_t
input,
__global
image2d_t
bias,__write_only
image2d_t
outputImage
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int2
coords
;
coords.x
=
x
;
coords.y
=
y
;
half4
in
=
read_imageh
(
input,
sampler,
coords
)
;
half4
biase
=
read_imageh
(
bias,
sampler,
coords
)
;
half4
output
=
in
+
biase
;
write_imageh
(
outputImage,coords,output
)
;
}
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
0 → 100644
浏览文件 @
a95dcea5
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
MIN_VALUE
-FLT_MAX
__kernel
void
pool_max
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
out_h
*
stride_h
-
pad_top
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
start_h
=
max
(
start_h,0
)
;
int
start_w
=
out_w
*
stride_w
-
pad_left
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
start_w
=
max
(
start_w,0
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
max_value
=
(
half4
)(
MIN_VALUE
)
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
half4
tmp
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
max_value
=
max
(
max_value,
tmp
)
;
}
}
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
max_value
)
;
}
__kernel
void
pool_avg
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
max
(
out_h
*
stride_h
-
pad_top,
0
)
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
int
start_w
=
max
(
out_w
*
stride_w
-
pad_left,
0
)
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
sum
=
(
half4
)(
0.0f
)
;
int
num
=
0
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
sum
+=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
num++
;
}
}
half4
avg
=
sum
/
num
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
}
paddle/fluid/lite/opencl/cl_test.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
DEFINE_string
(
cl_path
,
"/data/local/tmp/opencl"
,
"The OpenCL kernels path."
);
namespace
paddle
{
namespace
lite
{
TEST
(
cl_test
,
engine_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
engine
->
platform
();
engine
->
device
();
engine
->
command_queue
();
auto
&
context
=
engine
->
context
();
auto
program
=
engine
->
CreateProgram
(
context
,
engine
->
cl_path
()
+
"/cl_kernel/"
+
"elementwise_add_kernel.cl"
);
auto
event
=
engine
->
CreateEvent
(
context
);
CHECK
(
engine
->
BuildProgram
(
program
.
get
()));
}
TEST
(
cl_test
,
context_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
CLContext
context
;
context
.
GetKernel
(
"pool_max"
,
"pool_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
}
TEST
(
cl_test
,
kernel_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
std
::
unique_ptr
<
CLContext
>
context
(
new
CLContext
);
// std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
std
::
unique_ptr
<
CLHelper
>
helper
(
new
CLHelper
);
helper
->
set_context
(
context
.
get
());
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
->
KernelAt
(
2
);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
1.
f
;
}
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
in_image
;
in_image
.
set_tensor_data
(
in_data
.
get
(),
in_dim
);
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
in_image
;
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
2.
f
;
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias_data
.
get
(),
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
bias_image
;
CLImage
out_image
;
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
LOG
(
INFO
)
<<
out_image
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
// auto global_work_size = helper->DefaultWorkSize(out_image);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
cl
::
Event
event
;
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
&
event
);
CL_CHECK_ERRORS
(
status
);
double
start_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
double
stop_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
double
elapsed_micros
=
(
stop_nanos
-
start_nanos
)
/
1000.0
;
LOG
(
INFO
)
<<
"Kernel Run Cost Time: "
<<
elapsed_micros
<<
" us."
;
LOG
(
INFO
)
<<
out_image
;
}
TEST
(
cl_test
,
elementwise_add_test
)
{
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
dist
(
engine
);
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
dist
(
engine
);
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
1024
*
512
]);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
CLContext
context
;
elementwise_add
(
&
context
,
in_data
.
get
(),
in_dim
,
bias_data
.
get
(),
bias_dim
,
out
.
get
(),
out_dim
);
int
stride
=
1024
*
512
/
20
;
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
)
{
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch
(
error
)
{
CASE_CL_CONSTANT
(
CL_SUCCESS
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_FOUND
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_COMPILER_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_OBJECT_ALLOCATION_FAILURE
)
CASE_CL_CONSTANT
(
CL_OUT_OF_RESOURCES
)
CASE_CL_CONSTANT
(
CL_OUT_OF_HOST_MEMORY
)
CASE_CL_CONSTANT
(
CL_PROFILING_INFO_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_COPY_OVERLAP
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_MISMATCH
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_NOT_SUPPORTED
)
CASE_CL_CONSTANT
(
CL_BUILD_PROGRAM_FAILURE
)
CASE_CL_CONSTANT
(
CL_MAP_FAILURE
)
CASE_CL_CONSTANT
(
CL_MISALIGNED_SUB_BUFFER_OFFSET
)
CASE_CL_CONSTANT
(
CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE_TYPE
)
CASE_CL_CONSTANT
(
CL_INVALID_PLATFORM
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE
)
CASE_CL_CONSTANT
(
CL_INVALID_CONTEXT
)
CASE_CL_CONSTANT
(
CL_INVALID_QUEUE_PROPERTIES
)
CASE_CL_CONSTANT
(
CL_INVALID_COMMAND_QUEUE
)
CASE_CL_CONSTANT
(
CL_INVALID_HOST_PTR
)
CASE_CL_CONSTANT
(
CL_INVALID_MEM_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_FORMAT_DESCRIPTOR
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_SAMPLER
)
CASE_CL_CONSTANT
(
CL_INVALID_BINARY
)
CASE_CL_CONSTANT
(
CL_INVALID_BUILD_OPTIONS
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM_EXECUTABLE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_NAME
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_DEFINITION
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_INDEX
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_ARGS
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_DIMENSION
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_GROUP_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_ITEM_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_OFFSET
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT
)
CASE_CL_CONSTANT
(
CL_INVALID_OPERATION
)
CASE_CL_CONSTANT
(
CL_INVALID_GL_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_BUFFER_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_MIP_LEVEL
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_WORK_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_PROPERTY
)
default:
return
"UNKNOWN ERROR CODE"
;
}
#undef CASE_CL_CONSTANT
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_wrapper.cc
0 → 100644
浏览文件 @
a95dcea5
此差异已折叠。
点击以展开。
paddle/fluid/lite/operators/use_ops.h
浏览文件 @
a95dcea5
...
...
@@ -13,9 +13,10 @@
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
// ATTENTION This can only include in a .cc file.
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_OP
(
mul
);
USE_LITE_OP
(
fc
);
...
...
paddle/fluid/lite/tools/build.sh
浏览文件 @
a95dcea5
...
...
@@ -25,6 +25,23 @@ function cmake_x86 {
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
}
function
cmake_opencl
{
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
cmake ..
\
-DLITE_WITH_OPENCL
=
ON
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_LITE
=
ON
\
-DLITE_WITH_CUDA
=
OFF
\
-DLITE_WITH_X86
=
OFF
\
-DLITE_WITH_ARM
=
ON
\
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK
=
ON
\
-DWITH_TESTING
=
ON
\
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
}
# This method is only called in CI.
function
cmake_x86_for_CI
{
prepare_for_codegen
# fake an empty __generated_code__.cc to pass cmake.
...
...
@@ -85,8 +102,8 @@ function build_test_server {
# test_arm_android <some_test_name> <adb_port_number>
function
test_arm_android
{
test_name
=
$1
port
=
$2
local
test_name
=
$1
local
port
=
$2
if
[[
"
${
test_name
}
x"
==
"x"
]]
;
then
echo
"test_name can not be empty"
exit
1
...
...
@@ -99,12 +116,18 @@ function test_arm_android {
echo
"test name:
${
test_name
}
"
adb_work_dir
=
"/data/local/tmp"
skip_list
=(
"test_model_parser_lite"
"test_mobilenetv1_lite"
"test_mobilenetv2_lite"
"test_resnet50_lite"
"test_inceptionv4_lite"
)
skip_list
=(
"test_model_parser_lite"
"test_mobilenetv1_lite"
"test_mobilenetv2_lite"
"test_resnet50_lite"
"test_inceptionv4_lite"
"test_light_api_lite"
"test_apis_lite"
)
for
skip_name
in
${
skip_list
[@]
}
;
do
[[
$skip_name
=
~
(
^|[[:space:]]
)
$test_name
(
$|
[[
:space:]]
)
]]
&&
echo
"skip
$test_name
"
&&
return
done
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
local
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
# if [[ "$test_name" == "test_light_api" ]]; then
# local model_path=$(find . -name "lite_naive_model")
# arm_push_necessary_file $port $model_path $adb_work_dir
# fi
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
adb
-s
emulator-
${
port
}
shell
"./
${
adb_work_dir
}
/
${
test_name
}
"
...
...
@@ -204,6 +227,7 @@ function test_arm {
abi
=
$2
lang
=
$3
port
=
$4
if
[[
${
os
}
==
"armlinux"
]]
;
then
# TODO(hongming): enable test armlinux on armv8, armv7 and armv7hf
echo
"Skip test arm linux yet. armlinux must in another docker"
...
...
@@ -214,13 +238,14 @@ function test_arm {
echo
"android do not need armv7hf"
return
0
fi
# TODO(yuanshuai): enable armv7 on android
if
[[
${
abi
}
==
"armv7"
]]
;
then
echo
"skip android v7 test yet"
return
0
fi
echo
"test file:
${
TESTS_FILE
}
"
for
_test
in
$(
cat
$TESTS_FILE
)
;
do
test_arm_android
$_test
$port
...
...
@@ -235,13 +260,21 @@ function prepare_emulator {
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
# start android armv8 and armv7 emulators first
echo
n | avdmanager create avd
-f
-n
paddle-armv8
-k
"system-images;android-24;google_apis;arm64-v8a"
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv8
-noaudio
-no-window
-gpu
off
-
verbose
-
port
${
port_armv8
}
&
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv8
-noaudio
-no-window
-gpu
off
-port
${
port_armv8
}
&
sleep
1m
echo
n | avdmanager create avd
-f
-n
paddle-armv7
-k
"system-images;android-24;google_apis;armeabi-v7a"
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-
verbose
-
port
${
port_armv7
}
&
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-port
${
port_armv7
}
&
sleep
1m
}
function
arm_push_necessary_file
{
local
port
=
$1
local
testpath
=
$2
local
adb_work_dir
=
$3
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
}
# We split the arm unittest into several sub-tasks to parallel and reduce the overall CI timetime.
# sub-task1
...
...
@@ -286,20 +319,22 @@ function build_test_arm_subtask_armlinux {
prepare_emulator
$port_armv8
$port_armv7
cur
=
$PWD
# job 5
build_arm
"armlinux"
"armv8"
test_arm
"armlinux"
"armv8"
cd
-
build_arm
"armlinux"
"armv8"
"gcc"
$port_armv8
test_arm
"armlinux"
"armv8"
"gcc"
$port_armv8
cd
$cur
# job 6
build_arm
"armlinux"
"armv7"
test_arm
"armlinux"
"armv7"
cd
-
build_arm
"armlinux"
"armv7"
"gcc"
$port_armv8
test_arm
"armlinux"
"armv7"
"gcc"
$port_armv8
cd
$cur
# job 7
build_arm
"armlinux"
"armv7hf"
test_arm
"armlinux"
"armv7hf"
cd
-
build_arm
"armlinux"
"armv7hf"
"gcc"
$port_armv8
test_arm
"armlinux"
"armv7hf"
"gcc"
$port_armv8
cd
$cur
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
echo
"Done"
...
...
@@ -333,6 +368,22 @@ function build_test_arm_subtask_model {
echo
"Done"
}
# this test load a model, optimize it and check the prediction result of both cxx and light APIS.
function
test_arm_predict_apis
{
local
port
=
$1
local
workspace
=
$2
local
naive_model_path
=
$3
local
api_test_path
=
$(
find
.
-name
"test_apis_lite"
)
# the model is pushed to ./lite_naive_model
adb
-s
emulator-
${
port
}
push
${
naive_model_path
}
${
workspace
}
adb
-s
emulator-
${
port
}
push
$api_test_path
${
workspace
}
# test cxx_api first to store the optimized model.
adb
-s
emulator-
${
port
}
shell ./test_apis_lite
--model_dir
./lite_naive_model
--optimized_model
./lite_naive_model_opt
}
# Build the code and run lite arm tests. This is executed in the CI system.
function
build_test_arm
{
########################################################################
...
...
@@ -404,6 +455,10 @@ function main {
cmake_x86
shift
;;
cmake_opencl
)
cmake_opencl
$ARM_OS
$ARM_ABI
shift
;;
cmake_cuda
)
cmake_cuda
shift
...
...
paddle/fluid/lite/utils/io.h
浏览文件 @
a95dcea5
...
...
@@ -18,11 +18,12 @@
#include <fstream>
#include <string>
#include "paddle/fluid/lite/utils/cp_logging.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
lite
{
static
bool
IsFileExists
(
const
std
::
string
&
path
)
{
static
bool
IsFileExists
(
const
std
::
string
&
path
)
{
std
::
ifstream
file
(
path
);
bool
res
=
file
.
is_open
();
if
(
res
)
{
...
...
@@ -31,5 +32,15 @@ static bool IsFileExists(const std::string &path) {
return
res
;
}
// ARM mobile not support mkdir in C++
static
void
MkDirRecur
(
const
std
::
string
&
path
)
{
#ifndef LITE_WITH_ARM
CHECK_EQ
(
system
(
string_format
(
"mkdir -p %s"
,
path
.
c_str
()).
c_str
()),
0
)
<<
"Cann't mkdir "
<<
path
;
#else // On ARM
CHECK_NE
(
mkdir
(
path
.
c_str
(),
S_IRWXU
),
-
1
)
<<
"Cann't mkdir "
<<
path
;
#endif
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/utils/string.h
浏览文件 @
a95dcea5
...
...
@@ -74,5 +74,15 @@ static std::string Repr(const std::vector<std::string>& v) {
return
"{"
+
Join
(
tmp
,
","
)
+
"}"
;
}
static
std
::
vector
<
std
::
string
>
Split
(
const
std
::
string
&
s
,
char
delim
)
{
std
::
stringstream
ss
(
s
);
std
::
string
line
;
std
::
vector
<
std
::
string
>
res
;
while
(
std
::
getline
(
ss
,
line
,
delim
))
{
res
.
push_back
(
line
);
}
return
res
;
}
}
// namespace lite
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录