Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a95dcea5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a95dcea5
编写于
6月 23, 2019
作者:
H
hong19860320
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'incubate/lite' of
http://10.87.145.36/inference/paddlelite
into hongming/fix_pooling
上级
54f421c6
c8deaaa9
变更
63
隐藏空白更改
内联
并排
Showing
63 changed file
with
4083 addition
and
296 deletion
+4083
-296
.gitlab-ci.yml
.gitlab-ci.yml
+11
-34
CMakeLists.txt
CMakeLists.txt
+7
-0
cmake/configure.cmake
cmake/configure.cmake
+4
-0
cmake/external/opencl-clhpp.cmake
cmake/external/opencl-clhpp.cmake
+36
-0
cmake/external/opencl-headers.cmake
cmake/external/opencl-headers.cmake
+33
-0
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+4
-2
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+26
-27
paddle/fluid/lite/api/apis_test.cc
paddle/fluid/lite/api/apis_test.cc
+25
-10
paddle/fluid/lite/api/cxx_api.cc
paddle/fluid/lite/api/cxx_api.cc
+54
-7
paddle/fluid/lite/api/cxx_api.h
paddle/fluid/lite/api/cxx_api.h
+30
-57
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+1
-1
paddle/fluid/lite/api/cxx_api_test.cc
paddle/fluid/lite/api/cxx_api_test.cc
+1
-1
paddle/fluid/lite/api/inceptionv4_test.cc
paddle/fluid/lite/api/inceptionv4_test.cc
+17
-5
paddle/fluid/lite/api/light_api.cc
paddle/fluid/lite/api/light_api.cc
+64
-0
paddle/fluid/lite/api/light_api.h
paddle/fluid/lite/api/light_api.h
+11
-52
paddle/fluid/lite/api/light_api_test.cc
paddle/fluid/lite/api/light_api_test.cc
+4
-2
paddle/fluid/lite/api/lite_api_test_helper.cc
paddle/fluid/lite/api/lite_api_test_helper.cc
+1
-1
paddle/fluid/lite/api/mobilenetv1_test.cc
paddle/fluid/lite/api/mobilenetv1_test.cc
+17
-5
paddle/fluid/lite/api/mobilenetv2_test.cc
paddle/fluid/lite/api/mobilenetv2_test.cc
+17
-5
paddle/fluid/lite/api/resnet50_test.cc
paddle/fluid/lite/api/resnet50_test.cc
+17
-5
paddle/fluid/lite/api/test_helper.h
paddle/fluid/lite/api/test_helper.h
+20
-9
paddle/fluid/lite/arm/math/CMakeLists.txt
paddle/fluid/lite/arm/math/CMakeLists.txt
+2
-0
paddle/fluid/lite/core/CMakeLists.txt
paddle/fluid/lite/core/CMakeLists.txt
+1
-1
paddle/fluid/lite/core/kernel.cc
paddle/fluid/lite/core/kernel.cc
+31
-0
paddle/fluid/lite/core/kernel.h
paddle/fluid/lite/core/kernel.h
+2
-24
paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc
paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc
+2
-2
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
+1
-1
paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
+1
-1
paddle/fluid/lite/core/op_registry.h
paddle/fluid/lite/core/op_registry.h
+2
-0
paddle/fluid/lite/core/optimizer.h
paddle/fluid/lite/core/optimizer.h
+0
-1
paddle/fluid/lite/core/target_wrapper.h
paddle/fluid/lite/core/target_wrapper.h
+15
-1
paddle/fluid/lite/kernels/arm/CMakeLists.txt
paddle/fluid/lite/kernels/arm/CMakeLists.txt
+0
-2
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+23
-0
paddle/fluid/lite/kernels/arm/conv_compute.h
paddle/fluid/lite/kernels/arm/conv_compute.h
+19
-0
paddle/fluid/lite/kernels/use_kernels.h
paddle/fluid/lite/kernels/use_kernels.h
+20
-16
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+3
-4
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+19
-0
paddle/fluid/lite/opencl/cl2_header.h
paddle/fluid/lite/opencl/cl2_header.h
+21
-0
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+88
-0
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+30
-0
paddle/fluid/lite/opencl/cl_context.cc
paddle/fluid/lite/opencl/cl_context.cc
+73
-0
paddle/fluid/lite/opencl/cl_context.h
paddle/fluid/lite/opencl/cl_context.h
+43
-0
paddle/fluid/lite/opencl/cl_engine.cc
paddle/fluid/lite/opencl/cl_engine.cc
+171
-0
paddle/fluid/lite/opencl/cl_engine.h
paddle/fluid/lite/opencl/cl_engine.h
+96
-0
paddle/fluid/lite/opencl/cl_half.cc
paddle/fluid/lite/opencl/cl_half.cc
+518
-0
paddle/fluid/lite/opencl/cl_half.h
paddle/fluid/lite/opencl/cl_half.h
+32
-0
paddle/fluid/lite/opencl/cl_helper.cc
paddle/fluid/lite/opencl/cl_helper.cc
+90
-0
paddle/fluid/lite/opencl/cl_helper.h
paddle/fluid/lite/opencl/cl_helper.h
+52
-0
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+164
-0
paddle/fluid/lite/opencl/cl_image.h
paddle/fluid/lite/opencl/cl_image.h
+118
-0
paddle/fluid/lite/opencl/cl_image_converter.cc
paddle/fluid/lite/opencl/cl_image_converter.cc
+450
-0
paddle/fluid/lite/opencl/cl_image_converter.h
paddle/fluid/lite/opencl/cl_image_converter.h
+115
-0
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
+34
-0
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
+27
-0
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
+91
-0
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+154
-0
paddle/fluid/lite/opencl/cl_tool.cc
paddle/fluid/lite/opencl/cl_tool.cc
+84
-0
paddle/fluid/lite/opencl/cl_tool.h
paddle/fluid/lite/opencl/cl_tool.h
+32
-0
paddle/fluid/lite/opencl/cl_wrapper.cc
paddle/fluid/lite/opencl/cl_wrapper.cc
+962
-0
paddle/fluid/lite/operators/use_ops.h
paddle/fluid/lite/operators/use_ops.h
+4
-3
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+71
-16
paddle/fluid/lite/utils/io.h
paddle/fluid/lite/utils/io.h
+12
-1
paddle/fluid/lite/utils/string.h
paddle/fluid/lite/utils/string.h
+10
-0
未找到文件。
.gitlab-ci.yml
浏览文件 @
a95dcea5
...
...
@@ -76,8 +76,8 @@ build:mobile_android:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc/third_party
-
build.lite.android.armv7.gcc/third_party
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_ccache
script
:
...
...
@@ -96,8 +96,9 @@ build:mobile_armlinux:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.armlinux.armv8.gcc
-
build.lite.armlinux.armv7.gcc
-
build.lite.armlinux.armv7hf.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_ccache2
script
:
...
...
@@ -107,24 +108,13 @@ build:mobile_armlinux:
dependencies
:
-
build:server
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
build:mobile_model_mobilenetv1:
tags
:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv1
-
./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv1
...
...
@@ -135,8 +125,7 @@ build:mobile_model_mobilenetv1:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_model_mobilenetv1
...
...
@@ -145,12 +134,7 @@ build:mobile_model_mobilenetv2:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_mobilenetv2
-
./paddle/fluid/lite/tools/build.sh build_test_arm_model_mobilenetv2
...
...
@@ -161,8 +145,7 @@ build:mobile_model_mobilenetv2:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_model_mobilenetv2
...
...
@@ -171,12 +154,7 @@ build:mobile_model_resnet50:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
~/.ccache
script
:
-
export CCACHE_DIR=$CI_PROJECT_DIR/build_mobile_model_resnet50
-
./paddle/fluid/lite/tools/build.sh build_test_arm_model_resnet50
...
...
@@ -187,8 +165,7 @@ build:mobile_model_resnet50:
cache
:
key
:
mobile_thirdparty
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
build.lite.android.armv8.gcc
-
~/.ccache
-
$CI_PROJECT_DIR/build_mobile_model_resnet50
...
...
CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option
(
LITE_WITH_CUDA
"Enable CUDA in lite mode"
OFF
)
option
(
LITE_WITH_X86
"Enable X86 in lite mode"
ON
)
option
(
LITE_WITH_ARM
"Enable ARM in lite mode"
OFF
)
option
(
LITE_WITH_OPENCL
"Enable OpenCL support in lite"
OFF
)
option
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"Enable light-weight framework"
OFF
)
option
(
LITE_WITH_PROFILE
"Enable profile mode in lite framework"
OFF
)
...
...
@@ -181,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include
(
external/eigen
)
# download eigen3
include
(
ccache
)
# set ccache for compilation
# for opencl
if
(
LITE_WITH_OPENCL
)
include
(
external/opencl-headers
)
include
(
external/opencl-clhpp
)
endif
()
include
(
generic
)
# simplify cmake module
include
(
configure
)
# add paddle env configuration
...
...
cmake/configure.cmake
浏览文件 @
a95dcea5
...
...
@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
add_definitions
(
"-DLITE_WITH_ARM"
)
endif
()
if
(
LITE_WITH_OPENCL
)
add_definitions
(
"-DLITE_WITH_OPENCL"
)
endif
()
if
(
LITE_WITH_PROFILE
)
add_definitions
(
"-DLITE_WITH_PROFILE"
)
endif
()
...
...
cmake/external/opencl-clhpp.cmake
0 → 100644
浏览文件 @
a95dcea5
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_CLHPP_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INCLUDE_DIR
"
${
OPENCL_CLHPP_INSTALL_DIR
}
"
CACHE PATH
"opencl-clhpp include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_CLHPP_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_clhpp
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-CLHPP.git"
GIT_TAG
"v2.0.10"
PREFIX
"
${
OPENCL_CLHPP_SRCS_DIR
}
"
CMAKE_ARGS -DBUILD_DOCS=OFF
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=
${
OPENCL_CLHPP_INSTALL_DIR
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
OPENCL_CLHPP_INSTALL_DIR
}
-DCMAKE_BUILD_TYPE:STRING=
${
THIRD_PARTY_BUILD_TYPE
}
)
ADD_DEPENDENCIES
(
opencl_clhpp opencl_headers
)
cmake/external/opencl-headers.cmake
0 → 100644
浏览文件 @
a95dcea5
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_HEADERS_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-headers
)
SET
(
OPENCL_HEADERS_INCLUDE_DIR
"
${
OPENCL_HEADERS_SRCS_DIR
}
/src/opencl_headers"
CACHE PATH
"opencl-headers include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_HEADERS_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_headers
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-Headers.git"
GIT_TAG
"c5a4bbeabb10d8ed3d1c651b93aa31737bc473dd"
PREFIX
${
OPENCL_HEADERS_SRCS_DIR
}
DOWNLOAD_NAME
"OpenCL-Headers"
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND
""
TEST_COMMAND
""
)
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -24,8 +24,7 @@ function(lite_download_and_uncompress INSTALL_DIR URL FILENAME)
${
EXTERNAL_PROJECT_NAME
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
INSTALL_DIR
}
DOWNLOAD_COMMAND wget --no-check-certificate -q -O
${
INSTALL_DIR
}
/
${
FILENAME
}
${
URL
}
/
${
FILENAME
}
&&
${
CMAKE_COMMAND
}
-E tar xzf
${
INSTALL_DIR
}
/
${
FILENAME
}
DOWNLOAD_COMMAND wget --no-check-certificate -q -O
${
INSTALL_DIR
}
/
${
FILENAME
}
${
URL
}
/
${
FILENAME
}
&&
${
CMAKE_COMMAND
}
-E tar xzf
${
INSTALL_DIR
}
/
${
FILENAME
}
DOWNLOAD_DIR
${
INSTALL_DIR
}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND
""
...
...
@@ -143,6 +142,8 @@ function(lite_cc_binary TARGET)
HVY_DEPS
${
args_HVY_DEPS
}
)
cc_binary
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
${
args_DEPS
}
)
# collect targets need to compile for lite
add_dependencies
(
lite_compile_deps
${
TARGET
}
)
endfunction
()
# Add a unit-test name to file for latter offline manual test.
...
...
@@ -181,6 +182,7 @@ add_subdirectory(x86)
add_subdirectory
(
arm
)
add_subdirectory
(
host
)
add_subdirectory
(
cuda
)
add_subdirectory
(
opencl
)
add_subdirectory
(
model_parser
)
add_subdirectory
(
utils
)
add_subdirectory
(
api
)
...
...
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -12,7 +12,6 @@ lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc
CUDA_DEPS kernels_cuda
X86_DEPS
${
x86_kernels
}
)
lite_cc_library
(
cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper
)
set
(
light_api_deps
scope_lite target_wrapper_host model_parser_lite program_lite
)
...
...
@@ -21,27 +20,34 @@ if(LITE_WITH_CUDA)
set
(
light_api_deps
${
light_api_deps
}
target_wrapper_cuda
)
endif
()
lite_cc_library
(
light_api_lite SRCS light_api.cc
DEPS
${
light_api_deps
}
${
ops_lite
}
${
host_kernels
}
)
message
(
STATUS
"get ops
${
ops_lite
}
"
)
message
(
STATUS
"get Host kernels
${
host_kernels
}
"
)
message
(
STATUS
"get ARM kernels
${
arm_kernels
}
"
)
lite_cc_library
(
cxx_api_lite SRCS cxx_api.cc DEPS
${
cxx_api_lite_deps
}
${
ops_lite
}
${
host_kernels
}
program_lite
)
lite_cc_library
(
light_api_lite SRCS light_api.cc
DEPS scope_lite target_wrapper_host model_parser_lite
${
light_api_deps
}
${
ops_lite
}
${
host_kernels
}
program_lite
CUDA_DEPS target_wrapper_cuda
X86_DEPS
${
x86_kernels
}
operator
ARM_DEPS
${
arm_kernels
}
)
include
(
ExternalProject
)
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
"A path setting inference demo download directories."
)
if
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING
)
lite_cc_test
(
test_cxx_api_lite SRCS cxx_api_test.cc
DEPS cxx_api_lite mir_passes
DEPS cxx_api_lite mir_passes
lite_api_test_helper
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
add_dependencies
(
test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz
)
endif
()
if
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING
)
set
(
lite_model_test_DEPS cxx_api_lite mir_passes
${
ops_lite
}
${
host_kernels
}
${
arm_kernels
}
)
...
...
@@ -68,25 +74,18 @@ endif()
# These tests needs CLI arguments, and is not supported in ARM CI.
# TODO(Superjomn) support latter.
if
(
NOT LITE_ON_MOBILE
)
lite_cc_test
(
test_light_api SRCS light_api_test.cc
DEPS light_api_lite mir_passes
X86_DEPS
${
x86_kernels
}
ARGS --optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt
SERIAL
)
lite_cc_test
(
test_apis_lite SRCS apis_test.cc
DEPS cxx_api_lite light_api_lite
${
ops_lite
}
mir_passes
X86_DEPS
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
endif
()
lite_cc_test
(
test_light_api_lite SRCS light_api_test.cc
DEPS light_api_lite program_lite mir_passes
ARGS --optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt
SERIAL
)
lite_cc_test
(
test_apis_lite SRCS apis_test.cc
DEPS cxx_api_lite light_api_lite
${
ops_lite
}
X86_DEPS
${
x86_kernels
}
operator
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
lite_cc_binary
(
cxx_api_lite_bin SRCS cxx_api_bin.cc
DEPS
cxx_api_lite
model_parser_lite
target_wrapper_host
mir_passes
${
ops_lite
}
${
host_kernels
}
ARM_DEPS
${
arm_kernels
}
)
#lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
#X86_DEPS operator
#DEPS light_api_lite model_parser_lite target_wrapper_host mir_passes
#ARM_DEPS ${arm_kernels})
paddle/fluid/lite/api/apis_test.cc
浏览文件 @
a95dcea5
...
...
@@ -39,23 +39,41 @@ void SetConstInput(lite::Tensor* x) {
}
}
bool
CompareTensors
(
const
std
::
string
&
name
,
const
ExecutorLite
&
cxx_api
,
bool
CompareTensors
(
const
std
::
string
&
name
,
const
Predictor
&
cxx_api
,
const
LightPredictor
&
light_api
)
{
const
auto
*
a
=
cxx_api
.
GetTensor
(
name
);
const
auto
*
b
=
light_api
.
GetTensor
(
name
);
return
TensorCompareWith
(
*
a
,
*
b
);
}
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXApi_LightApi
,
optim_model
)
{
lite
::
Predictor
cxx_api
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
// Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
valid_places
);
cxx_api
.
SaveModel
(
FLAGS_optimized_model
);
}
TEST
(
CXXApi_LightApi
,
save_and_load_model
)
{
lite
::
ExecutorLite
cxx_api
;
lite
::
LightPredictor
light_api
;
lite
::
Predictor
cxx_api
;
lite
::
LightPredictor
light_api
(
FLAGS_optimized_model
)
;
// CXXAPi
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
cxx_api
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
// Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
valid_places
);
auto
*
x
=
cxx_api
.
GetInput
(
0
);
...
...
@@ -69,8 +87,6 @@ TEST(CXXApi_LightApi, save_and_load_model) {
// LightApi
{
light_api
.
Build
(
FLAGS_optimized_model
);
auto
*
x
=
light_api
.
GetInput
(
0
);
SetConstInput
(
x
);
...
...
@@ -89,7 +105,6 @@ TEST(CXXApi_LightApi, save_and_load_model) {
ASSERT_TRUE
(
CompareTensors
(
tensor_name
,
cxx_api
,
light_api
));
}
}
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/cxx_api.cc
浏览文件 @
a95dcea5
...
...
@@ -17,19 +17,66 @@
#include <string>
#include <utility>
#include <vector>
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include "paddle/fluid/platform/port.h"
#endif
#include "paddle/fluid/lite/utils/io.h"
namespace
paddle
{
namespace
lite
{
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void
ExecutorLite
::
SaveModel
(
const
std
::
string
&
dir
)
{
MkDirRecursively
(
dir
.
c_str
());
void
Predictor
::
SaveModel
(
const
std
::
string
&
dir
)
{
#ifndef LITE_WITH_ARM
MkDirRecur
(
dir
);
#else
#endif
program_
->
PersistModel
(
dir
,
program_desc_
);
LOG
(
INFO
)
<<
"Save model to "
<<
dir
;
}
lite
::
Tensor
*
Predictor
::
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
const
lite
::
Tensor
*
Predictor
::
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
void
Predictor
::
Build
(
const
std
::
string
&
model_path
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>
&
valid_places
)
{
LoadModel
(
model_path
,
scope_
.
get
(),
&
program_desc_
);
Build
(
program_desc_
,
prefer_place
,
valid_places
);
}
const
framework
::
proto
::
ProgramDesc
&
Predictor
::
program_desc
()
const
{
return
program_desc_
;
}
void
Predictor
::
Build
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>
&
valid_places
)
{
program_desc_
=
desc
;
Program
program
(
desc
,
scope_
,
valid_places
);
optimizer_
.
KernelPickPreferPlace
(
prefer_place
);
core
::
KernelPickFactor
factor
;
factor
.
ConsiderTarget
();
factor
.
ConsiderPrecision
();
optimizer_
.
Run
(
std
::
move
(
program
),
valid_places
,
factor
);
program_
=
optimizer_
.
GenRuntimeProgram
();
}
const
lite
::
Tensor
*
Predictor
::
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
return
&
var
->
Get
<
lite
::
Tensor
>
();
}
#endif
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/cxx_api.h
浏览文件 @
a95dcea5
...
...
@@ -26,68 +26,39 @@
namespace
paddle
{
namespace
lite
{
struct
Config
{};
class
ExecutorLite
{
/*
* Predictor for inference, input a model, it will optimize and execute it.
*/
class
Predictor
{
public:
ExecutorLite
()
{
scope_
=
std
::
make_shared
<
Scope
>
();
}
explicit
ExecutorLite
(
const
std
::
shared_ptr
<
lite
::
Scope
>&
root_scope
)
{
scope_
=
root_scope
;
}
// Create an empty predictor.
Predictor
()
{
scope_
=
std
::
make_shared
<
Scope
>
();
}
// Create a predictor with the weight variable scope set.
explicit
Predictor
(
const
std
::
shared_ptr
<
lite
::
Scope
>&
root_scope
)
:
scope_
(
root_scope
)
{}
// Build from a model, with places set for hardware config.
void
Build
(
const
std
::
string
&
model_path
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>&
valid_places
)
{
LoadModel
(
model_path
,
scope_
.
get
(),
&
program_desc_
);
Build
(
program_desc_
,
prefer_place
,
valid_places
);
}
const
std
::
vector
<
Place
>&
valid_places
);
void
Build
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>&
valid_places
)
{
program_desc_
=
desc
;
Program
program
(
desc
,
scope_
,
valid_places
);
optimizer_
.
KernelPickPreferPlace
(
prefer_place
);
core
::
KernelPickFactor
factor
;
factor
.
ConsiderTarget
();
optimizer_
.
Run
(
std
::
move
(
program
),
valid_places
,
factor
);
program_
=
optimizer_
.
GenRuntimeProgram
();
}
// This method is disabled in mobile, or unnecessary dependencies required.
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void
SaveModel
(
const
std
::
string
&
dir
);
#endif
const
Place
&
prefer_place
,
const
std
::
vector
<
Place
>&
valid_places
);
// Get offset-th col of feed.
lite
::
Tensor
*
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
// Run the predictor for a single batch of data.
void
Run
()
{
program_
->
Run
();
}
const
lite
::
Tensor
*
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
// Get offset-th col of feed inputs.
lite
::
Tensor
*
GetInput
(
size_t
offset
);
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
return
&
var
->
Get
<
lite
::
Tensor
>
();
}
// Get offset-th col of fetch results.
const
lite
::
Tensor
*
GetOutput
(
size_t
offset
);
void
Run
()
{
program_
->
Run
();
}
// Return the program desc for debug.
const
framework
::
proto
::
ProgramDesc
&
program_desc
()
const
;
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
;
const
framework
::
proto
::
ProgramDesc
&
program_desc
()
const
{
return
program_desc_
;
}
// This method is disabled in mobile, for unnecessary dependencies required.
void
SaveModel
(
const
std
::
string
&
dir
);
private:
Optimizer
optimizer_
;
...
...
@@ -96,6 +67,7 @@ class ExecutorLite {
std
::
unique_ptr
<
RuntimeProgram
>
program_
;
};
#ifdef LITE_WITH_X86
/*
* An executor for training.
*
...
...
@@ -119,13 +91,13 @@ class CXXTrainer {
:
scope_
(
root_scope
),
preferred_place_
(
preferred_place
),
valid_places_
(
valid_places
),
main_program_executor_
(
ExecutorLite
(
scope_
))
{}
main_program_executor_
(
Predictor
(
scope_
))
{}
// Build the RuntimeProgram cache for the main program. The cache will run
// multiple times for the epoches.
// NOTE Just support to execute the 0-th block currently.
ExecutorLite
&
BuildMainProgramExecutor
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
int
block_id
=
0
)
{
Predictor
&
BuildMainProgramExecutor
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
int
block_id
=
0
)
{
main_program_executor_
.
Build
(
desc
,
preferred_place_
,
valid_places_
);
return
main_program_executor_
;
}
...
...
@@ -133,7 +105,7 @@ class CXXTrainer {
// Run the startup program. It just executes once, no cache needed.
void
RunStartupProgram
(
const
framework
::
proto
::
ProgramDesc
&
desc
,
int
block_id
=
0
)
{
ExecutorLite
exe
(
scope_
);
Predictor
exe
(
scope_
);
exe
.
Build
(
desc
,
preferred_place_
,
valid_places_
);
exe
.
Run
();
}
...
...
@@ -145,8 +117,9 @@ class CXXTrainer {
std
::
vector
<
Place
>
valid_places_
;
// The training program.
ExecutorLite
main_program_executor_
;
Predictor
main_program_executor_
;
};
#endif
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
a95dcea5
...
...
@@ -34,7 +34,7 @@ void Run(const char* model_dir, int repeat, int thread_num) {
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
thread_num
);
#endif
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
paddle/fluid/lite/api/cxx_api_test.cc
浏览文件 @
a95dcea5
...
...
@@ -42,7 +42,7 @@ TEST(CXXApi, test) {
}
TEST
(
CXXApi
,
save_model
)
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
...
...
paddle/fluid/lite/api/inceptionv4_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
InceptionV4
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(InceptionV4, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
0.00078033
,
0.00083865
,
0.00060029
,
0.00057083
,
...
...
paddle/fluid/lite/api/light_api.cc
浏览文件 @
a95dcea5
...
...
@@ -13,3 +13,67 @@
// limitations under the License.
#include "paddle/fluid/lite/api/light_api.h"
namespace
paddle
{
namespace
lite
{
void
LightPredictor
::
Build
(
const
std
::
string
&
model_dir
)
{
framework
::
proto
::
ProgramDesc
desc
;
LoadModel
(
model_dir
,
scope_
.
get
(),
&
desc
);
BuildRuntimeProgram
(
desc
);
}
Tensor
*
LightPredictor
::
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
const
Tensor
*
LightPredictor
::
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
void
LightPredictor
::
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
std
::
vector
<
Instruction
>
insts
;
// 1. Create op first
Program
program
(
prog
,
scope_
,
{});
// 2. Create Instructs
// Create the kernels of the target places, and filter out the specific
// kernel with the target alias.
for
(
auto
&
op
:
program
.
ops
())
{
auto
kernel_type
=
op
->
op_info
()
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
std
::
string
op_type
,
alias
;
Place
place
;
KernelBase
::
ParseKernelType
(
kernel_type
,
&
op_type
,
&
alias
,
&
place
);
auto
kernels
=
op
->
CreateKernels
({
place
});
// filter out a kernel
auto
it
=
std
::
find_if
(
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>&
it
)
{
return
it
->
alias
()
==
alias
;
});
CHECK
(
it
!=
kernels
.
end
());
(
*
it
)
->
SetContext
(
ContextScheduler
::
Global
().
NewContext
((
*
it
)
->
target
()));
insts
.
emplace_back
(
op
,
std
::
move
(
*
it
));
}
program_
.
reset
(
new
RuntimeProgram
(
std
::
move
(
insts
)));
CHECK
(
program
.
exec_scope
());
program_
->
set_exec_scope
(
program
.
exec_scope
());
}
LightPredictor
::
LightPredictor
(
const
std
::
string
&
model_dir
)
{
scope_
=
std
::
make_shared
<
Scope
>
();
Build
(
model_dir
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/api/light_api.h
浏览文件 @
a95dcea5
...
...
@@ -32,36 +32,21 @@
namespace
paddle
{
namespace
lite
{
/*
* The light weight predictor, mainly for mobile. It loads an optimized model,
* and will not depend on the MIR or perform latter optimization.
*/
class
LightPredictor
{
public:
LightPredictor
()
{
scope_
=
std
::
make_shared
<
Scope
>
();
}
void
Build
(
const
std
::
string
&
model_dir
)
{
framework
::
proto
::
ProgramDesc
desc
;
LoadModel
(
model_dir
,
scope_
.
get
(),
&
desc
);
BuildRuntimeProgram
(
desc
);
}
explicit
LightPredictor
(
const
std
::
string
&
model_dir
);
void
Run
()
{
program_
->
Run
();
}
// Get offset-th col of feed.
Tensor
*
GetInput
(
size_t
offset
)
{
auto
*
_feed_list
=
program_
->
exec_scope
()
->
FindVar
(
"feed"
);
CHECK
(
_feed_list
)
<<
"no feed variable in exec_scope"
;
auto
*
feed_list
=
_feed_list
->
GetMutable
<
std
::
vector
<
Tensor
>>
();
if
(
offset
>=
feed_list
->
size
())
{
feed_list
->
resize
(
offset
+
1
);
}
return
&
feed_list
->
at
(
offset
);
}
// Get offset-th col of feed inputs.
Tensor
*
GetInput
(
size_t
offset
);
const
Tensor
*
GetOutput
(
size_t
offset
)
{
auto
*
_fetch_list
=
program_
->
exec_scope
()
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
// Get offset-th col of fetch outputs.
const
Tensor
*
GetOutput
(
size_t
offset
);
const
lite
::
Tensor
*
GetTensor
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
program_
->
exec_scope
()
->
FindVar
(
name
);
...
...
@@ -69,34 +54,8 @@ class LightPredictor {
}
private:
void
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
)
{
std
::
vector
<
Instruction
>
insts
;
// 1. Create op first
Program
program
(
prog
,
scope_
,
{});
// 2. Create Instructs
// Create the kernels of the target places, and filter out the specific
// kernel with the target alias.
for
(
auto
&
op
:
program
.
ops
())
{
auto
kernel_type
=
op
->
op_info
()
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
std
::
string
op_type
,
alias
;
Place
place
;
KernelBase
::
ParseKernelType
(
kernel_type
,
&
op_type
,
&
alias
,
&
place
);
auto
kernels
=
op
->
CreateKernels
({
place
});
// filter out a kernel
auto
it
=
std
::
find_if
(
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>&
it
)
{
return
it
->
alias
()
==
alias
;
});
CHECK
(
it
!=
kernels
.
end
());
(
*
it
)
->
SetContext
(
ContextScheduler
::
Global
().
NewContext
((
*
it
)
->
target
()));
insts
.
emplace_back
(
op
,
std
::
move
(
*
it
));
}
program_
.
reset
(
new
RuntimeProgram
(
std
::
move
(
insts
)));
CHECK
(
program
.
exec_scope
());
program_
->
set_exec_scope
(
program
.
exec_scope
());
}
void
Build
(
const
std
::
string
&
model_dir
);
void
BuildRuntimeProgram
(
const
framework
::
proto
::
ProgramDesc
&
prog
);
private:
std
::
shared_ptr
<
Scope
>
scope_
;
...
...
paddle/fluid/lite/api/light_api_test.cc
浏览文件 @
a95dcea5
...
...
@@ -25,8 +25,10 @@ namespace paddle {
namespace
lite
{
TEST
(
LightAPI
,
load
)
{
LightPredictor
predictor
;
predictor
.
Build
(
FLAGS_optimized_model
);
if
(
FLAGS_optimized_model
.
empty
())
{
FLAGS_optimized_model
=
"lite_naive_model"
;
}
LightPredictor
predictor
(
FLAGS_optimized_model
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
int64_t
>
({
100
,
100
})));
...
...
paddle/fluid/lite/api/lite_api_test_helper.cc
浏览文件 @
a95dcea5
...
...
@@ -22,7 +22,7 @@ namespace paddle {
namespace
lite
{
const
lite
::
Tensor
*
RunHvyModel
()
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
...
...
paddle/fluid/lite/api/mobilenetv1_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
MobileNetV1
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(MobileNetV1, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
1.91308980e-04
,
5.92055148e-04
,
1.12303176e-04
,
...
...
paddle/fluid/lite/api/mobilenetv2_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
MobileNetV2
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(MobileNetV2, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
0.00097802
,
0.00099822
,
0.00103093
,
0.00100121
,
...
...
paddle/fluid/lite/api/resnet50_test.cc
浏览文件 @
a95dcea5
...
...
@@ -16,21 +16,20 @@
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/api/cxx_api.h"
#include "paddle/fluid/lite/api/test_helper.h"
#include "paddle/fluid/lite/core/mir/use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/use_kernels.h"
#include "paddle/fluid/lite/operators/use_ops.h"
// for eval
DEFINE_string
(
model_dir
,
""
,
""
);
namespace
paddle
{
namespace
lite
{
#ifdef LITE_WITH_ARM
TEST
(
ResNet50
,
test
)
{
DeviceInfo
::
Init
();
lite
::
ExecutorLite
predictor
;
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
...
...
@@ -44,7 +43,20 @@ TEST(ResNet50, test) {
data
[
i
]
=
1
;
}
predictor
.
Run
();
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
std
::
vector
<
float
>
results
({
2.41399175e-04
,
4.13724629e-04
,
2.64324830e-04
,
...
...
paddle/fluid/lite/
kernels/arm/use_kernels
.h
→
paddle/fluid/lite/
api/test_helper
.h
浏览文件 @
a95dcea5
...
...
@@ -13,13 +13,24 @@
// limitations under the License.
#pragma once
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
feed
,
kARM
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kARM
,
kAny
,
kAny
,
def
);
#include <gflags/gflags.h>
#include <time.h>
// for eval
DEFINE_string
(
model_dir
,
""
,
"model dir"
);
DEFINE_int32
(
warmup
,
0
,
"warmup times"
);
DEFINE_int32
(
repeats
,
1
,
"repeats times"
);
DEFINE_int32
(
threads
,
1
,
"threads num"
);
namespace
paddle
{
namespace
lite
{
inline
double
GetCurrentUS
()
{
struct
timeval
time
;
gettimeofday
(
&
time
,
NULL
);
return
1e+6
*
time
.
tv_sec
+
time
.
tv_usec
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/arm/math/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -35,6 +35,8 @@ cc_library(math_arm SRCS
split.cc
activation.cc
dropout.cc
gemm_prepacked_int8.cc
gemv_arm_int8.cc
DEPS
${
lite_kernel_deps
}
eigen3 framework_proto_lite
)
# TODO(TJ): fix me do not deps proto
...
...
paddle/fluid/lite/core/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -25,7 +25,7 @@ cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
cc_library
(
scope_lite SRCS scope.cc DEPS
${
tensor_lite
}
)
cc_library
(
cpu_info_lite SRCS cpu_info.cc
)
lite_cc_library
(
context_lite SRCS context.cc DEPS
${
tensor_lite
}
any_lite cpu_info_lite eigen3
)
cc_library
(
op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
cc_library
(
op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite target_wrapper_lite
kernel_lite
cpp_op_desc_lite
${
tensor_lite
}
)
cc_library
(
types_lite SRCS types.cc
)
cc_library
(
type_system SRCS type_system.cc DEPS
${
tensor_lite
}
target_wrapper_lite
)
...
...
paddle/fluid/lite/core/kernel.cc
浏览文件 @
a95dcea5
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/lite/core/kernel.h"
#include <cstdlib>
namespace
paddle
{
namespace
lite
{
...
...
@@ -49,6 +50,36 @@ std::string KernelBase::GenParamTypeKey() const {
return
ss
.
str
();
}
void
KernelBase
::
ParseKernelType
(
const
std
::
string
&
kernel_type
,
std
::
string
*
op_type
,
std
::
string
*
alias
,
Place
*
place
)
{
std
::
stringstream
ss
(
kernel_type
);
std
::
getline
(
ss
,
*
op_type
,
'/'
);
std
::
getline
(
ss
,
*
alias
,
'/'
);
std
::
string
target
,
precision
,
layout
;
std
::
getline
(
ss
,
target
,
'/'
);
std
::
getline
(
ss
,
precision
,
'/'
);
std
::
getline
(
ss
,
layout
,
'/'
);
place
->
target
=
static_cast
<
TargetType
>
(
std
::
atoi
(
target
.
c_str
()));
place
->
precision
=
static_cast
<
PrecisionType
>
(
std
::
atoi
(
precision
.
c_str
()));
place
->
layout
=
static_cast
<
DataLayoutType
>
(
std
::
atoi
(
layout
.
c_str
()));
}
std
::
string
KernelBase
::
SerializeKernelType
(
const
std
::
string
&
op_type
,
const
std
::
string
&
alias
,
const
Place
&
place
)
{
std
::
stringstream
ss
;
ss
<<
op_type
<<
"/"
;
ss
<<
alias
<<
"/"
;
// We serialize the place value not the string representation here for
// easier deserialization.
ss
<<
static_cast
<
int
>
(
place
.
target
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
precision
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
layout
);
return
ss
.
str
();
}
bool
ParamTypeRegistry
::
KeyCmp
::
operator
()(
const
ParamTypeRegistry
::
key_t
&
a
,
const
ParamTypeRegistry
::
key_t
&
b
)
const
{
...
...
paddle/fluid/lite/core/kernel.h
浏览文件 @
a95dcea5
...
...
@@ -118,33 +118,11 @@ class KernelBase {
static
std
::
string
SerializeKernelType
(
const
std
::
string
&
op_type
,
const
std
::
string
&
alias
,
const
Place
&
place
)
{
std
::
stringstream
ss
;
ss
<<
op_type
<<
"/"
;
ss
<<
alias
<<
"/"
;
// We serialize the place value not the string representation here for
// easier deserialization.
ss
<<
static_cast
<
int
>
(
place
.
target
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
precision
)
<<
"/"
;
ss
<<
static_cast
<
int
>
(
place
.
layout
);
return
ss
.
str
();
}
const
Place
&
place
);
static
void
ParseKernelType
(
const
std
::
string
&
kernel_type
,
std
::
string
*
op_type
,
std
::
string
*
alias
,
Place
*
place
)
{
std
::
stringstream
ss
(
kernel_type
);
std
::
getline
(
ss
,
*
op_type
,
'/'
);
std
::
getline
(
ss
,
*
alias
,
'/'
);
std
::
string
target
,
precision
,
layout
;
std
::
getline
(
ss
,
target
,
'/'
);
std
::
getline
(
ss
,
precision
,
'/'
);
std
::
getline
(
ss
,
layout
,
'/'
);
place
->
target
=
static_cast
<
TargetType
>
(
std
::
stoi
(
target
));
place
->
precision
=
static_cast
<
PrecisionType
>
(
std
::
stoi
(
precision
));
place
->
layout
=
static_cast
<
DataLayoutType
>
(
std
::
stoi
(
layout
));
}
Place
*
place
);
virtual
~
KernelBase
()
=
default
;
void
Torch
()
{}
...
...
paddle/fluid/lite/core/mir/fusion/fc_fuse_pass_test.cc
浏览文件 @
a95dcea5
...
...
@@ -28,7 +28,7 @@ namespace lite {
namespace
mir
{
TEST
(
fc_fuse_pass
,
fuse_test
)
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
...
...
@@ -69,7 +69,7 @@ TEST(fc_fuse_pass, fuse_test) {
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
fc_fuse_pass
,
save_model_test
)
{
lite
::
ExecutorLite
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
...
...
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h
浏览文件 @
a95dcea5
...
...
@@ -25,7 +25,7 @@ namespace fusion {
/* The model trained by fluid quantization is a simulation of real int8.
* The quantized Ops(conv2d, mul, depthwise conv2d etc) have fake_quantop
* in front
and fake_dequantop behind.
* in front and fake_dequantop behind.
*
* When in int8 mode, the pattern like "fake_quant + quantized_op +
* fake_dequant"
...
...
paddle/fluid/lite/core/mir/pattern_matcher_high_api.cc
浏览文件 @
a95dcea5
...
...
@@ -41,7 +41,7 @@ void FuseBase::DeleteInterNodes(SSAGraph *graph) {
}
}
LOG
(
INFO
)
<<
"keys: "
<<
key2nodes_
.
size
();
VLOG
(
4
)
<<
"keys: "
<<
key2nodes_
.
size
();
std
::
unordered_set
<
const
Node
*>
nodes2rm
;
for
(
auto
&
matched
:
key2nodes_
)
{
for
(
const
auto
&
key
:
keys
)
{
...
...
paddle/fluid/lite/core/op_registry.h
浏览文件 @
a95dcea5
...
...
@@ -80,6 +80,8 @@ class KernelRegistry final {
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
)
>
*
//
>
;
...
...
paddle/fluid/lite/core/optimizer.h
浏览文件 @
a95dcea5
...
...
@@ -58,7 +58,6 @@ class Optimizer {
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"lite_elementwise_add_activation_fuse_pass"
,
//
#endif
"lite_fc_fuse_pass"
,
//
"static_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"argument_type_display_pass"
,
//
...
...
paddle/fluid/lite/core/target_wrapper.h
浏览文件 @
a95dcea5
...
...
@@ -38,6 +38,7 @@ enum class PrecisionType : int {
kUnk
=
0
,
kFloat
,
kInt8
,
kInt32
,
kAny
,
// any precision
NUM
,
// number of fields.
};
...
...
@@ -48,6 +49,19 @@ enum class DataLayoutType : int {
NUM
,
// number of fields.
};
static
size_t
PrecisionTypeLength
(
PrecisionType
type
)
{
switch
(
type
)
{
case
PrecisionType
::
kFloat
:
return
4
;
case
PrecisionType
::
kInt8
:
return
1
;
case
PrecisionType
::
kInt32
:
return
4
;
default:
return
4
;
}
}
// Some helper macro to get a specific TargetType.
#define TARGET(item__) paddle::lite::TargetType::item__
// Some helper macro to get a specific PrecisionType.
...
...
@@ -87,7 +101,7 @@ static const std::string& TargetRepr(TargetType target) {
static
const
std
::
string
&
PrecisionRepr
(
PrecisionType
precision
)
{
static
const
std
::
string
precision2string
[]
=
{
"kUnk"
,
"kFloat"
,
"kInt8"
,
"kAny"
};
"k
Int32"
,
"k
Any"
};
auto
x
=
static_cast
<
int
>
(
precision
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
PRECISION
(
NUM
)));
return
precision2string
[
x
];
...
...
paddle/fluid/lite/kernels/arm/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -51,5 +51,3 @@ set(arm_kernels
)
set
(
arm_kernels
"
${
arm_kernels
}
"
CACHE INTERNAL
"arm kernels"
)
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
a95dcea5
...
...
@@ -92,6 +92,9 @@ void ConvCompute::Run() {
// }
}
void
ConvComputeInt8
::
PrepareForRun
()
{}
void
ConvComputeInt8
::
Run
()
{}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
...
...
@@ -112,3 +115,23 @@ REGISTER_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW,
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/conv_compute.h
浏览文件 @
a95dcea5
...
...
@@ -41,6 +41,25 @@ class ConvCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
nullptr
};
};
class
ConvComputeInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
ConvParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
~
ConvComputeInt8
()
{
if
(
impl_
!=
nullptr
)
{
delete
impl_
;
}
}
private:
lite
::
arm
::
math
::
ImplBase
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
param_t
>*
impl_
{
nullptr
};
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
...
...
paddle/fluid/lite/kernels/use_kernels.h
浏览文件 @
a95dcea5
...
...
@@ -12,14 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
#pragma once
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
split
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
relu
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
transpose
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
transpose2
,
kARM
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
...
...
@@ -36,21 +55,6 @@ USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
pool2d
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
batch_norm
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
relu
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
pool2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
concat
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kARM
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL
(
mul
,
kCUDA
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
...
...
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
a95dcea5
...
...
@@ -44,10 +44,9 @@ set(x86_kernels
softmax_compute_x86
dropout_compute_x86
concat_compute_x86
conv_compute_x86
pool_compute_x86
batch_norm_compute_x86
conv_compute_x86
pool_compute_x86
batch_norm_compute_x86
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
paddle/fluid/lite/opencl/CMakeLists.txt
0 → 100644
浏览文件 @
a95dcea5
if
(
NOT LITE_WITH_OPENCL
)
return
()
endif
()
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_half SRCS cl_half.cc
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
add_dependencies
(
cl_tool opencl_clhpp
)
endif
()
paddle/fluid/lite/opencl/cl2_header.h
0 → 100644
浏览文件 @
a95dcea5
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#include <CL/cl2.hpp>
paddle/fluid/lite/opencl/cl_caller.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
static
void
CopyImageData
(
const
CLImage
&
cl_image
,
float
*
out
)
{
int
width
=
cl_image
.
image_dims
()[
0
];
int
height
=
cl_image
.
image_dims
()[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
out
,
cl_image
.
image_dims
(),
cl_image
.
tensor_dims
());
delete
[]
image_data
;
}
bool
InitOpenCLEngine
(
std
::
string
cl_path
)
{
auto
*
engine
=
CLEngine
::
Global
();
engine
->
set_cl_path
(
cl_path
);
return
engine
->
IsInitSuccess
();
}
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
CLHelper
helper
(
context
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
.
KernelAt
(
0
);
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Inpu image: "
<<
in_image
<<
" --- "
;
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias
,
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
VLOG
(
3
)
<<
" --- Bias image: "
<<
bias_image
<<
" --- "
;
CLImage
out_image
;
out_image
.
InitEmptyImage
(
helper
.
OpenCLContext
(),
out_dim
);
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
status
=
helper
.
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- Out image: "
<<
out_image
<<
" --- "
;
CopyImageData
(
out_image
,
out
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_caller.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
namespace
paddle
{
namespace
lite
{
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_context.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <memory>
#include <string>
#include <utility>
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
cl
::
CommandQueue
&
CLContext
::
GetCommandQueue
()
{
return
CLEngine
::
Global
()
->
command_queue
();
}
cl
::
Context
&
CLContext
::
GetContext
()
{
return
CLEngine
::
Global
()
->
context
();
}
cl
::
Program
&
CLContext
::
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
std
::
string
program_key
=
file_name
;
if
(
!
options
.
empty
())
{
program_key
+=
options
;
}
auto
it
=
programs_
.
find
(
program_key
);
if
(
it
!=
programs_
.
end
())
{
VLOG
(
3
)
<<
" --- program -> "
<<
program_key
<<
" has been built --- "
;
return
*
(
it
->
second
);
}
auto
program
=
CLEngine
::
Global
()
->
CreateProgram
(
GetContext
(),
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel/"
+
file_name
);
VLOG
(
3
)
<<
" --- begin build program -> "
<<
program_key
<<
" --- "
;
CLEngine
::
Global
()
->
BuildProgram
(
program
.
get
(),
options
);
VLOG
(
3
)
<<
" --- end build program -> "
<<
program_key
<<
" --- "
;
programs_
[
program_key
]
=
std
::
move
(
program
);
return
*
(
programs_
[
program_key
]);
}
std
::
unique_ptr
<
cl
::
Kernel
>
CLContext
::
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
cl_int
status
{
CL_SUCCESS
};
VLOG
(
3
)
<<
" --- to get program "
<<
file_name
<<
" --- "
;
auto
program
=
GetProgram
(
file_name
,
options
);
VLOG
(
3
)
<<
" --- end get program --- "
;
VLOG
(
3
)
<<
" --- to create kernel: "
<<
kernel_name
<<
" --- "
;
std
::
unique_ptr
<
cl
::
Kernel
>
kernel
(
new
cl
::
Kernel
(
program
,
kernel_name
.
c_str
(),
&
status
));
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- end create kernel --- "
;
return
std
::
move
(
kernel
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_context.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace
paddle
{
namespace
lite
{
class
CLContext
{
public:
cl
::
CommandQueue
&
GetCommandQueue
();
cl
::
Context
&
GetContext
();
cl
::
Program
&
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
std
::
unique_ptr
<
cl
::
Kernel
>
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
cl
::
Program
>>
programs_
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
CLEngine
*
CLEngine
::
Global
()
{
static
CLEngine
cl_engine_
;
cl_engine_
.
Init
();
return
&
cl_engine_
;
}
CLEngine
::~
CLEngine
()
{
if
(
command_queue_
!=
nullptr
)
{
command_queue_
->
finish
();
}
// For controlling the destruction order:
command_queue_
.
reset
();
context_
.
reset
();
device_
.
reset
();
platform_
.
reset
();
}
bool
CLEngine
::
Init
()
{
if
(
initialized_
)
{
return
true
;
}
bool
is_platform_init
=
InitializePlatform
();
bool
is_device_init
=
InitializeDevice
();
is_init_success_
=
is_platform_init
&&
is_device_init
;
initialized_
=
true
;
return
initialized_
;
}
cl
::
Platform
&
CLEngine
::
platform
()
{
CHECK
(
platform_
!=
nullptr
)
<<
"platform_ is not initialized!"
;
return
*
platform_
;
}
cl
::
Context
&
CLEngine
::
context
()
{
if
(
context_
==
nullptr
)
{
context_
=
CreateContext
();
}
return
*
context_
;
}
cl
::
Device
&
CLEngine
::
device
()
{
CHECK
(
device_
!=
nullptr
)
<<
"device_ is not initialized!"
;
return
*
device_
;
}
cl
::
CommandQueue
&
CLEngine
::
command_queue
()
{
if
(
command_queue_
==
nullptr
)
{
command_queue_
=
CreateCommandQueue
(
context
());
}
return
*
command_queue_
;
}
std
::
unique_ptr
<
cl
::
Program
>
CLEngine
::
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
)
{
std
::
ifstream
file
{
file_name
,
std
::
ios
::
binary
|
std
::
ios
::
ate
};
CHECK
(
file
.
is_open
())
<<
"Can't open file from "
<<
file_name
;
auto
size
=
file
.
tellg
();
CHECK
(
size
>
0
)
<<
"size is too small."
;
std
::
string
content
(
size
,
'\0'
);
file
.
seekg
(
0
);
file
.
read
(
&
content
[
0
],
size
);
cl
::
Program
::
Sources
sources
;
sources
.
push_back
(
content
);
auto
prog
=
std
::
unique_ptr
<
cl
::
Program
>
(
new
cl
::
Program
(
context
,
sources
,
&
status_
));
LOG
(
INFO
)
<<
"OpenCL kernel file name: "
<<
file_name
;
LOG
(
INFO
)
<<
"Program source size: "
<<
content
.
size
();
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
prog
);
}
std
::
unique_ptr
<
cl
::
UserEvent
>
CLEngine
::
CreateEvent
(
const
cl
::
Context
&
context
)
{
auto
event
=
std
::
unique_ptr
<
cl
::
UserEvent
>
(
new
cl
::
UserEvent
(
context
,
&
status_
));
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
event
);
}
bool
CLEngine
::
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
)
{
std
::
string
build_option
=
options
+
" -cl-fast-relaxed-math -I "
+
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel"
;
status_
=
program
->
build
({
*
device_
},
build_option
.
c_str
());
CL_CHECK_ERRORS
(
status_
);
if
(
status_
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
device
())
==
CL_BUILD_ERROR
)
{
std
::
string
log
=
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
device
());
LOG
(
INFO
)
<<
"Program build error: "
<<
log
;
}
return
false
;
}
return
true
;
}
bool
CLEngine
::
InitializePlatform
()
{
std
::
vector
<
cl
::
Platform
>
all_platforms
;
status_
=
cl
::
Platform
::
get
(
&
all_platforms
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_platforms
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL platform found!"
;
return
false
;
}
platform_
=
std
::
make_shared
<
cl
::
Platform
>
();
*
platform_
=
all_platforms
[
0
];
return
true
;
}
bool
CLEngine
::
InitializeDevice
()
{
std
::
vector
<
cl
::
Device
>
all_devices
;
status_
=
platform_
->
getDevices
(
CL_DEVICE_TYPE_GPU
,
&
all_devices
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_devices
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL GPU device found!"
;
return
false
;
}
device_
=
std
::
make_shared
<
cl
::
Device
>
();
*
device_
=
all_devices
[
0
];
auto
device_name
=
device_
->
getInfo
<
CL_DEVICE_NAME
>
();
LOG
(
INFO
)
<<
"Using device: "
<<
device_name
;
auto
image_support
=
device_
->
getInfo
<
CL_DEVICE_IMAGE_SUPPORT
>
();
if
(
image_support
)
{
LOG
(
INFO
)
<<
"The chosen device supports image processing."
;
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support image processing!"
;
return
false
;
}
auto
ext_data
=
device_
->
getInfo
<
CL_DEVICE_EXTENSIONS
>
();
LOG
(
INFO
)
<<
"The extensions supported by this device: "
<<
ext_data
;
if
(
ext_data
.
find
(
"cl_khr_fp16"
)
!=
std
::
string
::
npos
)
{
LOG
(
INFO
)
<<
"The chosen device supports the half data type."
;
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support the half data type!"
;
return
false
;
}
auto
max_units
=
device_
->
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
LOG
(
INFO
)
<<
"The chosen device has "
<<
max_units
<<
" compute units."
;
auto
local_mem
=
device_
->
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
();
LOG
(
INFO
)
<<
"The local memory size of the chosen device is "
<<
static_cast
<
float
>
(
local_mem
)
/
1024
<<
" KB."
;
return
true
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
class
CLEngine
{
public:
static
CLEngine
*
Global
();
bool
Init
();
cl
::
Platform
&
platform
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
std
::
unique_ptr
<
cl
::
Program
>
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
);
std
::
unique_ptr
<
cl
::
UserEvent
>
CreateEvent
(
const
cl
::
Context
&
context
);
bool
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
=
""
);
bool
IsInitSuccess
()
{
return
is_init_success_
;
}
std
::
string
cl_path
()
{
return
cl_path_
;
}
void
set_cl_path
(
std
::
string
cl_path
)
{
cl_path_
=
cl_path
;
}
private:
CLEngine
()
=
default
;
~
CLEngine
();
bool
InitializePlatform
();
bool
InitializeDevice
();
std
::
shared_ptr
<
cl
::
Context
>
CreateContext
()
{
auto
context
=
std
::
make_shared
<
cl
::
Context
>
(
std
::
vector
<
cl
::
Device
>
{
device
()},
nullptr
,
nullptr
,
nullptr
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
context
;
}
std
::
shared_ptr
<
cl
::
CommandQueue
>
CreateCommandQueue
(
const
cl
::
Context
&
context
)
{
auto
queue
=
std
::
make_shared
<
cl
::
CommandQueue
>
(
context
,
device
(),
0
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
queue
;
}
std
::
string
cl_path_
;
std
::
shared_ptr
<
cl
::
Platform
>
platform_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Context
>
context_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Device
>
device_
{
nullptr
};
std
::
shared_ptr
<
cl
::
CommandQueue
>
command_queue_
{
nullptr
};
cl_int
status_
{
CL_SUCCESS
};
bool
initialized_
{
false
};
bool
is_init_success_
{
false
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_half.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace
paddle
{
namespace
lite
{
static
const
uint32_t
mantissatable
[
2048
]
=
{
0x00000000
,
0x33800000
,
0x34000000
,
0x34400000
,
0x34800000
,
0x34a00000
,
0x34c00000
,
0x34e00000
,
0x35000000
,
0x35100000
,
0x35200000
,
0x35300000
,
0x35400000
,
0x35500000
,
0x35600000
,
0x35700000
,
0x35800000
,
0x35880000
,
0x35900000
,
0x35980000
,
0x35a00000
,
0x35a80000
,
0x35b00000
,
0x35b80000
,
0x35c00000
,
0x35c80000
,
0x35d00000
,
0x35d80000
,
0x35e00000
,
0x35e80000
,
0x35f00000
,
0x35f80000
,
0x36000000
,
0x36040000
,
0x36080000
,
0x360c0000
,
0x36100000
,
0x36140000
,
0x36180000
,
0x361c0000
,
0x36200000
,
0x36240000
,
0x36280000
,
0x362c0000
,
0x36300000
,
0x36340000
,
0x36380000
,
0x363c0000
,
0x36400000
,
0x36440000
,
0x36480000
,
0x364c0000
,
0x36500000
,
0x36540000
,
0x36580000
,
0x365c0000
,
0x36600000
,
0x36640000
,
0x36680000
,
0x366c0000
,
0x36700000
,
0x36740000
,
0x36780000
,
0x367c0000
,
0x36800000
,
0x36820000
,
0x36840000
,
0x36860000
,
0x36880000
,
0x368a0000
,
0x368c0000
,
0x368e0000
,
0x36900000
,
0x36920000
,
0x36940000
,
0x36960000
,
0x36980000
,
0x369a0000
,
0x369c0000
,
0x369e0000
,
0x36a00000
,
0x36a20000
,
0x36a40000
,
0x36a60000
,
0x36a80000
,
0x36aa0000
,
0x36ac0000
,
0x36ae0000
,
0x36b00000
,
0x36b20000
,
0x36b40000
,
0x36b60000
,
0x36b80000
,
0x36ba0000
,
0x36bc0000
,
0x36be0000
,
0x36c00000
,
0x36c20000
,
0x36c40000
,
0x36c60000
,
0x36c80000
,
0x36ca0000
,
0x36cc0000
,
0x36ce0000
,
0x36d00000
,
0x36d20000
,
0x36d40000
,
0x36d60000
,
0x36d80000
,
0x36da0000
,
0x36dc0000
,
0x36de0000
,
0x36e00000
,
0x36e20000
,
0x36e40000
,
0x36e60000
,
0x36e80000
,
0x36ea0000
,
0x36ec0000
,
0x36ee0000
,
0x36f00000
,
0x36f20000
,
0x36f40000
,
0x36f60000
,
0x36f80000
,
0x36fa0000
,
0x36fc0000
,
0x36fe0000
,
0x37000000
,
0x37010000
,
0x37020000
,
0x37030000
,
0x37040000
,
0x37050000
,
0x37060000
,
0x37070000
,
0x37080000
,
0x37090000
,
0x370a0000
,
0x370b0000
,
0x370c0000
,
0x370d0000
,
0x370e0000
,
0x370f0000
,
0x37100000
,
0x37110000
,
0x37120000
,
0x37130000
,
0x37140000
,
0x37150000
,
0x37160000
,
0x37170000
,
0x37180000
,
0x37190000
,
0x371a0000
,
0x371b0000
,
0x371c0000
,
0x371d0000
,
0x371e0000
,
0x371f0000
,
0x37200000
,
0x37210000
,
0x37220000
,
0x37230000
,
0x37240000
,
0x37250000
,
0x37260000
,
0x37270000
,
0x37280000
,
0x37290000
,
0x372a0000
,
0x372b0000
,
0x372c0000
,
0x372d0000
,
0x372e0000
,
0x372f0000
,
0x37300000
,
0x37310000
,
0x37320000
,
0x37330000
,
0x37340000
,
0x37350000
,
0x37360000
,
0x37370000
,
0x37380000
,
0x37390000
,
0x373a0000
,
0x373b0000
,
0x373c0000
,
0x373d0000
,
0x373e0000
,
0x373f0000
,
0x37400000
,
0x37410000
,
0x37420000
,
0x37430000
,
0x37440000
,
0x37450000
,
0x37460000
,
0x37470000
,
0x37480000
,
0x37490000
,
0x374a0000
,
0x374b0000
,
0x374c0000
,
0x374d0000
,
0x374e0000
,
0x374f0000
,
0x37500000
,
0x37510000
,
0x37520000
,
0x37530000
,
0x37540000
,
0x37550000
,
0x37560000
,
0x37570000
,
0x37580000
,
0x37590000
,
0x375a0000
,
0x375b0000
,
0x375c0000
,
0x375d0000
,
0x375e0000
,
0x375f0000
,
0x37600000
,
0x37610000
,
0x37620000
,
0x37630000
,
0x37640000
,
0x37650000
,
0x37660000
,
0x37670000
,
0x37680000
,
0x37690000
,
0x376a0000
,
0x376b0000
,
0x376c0000
,
0x376d0000
,
0x376e0000
,
0x376f0000
,
0x37700000
,
0x37710000
,
0x37720000
,
0x37730000
,
0x37740000
,
0x37750000
,
0x37760000
,
0x37770000
,
0x37780000
,
0x37790000
,
0x377a0000
,
0x377b0000
,
0x377c0000
,
0x377d0000
,
0x377e0000
,
0x377f0000
,
0x37800000
,
0x37808000
,
0x37810000
,
0x37818000
,
0x37820000
,
0x37828000
,
0x37830000
,
0x37838000
,
0x37840000
,
0x37848000
,
0x37850000
,
0x37858000
,
0x37860000
,
0x37868000
,
0x37870000
,
0x37878000
,
0x37880000
,
0x37888000
,
0x37890000
,
0x37898000
,
0x378a0000
,
0x378a8000
,
0x378b0000
,
0x378b8000
,
0x378c0000
,
0x378c8000
,
0x378d0000
,
0x378d8000
,
0x378e0000
,
0x378e8000
,
0x378f0000
,
0x378f8000
,
0x37900000
,
0x37908000
,
0x37910000
,
0x37918000
,
0x37920000
,
0x37928000
,
0x37930000
,
0x37938000
,
0x37940000
,
0x37948000
,
0x37950000
,
0x37958000
,
0x37960000
,
0x37968000
,
0x37970000
,
0x37978000
,
0x37980000
,
0x37988000
,
0x37990000
,
0x37998000
,
0x379a0000
,
0x379a8000
,
0x379b0000
,
0x379b8000
,
0x379c0000
,
0x379c8000
,
0x379d0000
,
0x379d8000
,
0x379e0000
,
0x379e8000
,
0x379f0000
,
0x379f8000
,
0x37a00000
,
0x37a08000
,
0x37a10000
,
0x37a18000
,
0x37a20000
,
0x37a28000
,
0x37a30000
,
0x37a38000
,
0x37a40000
,
0x37a48000
,
0x37a50000
,
0x37a58000
,
0x37a60000
,
0x37a68000
,
0x37a70000
,
0x37a78000
,
0x37a80000
,
0x37a88000
,
0x37a90000
,
0x37a98000
,
0x37aa0000
,
0x37aa8000
,
0x37ab0000
,
0x37ab8000
,
0x37ac0000
,
0x37ac8000
,
0x37ad0000
,
0x37ad8000
,
0x37ae0000
,
0x37ae8000
,
0x37af0000
,
0x37af8000
,
0x37b00000
,
0x37b08000
,
0x37b10000
,
0x37b18000
,
0x37b20000
,
0x37b28000
,
0x37b30000
,
0x37b38000
,
0x37b40000
,
0x37b48000
,
0x37b50000
,
0x37b58000
,
0x37b60000
,
0x37b68000
,
0x37b70000
,
0x37b78000
,
0x37b80000
,
0x37b88000
,
0x37b90000
,
0x37b98000
,
0x37ba0000
,
0x37ba8000
,
0x37bb0000
,
0x37bb8000
,
0x37bc0000
,
0x37bc8000
,
0x37bd0000
,
0x37bd8000
,
0x37be0000
,
0x37be8000
,
0x37bf0000
,
0x37bf8000
,
0x37c00000
,
0x37c08000
,
0x37c10000
,
0x37c18000
,
0x37c20000
,
0x37c28000
,
0x37c30000
,
0x37c38000
,
0x37c40000
,
0x37c48000
,
0x37c50000
,
0x37c58000
,
0x37c60000
,
0x37c68000
,
0x37c70000
,
0x37c78000
,
0x37c80000
,
0x37c88000
,
0x37c90000
,
0x37c98000
,
0x37ca0000
,
0x37ca8000
,
0x37cb0000
,
0x37cb8000
,
0x37cc0000
,
0x37cc8000
,
0x37cd0000
,
0x37cd8000
,
0x37ce0000
,
0x37ce8000
,
0x37cf0000
,
0x37cf8000
,
0x37d00000
,
0x37d08000
,
0x37d10000
,
0x37d18000
,
0x37d20000
,
0x37d28000
,
0x37d30000
,
0x37d38000
,
0x37d40000
,
0x37d48000
,
0x37d50000
,
0x37d58000
,
0x37d60000
,
0x37d68000
,
0x37d70000
,
0x37d78000
,
0x37d80000
,
0x37d88000
,
0x37d90000
,
0x37d98000
,
0x37da0000
,
0x37da8000
,
0x37db0000
,
0x37db8000
,
0x37dc0000
,
0x37dc8000
,
0x37dd0000
,
0x37dd8000
,
0x37de0000
,
0x37de8000
,
0x37df0000
,
0x37df8000
,
0x37e00000
,
0x37e08000
,
0x37e10000
,
0x37e18000
,
0x37e20000
,
0x37e28000
,
0x37e30000
,
0x37e38000
,
0x37e40000
,
0x37e48000
,
0x37e50000
,
0x37e58000
,
0x37e60000
,
0x37e68000
,
0x37e70000
,
0x37e78000
,
0x37e80000
,
0x37e88000
,
0x37e90000
,
0x37e98000
,
0x37ea0000
,
0x37ea8000
,
0x37eb0000
,
0x37eb8000
,
0x37ec0000
,
0x37ec8000
,
0x37ed0000
,
0x37ed8000
,
0x37ee0000
,
0x37ee8000
,
0x37ef0000
,
0x37ef8000
,
0x37f00000
,
0x37f08000
,
0x37f10000
,
0x37f18000
,
0x37f20000
,
0x37f28000
,
0x37f30000
,
0x37f38000
,
0x37f40000
,
0x37f48000
,
0x37f50000
,
0x37f58000
,
0x37f60000
,
0x37f68000
,
0x37f70000
,
0x37f78000
,
0x37f80000
,
0x37f88000
,
0x37f90000
,
0x37f98000
,
0x37fa0000
,
0x37fa8000
,
0x37fb0000
,
0x37fb8000
,
0x37fc0000
,
0x37fc8000
,
0x37fd0000
,
0x37fd8000
,
0x37fe0000
,
0x37fe8000
,
0x37ff0000
,
0x37ff8000
,
0x38000000
,
0x38004000
,
0x38008000
,
0x3800c000
,
0x38010000
,
0x38014000
,
0x38018000
,
0x3801c000
,
0x38020000
,
0x38024000
,
0x38028000
,
0x3802c000
,
0x38030000
,
0x38034000
,
0x38038000
,
0x3803c000
,
0x38040000
,
0x38044000
,
0x38048000
,
0x3804c000
,
0x38050000
,
0x38054000
,
0x38058000
,
0x3805c000
,
0x38060000
,
0x38064000
,
0x38068000
,
0x3806c000
,
0x38070000
,
0x38074000
,
0x38078000
,
0x3807c000
,
0x38080000
,
0x38084000
,
0x38088000
,
0x3808c000
,
0x38090000
,
0x38094000
,
0x38098000
,
0x3809c000
,
0x380a0000
,
0x380a4000
,
0x380a8000
,
0x380ac000
,
0x380b0000
,
0x380b4000
,
0x380b8000
,
0x380bc000
,
0x380c0000
,
0x380c4000
,
0x380c8000
,
0x380cc000
,
0x380d0000
,
0x380d4000
,
0x380d8000
,
0x380dc000
,
0x380e0000
,
0x380e4000
,
0x380e8000
,
0x380ec000
,
0x380f0000
,
0x380f4000
,
0x380f8000
,
0x380fc000
,
0x38100000
,
0x38104000
,
0x38108000
,
0x3810c000
,
0x38110000
,
0x38114000
,
0x38118000
,
0x3811c000
,
0x38120000
,
0x38124000
,
0x38128000
,
0x3812c000
,
0x38130000
,
0x38134000
,
0x38138000
,
0x3813c000
,
0x38140000
,
0x38144000
,
0x38148000
,
0x3814c000
,
0x38150000
,
0x38154000
,
0x38158000
,
0x3815c000
,
0x38160000
,
0x38164000
,
0x38168000
,
0x3816c000
,
0x38170000
,
0x38174000
,
0x38178000
,
0x3817c000
,
0x38180000
,
0x38184000
,
0x38188000
,
0x3818c000
,
0x38190000
,
0x38194000
,
0x38198000
,
0x3819c000
,
0x381a0000
,
0x381a4000
,
0x381a8000
,
0x381ac000
,
0x381b0000
,
0x381b4000
,
0x381b8000
,
0x381bc000
,
0x381c0000
,
0x381c4000
,
0x381c8000
,
0x381cc000
,
0x381d0000
,
0x381d4000
,
0x381d8000
,
0x381dc000
,
0x381e0000
,
0x381e4000
,
0x381e8000
,
0x381ec000
,
0x381f0000
,
0x381f4000
,
0x381f8000
,
0x381fc000
,
0x38200000
,
0x38204000
,
0x38208000
,
0x3820c000
,
0x38210000
,
0x38214000
,
0x38218000
,
0x3821c000
,
0x38220000
,
0x38224000
,
0x38228000
,
0x3822c000
,
0x38230000
,
0x38234000
,
0x38238000
,
0x3823c000
,
0x38240000
,
0x38244000
,
0x38248000
,
0x3824c000
,
0x38250000
,
0x38254000
,
0x38258000
,
0x3825c000
,
0x38260000
,
0x38264000
,
0x38268000
,
0x3826c000
,
0x38270000
,
0x38274000
,
0x38278000
,
0x3827c000
,
0x38280000
,
0x38284000
,
0x38288000
,
0x3828c000
,
0x38290000
,
0x38294000
,
0x38298000
,
0x3829c000
,
0x382a0000
,
0x382a4000
,
0x382a8000
,
0x382ac000
,
0x382b0000
,
0x382b4000
,
0x382b8000
,
0x382bc000
,
0x382c0000
,
0x382c4000
,
0x382c8000
,
0x382cc000
,
0x382d0000
,
0x382d4000
,
0x382d8000
,
0x382dc000
,
0x382e0000
,
0x382e4000
,
0x382e8000
,
0x382ec000
,
0x382f0000
,
0x382f4000
,
0x382f8000
,
0x382fc000
,
0x38300000
,
0x38304000
,
0x38308000
,
0x3830c000
,
0x38310000
,
0x38314000
,
0x38318000
,
0x3831c000
,
0x38320000
,
0x38324000
,
0x38328000
,
0x3832c000
,
0x38330000
,
0x38334000
,
0x38338000
,
0x3833c000
,
0x38340000
,
0x38344000
,
0x38348000
,
0x3834c000
,
0x38350000
,
0x38354000
,
0x38358000
,
0x3835c000
,
0x38360000
,
0x38364000
,
0x38368000
,
0x3836c000
,
0x38370000
,
0x38374000
,
0x38378000
,
0x3837c000
,
0x38380000
,
0x38384000
,
0x38388000
,
0x3838c000
,
0x38390000
,
0x38394000
,
0x38398000
,
0x3839c000
,
0x383a0000
,
0x383a4000
,
0x383a8000
,
0x383ac000
,
0x383b0000
,
0x383b4000
,
0x383b8000
,
0x383bc000
,
0x383c0000
,
0x383c4000
,
0x383c8000
,
0x383cc000
,
0x383d0000
,
0x383d4000
,
0x383d8000
,
0x383dc000
,
0x383e0000
,
0x383e4000
,
0x383e8000
,
0x383ec000
,
0x383f0000
,
0x383f4000
,
0x383f8000
,
0x383fc000
,
0x38400000
,
0x38404000
,
0x38408000
,
0x3840c000
,
0x38410000
,
0x38414000
,
0x38418000
,
0x3841c000
,
0x38420000
,
0x38424000
,
0x38428000
,
0x3842c000
,
0x38430000
,
0x38434000
,
0x38438000
,
0x3843c000
,
0x38440000
,
0x38444000
,
0x38448000
,
0x3844c000
,
0x38450000
,
0x38454000
,
0x38458000
,
0x3845c000
,
0x38460000
,
0x38464000
,
0x38468000
,
0x3846c000
,
0x38470000
,
0x38474000
,
0x38478000
,
0x3847c000
,
0x38480000
,
0x38484000
,
0x38488000
,
0x3848c000
,
0x38490000
,
0x38494000
,
0x38498000
,
0x3849c000
,
0x384a0000
,
0x384a4000
,
0x384a8000
,
0x384ac000
,
0x384b0000
,
0x384b4000
,
0x384b8000
,
0x384bc000
,
0x384c0000
,
0x384c4000
,
0x384c8000
,
0x384cc000
,
0x384d0000
,
0x384d4000
,
0x384d8000
,
0x384dc000
,
0x384e0000
,
0x384e4000
,
0x384e8000
,
0x384ec000
,
0x384f0000
,
0x384f4000
,
0x384f8000
,
0x384fc000
,
0x38500000
,
0x38504000
,
0x38508000
,
0x3850c000
,
0x38510000
,
0x38514000
,
0x38518000
,
0x3851c000
,
0x38520000
,
0x38524000
,
0x38528000
,
0x3852c000
,
0x38530000
,
0x38534000
,
0x38538000
,
0x3853c000
,
0x38540000
,
0x38544000
,
0x38548000
,
0x3854c000
,
0x38550000
,
0x38554000
,
0x38558000
,
0x3855c000
,
0x38560000
,
0x38564000
,
0x38568000
,
0x3856c000
,
0x38570000
,
0x38574000
,
0x38578000
,
0x3857c000
,
0x38580000
,
0x38584000
,
0x38588000
,
0x3858c000
,
0x38590000
,
0x38594000
,
0x38598000
,
0x3859c000
,
0x385a0000
,
0x385a4000
,
0x385a8000
,
0x385ac000
,
0x385b0000
,
0x385b4000
,
0x385b8000
,
0x385bc000
,
0x385c0000
,
0x385c4000
,
0x385c8000
,
0x385cc000
,
0x385d0000
,
0x385d4000
,
0x385d8000
,
0x385dc000
,
0x385e0000
,
0x385e4000
,
0x385e8000
,
0x385ec000
,
0x385f0000
,
0x385f4000
,
0x385f8000
,
0x385fc000
,
0x38600000
,
0x38604000
,
0x38608000
,
0x3860c000
,
0x38610000
,
0x38614000
,
0x38618000
,
0x3861c000
,
0x38620000
,
0x38624000
,
0x38628000
,
0x3862c000
,
0x38630000
,
0x38634000
,
0x38638000
,
0x3863c000
,
0x38640000
,
0x38644000
,
0x38648000
,
0x3864c000
,
0x38650000
,
0x38654000
,
0x38658000
,
0x3865c000
,
0x38660000
,
0x38664000
,
0x38668000
,
0x3866c000
,
0x38670000
,
0x38674000
,
0x38678000
,
0x3867c000
,
0x38680000
,
0x38684000
,
0x38688000
,
0x3868c000
,
0x38690000
,
0x38694000
,
0x38698000
,
0x3869c000
,
0x386a0000
,
0x386a4000
,
0x386a8000
,
0x386ac000
,
0x386b0000
,
0x386b4000
,
0x386b8000
,
0x386bc000
,
0x386c0000
,
0x386c4000
,
0x386c8000
,
0x386cc000
,
0x386d0000
,
0x386d4000
,
0x386d8000
,
0x386dc000
,
0x386e0000
,
0x386e4000
,
0x386e8000
,
0x386ec000
,
0x386f0000
,
0x386f4000
,
0x386f8000
,
0x386fc000
,
0x38700000
,
0x38704000
,
0x38708000
,
0x3870c000
,
0x38710000
,
0x38714000
,
0x38718000
,
0x3871c000
,
0x38720000
,
0x38724000
,
0x38728000
,
0x3872c000
,
0x38730000
,
0x38734000
,
0x38738000
,
0x3873c000
,
0x38740000
,
0x38744000
,
0x38748000
,
0x3874c000
,
0x38750000
,
0x38754000
,
0x38758000
,
0x3875c000
,
0x38760000
,
0x38764000
,
0x38768000
,
0x3876c000
,
0x38770000
,
0x38774000
,
0x38778000
,
0x3877c000
,
0x38780000
,
0x38784000
,
0x38788000
,
0x3878c000
,
0x38790000
,
0x38794000
,
0x38798000
,
0x3879c000
,
0x387a0000
,
0x387a4000
,
0x387a8000
,
0x387ac000
,
0x387b0000
,
0x387b4000
,
0x387b8000
,
0x387bc000
,
0x387c0000
,
0x387c4000
,
0x387c8000
,
0x387cc000
,
0x387d0000
,
0x387d4000
,
0x387d8000
,
0x387dc000
,
0x387e0000
,
0x387e4000
,
0x387e8000
,
0x387ec000
,
0x387f0000
,
0x387f4000
,
0x387f8000
,
0x387fc000
,
0x38000000
,
0x38002000
,
0x38004000
,
0x38006000
,
0x38008000
,
0x3800a000
,
0x3800c000
,
0x3800e000
,
0x38010000
,
0x38012000
,
0x38014000
,
0x38016000
,
0x38018000
,
0x3801a000
,
0x3801c000
,
0x3801e000
,
0x38020000
,
0x38022000
,
0x38024000
,
0x38026000
,
0x38028000
,
0x3802a000
,
0x3802c000
,
0x3802e000
,
0x38030000
,
0x38032000
,
0x38034000
,
0x38036000
,
0x38038000
,
0x3803a000
,
0x3803c000
,
0x3803e000
,
0x38040000
,
0x38042000
,
0x38044000
,
0x38046000
,
0x38048000
,
0x3804a000
,
0x3804c000
,
0x3804e000
,
0x38050000
,
0x38052000
,
0x38054000
,
0x38056000
,
0x38058000
,
0x3805a000
,
0x3805c000
,
0x3805e000
,
0x38060000
,
0x38062000
,
0x38064000
,
0x38066000
,
0x38068000
,
0x3806a000
,
0x3806c000
,
0x3806e000
,
0x38070000
,
0x38072000
,
0x38074000
,
0x38076000
,
0x38078000
,
0x3807a000
,
0x3807c000
,
0x3807e000
,
0x38080000
,
0x38082000
,
0x38084000
,
0x38086000
,
0x38088000
,
0x3808a000
,
0x3808c000
,
0x3808e000
,
0x38090000
,
0x38092000
,
0x38094000
,
0x38096000
,
0x38098000
,
0x3809a000
,
0x3809c000
,
0x3809e000
,
0x380a0000
,
0x380a2000
,
0x380a4000
,
0x380a6000
,
0x380a8000
,
0x380aa000
,
0x380ac000
,
0x380ae000
,
0x380b0000
,
0x380b2000
,
0x380b4000
,
0x380b6000
,
0x380b8000
,
0x380ba000
,
0x380bc000
,
0x380be000
,
0x380c0000
,
0x380c2000
,
0x380c4000
,
0x380c6000
,
0x380c8000
,
0x380ca000
,
0x380cc000
,
0x380ce000
,
0x380d0000
,
0x380d2000
,
0x380d4000
,
0x380d6000
,
0x380d8000
,
0x380da000
,
0x380dc000
,
0x380de000
,
0x380e0000
,
0x380e2000
,
0x380e4000
,
0x380e6000
,
0x380e8000
,
0x380ea000
,
0x380ec000
,
0x380ee000
,
0x380f0000
,
0x380f2000
,
0x380f4000
,
0x380f6000
,
0x380f8000
,
0x380fa000
,
0x380fc000
,
0x380fe000
,
0x38100000
,
0x38102000
,
0x38104000
,
0x38106000
,
0x38108000
,
0x3810a000
,
0x3810c000
,
0x3810e000
,
0x38110000
,
0x38112000
,
0x38114000
,
0x38116000
,
0x38118000
,
0x3811a000
,
0x3811c000
,
0x3811e000
,
0x38120000
,
0x38122000
,
0x38124000
,
0x38126000
,
0x38128000
,
0x3812a000
,
0x3812c000
,
0x3812e000
,
0x38130000
,
0x38132000
,
0x38134000
,
0x38136000
,
0x38138000
,
0x3813a000
,
0x3813c000
,
0x3813e000
,
0x38140000
,
0x38142000
,
0x38144000
,
0x38146000
,
0x38148000
,
0x3814a000
,
0x3814c000
,
0x3814e000
,
0x38150000
,
0x38152000
,
0x38154000
,
0x38156000
,
0x38158000
,
0x3815a000
,
0x3815c000
,
0x3815e000
,
0x38160000
,
0x38162000
,
0x38164000
,
0x38166000
,
0x38168000
,
0x3816a000
,
0x3816c000
,
0x3816e000
,
0x38170000
,
0x38172000
,
0x38174000
,
0x38176000
,
0x38178000
,
0x3817a000
,
0x3817c000
,
0x3817e000
,
0x38180000
,
0x38182000
,
0x38184000
,
0x38186000
,
0x38188000
,
0x3818a000
,
0x3818c000
,
0x3818e000
,
0x38190000
,
0x38192000
,
0x38194000
,
0x38196000
,
0x38198000
,
0x3819a000
,
0x3819c000
,
0x3819e000
,
0x381a0000
,
0x381a2000
,
0x381a4000
,
0x381a6000
,
0x381a8000
,
0x381aa000
,
0x381ac000
,
0x381ae000
,
0x381b0000
,
0x381b2000
,
0x381b4000
,
0x381b6000
,
0x381b8000
,
0x381ba000
,
0x381bc000
,
0x381be000
,
0x381c0000
,
0x381c2000
,
0x381c4000
,
0x381c6000
,
0x381c8000
,
0x381ca000
,
0x381cc000
,
0x381ce000
,
0x381d0000
,
0x381d2000
,
0x381d4000
,
0x381d6000
,
0x381d8000
,
0x381da000
,
0x381dc000
,
0x381de000
,
0x381e0000
,
0x381e2000
,
0x381e4000
,
0x381e6000
,
0x381e8000
,
0x381ea000
,
0x381ec000
,
0x381ee000
,
0x381f0000
,
0x381f2000
,
0x381f4000
,
0x381f6000
,
0x381f8000
,
0x381fa000
,
0x381fc000
,
0x381fe000
,
0x38200000
,
0x38202000
,
0x38204000
,
0x38206000
,
0x38208000
,
0x3820a000
,
0x3820c000
,
0x3820e000
,
0x38210000
,
0x38212000
,
0x38214000
,
0x38216000
,
0x38218000
,
0x3821a000
,
0x3821c000
,
0x3821e000
,
0x38220000
,
0x38222000
,
0x38224000
,
0x38226000
,
0x38228000
,
0x3822a000
,
0x3822c000
,
0x3822e000
,
0x38230000
,
0x38232000
,
0x38234000
,
0x38236000
,
0x38238000
,
0x3823a000
,
0x3823c000
,
0x3823e000
,
0x38240000
,
0x38242000
,
0x38244000
,
0x38246000
,
0x38248000
,
0x3824a000
,
0x3824c000
,
0x3824e000
,
0x38250000
,
0x38252000
,
0x38254000
,
0x38256000
,
0x38258000
,
0x3825a000
,
0x3825c000
,
0x3825e000
,
0x38260000
,
0x38262000
,
0x38264000
,
0x38266000
,
0x38268000
,
0x3826a000
,
0x3826c000
,
0x3826e000
,
0x38270000
,
0x38272000
,
0x38274000
,
0x38276000
,
0x38278000
,
0x3827a000
,
0x3827c000
,
0x3827e000
,
0x38280000
,
0x38282000
,
0x38284000
,
0x38286000
,
0x38288000
,
0x3828a000
,
0x3828c000
,
0x3828e000
,
0x38290000
,
0x38292000
,
0x38294000
,
0x38296000
,
0x38298000
,
0x3829a000
,
0x3829c000
,
0x3829e000
,
0x382a0000
,
0x382a2000
,
0x382a4000
,
0x382a6000
,
0x382a8000
,
0x382aa000
,
0x382ac000
,
0x382ae000
,
0x382b0000
,
0x382b2000
,
0x382b4000
,
0x382b6000
,
0x382b8000
,
0x382ba000
,
0x382bc000
,
0x382be000
,
0x382c0000
,
0x382c2000
,
0x382c4000
,
0x382c6000
,
0x382c8000
,
0x382ca000
,
0x382cc000
,
0x382ce000
,
0x382d0000
,
0x382d2000
,
0x382d4000
,
0x382d6000
,
0x382d8000
,
0x382da000
,
0x382dc000
,
0x382de000
,
0x382e0000
,
0x382e2000
,
0x382e4000
,
0x382e6000
,
0x382e8000
,
0x382ea000
,
0x382ec000
,
0x382ee000
,
0x382f0000
,
0x382f2000
,
0x382f4000
,
0x382f6000
,
0x382f8000
,
0x382fa000
,
0x382fc000
,
0x382fe000
,
0x38300000
,
0x38302000
,
0x38304000
,
0x38306000
,
0x38308000
,
0x3830a000
,
0x3830c000
,
0x3830e000
,
0x38310000
,
0x38312000
,
0x38314000
,
0x38316000
,
0x38318000
,
0x3831a000
,
0x3831c000
,
0x3831e000
,
0x38320000
,
0x38322000
,
0x38324000
,
0x38326000
,
0x38328000
,
0x3832a000
,
0x3832c000
,
0x3832e000
,
0x38330000
,
0x38332000
,
0x38334000
,
0x38336000
,
0x38338000
,
0x3833a000
,
0x3833c000
,
0x3833e000
,
0x38340000
,
0x38342000
,
0x38344000
,
0x38346000
,
0x38348000
,
0x3834a000
,
0x3834c000
,
0x3834e000
,
0x38350000
,
0x38352000
,
0x38354000
,
0x38356000
,
0x38358000
,
0x3835a000
,
0x3835c000
,
0x3835e000
,
0x38360000
,
0x38362000
,
0x38364000
,
0x38366000
,
0x38368000
,
0x3836a000
,
0x3836c000
,
0x3836e000
,
0x38370000
,
0x38372000
,
0x38374000
,
0x38376000
,
0x38378000
,
0x3837a000
,
0x3837c000
,
0x3837e000
,
0x38380000
,
0x38382000
,
0x38384000
,
0x38386000
,
0x38388000
,
0x3838a000
,
0x3838c000
,
0x3838e000
,
0x38390000
,
0x38392000
,
0x38394000
,
0x38396000
,
0x38398000
,
0x3839a000
,
0x3839c000
,
0x3839e000
,
0x383a0000
,
0x383a2000
,
0x383a4000
,
0x383a6000
,
0x383a8000
,
0x383aa000
,
0x383ac000
,
0x383ae000
,
0x383b0000
,
0x383b2000
,
0x383b4000
,
0x383b6000
,
0x383b8000
,
0x383ba000
,
0x383bc000
,
0x383be000
,
0x383c0000
,
0x383c2000
,
0x383c4000
,
0x383c6000
,
0x383c8000
,
0x383ca000
,
0x383cc000
,
0x383ce000
,
0x383d0000
,
0x383d2000
,
0x383d4000
,
0x383d6000
,
0x383d8000
,
0x383da000
,
0x383dc000
,
0x383de000
,
0x383e0000
,
0x383e2000
,
0x383e4000
,
0x383e6000
,
0x383e8000
,
0x383ea000
,
0x383ec000
,
0x383ee000
,
0x383f0000
,
0x383f2000
,
0x383f4000
,
0x383f6000
,
0x383f8000
,
0x383fa000
,
0x383fc000
,
0x383fe000
,
0x38400000
,
0x38402000
,
0x38404000
,
0x38406000
,
0x38408000
,
0x3840a000
,
0x3840c000
,
0x3840e000
,
0x38410000
,
0x38412000
,
0x38414000
,
0x38416000
,
0x38418000
,
0x3841a000
,
0x3841c000
,
0x3841e000
,
0x38420000
,
0x38422000
,
0x38424000
,
0x38426000
,
0x38428000
,
0x3842a000
,
0x3842c000
,
0x3842e000
,
0x38430000
,
0x38432000
,
0x38434000
,
0x38436000
,
0x38438000
,
0x3843a000
,
0x3843c000
,
0x3843e000
,
0x38440000
,
0x38442000
,
0x38444000
,
0x38446000
,
0x38448000
,
0x3844a000
,
0x3844c000
,
0x3844e000
,
0x38450000
,
0x38452000
,
0x38454000
,
0x38456000
,
0x38458000
,
0x3845a000
,
0x3845c000
,
0x3845e000
,
0x38460000
,
0x38462000
,
0x38464000
,
0x38466000
,
0x38468000
,
0x3846a000
,
0x3846c000
,
0x3846e000
,
0x38470000
,
0x38472000
,
0x38474000
,
0x38476000
,
0x38478000
,
0x3847a000
,
0x3847c000
,
0x3847e000
,
0x38480000
,
0x38482000
,
0x38484000
,
0x38486000
,
0x38488000
,
0x3848a000
,
0x3848c000
,
0x3848e000
,
0x38490000
,
0x38492000
,
0x38494000
,
0x38496000
,
0x38498000
,
0x3849a000
,
0x3849c000
,
0x3849e000
,
0x384a0000
,
0x384a2000
,
0x384a4000
,
0x384a6000
,
0x384a8000
,
0x384aa000
,
0x384ac000
,
0x384ae000
,
0x384b0000
,
0x384b2000
,
0x384b4000
,
0x384b6000
,
0x384b8000
,
0x384ba000
,
0x384bc000
,
0x384be000
,
0x384c0000
,
0x384c2000
,
0x384c4000
,
0x384c6000
,
0x384c8000
,
0x384ca000
,
0x384cc000
,
0x384ce000
,
0x384d0000
,
0x384d2000
,
0x384d4000
,
0x384d6000
,
0x384d8000
,
0x384da000
,
0x384dc000
,
0x384de000
,
0x384e0000
,
0x384e2000
,
0x384e4000
,
0x384e6000
,
0x384e8000
,
0x384ea000
,
0x384ec000
,
0x384ee000
,
0x384f0000
,
0x384f2000
,
0x384f4000
,
0x384f6000
,
0x384f8000
,
0x384fa000
,
0x384fc000
,
0x384fe000
,
0x38500000
,
0x38502000
,
0x38504000
,
0x38506000
,
0x38508000
,
0x3850a000
,
0x3850c000
,
0x3850e000
,
0x38510000
,
0x38512000
,
0x38514000
,
0x38516000
,
0x38518000
,
0x3851a000
,
0x3851c000
,
0x3851e000
,
0x38520000
,
0x38522000
,
0x38524000
,
0x38526000
,
0x38528000
,
0x3852a000
,
0x3852c000
,
0x3852e000
,
0x38530000
,
0x38532000
,
0x38534000
,
0x38536000
,
0x38538000
,
0x3853a000
,
0x3853c000
,
0x3853e000
,
0x38540000
,
0x38542000
,
0x38544000
,
0x38546000
,
0x38548000
,
0x3854a000
,
0x3854c000
,
0x3854e000
,
0x38550000
,
0x38552000
,
0x38554000
,
0x38556000
,
0x38558000
,
0x3855a000
,
0x3855c000
,
0x3855e000
,
0x38560000
,
0x38562000
,
0x38564000
,
0x38566000
,
0x38568000
,
0x3856a000
,
0x3856c000
,
0x3856e000
,
0x38570000
,
0x38572000
,
0x38574000
,
0x38576000
,
0x38578000
,
0x3857a000
,
0x3857c000
,
0x3857e000
,
0x38580000
,
0x38582000
,
0x38584000
,
0x38586000
,
0x38588000
,
0x3858a000
,
0x3858c000
,
0x3858e000
,
0x38590000
,
0x38592000
,
0x38594000
,
0x38596000
,
0x38598000
,
0x3859a000
,
0x3859c000
,
0x3859e000
,
0x385a0000
,
0x385a2000
,
0x385a4000
,
0x385a6000
,
0x385a8000
,
0x385aa000
,
0x385ac000
,
0x385ae000
,
0x385b0000
,
0x385b2000
,
0x385b4000
,
0x385b6000
,
0x385b8000
,
0x385ba000
,
0x385bc000
,
0x385be000
,
0x385c0000
,
0x385c2000
,
0x385c4000
,
0x385c6000
,
0x385c8000
,
0x385ca000
,
0x385cc000
,
0x385ce000
,
0x385d0000
,
0x385d2000
,
0x385d4000
,
0x385d6000
,
0x385d8000
,
0x385da000
,
0x385dc000
,
0x385de000
,
0x385e0000
,
0x385e2000
,
0x385e4000
,
0x385e6000
,
0x385e8000
,
0x385ea000
,
0x385ec000
,
0x385ee000
,
0x385f0000
,
0x385f2000
,
0x385f4000
,
0x385f6000
,
0x385f8000
,
0x385fa000
,
0x385fc000
,
0x385fe000
,
0x38600000
,
0x38602000
,
0x38604000
,
0x38606000
,
0x38608000
,
0x3860a000
,
0x3860c000
,
0x3860e000
,
0x38610000
,
0x38612000
,
0x38614000
,
0x38616000
,
0x38618000
,
0x3861a000
,
0x3861c000
,
0x3861e000
,
0x38620000
,
0x38622000
,
0x38624000
,
0x38626000
,
0x38628000
,
0x3862a000
,
0x3862c000
,
0x3862e000
,
0x38630000
,
0x38632000
,
0x38634000
,
0x38636000
,
0x38638000
,
0x3863a000
,
0x3863c000
,
0x3863e000
,
0x38640000
,
0x38642000
,
0x38644000
,
0x38646000
,
0x38648000
,
0x3864a000
,
0x3864c000
,
0x3864e000
,
0x38650000
,
0x38652000
,
0x38654000
,
0x38656000
,
0x38658000
,
0x3865a000
,
0x3865c000
,
0x3865e000
,
0x38660000
,
0x38662000
,
0x38664000
,
0x38666000
,
0x38668000
,
0x3866a000
,
0x3866c000
,
0x3866e000
,
0x38670000
,
0x38672000
,
0x38674000
,
0x38676000
,
0x38678000
,
0x3867a000
,
0x3867c000
,
0x3867e000
,
0x38680000
,
0x38682000
,
0x38684000
,
0x38686000
,
0x38688000
,
0x3868a000
,
0x3868c000
,
0x3868e000
,
0x38690000
,
0x38692000
,
0x38694000
,
0x38696000
,
0x38698000
,
0x3869a000
,
0x3869c000
,
0x3869e000
,
0x386a0000
,
0x386a2000
,
0x386a4000
,
0x386a6000
,
0x386a8000
,
0x386aa000
,
0x386ac000
,
0x386ae000
,
0x386b0000
,
0x386b2000
,
0x386b4000
,
0x386b6000
,
0x386b8000
,
0x386ba000
,
0x386bc000
,
0x386be000
,
0x386c0000
,
0x386c2000
,
0x386c4000
,
0x386c6000
,
0x386c8000
,
0x386ca000
,
0x386cc000
,
0x386ce000
,
0x386d0000
,
0x386d2000
,
0x386d4000
,
0x386d6000
,
0x386d8000
,
0x386da000
,
0x386dc000
,
0x386de000
,
0x386e0000
,
0x386e2000
,
0x386e4000
,
0x386e6000
,
0x386e8000
,
0x386ea000
,
0x386ec000
,
0x386ee000
,
0x386f0000
,
0x386f2000
,
0x386f4000
,
0x386f6000
,
0x386f8000
,
0x386fa000
,
0x386fc000
,
0x386fe000
,
0x38700000
,
0x38702000
,
0x38704000
,
0x38706000
,
0x38708000
,
0x3870a000
,
0x3870c000
,
0x3870e000
,
0x38710000
,
0x38712000
,
0x38714000
,
0x38716000
,
0x38718000
,
0x3871a000
,
0x3871c000
,
0x3871e000
,
0x38720000
,
0x38722000
,
0x38724000
,
0x38726000
,
0x38728000
,
0x3872a000
,
0x3872c000
,
0x3872e000
,
0x38730000
,
0x38732000
,
0x38734000
,
0x38736000
,
0x38738000
,
0x3873a000
,
0x3873c000
,
0x3873e000
,
0x38740000
,
0x38742000
,
0x38744000
,
0x38746000
,
0x38748000
,
0x3874a000
,
0x3874c000
,
0x3874e000
,
0x38750000
,
0x38752000
,
0x38754000
,
0x38756000
,
0x38758000
,
0x3875a000
,
0x3875c000
,
0x3875e000
,
0x38760000
,
0x38762000
,
0x38764000
,
0x38766000
,
0x38768000
,
0x3876a000
,
0x3876c000
,
0x3876e000
,
0x38770000
,
0x38772000
,
0x38774000
,
0x38776000
,
0x38778000
,
0x3877a000
,
0x3877c000
,
0x3877e000
,
0x38780000
,
0x38782000
,
0x38784000
,
0x38786000
,
0x38788000
,
0x3878a000
,
0x3878c000
,
0x3878e000
,
0x38790000
,
0x38792000
,
0x38794000
,
0x38796000
,
0x38798000
,
0x3879a000
,
0x3879c000
,
0x3879e000
,
0x387a0000
,
0x387a2000
,
0x387a4000
,
0x387a6000
,
0x387a8000
,
0x387aa000
,
0x387ac000
,
0x387ae000
,
0x387b0000
,
0x387b2000
,
0x387b4000
,
0x387b6000
,
0x387b8000
,
0x387ba000
,
0x387bc000
,
0x387be000
,
0x387c0000
,
0x387c2000
,
0x387c4000
,
0x387c6000
,
0x387c8000
,
0x387ca000
,
0x387cc000
,
0x387ce000
,
0x387d0000
,
0x387d2000
,
0x387d4000
,
0x387d6000
,
0x387d8000
,
0x387da000
,
0x387dc000
,
0x387de000
,
0x387e0000
,
0x387e2000
,
0x387e4000
,
0x387e6000
,
0x387e8000
,
0x387ea000
,
0x387ec000
,
0x387ee000
,
0x387f0000
,
0x387f2000
,
0x387f4000
,
0x387f6000
,
0x387f8000
,
0x387fa000
,
0x387fc000
,
0x387fe000
};
static
const
uint16_t
offsettable
[
64
]
=
{
0x0000
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0000
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
};
static
const
uint32_t
exponenttable
[
64
]
=
{
0x00000000
,
0x00800000
,
0x01000000
,
0x01800000
,
0x02000000
,
0x02800000
,
0x03000000
,
0x03800000
,
0x04000000
,
0x04800000
,
0x05000000
,
0x05800000
,
0x06000000
,
0x06800000
,
0x07000000
,
0x07800000
,
0x08000000
,
0x08800000
,
0x09000000
,
0x09800000
,
0x0a000000
,
0x0a800000
,
0x0b000000
,
0x0b800000
,
0x0c000000
,
0x0c800000
,
0x0d000000
,
0x0d800000
,
0x0e000000
,
0x0e800000
,
0x0f000000
,
0x47800000
,
0x80000000
,
0x80800000
,
0x81000000
,
0x81800000
,
0x82000000
,
0x82800000
,
0x83000000
,
0x83800000
,
0x84000000
,
0x84800000
,
0x85000000
,
0x85800000
,
0x86000000
,
0x86800000
,
0x87000000
,
0x87800000
,
0x88000000
,
0x88800000
,
0x89000000
,
0x89800000
,
0x8a000000
,
0x8a800000
,
0x8b000000
,
0x8b800000
,
0x8c000000
,
0x8c800000
,
0x8d000000
,
0x8d800000
,
0x8e000000
,
0x8e800000
,
0x8f000000
,
0xc7800000
};
static
const
uint16_t
basetable
[
512
]
=
{
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0001
,
0x0002
,
0x0004
,
0x0008
,
0x0010
,
0x0020
,
0x0040
,
0x0080
,
0x0100
,
0x0200
,
0x0400
,
0x0800
,
0x0c00
,
0x1000
,
0x1400
,
0x1800
,
0x1c00
,
0x2000
,
0x2400
,
0x2800
,
0x2c00
,
0x3000
,
0x3400
,
0x3800
,
0x3c00
,
0x4000
,
0x4400
,
0x4800
,
0x4c00
,
0x5000
,
0x5400
,
0x5800
,
0x5c00
,
0x6000
,
0x6400
,
0x6800
,
0x6c00
,
0x7000
,
0x7400
,
0x7800
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8001
,
0x8002
,
0x8004
,
0x8008
,
0x8010
,
0x8020
,
0x8040
,
0x8080
,
0x8100
,
0x8200
,
0x8400
,
0x8800
,
0x8c00
,
0x9000
,
0x9400
,
0x9800
,
0x9c00
,
0xa000
,
0xa400
,
0xa800
,
0xac00
,
0xb000
,
0xb400
,
0xb800
,
0xbc00
,
0xc000
,
0xc400
,
0xc800
,
0xcc00
,
0xd000
,
0xd400
,
0xd800
,
0xdc00
,
0xe000
,
0xe400
,
0xe800
,
0xec00
,
0xf000
,
0xf400
,
0xf800
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
};
static
const
uint8_t
shifttable
[
512
]
=
{
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x17
,
0x16
,
0x15
,
0x14
,
0x13
,
0x12
,
0x11
,
0x10
,
0x0f
,
0x0e
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x0d
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x17
,
0x16
,
0x15
,
0x14
,
0x13
,
0x12
,
0x11
,
0x10
,
0x0f
,
0x0e
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x0d
};
half_t
Float2Half
(
float
f
)
{
uint32_t
v
=
*
reinterpret_cast
<
uint32_t
*>
(
&
f
);
return
basetable
[(
v
>>
23
)
&
0x1ff
]
+
((
v
&
0x007fffff
)
>>
shifttable
[(
v
>>
23
)
&
0x1ff
]);
}
float
Half2Float
(
half_t
h
)
{
uint32_t
v
=
mantissatable
[
offsettable
[
h
>>
10
]
+
(
h
&
0x3ff
)]
+
exponenttable
[
h
>>
10
];
return
*
reinterpret_cast
<
float
*>
(
&
v
);
}
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
h_array
[
i
]
=
Float2Half
(
f_array
[
i
]);
}
}
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
f_array
[
i
]
=
Half2Float
(
h_array
[
i
]);
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_half.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace
paddle
{
namespace
lite
{
typedef
uint16_t
half_t
;
half_t
Float2Half
(
float
f
);
float
Half2Float
(
half_t
h
);
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
);
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
void
CLHelper
::
set_context
(
CLContext
*
context
)
{
context_
=
context
;
}
void
CLHelper
::
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
kernels
.
emplace_back
(
std
::
move
(
kernel
));
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
}
cl
::
Kernel
&
CLHelper
::
KernelAt
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
.
size
()
<<
" --- "
;
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
.
size
())
<<
"The index must be less than the size of kernels."
;
CHECK
(
kernels
[
index
]
!=
nullptr
)
<<
"The target kernel pointer cannot be null."
;
return
*
(
kernels
[
index
]);
}
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetCommandQueue
();
}
cl
::
Context
&
CLHelper
::
OpenCLContext
()
{
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
return
context_
->
GetContext
();
}
cl
::
NDRange
CLHelper
::
DefaultWorkSize
(
const
CLImage
&
image
)
{
// n c h w
auto
image_dim
=
image
.
tensor_dims
();
if
(
image_dim
.
size
()
==
4
)
{
auto
n
=
image_dim
[
0
];
auto
h
=
image_dim
[
2
];
auto
w
=
image_dim
[
3
];
auto
image_width
=
image
.
ImageWidth
();
auto
work_size_0
=
image_width
/
w
;
auto
work_size_1
=
w
;
auto
work_size_2
=
n
*
h
;
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
work_size_0
),
static_cast
<
size_t
>
(
work_size_1
),
static_cast
<
size_t
>
(
work_size_2
)};
}
else
if
(
image_dim
.
size
()
==
2
)
{
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
image
.
ImageHeight
())};
}
else
if
(
image_dim
.
size
()
==
1
)
{
return
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
1
)};
}
else
if
(
image_dim
.
size
()
==
3
)
{
auto
c
=
image_dim
[
0
];
auto
h
=
image_dim
[
1
];
auto
w
=
image_dim
[
2
];
return
cl
::
NDRange
{
static_cast
<
size_t
>
((
c
+
3
)
/
4
),
static_cast
<
size_t
>
(
w
),
static_cast
<
size_t
>
(
h
)};
}
else
{
LOG
(
FATAL
)
<<
"Not support this dimension, need to be implemented!"
;
return
cl
::
NDRange
{};
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
namespace
paddle
{
namespace
lite
{
class
CLHelper
{
public:
CLHelper
()
=
default
;
explicit
CLHelper
(
CLContext
*
context
)
:
context_
(
context
)
{}
void
set_context
(
CLContext
*
context
);
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
=
""
);
cl
::
Kernel
&
KernelAt
(
const
int
index
);
cl
::
CommandQueue
&
OpenCLCommandQueue
();
cl
::
Context
&
OpenCLContext
();
cl
::
NDRange
DefaultWorkSize
(
const
CLImage
&
image
);
private:
CLContext
*
context_
{
nullptr
};
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image.h"
#include <glog/logging.h>
#include <array>
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
cl_image
)
{
int
width
=
cl_image
.
image_dims_
[
0
];
int
height
=
cl_image
.
image_dims_
[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image
*
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
*
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
float
*
tensor_data
=
new
float
[
cl_image
.
numel
()];
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
tensor_data
,
cl_image
.
image_dims_
,
cl_image
.
tensor_dims_
);
int
stride
=
cl_image
.
numel
()
/
20
;
stride
=
stride
>
0
?
stride
:
1
;
os
<<
" dims: "
<<
cl_image
.
tensor_dims_
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
cl_image
.
numel
();
i
+=
stride
)
{
os
<<
tensor_data
[
i
]
<<
" "
;
}
delete
[]
tensor_data
;
delete
[]
image_data
;
return
os
;
}
void
CLImage
::
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto
numel
=
dim
.
product
();
#else
auto
numel
=
dim
.
production
();
#endif
tensor_data_
.
reset
(
new
float
[
numel
]);
memcpy
(
tensor_data_
.
get
(),
tensor_data
,
numel
*
sizeof
(
float
));
tensor_dims_
=
dim
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call "
"set_tensohelper->DefaultWorkSize(out_"
"image)r_data first!"
;
image_converter_
.
reset
(
new
CLImageConverterFolder
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNormalCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterNWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitDWImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterDWBlock
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
)
{
CHECK
(
tensor_data_
==
nullptr
)
<<
" Empty image tensor data shouldn't have value"
;
tensor_dims_
=
dim
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_converter_
->
InitImageDimInfoWith
(
tensor_dims_
);
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
)
{
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_dims
;
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image"
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call set_tensor_data first!"
;
VLOG
(
3
)
<<
" begin init cl image "
;
image_dims_
=
converter
->
InitImageDimInfoWith
(
tensor_dims_
);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
half_t
*
image_data
=
new
half_t
[
image_dims_
.
product
()
*
4
];
#else
half_t
*
image_data
=
new
half_t
[
image_dims_
.
production
()
*
4
];
#endif
VLOG
(
3
)
<<
" convert to image "
;
converter
->
NCHWToImage
(
tensor_data_
.
get
(),
image_data
,
tensor_dims_
);
VLOG
(
3
)
<<
" end convert to image "
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
image_data
);
delete
[]
image_data
;
tensor_data_
=
nullptr
;
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
)
{
cl
::
ImageFormat
img_format
(
CL_RGBA
,
CL_HALF_FLOAT
);
cl_int
err
;
cl_image_
.
reset
(
new
cl
::
Image2D
(
context
,
CL_MEM_READ_WRITE
|
(
data
?
CL_MEM_COPY_HOST_PTR
:
0
),
img_format
,
width
,
height
,
0
,
data
,
&
err
));
CL_CHECK_ERRORS
(
err
);
CHECK
(
err
==
CL_SUCCESS
)
<<
" Create image 2d error."
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <iostream>
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
namespace
paddle
{
namespace
lite
{
class
CLImage
{
// For debug
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
image
);
public:
CLImage
()
=
default
;
/*
* Will not hold input tensor data, memcpy in this method.
* */
void
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
);
bool
IsInit
()
{
return
initialized_
;
}
/*
* Need call set_tensor_data first.
* Folder when one dim or two dim.
* */
void
InitCLImage
(
const
cl
::
Context
&
context
);
void
InitNormalCLImage
(
const
cl
::
Context
&
context
);
void
InitNImage
(
const
cl
::
Context
&
context
);
void
InitDWImage
(
const
cl
::
Context
&
context
);
void
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
);
void
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
);
cl
::
Image
*
cl_image
()
const
{
return
cl_image_
.
get
();
}
const
DDim
&
image_dims
()
const
{
return
image_dims_
;
}
inline
size_t
ImageWidth
()
const
{
return
image_dims_
[
0
];
}
inline
size_t
ImageHeight
()
const
{
return
image_dims_
[
1
];
}
const
DDim
&
tensor_dims
()
const
{
return
tensor_dims_
;
}
/*with_da
* Resize original tensor dim.
* */
inline
CLImage
&
Resize
(
const
DDim
&
dims
)
{
tensor_dims_
=
dims
;
return
*
this
;
}
template
<
typename
T
>
T
*
data
()
const
{
CHECK
(
!
initialized_
)
<<
"CL image has initialized, tensor data has been "
"deleted, can't use tensor data!"
;
return
reinterpret_cast
<
T
*>
(
tensor_data_
);
}
/*
* Numel of tensor dim
* */
inline
int64_t
numel
()
const
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
return
tensor_dims_
.
product
();
#else
return
tensor_dims_
.
production
();
#endif
}
/*
* Original tensor dim
* */
cl
::
UserEvent
&
cl_event
()
const
{
return
*
cl_event_
;
}
CLImageConverterBase
*
image_converter
()
const
{
return
image_converter_
.
get
();
}
private:
void
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
);
void
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
);
bool
initialized_
=
false
;
std
::
unique_ptr
<
cl
::
Image2D
>
cl_image_
{
nullptr
};
std
::
unique_ptr
<
cl
::
UserEvent
>
cl_event_
{
nullptr
};
DDim
tensor_dims_
;
DDim
image_dims_
;
std
::
unique_ptr
<
float
>
tensor_data_
{
nullptr
};
std
::
unique_ptr
<
CLImageConverterBase
>
image_converter_
{
nullptr
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
#include <glog/logging.h>
#include <vector>
namespace
paddle
{
namespace
lite
{
DDim
CLImageConverterDefault
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDefault
::
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
nchw
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDefault
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
image_dim
[
0
];
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterFolder
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
<=
2
)
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
size_t
width
=
(
tdim
[
1
]
+
3
)
/
4
;
size_t
height
=
tdim
[
0
];
width_of_one_block_
=
width
;
height_of_one_block_
=
height
;
c_block_
=
1
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
else
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterFolder
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
else
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
DDim
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
size_t
width
=
image_dim
[
0
];
for
(
size_t
h
=
0
;
h
<
tdim
[
0
];
h
++
)
{
for
(
size_t
w
=
0
;
w
<
tdim
[
1
];
w
++
)
{
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]
=
Float2Half
(
tensor
[
h
*
tdim
[
1
]
+
w
]);
}
}
}
}
void
CLImageConverterFolder
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
else
{
size_t
width
=
image_dim
[
0
];
size_t
H
=
1
,
W
=
1
;
if
(
tensor_dim
.
size
()
==
2
)
{
H
=
tensor_dim
[
0
];
W
=
tensor_dim
[
1
];
}
else
if
(
tensor_dim
.
size
()
==
1
)
{
W
=
tensor_dim
[
0
];
}
float
*
p
=
tensor
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
p
[
h
*
W
+
w
]
=
Half2Float
(
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]);
}
}
}
}
DDim
CLImageConverterNWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
auto
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
size_t
block
=
image_dim
[
0
]
/
tensor_dim
[
3
];
for
(
size_t
n
=
0
;
n
<
block
*
4
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
if
(
n
<
N
)
{
image
[
index
]
=
Float2Half
(
*
p
);
p
++
;
}
else
{
image
[
index
]
=
0.0
;
}
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
void
CLImageConverterNWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
*
p
=
Half2Float
(
image
[
index
]);
p
++
;
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
DDim
CLImageConverterDWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
1
];
C
=
new_dims
[
0
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
1
];
size_t
C
=
tensor_dim
[
0
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterNormal
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNormal
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
void
CLImageConverterNormal
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
DDim
CLImageConverterWinoTransWeight
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
size_t
width
=
(
C
+
3
)
/
4
;
size_t
height
=
N
*
16
;
// N * (wino_blk_size + 2) * (wino_blk_size + 2)
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterWinoTransWeight
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{}
void
CLImageConverterWinoTransWeight
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace
paddle
{
namespace
lite
{
class
CLImageConverterBase
{
public:
virtual
~
CLImageConverterBase
()
{}
virtual
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
void
ImageToNCHW
(
half_t
*
image
,
float
*
nchw
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
=
0
;
};
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline
half4
activation
(
half4
in
#ifdef PRELU
,
half4
prelu_alpha
#endif
)
{
half4
output
;
#ifdef PRELU
output
=
select
(
prelu_alpha
*
in
,
in
,
in
>=
(
half4
)
0
.
0
);
#endif
#ifdef RELU
output
=
fmax
(
in
,
(
half4
)(
0
.
0
f
));
#endif
return
output
;
}
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
0 → 100644
浏览文件 @
a95dcea5
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
elementwise_add
(
__global
image2d_t
input,
__global
image2d_t
bias,__write_only
image2d_t
outputImage
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int2
coords
;
coords.x
=
x
;
coords.y
=
y
;
half4
in
=
read_imageh
(
input,
sampler,
coords
)
;
half4
biase
=
read_imageh
(
bias,
sampler,
coords
)
;
half4
output
=
in
+
biase
;
write_imageh
(
outputImage,coords,output
)
;
}
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
0 → 100644
浏览文件 @
a95dcea5
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
MIN_VALUE
-FLT_MAX
__kernel
void
pool_max
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
out_h
*
stride_h
-
pad_top
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
start_h
=
max
(
start_h,0
)
;
int
start_w
=
out_w
*
stride_w
-
pad_left
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
start_w
=
max
(
start_w,0
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
max_value
=
(
half4
)(
MIN_VALUE
)
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
half4
tmp
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
max_value
=
max
(
max_value,
tmp
)
;
}
}
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
max_value
)
;
}
__kernel
void
pool_avg
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
max
(
out_h
*
stride_h
-
pad_top,
0
)
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
int
start_w
=
max
(
out_w
*
stride_w
-
pad_left,
0
)
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
sum
=
(
half4
)(
0.0f
)
;
int
num
=
0
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
sum
+=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
num++
;
}
}
half4
avg
=
sum
/
num
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
}
paddle/fluid/lite/opencl/cl_test.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
DEFINE_string
(
cl_path
,
"/data/local/tmp/opencl"
,
"The OpenCL kernels path."
);
namespace
paddle
{
namespace
lite
{
TEST
(
cl_test
,
engine_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
engine
->
platform
();
engine
->
device
();
engine
->
command_queue
();
auto
&
context
=
engine
->
context
();
auto
program
=
engine
->
CreateProgram
(
context
,
engine
->
cl_path
()
+
"/cl_kernel/"
+
"elementwise_add_kernel.cl"
);
auto
event
=
engine
->
CreateEvent
(
context
);
CHECK
(
engine
->
BuildProgram
(
program
.
get
()));
}
TEST
(
cl_test
,
context_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
CLContext
context
;
context
.
GetKernel
(
"pool_max"
,
"pool_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
}
TEST
(
cl_test
,
kernel_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
FLAGS_cl_path
);
std
::
unique_ptr
<
CLContext
>
context
(
new
CLContext
);
// std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
std
::
unique_ptr
<
CLHelper
>
helper
(
new
CLHelper
);
helper
->
set_context
(
context
.
get
());
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
->
KernelAt
(
2
);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
1.
f
;
}
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
in_image
;
in_image
.
set_tensor_data
(
in_data
.
get
(),
in_dim
);
in_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
in_image
;
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
2.
f
;
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
CLImage
bias_image
;
bias_image
.
set_tensor_data
(
bias_data
.
get
(),
bias_dim
);
bias_image
.
InitNormalCLImage
(
helper
->
OpenCLContext
());
LOG
(
INFO
)
<<
bias_image
;
CLImage
out_image
;
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
out_image
.
InitEmptyImage
(
helper
->
OpenCLContext
(),
out_dim
);
LOG
(
INFO
)
<<
out_image
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
in_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
1
,
*
bias_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_image
.
cl_image
());
CL_CHECK_ERRORS
(
status
);
// auto global_work_size = helper->DefaultWorkSize(out_image);
size_t
width
=
in_image
.
ImageWidth
();
size_t
height
=
in_image
.
ImageHeight
();
auto
global_work_size
=
cl
::
NDRange
{
width
,
height
};
cl
::
Event
event
;
status
=
helper
->
OpenCLCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
nullptr
,
&
event
);
CL_CHECK_ERRORS
(
status
);
double
start_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_START
>
();
double
stop_nanos
=
event
.
getProfilingInfo
<
CL_PROFILING_COMMAND_END
>
();
double
elapsed_micros
=
(
stop_nanos
-
start_nanos
)
/
1000.0
;
LOG
(
INFO
)
<<
"Kernel Run Cost Time: "
<<
elapsed_micros
<<
" us."
;
LOG
(
INFO
)
<<
out_image
;
}
TEST
(
cl_test
,
elementwise_add_test
)
{
std
::
default_random_engine
engine
;
std
::
uniform_real_distribution
<
float
>
dist
(
-
5
,
5
);
const
DDim
in_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
in_data
[
i
]
=
dist
(
engine
);
}
const
DDim
bias_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
bias_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
bias_data
[
i
]
=
dist
(
engine
);
}
const
DDim
out_dim
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
{
1024
,
512
});
std
::
unique_ptr
<
float
[]
>
out
(
new
float
[
1024
*
512
]);
bool
status
=
InitOpenCLEngine
(
FLAGS_cl_path
);
CHECK
(
status
)
<<
"Fail to initialize OpenCL engine."
;
CLContext
context
;
elementwise_add
(
&
context
,
in_data
.
get
(),
in_dim
,
bias_data
.
get
(),
bias_dim
,
out
.
get
(),
out_dim
);
int
stride
=
1024
*
512
/
20
;
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
+=
stride
)
{
std
::
cout
<<
out
[
i
]
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
)
{
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch
(
error
)
{
CASE_CL_CONSTANT
(
CL_SUCCESS
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_FOUND
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_COMPILER_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_OBJECT_ALLOCATION_FAILURE
)
CASE_CL_CONSTANT
(
CL_OUT_OF_RESOURCES
)
CASE_CL_CONSTANT
(
CL_OUT_OF_HOST_MEMORY
)
CASE_CL_CONSTANT
(
CL_PROFILING_INFO_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_COPY_OVERLAP
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_MISMATCH
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_NOT_SUPPORTED
)
CASE_CL_CONSTANT
(
CL_BUILD_PROGRAM_FAILURE
)
CASE_CL_CONSTANT
(
CL_MAP_FAILURE
)
CASE_CL_CONSTANT
(
CL_MISALIGNED_SUB_BUFFER_OFFSET
)
CASE_CL_CONSTANT
(
CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE_TYPE
)
CASE_CL_CONSTANT
(
CL_INVALID_PLATFORM
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE
)
CASE_CL_CONSTANT
(
CL_INVALID_CONTEXT
)
CASE_CL_CONSTANT
(
CL_INVALID_QUEUE_PROPERTIES
)
CASE_CL_CONSTANT
(
CL_INVALID_COMMAND_QUEUE
)
CASE_CL_CONSTANT
(
CL_INVALID_HOST_PTR
)
CASE_CL_CONSTANT
(
CL_INVALID_MEM_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_FORMAT_DESCRIPTOR
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_SAMPLER
)
CASE_CL_CONSTANT
(
CL_INVALID_BINARY
)
CASE_CL_CONSTANT
(
CL_INVALID_BUILD_OPTIONS
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM_EXECUTABLE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_NAME
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_DEFINITION
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_INDEX
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_ARGS
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_DIMENSION
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_GROUP_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_ITEM_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_OFFSET
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT
)
CASE_CL_CONSTANT
(
CL_INVALID_OPERATION
)
CASE_CL_CONSTANT
(
CL_INVALID_GL_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_BUFFER_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_MIP_LEVEL
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_WORK_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_PROPERTY
)
default:
return
"UNKNOWN ERROR CODE"
;
}
#undef CASE_CL_CONSTANT
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.h
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_wrapper.cc
0 → 100644
浏览文件 @
a95dcea5
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <dlfcn.h>
#include <glog/logging.h>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
/**
* Wrapper of OpenCL 2.0, based on file opencl20/CL/cl.h
*/
#if CL_HPP_TARGET_OPENCL_VERSION < 200
#define CL_API_SUFFIX__VERSION_2_0
#endif
namespace
paddle
{
namespace
lite
{
class
OpenCLLibrary
final
{
private:
OpenCLLibrary
();
OpenCLLibrary
(
const
OpenCLLibrary
&
)
=
delete
;
OpenCLLibrary
&
operator
=
(
const
OpenCLLibrary
&
)
=
delete
;
bool
Load
();
void
*
LoadFromPath
(
const
std
::
string
&
path
);
public:
static
OpenCLLibrary
*
Get
();
using
clGetPlatformIDsFunc
=
cl_int
(
*
)(
cl_uint
,
cl_platform_id
*
,
cl_uint
*
);
using
clGetPlatformInfoFunc
=
cl_int
(
*
)(
cl_platform_id
,
cl_platform_info
,
size_t
,
void
*
,
size_t
*
);
using
clBuildProgramFunc
=
cl_int
(
*
)(
cl_program
,
cl_uint
,
const
cl_device_id
*
,
const
char
*
,
void
(
*
pfn_notify
)(
cl_program
,
void
*
),
void
*
);
using
clEnqueueNDRangeKernelFunc
=
cl_int
(
*
)(
cl_command_queue
,
cl_kernel
,
cl_uint
,
const
size_t
*
,
const
size_t
*
,
const
size_t
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*
);
using
clSetKernelArgFunc
=
cl_int
(
*
)(
cl_kernel
,
cl_uint
,
size_t
,
const
void
*
);
using
clRetainMemObjectFunc
=
cl_int
(
*
)(
cl_mem
);
using
clReleaseMemObjectFunc
=
cl_int
(
*
)(
cl_mem
);
using
clEnqueueUnmapMemObjectFunc
=
cl_int
(
*
)(
cl_command_queue
,
cl_mem
,
void
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*
);
using
clRetainCommandQueueFunc
=
cl_int
(
*
)(
cl_command_queue
command_queue
);
using
clCreateContextFunc
=
cl_context
(
*
)(
const
cl_context_properties
*
,
cl_uint
,
const
cl_device_id
*
,
void
(
CL_CALLBACK
*
)(
// NOLINT(readability/casting)
const
char
*
,
const
void
*
,
size_t
,
void
*
),
void
*
,
cl_int
*
);
using
clCreateContextFromTypeFunc
=
cl_context
(
*
)(
const
cl_context_properties
*
,
cl_device_type
,
void
(
CL_CALLBACK
*
)(
// NOLINT(readability/casting)
const
char
*
,
const
void
*
,
size_t
,
void
*
),
void
*
,
cl_int
*
);
using
clReleaseContextFunc
=
cl_int
(
*
)(
cl_context
);
using
clWaitForEventsFunc
=
cl_int
(
*
)(
cl_uint
,
const
cl_event
*
);
using
clReleaseEventFunc
=
cl_int
(
*
)(
cl_event
);
using
clEnqueueWriteBufferFunc
=
cl_int
(
*
)(
cl_command_queue
,
cl_mem
,
cl_bool
,
size_t
,
size_t
,
const
void
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*
);
using
clEnqueueReadBufferFunc
=
cl_int
(
*
)(
cl_command_queue
,
cl_mem
,
cl_bool
,
size_t
,
size_t
,
void
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*
);
using
clEnqueueReadImageFunc
=
cl_int
(
*
)(
cl_command_queue
,
cl_mem
,
cl_bool
,
const
size_t
*
,
const
size_t
*
,
size_t
,
size_t
,
void
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*
);
using
clGetProgramBuildInfoFunc
=
cl_int
(
*
)(
cl_program
,
cl_device_id
,
cl_program_build_info
,
size_t
,
void
*
,
size_t
*
);
using
clRetainProgramFunc
=
cl_int
(
*
)(
cl_program
program
);
using
clEnqueueMapBufferFunc
=
void
*
(
*
)(
cl_command_queue
,
cl_mem
,
cl_bool
,
cl_map_flags
,
size_t
,
size_t
,
cl_uint
,
const
cl_event
*
,
cl_event
*
,
cl_int
*
);
using
clEnqueueMapImageFunc
=
void
*
(
*
)(
cl_command_queue
,
cl_mem
,
cl_bool
,
cl_map_flags
,
const
size_t
*
,
const
size_t
*
,
size_t
*
,
size_t
*
,
cl_uint
,
const
cl_event
*
,
cl_event
*
,
cl_int
*
);
using
clCreateCommandQueueFunc
=
cl_command_queue
(
CL_API_CALL
*
)(
// NOLINT
cl_context
,
cl_device_id
,
cl_command_queue_properties
,
cl_int
*
);
using
clCreateCommandQueueWithPropertiesFunc
=
cl_command_queue
(
*
)(
cl_context
,
cl_device_id
,
const
cl_queue_properties
*
,
cl_int
*
);
using
clReleaseCommandQueueFunc
=
cl_int
(
*
)(
cl_command_queue
);
using
clCreateProgramWithBinaryFunc
=
cl_program
(
*
)(
cl_context
,
cl_uint
,
const
cl_device_id
*
,
const
size_t
*
,
const
unsigned
char
**
,
cl_int
*
,
cl_int
*
);
using
clRetainContextFunc
=
cl_int
(
*
)(
cl_context
context
);
using
clGetContextInfoFunc
=
cl_int
(
*
)(
cl_context
,
cl_context_info
,
size_t
,
void
*
,
size_t
*
);
using
clReleaseProgramFunc
=
cl_int
(
*
)(
cl_program
program
);
using
clFlushFunc
=
cl_int
(
*
)(
cl_command_queue
command_queue
);
using
clFinishFunc
=
cl_int
(
*
)(
cl_command_queue
command_queue
);
using
clGetProgramInfoFunc
=
cl_int
(
*
)(
cl_program
,
cl_program_info
,
size_t
,
void
*
,
size_t
*
);
using
clCreateKernelFunc
=
cl_kernel
(
*
)(
cl_program
,
const
char
*
,
cl_int
*
);
using
clRetainKernelFunc
=
cl_int
(
*
)(
cl_kernel
kernel
);
using
clCreateBufferFunc
=
cl_mem
(
*
)(
cl_context
,
cl_mem_flags
,
size_t
,
void
*
,
cl_int
*
);
using
clCreateImage2DFunc
=
cl_mem
(
CL_API_CALL
*
)(
cl_context
,
// NOLINT
cl_mem_flags
,
const
cl_image_format
*
,
size_t
,
size_t
,
size_t
,
void
*
,
cl_int
*
);
using
clCreateImageFunc
=
cl_mem
(
*
)(
cl_context
,
cl_mem_flags
,
const
cl_image_format
*
,
const
cl_image_desc
*
,
void
*
,
cl_int
*
);
using
clCreateUserEventFunc
=
cl_event
(
*
)(
cl_context
,
cl_int
*
);
using
clCreateProgramWithSourceFunc
=
cl_program
(
*
)(
cl_context
,
cl_uint
,
const
char
**
,
const
size_t
*
,
cl_int
*
);
using
clReleaseKernelFunc
=
cl_int
(
*
)(
cl_kernel
kernel
);
using
clGetDeviceInfoFunc
=
cl_int
(
*
)(
cl_device_id
,
cl_device_info
,
size_t
,
void
*
,
size_t
*
);
using
clGetDeviceIDsFunc
=
cl_int
(
*
)(
cl_platform_id
,
cl_device_type
,
cl_uint
,
cl_device_id
*
,
cl_uint
*
);
using
clRetainDeviceFunc
=
cl_int
(
*
)(
cl_device_id
);
using
clReleaseDeviceFunc
=
cl_int
(
*
)(
cl_device_id
);
using
clRetainEventFunc
=
cl_int
(
*
)(
cl_event
);
using
clGetKernelWorkGroupInfoFunc
=
cl_int
(
*
)(
cl_kernel
,
cl_device_id
,
cl_kernel_work_group_info
,
size_t
,
void
*
,
size_t
*
);
using
clGetEventInfoFunc
=
cl_int
(
*
)(
cl_event
event
,
cl_event_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
);
using
clGetEventProfilingInfoFunc
=
cl_int
(
*
)(
cl_event
event
,
cl_profiling_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
);
using
clGetImageInfoFunc
=
cl_int
(
*
)(
cl_mem
,
cl_image_info
,
size_t
,
void
*
,
size_t
*
);
#define PADDLE_CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetPlatformIDs
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetPlatformInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clBuildProgram
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clEnqueueNDRangeKernel
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clSetKernelArg
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clReleaseKernel
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateProgramWithSource
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateBuffer
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateImage
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateImage2D
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateUserEvent
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clRetainKernel
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateKernel
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetProgramInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clFlush
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clFinish
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clReleaseProgram
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clRetainContext
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetContextInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateProgramWithBinary
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateCommandQueue
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateCommandQueueWithProperties
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clReleaseCommandQueue
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clEnqueueMapBuffer
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clEnqueueMapImage
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clRetainProgram
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetProgramBuildInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clEnqueueReadBuffer
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clEnqueueReadImage
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clEnqueueWriteBuffer
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clWaitForEvents
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clReleaseEvent
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateContext
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clCreateContextFromType
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clReleaseContext
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clRetainCommandQueue
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clEnqueueUnmapMemObject
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clRetainMemObject
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clReleaseMemObject
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetDeviceInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetDeviceIDs
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clRetainDevice
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clReleaseDevice
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clRetainEvent
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetKernelWorkGroupInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetEventInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetEventProfilingInfo
);
PADDLE_CL_DEFINE_FUNC_PTR
(
clGetImageInfo
);
#undef PADDLE_CL_DEFINE_FUNC_PTR
private:
void
*
handle_
=
nullptr
;
};
OpenCLLibrary
*
OpenCLLibrary
::
Get
()
{
static
OpenCLLibrary
library
;
return
&
library
;
}
OpenCLLibrary
::
OpenCLLibrary
()
{
this
->
Load
();
// Do not call dlclose which may unload all OpenCL symbols.
// If close the OpenCL library, the static OpenCLlite destructor may fail.
// If there is no dlclose, the library will be closed when the program exist.
// Besides, the library will not be load repeatedly even dlopen many times.
}
bool
OpenCLLibrary
::
Load
()
{
if
(
handle_
!=
nullptr
)
{
return
true
;
}
// Add customized OpenCL search path here
const
std
::
vector
<
std
::
string
>
paths
=
{
"libOpenCL.so"
,
#if defined(__aarch64__)
// Qualcomm Adreno with Android
"/system/vendor/lib64/libOpenCL.so"
,
"/system/lib64/libOpenCL.so"
,
// Mali with Android
"/system/vendor/lib64/egl/libGLES_mali.so"
,
"/system/lib64/egl/libGLES_mali.so"
,
// Typical Linux board
"/usr/lib/aarch64-linux-gnu/libOpenCL.so"
,
#else
// Qualcomm Adreno with Android
"/system/vendor/lib/libOpenCL.so"
,
"/system/lib/libOpenCL.so"
,
// Mali with Android
"/system/vendor/lib/egl/libGLES_mali.so"
,
"/system/lib/egl/libGLES_mali.so"
,
// Typical Linux board
"/usr/lib/arm-linux-gnueabihf/libOpenCL.so"
,
#endif
};
for
(
const
auto
&
path
:
paths
)
{
VLOG
(
3
)
<<
"Loading OpenCL from "
<<
path
;
void
*
handle
=
LoadFromPath
(
path
);
if
(
handle
!=
nullptr
)
{
handle_
=
handle
;
break
;
}
}
if
(
handle_
==
nullptr
)
{
LOG
(
ERROR
)
<<
"Failed to load OpenCL library, "
"please make sure there exists OpenCL library on your device, "
"and your APP have right to access the library."
;
return
false
;
}
return
true
;
}
void
*
OpenCLLibrary
::
LoadFromPath
(
const
std
::
string
&
path
)
{
void
*
handle
=
dlopen
(
path
.
c_str
(),
RTLD_LAZY
|
RTLD_LOCAL
);
if
(
handle
==
nullptr
)
{
VLOG
(
3
)
<<
"Failed to load OpenCL library from path "
<<
path
<<
" error code: "
<<
dlerror
();
return
nullptr
;
}
#define PADDLE_CL_ASSIGN_FROM_DLSYM(func) \
do { \
void *ptr = dlsym(handle, #func); \
if (ptr == nullptr) { \
VLOG(1) << "Failed to load " << #func << " from " << path; \
continue; \
} \
func = reinterpret_cast<func##Func>(ptr); \
VLOG(3) << "Loaded " << #func << " from " << path; \
} while (false)
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetPlatformIDs
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetPlatformInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clBuildProgram
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clEnqueueNDRangeKernel
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clSetKernelArg
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clReleaseKernel
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateProgramWithSource
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateBuffer
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateImage
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateImage2D
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateUserEvent
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clRetainKernel
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateKernel
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetProgramInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clFlush
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clFinish
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clReleaseProgram
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clRetainContext
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetContextInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateProgramWithBinary
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateCommandQueue
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateCommandQueueWithProperties
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clReleaseCommandQueue
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clEnqueueMapBuffer
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clEnqueueMapImage
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clRetainProgram
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetProgramBuildInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clEnqueueReadBuffer
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clEnqueueReadImage
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clEnqueueWriteBuffer
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clWaitForEvents
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clReleaseEvent
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateContext
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clCreateContextFromType
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clReleaseContext
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clRetainCommandQueue
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clEnqueueUnmapMemObject
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clRetainMemObject
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clReleaseMemObject
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetDeviceInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetDeviceIDs
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clRetainDevice
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clReleaseDevice
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clRetainEvent
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetKernelWorkGroupInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetEventInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetEventProfilingInfo
);
PADDLE_CL_ASSIGN_FROM_DLSYM
(
clGetImageInfo
);
#undef PADDLE_CL_ASSIGN_FROM_DLSYM
return
handle
;
}
}
// namespace lite
}
// namespace paddle
CL_API_ENTRY
cl_event
clCreateUserEvent
(
cl_context
context
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateUserEvent
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
// Platform APIs
CL_API_ENTRY
cl_int
clGetPlatformIDs
(
cl_uint
num_entries
,
cl_platform_id
*
platforms
,
cl_uint
*
num_platforms
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetPlatformIDs
;
if
(
func
!=
nullptr
)
{
return
func
(
num_entries
,
platforms
,
num_platforms
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clGetPlatformInfo
(
cl_platform_id
platform
,
cl_platform_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetPlatformInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
platform
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Device APIs
CL_API_ENTRY
cl_int
clGetDeviceIDs
(
cl_platform_id
platform
,
cl_device_type
device_type
,
cl_uint
num_entries
,
cl_device_id
*
devices
,
cl_uint
*
num_devices
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetDeviceIDs
;
if
(
func
!=
nullptr
)
{
return
func
(
platform
,
device_type
,
num_entries
,
devices
,
num_devices
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clGetDeviceInfo
(
cl_device_id
device
,
cl_device_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetDeviceInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
device
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clRetainDevice
(
cl_device_id
device
)
CL_API_SUFFIX__VERSION_1_2
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clRetainDevice
;
if
(
func
!=
nullptr
)
{
return
func
(
device
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clReleaseDevice
(
cl_device_id
device
)
CL_API_SUFFIX__VERSION_1_2
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clReleaseDevice
;
if
(
func
!=
nullptr
)
{
return
func
(
device
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Context APIs
CL_API_ENTRY
cl_context
clCreateContext
(
const
cl_context_properties
*
properties
,
cl_uint
num_devices
,
const
cl_device_id
*
devices
,
void
(
CL_CALLBACK
*
pfn_notify
)(
const
char
*
,
const
void
*
,
size_t
,
void
*
),
void
*
user_data
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateContext
;
if
(
func
!=
nullptr
)
{
return
func
(
properties
,
num_devices
,
devices
,
pfn_notify
,
user_data
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_context
clCreateContextFromType
(
const
cl_context_properties
*
properties
,
cl_device_type
device_type
,
void
(
CL_CALLBACK
*
pfn_notify
)(
const
char
*
,
const
void
*
,
size_t
,
void
*
),
void
*
user_data
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateContextFromType
;
if
(
func
!=
nullptr
)
{
return
func
(
properties
,
device_type
,
pfn_notify
,
user_data
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_int
clRetainContext
(
cl_context
context
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clRetainContext
;
if
(
func
!=
nullptr
)
{
return
func
(
context
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clReleaseContext
(
cl_context
context
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clReleaseContext
;
if
(
func
!=
nullptr
)
{
return
func
(
context
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clGetContextInfo
(
cl_context
context
,
cl_context_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetContextInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Program Object APIs
CL_API_ENTRY
cl_program
clCreateProgramWithSource
(
cl_context
context
,
cl_uint
count
,
const
char
**
strings
,
const
size_t
*
lengths
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateProgramWithSource
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
count
,
strings
,
lengths
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_program
clCreateProgramWithBinary
(
cl_context
context
,
cl_uint
num_devices
,
const
cl_device_id
*
device_list
,
const
size_t
*
lengths
,
const
unsigned
char
**
binaries
,
cl_int
*
binary_status
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateProgramWithBinary
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
num_devices
,
device_list
,
lengths
,
binaries
,
binary_status
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_int
clGetProgramInfo
(
cl_program
program
,
cl_program_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetProgramInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
program
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clGetProgramBuildInfo
(
cl_program
program
,
cl_device_id
device
,
cl_program_build_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetProgramBuildInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
program
,
device
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clRetainProgram
(
cl_program
program
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clRetainProgram
;
if
(
func
!=
nullptr
)
{
return
func
(
program
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clReleaseProgram
(
cl_program
program
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clReleaseProgram
;
if
(
func
!=
nullptr
)
{
return
func
(
program
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clBuildProgram
(
cl_program
program
,
cl_uint
num_devices
,
const
cl_device_id
*
device_list
,
const
char
*
options
,
void
(
CL_CALLBACK
*
pfn_notify
)(
cl_program
program
,
void
*
user_data
),
void
*
user_data
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clBuildProgram
;
if
(
func
!=
nullptr
)
{
return
func
(
program
,
num_devices
,
device_list
,
options
,
pfn_notify
,
user_data
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Kernel Object APIs
CL_API_ENTRY
cl_kernel
clCreateKernel
(
cl_program
program
,
const
char
*
kernel_name
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateKernel
;
if
(
func
!=
nullptr
)
{
return
func
(
program
,
kernel_name
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_int
clRetainKernel
(
cl_kernel
kernel
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clRetainKernel
;
if
(
func
!=
nullptr
)
{
return
func
(
kernel
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clReleaseKernel
(
cl_kernel
kernel
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clReleaseKernel
;
if
(
func
!=
nullptr
)
{
return
func
(
kernel
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clSetKernelArg
(
cl_kernel
kernel
,
cl_uint
arg_index
,
size_t
arg_size
,
const
void
*
arg_value
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clSetKernelArg
;
if
(
func
!=
nullptr
)
{
return
func
(
kernel
,
arg_index
,
arg_size
,
arg_value
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Memory Object APIs
CL_API_ENTRY
cl_mem
clCreateBuffer
(
cl_context
context
,
cl_mem_flags
flags
,
size_t
size
,
void
*
host_ptr
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateBuffer
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
flags
,
size
,
host_ptr
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_mem
clCreateImage
(
cl_context
context
,
cl_mem_flags
flags
,
const
cl_image_format
*
image_format
,
const
cl_image_desc
*
image_desc
,
void
*
host_ptr
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_2
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateImage
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
flags
,
image_format
,
image_desc
,
host_ptr
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_int
clRetainMemObject
(
cl_mem
memobj
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clRetainMemObject
;
if
(
func
!=
nullptr
)
{
return
func
(
memobj
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clReleaseMemObject
(
cl_mem
memobj
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clReleaseMemObject
;
if
(
func
!=
nullptr
)
{
return
func
(
memobj
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clGetImageInfo
(
cl_mem
image
,
cl_image_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetImageInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
image
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Command Queue APIs
CL_API_ENTRY
cl_command_queue
clCreateCommandQueueWithProperties
(
cl_context
context
,
cl_device_id
device
,
const
cl_queue_properties
*
properties
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_2_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateCommandQueueWithProperties
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
device
,
properties
,
errcode_ret
);
}
else
{
// Fix MediaTek MT6771 OpenCL driver breakage
VLOG
(
3
)
<<
"Fallback to clCreateCommandQueue"
;
if
(
properties
[
0
]
==
CL_QUEUE_PROPERTIES
)
{
// When calling with OpenCL-CLHPP, the 2nd param is provided by caller.
#pragma GCC diagnostic push // disable warning both for clang and gcc
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return
clCreateCommandQueue
(
context
,
device
,
properties
[
1
],
errcode_ret
);
#pragma GCC diagnostic pop
}
else
{
LOG
(
FATAL
)
<<
"Unknown calling parameters, check the code here"
;
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
}
CL_API_ENTRY
cl_int
clRetainCommandQueue
(
cl_command_queue
command_queue
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clRetainCommandQueue
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clReleaseCommandQueue
(
cl_command_queue
command_queue
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clReleaseCommandQueue
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Enqueued Commands APIs
CL_API_ENTRY
cl_int
clEnqueueReadBuffer
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_read
,
size_t
offset
,
size_t
size
,
void
*
ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clEnqueueReadBuffer
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
,
buffer
,
blocking_read
,
offset
,
size
,
ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clEnqueueReadImage
(
cl_command_queue
command_queue
,
cl_mem
image
,
cl_bool
blocking_read
,
const
size_t
*
origin
,
const
size_t
*
region
,
size_t
row_pitch
,
size_t
slice_pitch
,
void
*
ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clEnqueueReadImage
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
,
image
,
blocking_read
,
origin
,
region
,
row_pitch
,
slice_pitch
,
ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clEnqueueWriteBuffer
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_write
,
size_t
offset
,
size_t
size
,
const
void
*
ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clEnqueueWriteBuffer
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
,
buffer
,
blocking_write
,
offset
,
size
,
ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
void
*
clEnqueueMapBuffer
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
size_t
offset
,
size_t
size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clEnqueueMapBuffer
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
,
buffer
,
blocking_map
,
map_flags
,
offset
,
size
,
num_events_in_wait_list
,
event_wait_list
,
event
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
void
*
clEnqueueMapImage
(
cl_command_queue
command_queue
,
cl_mem
image
,
cl_bool
blocking_map
,
cl_map_flags
map_flags
,
const
size_t
*
origin
,
const
size_t
*
region
,
size_t
*
image_row_pitch
,
size_t
*
image_slice_pitch
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
,
cl_int
*
errcode_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clEnqueueMapImage
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
,
image
,
blocking_map
,
map_flags
,
origin
,
region
,
image_row_pitch
,
image_slice_pitch
,
num_events_in_wait_list
,
event_wait_list
,
event
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
CL_API_ENTRY
cl_int
clEnqueueUnmapMemObject
(
cl_command_queue
command_queue
,
cl_mem
memobj
,
void
*
mapped_ptr
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clEnqueueUnmapMemObject
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
,
memobj
,
mapped_ptr
,
num_events_in_wait_list
,
event_wait_list
,
event
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clGetKernelWorkGroupInfo
(
cl_kernel
kernel
,
cl_device_id
device
,
cl_kernel_work_group_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetKernelWorkGroupInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
kernel
,
device
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clEnqueueNDRangeKernel
(
cl_command_queue
command_queue
,
cl_kernel
kernel
,
cl_uint
work_dim
,
const
size_t
*
global_work_offset
,
const
size_t
*
global_work_size
,
const
size_t
*
local_work_size
,
cl_uint
num_events_in_wait_list
,
const
cl_event
*
event_wait_list
,
cl_event
*
event
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clEnqueueNDRangeKernel
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
,
kernel
,
work_dim
,
global_work_offset
,
global_work_size
,
local_work_size
,
num_events_in_wait_list
,
event_wait_list
,
event
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Event Object APIs
CL_API_ENTRY
cl_int
clWaitForEvents
(
cl_uint
num_events
,
const
cl_event
*
event_list
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clWaitForEvents
;
if
(
func
!=
nullptr
)
{
return
func
(
num_events
,
event_list
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clRetainEvent
(
cl_event
event
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clRetainEvent
;
if
(
func
!=
nullptr
)
{
return
func
(
event
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clReleaseEvent
(
cl_event
event
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clReleaseEvent
;
if
(
func
!=
nullptr
)
{
return
func
(
event
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Event API
CL_API_ENTRY
cl_int
clGetEventInfo
(
cl_event
event
,
cl_event_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetEventInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
event
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Profiling APIs
CL_API_ENTRY
cl_int
clGetEventProfilingInfo
(
cl_event
event
,
cl_profiling_info
param_name
,
size_t
param_value_size
,
void
*
param_value
,
size_t
*
param_value_size_ret
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clGetEventProfilingInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
event
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Flush and Finish APIs
CL_API_ENTRY
cl_int
clFlush
(
cl_command_queue
command_queue
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clFlush
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
CL_API_ENTRY
cl_int
clFinish
(
cl_command_queue
command_queue
)
CL_API_SUFFIX__VERSION_1_0
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clFinish
;
if
(
func
!=
nullptr
)
{
return
func
(
command_queue
);
}
else
{
return
CL_INVALID_PLATFORM
;
}
}
// Deprecated OpenCL 1.1 APIs
CL_API_ENTRY
/* CL_EXT_PREFIX__VERSION_1_1_DEPRECATED */
cl_mem
clCreateImage2D
(
cl_context
context
,
cl_mem_flags
flags
,
const
cl_image_format
*
image_format
,
size_t
image_width
,
size_t
image_height
,
size_t
image_row_pitch
,
void
*
host_ptr
,
cl_int
*
errcode_ret
)
/* CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED */
{
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateImage2D
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
flags
,
image_format
,
image_width
,
image_height
,
image_row_pitch
,
host_ptr
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
// Deprecated OpenCL 2.0 APIs
CL_API_ENTRY
/*CL_EXT_PREFIX__VERSION_1_2_DEPRECATED*/
cl_command_queue
clCreateCommandQueue
(
cl_context
context
,
cl_device_id
device
,
cl_command_queue_properties
properties
,
cl_int
*
errcode_ret
)
/* CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED */
{
// NOLINT
auto
func
=
paddle
::
lite
::
OpenCLLibrary
::
Get
()
->
clCreateCommandQueue
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
device
,
properties
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
*
errcode_ret
=
CL_INVALID_PLATFORM
;
return
nullptr
;
}
}
paddle/fluid/lite/operators/use_ops.h
浏览文件 @
a95dcea5
...
...
@@ -13,9 +13,10 @@
// limitations under the License.
#pragma once
/*
* ATTENTION this header file can only include in .cc file.
*/
// ATTENTION This can only include in a .cc file.
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_OP
(
mul
);
USE_LITE_OP
(
fc
);
...
...
paddle/fluid/lite/tools/build.sh
浏览文件 @
a95dcea5
...
...
@@ -25,6 +25,23 @@ function cmake_x86 {
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
}
function
cmake_opencl
{
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
cmake ..
\
-DLITE_WITH_OPENCL
=
ON
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_LITE
=
ON
\
-DLITE_WITH_CUDA
=
OFF
\
-DLITE_WITH_X86
=
OFF
\
-DLITE_WITH_ARM
=
ON
\
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK
=
ON
\
-DWITH_TESTING
=
ON
\
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
}
# This method is only called in CI.
function
cmake_x86_for_CI
{
prepare_for_codegen
# fake an empty __generated_code__.cc to pass cmake.
...
...
@@ -85,8 +102,8 @@ function build_test_server {
# test_arm_android <some_test_name> <adb_port_number>
function
test_arm_android
{
test_name
=
$1
port
=
$2
local
test_name
=
$1
local
port
=
$2
if
[[
"
${
test_name
}
x"
==
"x"
]]
;
then
echo
"test_name can not be empty"
exit
1
...
...
@@ -99,12 +116,18 @@ function test_arm_android {
echo
"test name:
${
test_name
}
"
adb_work_dir
=
"/data/local/tmp"
skip_list
=(
"test_model_parser_lite"
"test_mobilenetv1_lite"
"test_mobilenetv2_lite"
"test_resnet50_lite"
"test_inceptionv4_lite"
)
skip_list
=(
"test_model_parser_lite"
"test_mobilenetv1_lite"
"test_mobilenetv2_lite"
"test_resnet50_lite"
"test_inceptionv4_lite"
"test_light_api_lite"
"test_apis_lite"
)
for
skip_name
in
${
skip_list
[@]
}
;
do
[[
$skip_name
=
~
(
^|[[:space:]]
)
$test_name
(
$|
[[
:space:]]
)
]]
&&
echo
"skip
$test_name
"
&&
return
done
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
local
testpath
=
$(
find ./paddle/fluid
-name
${
test_name
}
)
# if [[ "$test_name" == "test_light_api" ]]; then
# local model_path=$(find . -name "lite_naive_model")
# arm_push_necessary_file $port $model_path $adb_work_dir
# fi
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
adb
-s
emulator-
${
port
}
shell
"./
${
adb_work_dir
}
/
${
test_name
}
"
...
...
@@ -204,6 +227,7 @@ function test_arm {
abi
=
$2
lang
=
$3
port
=
$4
if
[[
${
os
}
==
"armlinux"
]]
;
then
# TODO(hongming): enable test armlinux on armv8, armv7 and armv7hf
echo
"Skip test arm linux yet. armlinux must in another docker"
...
...
@@ -214,13 +238,14 @@ function test_arm {
echo
"android do not need armv7hf"
return
0
fi
# TODO(yuanshuai): enable armv7 on android
if
[[
${
abi
}
==
"armv7"
]]
;
then
echo
"skip android v7 test yet"
return
0
fi
echo
"test file:
${
TESTS_FILE
}
"
for
_test
in
$(
cat
$TESTS_FILE
)
;
do
test_arm_android
$_test
$port
...
...
@@ -235,13 +260,21 @@ function prepare_emulator {
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
# start android armv8 and armv7 emulators first
echo
n | avdmanager create avd
-f
-n
paddle-armv8
-k
"system-images;android-24;google_apis;arm64-v8a"
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv8
-noaudio
-no-window
-gpu
off
-
verbose
-
port
${
port_armv8
}
&
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv8
-noaudio
-no-window
-gpu
off
-port
${
port_armv8
}
&
sleep
1m
echo
n | avdmanager create avd
-f
-n
paddle-armv7
-k
"system-images;android-24;google_apis;armeabi-v7a"
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-
verbose
-
port
${
port_armv7
}
&
echo
-ne
'\n'
|
${
ANDROID_HOME
}
/emulator/emulator
-avd
paddle-armv7
-noaudio
-no-window
-gpu
off
-port
${
port_armv7
}
&
sleep
1m
}
function
arm_push_necessary_file
{
local
port
=
$1
local
testpath
=
$2
local
adb_work_dir
=
$3
adb
-s
emulator-
${
port
}
push
${
testpath
}
${
adb_work_dir
}
}
# We split the arm unittest into several sub-tasks to parallel and reduce the overall CI timetime.
# sub-task1
...
...
@@ -286,20 +319,22 @@ function build_test_arm_subtask_armlinux {
prepare_emulator
$port_armv8
$port_armv7
cur
=
$PWD
# job 5
build_arm
"armlinux"
"armv8"
test_arm
"armlinux"
"armv8"
cd
-
build_arm
"armlinux"
"armv8"
"gcc"
$port_armv8
test_arm
"armlinux"
"armv8"
"gcc"
$port_armv8
cd
$cur
# job 6
build_arm
"armlinux"
"armv7"
test_arm
"armlinux"
"armv7"
cd
-
build_arm
"armlinux"
"armv7"
"gcc"
$port_armv8
test_arm
"armlinux"
"armv7"
"gcc"
$port_armv8
cd
$cur
# job 7
build_arm
"armlinux"
"armv7hf"
test_arm
"armlinux"
"armv7hf"
cd
-
build_arm
"armlinux"
"armv7hf"
"gcc"
$port_armv8
test_arm
"armlinux"
"armv7hf"
"gcc"
$port_armv8
cd
$cur
adb devices |
grep
emulator |
cut
-f1
|
while
read
line
;
do
adb
-s
$line
emu
kill
;
done
echo
"Done"
...
...
@@ -333,6 +368,22 @@ function build_test_arm_subtask_model {
echo
"Done"
}
# this test load a model, optimize it and check the prediction result of both cxx and light APIS.
function
test_arm_predict_apis
{
local
port
=
$1
local
workspace
=
$2
local
naive_model_path
=
$3
local
api_test_path
=
$(
find
.
-name
"test_apis_lite"
)
# the model is pushed to ./lite_naive_model
adb
-s
emulator-
${
port
}
push
${
naive_model_path
}
${
workspace
}
adb
-s
emulator-
${
port
}
push
$api_test_path
${
workspace
}
# test cxx_api first to store the optimized model.
adb
-s
emulator-
${
port
}
shell ./test_apis_lite
--model_dir
./lite_naive_model
--optimized_model
./lite_naive_model_opt
}
# Build the code and run lite arm tests. This is executed in the CI system.
function
build_test_arm
{
########################################################################
...
...
@@ -404,6 +455,10 @@ function main {
cmake_x86
shift
;;
cmake_opencl
)
cmake_opencl
$ARM_OS
$ARM_ABI
shift
;;
cmake_cuda
)
cmake_cuda
shift
...
...
paddle/fluid/lite/utils/io.h
浏览文件 @
a95dcea5
...
...
@@ -18,11 +18,12 @@
#include <fstream>
#include <string>
#include "paddle/fluid/lite/utils/cp_logging.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
lite
{
static
bool
IsFileExists
(
const
std
::
string
&
path
)
{
static
bool
IsFileExists
(
const
std
::
string
&
path
)
{
std
::
ifstream
file
(
path
);
bool
res
=
file
.
is_open
();
if
(
res
)
{
...
...
@@ -31,5 +32,15 @@ static bool IsFileExists(const std::string &path) {
return
res
;
}
// ARM mobile not support mkdir in C++
static
void
MkDirRecur
(
const
std
::
string
&
path
)
{
#ifndef LITE_WITH_ARM
CHECK_EQ
(
system
(
string_format
(
"mkdir -p %s"
,
path
.
c_str
()).
c_str
()),
0
)
<<
"Cann't mkdir "
<<
path
;
#else // On ARM
CHECK_NE
(
mkdir
(
path
.
c_str
(),
S_IRWXU
),
-
1
)
<<
"Cann't mkdir "
<<
path
;
#endif
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/utils/string.h
浏览文件 @
a95dcea5
...
...
@@ -74,5 +74,15 @@ static std::string Repr(const std::vector<std::string>& v) {
return
"{"
+
Join
(
tmp
,
","
)
+
"}"
;
}
static
std
::
vector
<
std
::
string
>
Split
(
const
std
::
string
&
s
,
char
delim
)
{
std
::
stringstream
ss
(
s
);
std
::
string
line
;
std
::
vector
<
std
::
string
>
res
;
while
(
std
::
getline
(
ss
,
line
,
delim
))
{
res
.
push_back
(
line
);
}
return
res
;
}
}
// namespace lite
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录