Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
bdf5e1fd
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bdf5e1fd
编写于
6月 20, 2019
作者:
Z
Zhen Wang
提交者:
ZhenWang
6月 21, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add opencl support for paddle-lite
上级
7cf536f0
变更
26
显示空白变更内容
内联
并排
Showing
26 changed file
with
2297 addition
and
0 deletion
+2297
-0
CMakeLists.txt
CMakeLists.txt
+7
-0
cmake/configure.cmake
cmake/configure.cmake
+4
-0
cmake/external/opencl-clhpp.cmake
cmake/external/opencl-clhpp.cmake
+39
-0
cmake/external/opencl-headers.cmake
cmake/external/opencl-headers.cmake
+33
-0
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+1
-0
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+19
-0
paddle/fluid/lite/opencl/cl2_header.h
paddle/fluid/lite/opencl/cl2_header.h
+21
-0
paddle/fluid/lite/opencl/cl_context.cc
paddle/fluid/lite/opencl/cl_context.cc
+73
-0
paddle/fluid/lite/opencl/cl_context.h
paddle/fluid/lite/opencl/cl_context.h
+43
-0
paddle/fluid/lite/opencl/cl_engine.cc
paddle/fluid/lite/opencl/cl_engine.cc
+172
-0
paddle/fluid/lite/opencl/cl_engine.h
paddle/fluid/lite/opencl/cl_engine.h
+96
-0
paddle/fluid/lite/opencl/cl_half.cc
paddle/fluid/lite/opencl/cl_half.cc
+518
-0
paddle/fluid/lite/opencl/cl_half.h
paddle/fluid/lite/opencl/cl_half.h
+32
-0
paddle/fluid/lite/opencl/cl_helper.cc
paddle/fluid/lite/opencl/cl_helper.cc
+76
-0
paddle/fluid/lite/opencl/cl_helper.h
paddle/fluid/lite/opencl/cl_helper.h
+50
-0
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+162
-0
paddle/fluid/lite/opencl/cl_image.h
paddle/fluid/lite/opencl/cl_image.h
+118
-0
paddle/fluid/lite/opencl/cl_image_converter.cc
paddle/fluid/lite/opencl/cl_image_converter.cc
+450
-0
paddle/fluid/lite/opencl/cl_image_converter.h
paddle/fluid/lite/opencl/cl_image_converter.h
+115
-0
paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
+35
-0
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
+34
-0
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
+26
-0
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+47
-0
paddle/fluid/lite/opencl/cl_tool.cc
paddle/fluid/lite/opencl/cl_tool.cc
+84
-0
paddle/fluid/lite/opencl/cl_tool.h
paddle/fluid/lite/opencl/cl_tool.h
+32
-0
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+10
-0
未找到文件。
CMakeLists.txt
浏览文件 @
bdf5e1fd
...
...
@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option
(
LITE_WITH_CUDA
"Enable CUDA in lite mode"
OFF
)
option
(
LITE_WITH_X86
"Enable X86 in lite mode"
ON
)
option
(
LITE_WITH_ARM
"Enable ARM in lite mode"
OFF
)
option
(
LITE_WITH_CL
"Enable OpenCL support in lite"
OFF
)
option
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"Enable light-weight framework"
OFF
)
option
(
LITE_WITH_PROFILE
"Enable profile mode in lite framework"
OFF
)
...
...
@@ -166,6 +167,12 @@ endif()
include_directories
(
"
${
PADDLE_SOURCE_DIR
}
"
)
# for opencl
if
(
LITE_WITH_CL
)
include
(
external/opencl-headers
)
include
(
external/opencl-clhpp
)
endif
()
# for mobile
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
message
(
STATUS
"Building the mobile framework"
)
...
...
cmake/configure.cmake
浏览文件 @
bdf5e1fd
...
...
@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
add_definitions
(
"-DLITE_WITH_ARM"
)
endif
()
if
(
LITE_WITH_CL
)
add_definitions
(
"-DLITE_WITH_CL"
)
endif
()
if
(
LITE_WITH_PROFILE
)
add_definitions
(
"-DLITE_WITH_PROFILE"
)
endif
()
...
...
cmake/external/opencl-clhpp.cmake
0 → 100644
浏览文件 @
bdf5e1fd
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_CLHPP_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/opencl-clhpp
)
SET
(
OPENCL_CLHPP_INCLUDE_DIR
"
${
OPENCL_CLHPP_INSTALL_DIR
}
"
CACHE PATH
"opencl-clhpp include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_CLHPP_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_clhpp
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-CLHPP.git"
GIT_TAG
"v2.0.10"
PREFIX
"
${
OPENCL_CLHPP_SRCS_DIR
}
"
CMAKE_ARGS -DBUILD_DOCS=OFF
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=
${
OPENCL_CLHPP_INSTALL_DIR
}
${
OPTIONAL_ARGS
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
GTEST_INSTALL_DIR
}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=
${
THIRD_PARTY_BUILD_TYPE
}
)
ADD_DEPENDENCIES
(
opencl_clhpp opencl_headers
)
cmake/external/opencl-headers.cmake
0 → 100644
浏览文件 @
bdf5e1fd
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE
(
ExternalProject
)
SET
(
OPENCL_HEADERS_SRCS_DIR
${
THIRD_PARTY_PATH
}
/opencl-headers
)
SET
(
OPENCL_HEADERS_INCLUDE_DIR
"
${
OPENCL_HEADERS_SRCS_DIR
}
/src/opencl_headers/opencl20"
CACHE PATH
"opencl-headers include directory."
FORCE
)
INCLUDE_DIRECTORIES
(
${
OPENCL_HEADERS_INCLUDE_DIR
}
)
ExternalProject_Add
(
opencl_headers
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/KhronosGroup/OpenCL-Headers.git"
GIT_TAG
"c5a4bbeabb10d8ed3d1c651b93aa31737bc473dd"
PREFIX
${
OPENCL_HEADERS_SRCS_DIR
}
DOWNLOAD_NAME
"OpenCL-Headers"
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND
""
TEST_COMMAND
""
)
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
bdf5e1fd
...
...
@@ -182,6 +182,7 @@ add_subdirectory(x86)
add_subdirectory
(
arm
)
add_subdirectory
(
host
)
add_subdirectory
(
cuda
)
add_subdirectory
(
opencl
)
add_subdirectory
(
model_parser
)
add_subdirectory
(
utils
)
add_subdirectory
(
api
)
...
...
paddle/fluid/lite/opencl/CMakeLists.txt
0 → 100644
浏览文件 @
bdf5e1fd
if
(
NOT LITE_WITH_CL
)
return
()
endif
()
find_library
(
opencl-lib
NAMES OpenCL
)
message
(
STATUS
"The OpenCL library path :
${
opencl-lib
}
"
)
add_compile_options
(
-fno-strict-aliasing
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
cc_library
(
cl_half SRCS cl_half.cc
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter
)
cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context
)
target_link_libraries
(
test_cl_runtime
${
opencl-lib
}
)
paddle/fluid/lite/opencl/cl2_header.h
0 → 100644
浏览文件 @
bdf5e1fd
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#include <CL/cl2.hpp>
paddle/fluid/lite/opencl/cl_context.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <memory>
#include <string>
#include <utility>
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
cl
::
CommandQueue
&
CLContext
::
GetCommandQueue
()
{
return
CLEngine
::
Global
()
->
command_queue
();
}
cl
::
Context
&
CLContext
::
GetContext
()
{
return
CLEngine
::
Global
()
->
context
();
}
cl
::
Program
&
CLContext
::
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
std
::
string
program_key
=
file_name
;
if
(
!
options
.
empty
())
{
program_key
+=
options
;
}
auto
it
=
programs_
.
find
(
program_key
);
if
(
it
!=
programs_
.
end
())
{
VLOG
(
3
)
<<
" --- program -> "
<<
program_key
<<
" has been built --- "
;
return
*
(
it
->
second
);
}
auto
program
=
CLEngine
::
Global
()
->
CreateProgram
(
GetContext
(),
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel/"
+
file_name
);
VLOG
(
3
)
<<
" --- begin build program -> "
<<
program_key
<<
" --- "
;
CLEngine
::
Global
()
->
BuildProgram
(
program
.
get
(),
options
);
VLOG
(
3
)
<<
" --- end build program -> "
<<
program_key
<<
" --- "
;
programs_
[
program_key
]
=
std
::
move
(
program
);
return
*
(
programs_
[
program_key
]);
}
std
::
unique_ptr
<
cl
::
Kernel
>
CLContext
::
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
cl_int
status
{
CL_SUCCESS
};
VLOG
(
3
)
<<
" --- to get program "
<<
file_name
<<
" --- "
;
auto
program
=
GetProgram
(
file_name
,
options
);
VLOG
(
3
)
<<
" --- end get program --- "
;
VLOG
(
3
)
<<
" --- to create kernel: "
<<
kernel_name
<<
" --- "
;
std
::
unique_ptr
<
cl
::
Kernel
>
kernel
(
new
cl
::
Kernel
(
program
,
kernel_name
.
c_str
(),
&
status
));
CL_CHECK_ERRORS
(
status
);
VLOG
(
3
)
<<
" --- end create kernel --- "
;
return
std
::
move
(
kernel
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_context.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace
paddle
{
namespace
lite
{
class
CLContext
{
public:
cl
::
CommandQueue
&
GetCommandQueue
();
cl
::
Context
&
GetContext
();
cl
::
Program
&
GetProgram
(
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
std
::
unique_ptr
<
cl
::
Kernel
>
GetKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
);
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
cl
::
Program
>>
programs_
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
CLEngine
*
CLEngine
::
Global
()
{
static
CLEngine
cl_engine_
;
cl_engine_
.
Init
();
return
&
cl_engine_
;
}
CLEngine
::~
CLEngine
()
{
if
(
command_queue_
!=
nullptr
)
{
command_queue_
->
finish
();
}
// For controlling the destruction order:
command_queue_
.
reset
();
context_
.
reset
();
device_
.
reset
();
platform_
.
reset
();
}
bool
CLEngine
::
Init
()
{
if
(
initialized_
)
{
return
true
;
}
bool
is_platform_init
=
InitializePlatform
();
bool
is_device_init
=
InitializeDevice
();
is_init_success_
=
is_platform_init
&&
is_device_init
;
initialized_
=
true
;
return
initialized_
;
}
cl
::
Platform
&
CLEngine
::
platform
()
{
CHECK
(
platform_
!=
nullptr
)
<<
"platform_ is not initialized!"
;
return
*
platform_
;
}
cl
::
Context
&
CLEngine
::
context
()
{
if
(
context_
==
nullptr
)
{
context_
=
CreateContext
();
}
return
*
context_
;
}
cl
::
Device
&
CLEngine
::
device
()
{
CHECK
(
device_
!=
nullptr
)
<<
"device_ is not initialized!"
;
return
*
device_
;
}
cl
::
CommandQueue
&
CLEngine
::
command_queue
()
{
if
(
command_queue_
==
nullptr
)
{
command_queue_
=
CreateCommandQueue
(
context
());
}
return
*
command_queue_
;
}
std
::
unique_ptr
<
cl
::
Program
>
CLEngine
::
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
)
{
std
::
ifstream
file
{
file_name
,
std
::
ios
::
binary
|
std
::
ios
::
ate
};
CHECK
(
file
.
is_open
())
<<
"Can't open file from "
<<
file_name
;
auto
size
=
file
.
tellg
();
CHECK
(
size
>
0
)
<<
"size is too small."
;
std
::
string
content
(
size
,
'\0'
);
file
.
seekg
(
0
);
file
.
read
(
&
content
[
0
],
size
);
cl
::
Program
::
Sources
sources
;
sources
.
push_back
(
content
);
auto
prog
=
std
::
unique_ptr
<
cl
::
Program
>
(
new
cl
::
Program
(
context
,
sources
,
&
status_
));
LOG
(
INFO
)
<<
"OpenCL kernel file name: "
<<
file_name
;
LOG
(
INFO
)
<<
"Program source size: "
<<
content
.
size
();
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
prog
);
}
std
::
unique_ptr
<
cl
::
UserEvent
>
CLEngine
::
CreateEvent
(
const
cl
::
Context
&
context
)
{
auto
event
=
std
::
unique_ptr
<
cl
::
UserEvent
>
(
new
cl
::
UserEvent
(
context
,
&
status_
));
CL_CHECK_ERRORS
(
status_
);
return
std
::
move
(
event
);
}
bool
CLEngine
::
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
)
{
std
::
string
build_option
=
options
+
" -cl-fast-relaxed-math -I "
+
CLEngine
::
Global
()
->
cl_path
()
+
"/cl_kernel"
;
status_
=
program
->
build
({
*
device_
},
build_option
.
c_str
());
CL_CHECK_ERRORS
(
status_
);
if
(
status_
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
device
())
==
CL_BUILD_ERROR
)
{
std
::
string
log
=
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
device
());
LOG
(
INFO
)
<<
"Program build error: "
<<
log
;
}
return
false
;
}
return
true
;
}
bool
CLEngine
::
InitializePlatform
()
{
std
::
vector
<
cl
::
Platform
>
all_platforms
;
status_
=
cl
::
Platform
::
get
(
&
all_platforms
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_platforms
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL platform found!"
;
return
false
;
}
platform_
=
std
::
make_shared
<
cl
::
Platform
>
();
*
platform_
=
all_platforms
[
0
];
return
true
;
}
bool
CLEngine
::
InitializeDevice
()
{
std
::
vector
<
cl
::
Device
>
all_devices
;
status_
=
platform_
->
getDevices
(
CL_DEVICE_TYPE_DEFAULT
,
&
all_devices
);
CL_CHECK_ERRORS
(
status_
);
if
(
all_devices
.
empty
())
{
LOG
(
ERROR
)
<<
"No OpenCL device found!"
;
return
false
;
}
device_
=
std
::
make_shared
<
cl
::
Device
>
();
*
device_
=
all_devices
[
0
];
auto
device_name
=
device_
->
getInfo
<
CL_DEVICE_NAME
>
();
LOG
(
INFO
)
<<
"Using device: "
<<
device_name
;
auto
image_support
=
device_
->
getInfo
<
CL_DEVICE_IMAGE_SUPPORT
>
();
if
(
image_support
)
{
LOG
(
INFO
)
<<
"The chosen device supports image processing."
;
}
else
{
LOG
(
ERROR
)
<<
"The chosen device doesn't support image processing!"
;
return
false
;
}
auto
ext_data
=
device_
->
getInfo
<
CL_DEVICE_EXTENSIONS
>
();
if
(
ext_data
.
find
(
"cl_khr_fp16"
)
!=
std
::
string
::
npos
)
{
LOG
(
INFO
)
<<
"The chosen device supports the half data type."
;
}
else
{
LOG
(
INFO
)
<<
"The chosen device doesn't support the half data type!"
;
LOG
(
INFO
)
<<
"The extensions supported by this device: "
<<
ext_data
;
// LOG(ERROR) << "The chosen platform doesn't support the half data type!";
// return false;
}
auto
max_units
=
device_
->
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
LOG
(
INFO
)
<<
"The chosen device has "
<<
max_units
<<
" compute units."
;
auto
local_mem
=
device_
->
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
();
LOG
(
INFO
)
<<
"The local memory size of the chosen device is "
<<
static_cast
<
float
>
(
local_mem
)
/
1024
<<
" KB."
;
return
true
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_engine.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
class
CLEngine
{
public:
static
CLEngine
*
Global
();
bool
Init
();
cl
::
Platform
&
platform
();
cl
::
Context
&
context
();
cl
::
Device
&
device
();
cl
::
CommandQueue
&
command_queue
();
std
::
unique_ptr
<
cl
::
Program
>
CreateProgram
(
const
cl
::
Context
&
context
,
std
::
string
file_name
);
std
::
unique_ptr
<
cl
::
UserEvent
>
CreateEvent
(
const
cl
::
Context
&
context
);
bool
BuildProgram
(
cl
::
Program
*
program
,
const
std
::
string
&
options
=
""
);
bool
IsInitSuccess
()
{
return
is_init_success_
;
}
std
::
string
cl_path
()
{
return
cl_path_
;
}
void
set_cl_path
(
std
::
string
cl_path
)
{
cl_path_
=
cl_path
;
}
private:
CLEngine
()
=
default
;
~
CLEngine
();
bool
InitializePlatform
();
bool
InitializeDevice
();
std
::
shared_ptr
<
cl
::
Context
>
CreateContext
()
{
auto
context
=
std
::
make_shared
<
cl
::
Context
>
(
std
::
vector
<
cl
::
Device
>
{
device
()},
nullptr
,
nullptr
,
nullptr
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
context
;
}
std
::
shared_ptr
<
cl
::
CommandQueue
>
CreateCommandQueue
(
const
cl
::
Context
&
context
)
{
auto
queue
=
std
::
make_shared
<
cl
::
CommandQueue
>
(
context
,
device
(),
0
,
&
status_
);
CL_CHECK_ERRORS
(
status_
);
return
queue
;
}
std
::
string
cl_path_
;
std
::
shared_ptr
<
cl
::
Platform
>
platform_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Context
>
context_
{
nullptr
};
std
::
shared_ptr
<
cl
::
Device
>
device_
{
nullptr
};
std
::
shared_ptr
<
cl
::
CommandQueue
>
command_queue_
{
nullptr
};
cl_int
status_
{
CL_SUCCESS
};
bool
initialized_
{
false
};
bool
is_init_success_
{
false
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_half.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace
paddle
{
namespace
lite
{
static
const
uint32_t
mantissatable
[
2048
]
=
{
0x00000000
,
0x33800000
,
0x34000000
,
0x34400000
,
0x34800000
,
0x34a00000
,
0x34c00000
,
0x34e00000
,
0x35000000
,
0x35100000
,
0x35200000
,
0x35300000
,
0x35400000
,
0x35500000
,
0x35600000
,
0x35700000
,
0x35800000
,
0x35880000
,
0x35900000
,
0x35980000
,
0x35a00000
,
0x35a80000
,
0x35b00000
,
0x35b80000
,
0x35c00000
,
0x35c80000
,
0x35d00000
,
0x35d80000
,
0x35e00000
,
0x35e80000
,
0x35f00000
,
0x35f80000
,
0x36000000
,
0x36040000
,
0x36080000
,
0x360c0000
,
0x36100000
,
0x36140000
,
0x36180000
,
0x361c0000
,
0x36200000
,
0x36240000
,
0x36280000
,
0x362c0000
,
0x36300000
,
0x36340000
,
0x36380000
,
0x363c0000
,
0x36400000
,
0x36440000
,
0x36480000
,
0x364c0000
,
0x36500000
,
0x36540000
,
0x36580000
,
0x365c0000
,
0x36600000
,
0x36640000
,
0x36680000
,
0x366c0000
,
0x36700000
,
0x36740000
,
0x36780000
,
0x367c0000
,
0x36800000
,
0x36820000
,
0x36840000
,
0x36860000
,
0x36880000
,
0x368a0000
,
0x368c0000
,
0x368e0000
,
0x36900000
,
0x36920000
,
0x36940000
,
0x36960000
,
0x36980000
,
0x369a0000
,
0x369c0000
,
0x369e0000
,
0x36a00000
,
0x36a20000
,
0x36a40000
,
0x36a60000
,
0x36a80000
,
0x36aa0000
,
0x36ac0000
,
0x36ae0000
,
0x36b00000
,
0x36b20000
,
0x36b40000
,
0x36b60000
,
0x36b80000
,
0x36ba0000
,
0x36bc0000
,
0x36be0000
,
0x36c00000
,
0x36c20000
,
0x36c40000
,
0x36c60000
,
0x36c80000
,
0x36ca0000
,
0x36cc0000
,
0x36ce0000
,
0x36d00000
,
0x36d20000
,
0x36d40000
,
0x36d60000
,
0x36d80000
,
0x36da0000
,
0x36dc0000
,
0x36de0000
,
0x36e00000
,
0x36e20000
,
0x36e40000
,
0x36e60000
,
0x36e80000
,
0x36ea0000
,
0x36ec0000
,
0x36ee0000
,
0x36f00000
,
0x36f20000
,
0x36f40000
,
0x36f60000
,
0x36f80000
,
0x36fa0000
,
0x36fc0000
,
0x36fe0000
,
0x37000000
,
0x37010000
,
0x37020000
,
0x37030000
,
0x37040000
,
0x37050000
,
0x37060000
,
0x37070000
,
0x37080000
,
0x37090000
,
0x370a0000
,
0x370b0000
,
0x370c0000
,
0x370d0000
,
0x370e0000
,
0x370f0000
,
0x37100000
,
0x37110000
,
0x37120000
,
0x37130000
,
0x37140000
,
0x37150000
,
0x37160000
,
0x37170000
,
0x37180000
,
0x37190000
,
0x371a0000
,
0x371b0000
,
0x371c0000
,
0x371d0000
,
0x371e0000
,
0x371f0000
,
0x37200000
,
0x37210000
,
0x37220000
,
0x37230000
,
0x37240000
,
0x37250000
,
0x37260000
,
0x37270000
,
0x37280000
,
0x37290000
,
0x372a0000
,
0x372b0000
,
0x372c0000
,
0x372d0000
,
0x372e0000
,
0x372f0000
,
0x37300000
,
0x37310000
,
0x37320000
,
0x37330000
,
0x37340000
,
0x37350000
,
0x37360000
,
0x37370000
,
0x37380000
,
0x37390000
,
0x373a0000
,
0x373b0000
,
0x373c0000
,
0x373d0000
,
0x373e0000
,
0x373f0000
,
0x37400000
,
0x37410000
,
0x37420000
,
0x37430000
,
0x37440000
,
0x37450000
,
0x37460000
,
0x37470000
,
0x37480000
,
0x37490000
,
0x374a0000
,
0x374b0000
,
0x374c0000
,
0x374d0000
,
0x374e0000
,
0x374f0000
,
0x37500000
,
0x37510000
,
0x37520000
,
0x37530000
,
0x37540000
,
0x37550000
,
0x37560000
,
0x37570000
,
0x37580000
,
0x37590000
,
0x375a0000
,
0x375b0000
,
0x375c0000
,
0x375d0000
,
0x375e0000
,
0x375f0000
,
0x37600000
,
0x37610000
,
0x37620000
,
0x37630000
,
0x37640000
,
0x37650000
,
0x37660000
,
0x37670000
,
0x37680000
,
0x37690000
,
0x376a0000
,
0x376b0000
,
0x376c0000
,
0x376d0000
,
0x376e0000
,
0x376f0000
,
0x37700000
,
0x37710000
,
0x37720000
,
0x37730000
,
0x37740000
,
0x37750000
,
0x37760000
,
0x37770000
,
0x37780000
,
0x37790000
,
0x377a0000
,
0x377b0000
,
0x377c0000
,
0x377d0000
,
0x377e0000
,
0x377f0000
,
0x37800000
,
0x37808000
,
0x37810000
,
0x37818000
,
0x37820000
,
0x37828000
,
0x37830000
,
0x37838000
,
0x37840000
,
0x37848000
,
0x37850000
,
0x37858000
,
0x37860000
,
0x37868000
,
0x37870000
,
0x37878000
,
0x37880000
,
0x37888000
,
0x37890000
,
0x37898000
,
0x378a0000
,
0x378a8000
,
0x378b0000
,
0x378b8000
,
0x378c0000
,
0x378c8000
,
0x378d0000
,
0x378d8000
,
0x378e0000
,
0x378e8000
,
0x378f0000
,
0x378f8000
,
0x37900000
,
0x37908000
,
0x37910000
,
0x37918000
,
0x37920000
,
0x37928000
,
0x37930000
,
0x37938000
,
0x37940000
,
0x37948000
,
0x37950000
,
0x37958000
,
0x37960000
,
0x37968000
,
0x37970000
,
0x37978000
,
0x37980000
,
0x37988000
,
0x37990000
,
0x37998000
,
0x379a0000
,
0x379a8000
,
0x379b0000
,
0x379b8000
,
0x379c0000
,
0x379c8000
,
0x379d0000
,
0x379d8000
,
0x379e0000
,
0x379e8000
,
0x379f0000
,
0x379f8000
,
0x37a00000
,
0x37a08000
,
0x37a10000
,
0x37a18000
,
0x37a20000
,
0x37a28000
,
0x37a30000
,
0x37a38000
,
0x37a40000
,
0x37a48000
,
0x37a50000
,
0x37a58000
,
0x37a60000
,
0x37a68000
,
0x37a70000
,
0x37a78000
,
0x37a80000
,
0x37a88000
,
0x37a90000
,
0x37a98000
,
0x37aa0000
,
0x37aa8000
,
0x37ab0000
,
0x37ab8000
,
0x37ac0000
,
0x37ac8000
,
0x37ad0000
,
0x37ad8000
,
0x37ae0000
,
0x37ae8000
,
0x37af0000
,
0x37af8000
,
0x37b00000
,
0x37b08000
,
0x37b10000
,
0x37b18000
,
0x37b20000
,
0x37b28000
,
0x37b30000
,
0x37b38000
,
0x37b40000
,
0x37b48000
,
0x37b50000
,
0x37b58000
,
0x37b60000
,
0x37b68000
,
0x37b70000
,
0x37b78000
,
0x37b80000
,
0x37b88000
,
0x37b90000
,
0x37b98000
,
0x37ba0000
,
0x37ba8000
,
0x37bb0000
,
0x37bb8000
,
0x37bc0000
,
0x37bc8000
,
0x37bd0000
,
0x37bd8000
,
0x37be0000
,
0x37be8000
,
0x37bf0000
,
0x37bf8000
,
0x37c00000
,
0x37c08000
,
0x37c10000
,
0x37c18000
,
0x37c20000
,
0x37c28000
,
0x37c30000
,
0x37c38000
,
0x37c40000
,
0x37c48000
,
0x37c50000
,
0x37c58000
,
0x37c60000
,
0x37c68000
,
0x37c70000
,
0x37c78000
,
0x37c80000
,
0x37c88000
,
0x37c90000
,
0x37c98000
,
0x37ca0000
,
0x37ca8000
,
0x37cb0000
,
0x37cb8000
,
0x37cc0000
,
0x37cc8000
,
0x37cd0000
,
0x37cd8000
,
0x37ce0000
,
0x37ce8000
,
0x37cf0000
,
0x37cf8000
,
0x37d00000
,
0x37d08000
,
0x37d10000
,
0x37d18000
,
0x37d20000
,
0x37d28000
,
0x37d30000
,
0x37d38000
,
0x37d40000
,
0x37d48000
,
0x37d50000
,
0x37d58000
,
0x37d60000
,
0x37d68000
,
0x37d70000
,
0x37d78000
,
0x37d80000
,
0x37d88000
,
0x37d90000
,
0x37d98000
,
0x37da0000
,
0x37da8000
,
0x37db0000
,
0x37db8000
,
0x37dc0000
,
0x37dc8000
,
0x37dd0000
,
0x37dd8000
,
0x37de0000
,
0x37de8000
,
0x37df0000
,
0x37df8000
,
0x37e00000
,
0x37e08000
,
0x37e10000
,
0x37e18000
,
0x37e20000
,
0x37e28000
,
0x37e30000
,
0x37e38000
,
0x37e40000
,
0x37e48000
,
0x37e50000
,
0x37e58000
,
0x37e60000
,
0x37e68000
,
0x37e70000
,
0x37e78000
,
0x37e80000
,
0x37e88000
,
0x37e90000
,
0x37e98000
,
0x37ea0000
,
0x37ea8000
,
0x37eb0000
,
0x37eb8000
,
0x37ec0000
,
0x37ec8000
,
0x37ed0000
,
0x37ed8000
,
0x37ee0000
,
0x37ee8000
,
0x37ef0000
,
0x37ef8000
,
0x37f00000
,
0x37f08000
,
0x37f10000
,
0x37f18000
,
0x37f20000
,
0x37f28000
,
0x37f30000
,
0x37f38000
,
0x37f40000
,
0x37f48000
,
0x37f50000
,
0x37f58000
,
0x37f60000
,
0x37f68000
,
0x37f70000
,
0x37f78000
,
0x37f80000
,
0x37f88000
,
0x37f90000
,
0x37f98000
,
0x37fa0000
,
0x37fa8000
,
0x37fb0000
,
0x37fb8000
,
0x37fc0000
,
0x37fc8000
,
0x37fd0000
,
0x37fd8000
,
0x37fe0000
,
0x37fe8000
,
0x37ff0000
,
0x37ff8000
,
0x38000000
,
0x38004000
,
0x38008000
,
0x3800c000
,
0x38010000
,
0x38014000
,
0x38018000
,
0x3801c000
,
0x38020000
,
0x38024000
,
0x38028000
,
0x3802c000
,
0x38030000
,
0x38034000
,
0x38038000
,
0x3803c000
,
0x38040000
,
0x38044000
,
0x38048000
,
0x3804c000
,
0x38050000
,
0x38054000
,
0x38058000
,
0x3805c000
,
0x38060000
,
0x38064000
,
0x38068000
,
0x3806c000
,
0x38070000
,
0x38074000
,
0x38078000
,
0x3807c000
,
0x38080000
,
0x38084000
,
0x38088000
,
0x3808c000
,
0x38090000
,
0x38094000
,
0x38098000
,
0x3809c000
,
0x380a0000
,
0x380a4000
,
0x380a8000
,
0x380ac000
,
0x380b0000
,
0x380b4000
,
0x380b8000
,
0x380bc000
,
0x380c0000
,
0x380c4000
,
0x380c8000
,
0x380cc000
,
0x380d0000
,
0x380d4000
,
0x380d8000
,
0x380dc000
,
0x380e0000
,
0x380e4000
,
0x380e8000
,
0x380ec000
,
0x380f0000
,
0x380f4000
,
0x380f8000
,
0x380fc000
,
0x38100000
,
0x38104000
,
0x38108000
,
0x3810c000
,
0x38110000
,
0x38114000
,
0x38118000
,
0x3811c000
,
0x38120000
,
0x38124000
,
0x38128000
,
0x3812c000
,
0x38130000
,
0x38134000
,
0x38138000
,
0x3813c000
,
0x38140000
,
0x38144000
,
0x38148000
,
0x3814c000
,
0x38150000
,
0x38154000
,
0x38158000
,
0x3815c000
,
0x38160000
,
0x38164000
,
0x38168000
,
0x3816c000
,
0x38170000
,
0x38174000
,
0x38178000
,
0x3817c000
,
0x38180000
,
0x38184000
,
0x38188000
,
0x3818c000
,
0x38190000
,
0x38194000
,
0x38198000
,
0x3819c000
,
0x381a0000
,
0x381a4000
,
0x381a8000
,
0x381ac000
,
0x381b0000
,
0x381b4000
,
0x381b8000
,
0x381bc000
,
0x381c0000
,
0x381c4000
,
0x381c8000
,
0x381cc000
,
0x381d0000
,
0x381d4000
,
0x381d8000
,
0x381dc000
,
0x381e0000
,
0x381e4000
,
0x381e8000
,
0x381ec000
,
0x381f0000
,
0x381f4000
,
0x381f8000
,
0x381fc000
,
0x38200000
,
0x38204000
,
0x38208000
,
0x3820c000
,
0x38210000
,
0x38214000
,
0x38218000
,
0x3821c000
,
0x38220000
,
0x38224000
,
0x38228000
,
0x3822c000
,
0x38230000
,
0x38234000
,
0x38238000
,
0x3823c000
,
0x38240000
,
0x38244000
,
0x38248000
,
0x3824c000
,
0x38250000
,
0x38254000
,
0x38258000
,
0x3825c000
,
0x38260000
,
0x38264000
,
0x38268000
,
0x3826c000
,
0x38270000
,
0x38274000
,
0x38278000
,
0x3827c000
,
0x38280000
,
0x38284000
,
0x38288000
,
0x3828c000
,
0x38290000
,
0x38294000
,
0x38298000
,
0x3829c000
,
0x382a0000
,
0x382a4000
,
0x382a8000
,
0x382ac000
,
0x382b0000
,
0x382b4000
,
0x382b8000
,
0x382bc000
,
0x382c0000
,
0x382c4000
,
0x382c8000
,
0x382cc000
,
0x382d0000
,
0x382d4000
,
0x382d8000
,
0x382dc000
,
0x382e0000
,
0x382e4000
,
0x382e8000
,
0x382ec000
,
0x382f0000
,
0x382f4000
,
0x382f8000
,
0x382fc000
,
0x38300000
,
0x38304000
,
0x38308000
,
0x3830c000
,
0x38310000
,
0x38314000
,
0x38318000
,
0x3831c000
,
0x38320000
,
0x38324000
,
0x38328000
,
0x3832c000
,
0x38330000
,
0x38334000
,
0x38338000
,
0x3833c000
,
0x38340000
,
0x38344000
,
0x38348000
,
0x3834c000
,
0x38350000
,
0x38354000
,
0x38358000
,
0x3835c000
,
0x38360000
,
0x38364000
,
0x38368000
,
0x3836c000
,
0x38370000
,
0x38374000
,
0x38378000
,
0x3837c000
,
0x38380000
,
0x38384000
,
0x38388000
,
0x3838c000
,
0x38390000
,
0x38394000
,
0x38398000
,
0x3839c000
,
0x383a0000
,
0x383a4000
,
0x383a8000
,
0x383ac000
,
0x383b0000
,
0x383b4000
,
0x383b8000
,
0x383bc000
,
0x383c0000
,
0x383c4000
,
0x383c8000
,
0x383cc000
,
0x383d0000
,
0x383d4000
,
0x383d8000
,
0x383dc000
,
0x383e0000
,
0x383e4000
,
0x383e8000
,
0x383ec000
,
0x383f0000
,
0x383f4000
,
0x383f8000
,
0x383fc000
,
0x38400000
,
0x38404000
,
0x38408000
,
0x3840c000
,
0x38410000
,
0x38414000
,
0x38418000
,
0x3841c000
,
0x38420000
,
0x38424000
,
0x38428000
,
0x3842c000
,
0x38430000
,
0x38434000
,
0x38438000
,
0x3843c000
,
0x38440000
,
0x38444000
,
0x38448000
,
0x3844c000
,
0x38450000
,
0x38454000
,
0x38458000
,
0x3845c000
,
0x38460000
,
0x38464000
,
0x38468000
,
0x3846c000
,
0x38470000
,
0x38474000
,
0x38478000
,
0x3847c000
,
0x38480000
,
0x38484000
,
0x38488000
,
0x3848c000
,
0x38490000
,
0x38494000
,
0x38498000
,
0x3849c000
,
0x384a0000
,
0x384a4000
,
0x384a8000
,
0x384ac000
,
0x384b0000
,
0x384b4000
,
0x384b8000
,
0x384bc000
,
0x384c0000
,
0x384c4000
,
0x384c8000
,
0x384cc000
,
0x384d0000
,
0x384d4000
,
0x384d8000
,
0x384dc000
,
0x384e0000
,
0x384e4000
,
0x384e8000
,
0x384ec000
,
0x384f0000
,
0x384f4000
,
0x384f8000
,
0x384fc000
,
0x38500000
,
0x38504000
,
0x38508000
,
0x3850c000
,
0x38510000
,
0x38514000
,
0x38518000
,
0x3851c000
,
0x38520000
,
0x38524000
,
0x38528000
,
0x3852c000
,
0x38530000
,
0x38534000
,
0x38538000
,
0x3853c000
,
0x38540000
,
0x38544000
,
0x38548000
,
0x3854c000
,
0x38550000
,
0x38554000
,
0x38558000
,
0x3855c000
,
0x38560000
,
0x38564000
,
0x38568000
,
0x3856c000
,
0x38570000
,
0x38574000
,
0x38578000
,
0x3857c000
,
0x38580000
,
0x38584000
,
0x38588000
,
0x3858c000
,
0x38590000
,
0x38594000
,
0x38598000
,
0x3859c000
,
0x385a0000
,
0x385a4000
,
0x385a8000
,
0x385ac000
,
0x385b0000
,
0x385b4000
,
0x385b8000
,
0x385bc000
,
0x385c0000
,
0x385c4000
,
0x385c8000
,
0x385cc000
,
0x385d0000
,
0x385d4000
,
0x385d8000
,
0x385dc000
,
0x385e0000
,
0x385e4000
,
0x385e8000
,
0x385ec000
,
0x385f0000
,
0x385f4000
,
0x385f8000
,
0x385fc000
,
0x38600000
,
0x38604000
,
0x38608000
,
0x3860c000
,
0x38610000
,
0x38614000
,
0x38618000
,
0x3861c000
,
0x38620000
,
0x38624000
,
0x38628000
,
0x3862c000
,
0x38630000
,
0x38634000
,
0x38638000
,
0x3863c000
,
0x38640000
,
0x38644000
,
0x38648000
,
0x3864c000
,
0x38650000
,
0x38654000
,
0x38658000
,
0x3865c000
,
0x38660000
,
0x38664000
,
0x38668000
,
0x3866c000
,
0x38670000
,
0x38674000
,
0x38678000
,
0x3867c000
,
0x38680000
,
0x38684000
,
0x38688000
,
0x3868c000
,
0x38690000
,
0x38694000
,
0x38698000
,
0x3869c000
,
0x386a0000
,
0x386a4000
,
0x386a8000
,
0x386ac000
,
0x386b0000
,
0x386b4000
,
0x386b8000
,
0x386bc000
,
0x386c0000
,
0x386c4000
,
0x386c8000
,
0x386cc000
,
0x386d0000
,
0x386d4000
,
0x386d8000
,
0x386dc000
,
0x386e0000
,
0x386e4000
,
0x386e8000
,
0x386ec000
,
0x386f0000
,
0x386f4000
,
0x386f8000
,
0x386fc000
,
0x38700000
,
0x38704000
,
0x38708000
,
0x3870c000
,
0x38710000
,
0x38714000
,
0x38718000
,
0x3871c000
,
0x38720000
,
0x38724000
,
0x38728000
,
0x3872c000
,
0x38730000
,
0x38734000
,
0x38738000
,
0x3873c000
,
0x38740000
,
0x38744000
,
0x38748000
,
0x3874c000
,
0x38750000
,
0x38754000
,
0x38758000
,
0x3875c000
,
0x38760000
,
0x38764000
,
0x38768000
,
0x3876c000
,
0x38770000
,
0x38774000
,
0x38778000
,
0x3877c000
,
0x38780000
,
0x38784000
,
0x38788000
,
0x3878c000
,
0x38790000
,
0x38794000
,
0x38798000
,
0x3879c000
,
0x387a0000
,
0x387a4000
,
0x387a8000
,
0x387ac000
,
0x387b0000
,
0x387b4000
,
0x387b8000
,
0x387bc000
,
0x387c0000
,
0x387c4000
,
0x387c8000
,
0x387cc000
,
0x387d0000
,
0x387d4000
,
0x387d8000
,
0x387dc000
,
0x387e0000
,
0x387e4000
,
0x387e8000
,
0x387ec000
,
0x387f0000
,
0x387f4000
,
0x387f8000
,
0x387fc000
,
0x38000000
,
0x38002000
,
0x38004000
,
0x38006000
,
0x38008000
,
0x3800a000
,
0x3800c000
,
0x3800e000
,
0x38010000
,
0x38012000
,
0x38014000
,
0x38016000
,
0x38018000
,
0x3801a000
,
0x3801c000
,
0x3801e000
,
0x38020000
,
0x38022000
,
0x38024000
,
0x38026000
,
0x38028000
,
0x3802a000
,
0x3802c000
,
0x3802e000
,
0x38030000
,
0x38032000
,
0x38034000
,
0x38036000
,
0x38038000
,
0x3803a000
,
0x3803c000
,
0x3803e000
,
0x38040000
,
0x38042000
,
0x38044000
,
0x38046000
,
0x38048000
,
0x3804a000
,
0x3804c000
,
0x3804e000
,
0x38050000
,
0x38052000
,
0x38054000
,
0x38056000
,
0x38058000
,
0x3805a000
,
0x3805c000
,
0x3805e000
,
0x38060000
,
0x38062000
,
0x38064000
,
0x38066000
,
0x38068000
,
0x3806a000
,
0x3806c000
,
0x3806e000
,
0x38070000
,
0x38072000
,
0x38074000
,
0x38076000
,
0x38078000
,
0x3807a000
,
0x3807c000
,
0x3807e000
,
0x38080000
,
0x38082000
,
0x38084000
,
0x38086000
,
0x38088000
,
0x3808a000
,
0x3808c000
,
0x3808e000
,
0x38090000
,
0x38092000
,
0x38094000
,
0x38096000
,
0x38098000
,
0x3809a000
,
0x3809c000
,
0x3809e000
,
0x380a0000
,
0x380a2000
,
0x380a4000
,
0x380a6000
,
0x380a8000
,
0x380aa000
,
0x380ac000
,
0x380ae000
,
0x380b0000
,
0x380b2000
,
0x380b4000
,
0x380b6000
,
0x380b8000
,
0x380ba000
,
0x380bc000
,
0x380be000
,
0x380c0000
,
0x380c2000
,
0x380c4000
,
0x380c6000
,
0x380c8000
,
0x380ca000
,
0x380cc000
,
0x380ce000
,
0x380d0000
,
0x380d2000
,
0x380d4000
,
0x380d6000
,
0x380d8000
,
0x380da000
,
0x380dc000
,
0x380de000
,
0x380e0000
,
0x380e2000
,
0x380e4000
,
0x380e6000
,
0x380e8000
,
0x380ea000
,
0x380ec000
,
0x380ee000
,
0x380f0000
,
0x380f2000
,
0x380f4000
,
0x380f6000
,
0x380f8000
,
0x380fa000
,
0x380fc000
,
0x380fe000
,
0x38100000
,
0x38102000
,
0x38104000
,
0x38106000
,
0x38108000
,
0x3810a000
,
0x3810c000
,
0x3810e000
,
0x38110000
,
0x38112000
,
0x38114000
,
0x38116000
,
0x38118000
,
0x3811a000
,
0x3811c000
,
0x3811e000
,
0x38120000
,
0x38122000
,
0x38124000
,
0x38126000
,
0x38128000
,
0x3812a000
,
0x3812c000
,
0x3812e000
,
0x38130000
,
0x38132000
,
0x38134000
,
0x38136000
,
0x38138000
,
0x3813a000
,
0x3813c000
,
0x3813e000
,
0x38140000
,
0x38142000
,
0x38144000
,
0x38146000
,
0x38148000
,
0x3814a000
,
0x3814c000
,
0x3814e000
,
0x38150000
,
0x38152000
,
0x38154000
,
0x38156000
,
0x38158000
,
0x3815a000
,
0x3815c000
,
0x3815e000
,
0x38160000
,
0x38162000
,
0x38164000
,
0x38166000
,
0x38168000
,
0x3816a000
,
0x3816c000
,
0x3816e000
,
0x38170000
,
0x38172000
,
0x38174000
,
0x38176000
,
0x38178000
,
0x3817a000
,
0x3817c000
,
0x3817e000
,
0x38180000
,
0x38182000
,
0x38184000
,
0x38186000
,
0x38188000
,
0x3818a000
,
0x3818c000
,
0x3818e000
,
0x38190000
,
0x38192000
,
0x38194000
,
0x38196000
,
0x38198000
,
0x3819a000
,
0x3819c000
,
0x3819e000
,
0x381a0000
,
0x381a2000
,
0x381a4000
,
0x381a6000
,
0x381a8000
,
0x381aa000
,
0x381ac000
,
0x381ae000
,
0x381b0000
,
0x381b2000
,
0x381b4000
,
0x381b6000
,
0x381b8000
,
0x381ba000
,
0x381bc000
,
0x381be000
,
0x381c0000
,
0x381c2000
,
0x381c4000
,
0x381c6000
,
0x381c8000
,
0x381ca000
,
0x381cc000
,
0x381ce000
,
0x381d0000
,
0x381d2000
,
0x381d4000
,
0x381d6000
,
0x381d8000
,
0x381da000
,
0x381dc000
,
0x381de000
,
0x381e0000
,
0x381e2000
,
0x381e4000
,
0x381e6000
,
0x381e8000
,
0x381ea000
,
0x381ec000
,
0x381ee000
,
0x381f0000
,
0x381f2000
,
0x381f4000
,
0x381f6000
,
0x381f8000
,
0x381fa000
,
0x381fc000
,
0x381fe000
,
0x38200000
,
0x38202000
,
0x38204000
,
0x38206000
,
0x38208000
,
0x3820a000
,
0x3820c000
,
0x3820e000
,
0x38210000
,
0x38212000
,
0x38214000
,
0x38216000
,
0x38218000
,
0x3821a000
,
0x3821c000
,
0x3821e000
,
0x38220000
,
0x38222000
,
0x38224000
,
0x38226000
,
0x38228000
,
0x3822a000
,
0x3822c000
,
0x3822e000
,
0x38230000
,
0x38232000
,
0x38234000
,
0x38236000
,
0x38238000
,
0x3823a000
,
0x3823c000
,
0x3823e000
,
0x38240000
,
0x38242000
,
0x38244000
,
0x38246000
,
0x38248000
,
0x3824a000
,
0x3824c000
,
0x3824e000
,
0x38250000
,
0x38252000
,
0x38254000
,
0x38256000
,
0x38258000
,
0x3825a000
,
0x3825c000
,
0x3825e000
,
0x38260000
,
0x38262000
,
0x38264000
,
0x38266000
,
0x38268000
,
0x3826a000
,
0x3826c000
,
0x3826e000
,
0x38270000
,
0x38272000
,
0x38274000
,
0x38276000
,
0x38278000
,
0x3827a000
,
0x3827c000
,
0x3827e000
,
0x38280000
,
0x38282000
,
0x38284000
,
0x38286000
,
0x38288000
,
0x3828a000
,
0x3828c000
,
0x3828e000
,
0x38290000
,
0x38292000
,
0x38294000
,
0x38296000
,
0x38298000
,
0x3829a000
,
0x3829c000
,
0x3829e000
,
0x382a0000
,
0x382a2000
,
0x382a4000
,
0x382a6000
,
0x382a8000
,
0x382aa000
,
0x382ac000
,
0x382ae000
,
0x382b0000
,
0x382b2000
,
0x382b4000
,
0x382b6000
,
0x382b8000
,
0x382ba000
,
0x382bc000
,
0x382be000
,
0x382c0000
,
0x382c2000
,
0x382c4000
,
0x382c6000
,
0x382c8000
,
0x382ca000
,
0x382cc000
,
0x382ce000
,
0x382d0000
,
0x382d2000
,
0x382d4000
,
0x382d6000
,
0x382d8000
,
0x382da000
,
0x382dc000
,
0x382de000
,
0x382e0000
,
0x382e2000
,
0x382e4000
,
0x382e6000
,
0x382e8000
,
0x382ea000
,
0x382ec000
,
0x382ee000
,
0x382f0000
,
0x382f2000
,
0x382f4000
,
0x382f6000
,
0x382f8000
,
0x382fa000
,
0x382fc000
,
0x382fe000
,
0x38300000
,
0x38302000
,
0x38304000
,
0x38306000
,
0x38308000
,
0x3830a000
,
0x3830c000
,
0x3830e000
,
0x38310000
,
0x38312000
,
0x38314000
,
0x38316000
,
0x38318000
,
0x3831a000
,
0x3831c000
,
0x3831e000
,
0x38320000
,
0x38322000
,
0x38324000
,
0x38326000
,
0x38328000
,
0x3832a000
,
0x3832c000
,
0x3832e000
,
0x38330000
,
0x38332000
,
0x38334000
,
0x38336000
,
0x38338000
,
0x3833a000
,
0x3833c000
,
0x3833e000
,
0x38340000
,
0x38342000
,
0x38344000
,
0x38346000
,
0x38348000
,
0x3834a000
,
0x3834c000
,
0x3834e000
,
0x38350000
,
0x38352000
,
0x38354000
,
0x38356000
,
0x38358000
,
0x3835a000
,
0x3835c000
,
0x3835e000
,
0x38360000
,
0x38362000
,
0x38364000
,
0x38366000
,
0x38368000
,
0x3836a000
,
0x3836c000
,
0x3836e000
,
0x38370000
,
0x38372000
,
0x38374000
,
0x38376000
,
0x38378000
,
0x3837a000
,
0x3837c000
,
0x3837e000
,
0x38380000
,
0x38382000
,
0x38384000
,
0x38386000
,
0x38388000
,
0x3838a000
,
0x3838c000
,
0x3838e000
,
0x38390000
,
0x38392000
,
0x38394000
,
0x38396000
,
0x38398000
,
0x3839a000
,
0x3839c000
,
0x3839e000
,
0x383a0000
,
0x383a2000
,
0x383a4000
,
0x383a6000
,
0x383a8000
,
0x383aa000
,
0x383ac000
,
0x383ae000
,
0x383b0000
,
0x383b2000
,
0x383b4000
,
0x383b6000
,
0x383b8000
,
0x383ba000
,
0x383bc000
,
0x383be000
,
0x383c0000
,
0x383c2000
,
0x383c4000
,
0x383c6000
,
0x383c8000
,
0x383ca000
,
0x383cc000
,
0x383ce000
,
0x383d0000
,
0x383d2000
,
0x383d4000
,
0x383d6000
,
0x383d8000
,
0x383da000
,
0x383dc000
,
0x383de000
,
0x383e0000
,
0x383e2000
,
0x383e4000
,
0x383e6000
,
0x383e8000
,
0x383ea000
,
0x383ec000
,
0x383ee000
,
0x383f0000
,
0x383f2000
,
0x383f4000
,
0x383f6000
,
0x383f8000
,
0x383fa000
,
0x383fc000
,
0x383fe000
,
0x38400000
,
0x38402000
,
0x38404000
,
0x38406000
,
0x38408000
,
0x3840a000
,
0x3840c000
,
0x3840e000
,
0x38410000
,
0x38412000
,
0x38414000
,
0x38416000
,
0x38418000
,
0x3841a000
,
0x3841c000
,
0x3841e000
,
0x38420000
,
0x38422000
,
0x38424000
,
0x38426000
,
0x38428000
,
0x3842a000
,
0x3842c000
,
0x3842e000
,
0x38430000
,
0x38432000
,
0x38434000
,
0x38436000
,
0x38438000
,
0x3843a000
,
0x3843c000
,
0x3843e000
,
0x38440000
,
0x38442000
,
0x38444000
,
0x38446000
,
0x38448000
,
0x3844a000
,
0x3844c000
,
0x3844e000
,
0x38450000
,
0x38452000
,
0x38454000
,
0x38456000
,
0x38458000
,
0x3845a000
,
0x3845c000
,
0x3845e000
,
0x38460000
,
0x38462000
,
0x38464000
,
0x38466000
,
0x38468000
,
0x3846a000
,
0x3846c000
,
0x3846e000
,
0x38470000
,
0x38472000
,
0x38474000
,
0x38476000
,
0x38478000
,
0x3847a000
,
0x3847c000
,
0x3847e000
,
0x38480000
,
0x38482000
,
0x38484000
,
0x38486000
,
0x38488000
,
0x3848a000
,
0x3848c000
,
0x3848e000
,
0x38490000
,
0x38492000
,
0x38494000
,
0x38496000
,
0x38498000
,
0x3849a000
,
0x3849c000
,
0x3849e000
,
0x384a0000
,
0x384a2000
,
0x384a4000
,
0x384a6000
,
0x384a8000
,
0x384aa000
,
0x384ac000
,
0x384ae000
,
0x384b0000
,
0x384b2000
,
0x384b4000
,
0x384b6000
,
0x384b8000
,
0x384ba000
,
0x384bc000
,
0x384be000
,
0x384c0000
,
0x384c2000
,
0x384c4000
,
0x384c6000
,
0x384c8000
,
0x384ca000
,
0x384cc000
,
0x384ce000
,
0x384d0000
,
0x384d2000
,
0x384d4000
,
0x384d6000
,
0x384d8000
,
0x384da000
,
0x384dc000
,
0x384de000
,
0x384e0000
,
0x384e2000
,
0x384e4000
,
0x384e6000
,
0x384e8000
,
0x384ea000
,
0x384ec000
,
0x384ee000
,
0x384f0000
,
0x384f2000
,
0x384f4000
,
0x384f6000
,
0x384f8000
,
0x384fa000
,
0x384fc000
,
0x384fe000
,
0x38500000
,
0x38502000
,
0x38504000
,
0x38506000
,
0x38508000
,
0x3850a000
,
0x3850c000
,
0x3850e000
,
0x38510000
,
0x38512000
,
0x38514000
,
0x38516000
,
0x38518000
,
0x3851a000
,
0x3851c000
,
0x3851e000
,
0x38520000
,
0x38522000
,
0x38524000
,
0x38526000
,
0x38528000
,
0x3852a000
,
0x3852c000
,
0x3852e000
,
0x38530000
,
0x38532000
,
0x38534000
,
0x38536000
,
0x38538000
,
0x3853a000
,
0x3853c000
,
0x3853e000
,
0x38540000
,
0x38542000
,
0x38544000
,
0x38546000
,
0x38548000
,
0x3854a000
,
0x3854c000
,
0x3854e000
,
0x38550000
,
0x38552000
,
0x38554000
,
0x38556000
,
0x38558000
,
0x3855a000
,
0x3855c000
,
0x3855e000
,
0x38560000
,
0x38562000
,
0x38564000
,
0x38566000
,
0x38568000
,
0x3856a000
,
0x3856c000
,
0x3856e000
,
0x38570000
,
0x38572000
,
0x38574000
,
0x38576000
,
0x38578000
,
0x3857a000
,
0x3857c000
,
0x3857e000
,
0x38580000
,
0x38582000
,
0x38584000
,
0x38586000
,
0x38588000
,
0x3858a000
,
0x3858c000
,
0x3858e000
,
0x38590000
,
0x38592000
,
0x38594000
,
0x38596000
,
0x38598000
,
0x3859a000
,
0x3859c000
,
0x3859e000
,
0x385a0000
,
0x385a2000
,
0x385a4000
,
0x385a6000
,
0x385a8000
,
0x385aa000
,
0x385ac000
,
0x385ae000
,
0x385b0000
,
0x385b2000
,
0x385b4000
,
0x385b6000
,
0x385b8000
,
0x385ba000
,
0x385bc000
,
0x385be000
,
0x385c0000
,
0x385c2000
,
0x385c4000
,
0x385c6000
,
0x385c8000
,
0x385ca000
,
0x385cc000
,
0x385ce000
,
0x385d0000
,
0x385d2000
,
0x385d4000
,
0x385d6000
,
0x385d8000
,
0x385da000
,
0x385dc000
,
0x385de000
,
0x385e0000
,
0x385e2000
,
0x385e4000
,
0x385e6000
,
0x385e8000
,
0x385ea000
,
0x385ec000
,
0x385ee000
,
0x385f0000
,
0x385f2000
,
0x385f4000
,
0x385f6000
,
0x385f8000
,
0x385fa000
,
0x385fc000
,
0x385fe000
,
0x38600000
,
0x38602000
,
0x38604000
,
0x38606000
,
0x38608000
,
0x3860a000
,
0x3860c000
,
0x3860e000
,
0x38610000
,
0x38612000
,
0x38614000
,
0x38616000
,
0x38618000
,
0x3861a000
,
0x3861c000
,
0x3861e000
,
0x38620000
,
0x38622000
,
0x38624000
,
0x38626000
,
0x38628000
,
0x3862a000
,
0x3862c000
,
0x3862e000
,
0x38630000
,
0x38632000
,
0x38634000
,
0x38636000
,
0x38638000
,
0x3863a000
,
0x3863c000
,
0x3863e000
,
0x38640000
,
0x38642000
,
0x38644000
,
0x38646000
,
0x38648000
,
0x3864a000
,
0x3864c000
,
0x3864e000
,
0x38650000
,
0x38652000
,
0x38654000
,
0x38656000
,
0x38658000
,
0x3865a000
,
0x3865c000
,
0x3865e000
,
0x38660000
,
0x38662000
,
0x38664000
,
0x38666000
,
0x38668000
,
0x3866a000
,
0x3866c000
,
0x3866e000
,
0x38670000
,
0x38672000
,
0x38674000
,
0x38676000
,
0x38678000
,
0x3867a000
,
0x3867c000
,
0x3867e000
,
0x38680000
,
0x38682000
,
0x38684000
,
0x38686000
,
0x38688000
,
0x3868a000
,
0x3868c000
,
0x3868e000
,
0x38690000
,
0x38692000
,
0x38694000
,
0x38696000
,
0x38698000
,
0x3869a000
,
0x3869c000
,
0x3869e000
,
0x386a0000
,
0x386a2000
,
0x386a4000
,
0x386a6000
,
0x386a8000
,
0x386aa000
,
0x386ac000
,
0x386ae000
,
0x386b0000
,
0x386b2000
,
0x386b4000
,
0x386b6000
,
0x386b8000
,
0x386ba000
,
0x386bc000
,
0x386be000
,
0x386c0000
,
0x386c2000
,
0x386c4000
,
0x386c6000
,
0x386c8000
,
0x386ca000
,
0x386cc000
,
0x386ce000
,
0x386d0000
,
0x386d2000
,
0x386d4000
,
0x386d6000
,
0x386d8000
,
0x386da000
,
0x386dc000
,
0x386de000
,
0x386e0000
,
0x386e2000
,
0x386e4000
,
0x386e6000
,
0x386e8000
,
0x386ea000
,
0x386ec000
,
0x386ee000
,
0x386f0000
,
0x386f2000
,
0x386f4000
,
0x386f6000
,
0x386f8000
,
0x386fa000
,
0x386fc000
,
0x386fe000
,
0x38700000
,
0x38702000
,
0x38704000
,
0x38706000
,
0x38708000
,
0x3870a000
,
0x3870c000
,
0x3870e000
,
0x38710000
,
0x38712000
,
0x38714000
,
0x38716000
,
0x38718000
,
0x3871a000
,
0x3871c000
,
0x3871e000
,
0x38720000
,
0x38722000
,
0x38724000
,
0x38726000
,
0x38728000
,
0x3872a000
,
0x3872c000
,
0x3872e000
,
0x38730000
,
0x38732000
,
0x38734000
,
0x38736000
,
0x38738000
,
0x3873a000
,
0x3873c000
,
0x3873e000
,
0x38740000
,
0x38742000
,
0x38744000
,
0x38746000
,
0x38748000
,
0x3874a000
,
0x3874c000
,
0x3874e000
,
0x38750000
,
0x38752000
,
0x38754000
,
0x38756000
,
0x38758000
,
0x3875a000
,
0x3875c000
,
0x3875e000
,
0x38760000
,
0x38762000
,
0x38764000
,
0x38766000
,
0x38768000
,
0x3876a000
,
0x3876c000
,
0x3876e000
,
0x38770000
,
0x38772000
,
0x38774000
,
0x38776000
,
0x38778000
,
0x3877a000
,
0x3877c000
,
0x3877e000
,
0x38780000
,
0x38782000
,
0x38784000
,
0x38786000
,
0x38788000
,
0x3878a000
,
0x3878c000
,
0x3878e000
,
0x38790000
,
0x38792000
,
0x38794000
,
0x38796000
,
0x38798000
,
0x3879a000
,
0x3879c000
,
0x3879e000
,
0x387a0000
,
0x387a2000
,
0x387a4000
,
0x387a6000
,
0x387a8000
,
0x387aa000
,
0x387ac000
,
0x387ae000
,
0x387b0000
,
0x387b2000
,
0x387b4000
,
0x387b6000
,
0x387b8000
,
0x387ba000
,
0x387bc000
,
0x387be000
,
0x387c0000
,
0x387c2000
,
0x387c4000
,
0x387c6000
,
0x387c8000
,
0x387ca000
,
0x387cc000
,
0x387ce000
,
0x387d0000
,
0x387d2000
,
0x387d4000
,
0x387d6000
,
0x387d8000
,
0x387da000
,
0x387dc000
,
0x387de000
,
0x387e0000
,
0x387e2000
,
0x387e4000
,
0x387e6000
,
0x387e8000
,
0x387ea000
,
0x387ec000
,
0x387ee000
,
0x387f0000
,
0x387f2000
,
0x387f4000
,
0x387f6000
,
0x387f8000
,
0x387fa000
,
0x387fc000
,
0x387fe000
};
static
const
uint16_t
offsettable
[
64
]
=
{
0x0000
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0000
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
,
0x0400
};
static
const
uint32_t
exponenttable
[
64
]
=
{
0x00000000
,
0x00800000
,
0x01000000
,
0x01800000
,
0x02000000
,
0x02800000
,
0x03000000
,
0x03800000
,
0x04000000
,
0x04800000
,
0x05000000
,
0x05800000
,
0x06000000
,
0x06800000
,
0x07000000
,
0x07800000
,
0x08000000
,
0x08800000
,
0x09000000
,
0x09800000
,
0x0a000000
,
0x0a800000
,
0x0b000000
,
0x0b800000
,
0x0c000000
,
0x0c800000
,
0x0d000000
,
0x0d800000
,
0x0e000000
,
0x0e800000
,
0x0f000000
,
0x47800000
,
0x80000000
,
0x80800000
,
0x81000000
,
0x81800000
,
0x82000000
,
0x82800000
,
0x83000000
,
0x83800000
,
0x84000000
,
0x84800000
,
0x85000000
,
0x85800000
,
0x86000000
,
0x86800000
,
0x87000000
,
0x87800000
,
0x88000000
,
0x88800000
,
0x89000000
,
0x89800000
,
0x8a000000
,
0x8a800000
,
0x8b000000
,
0x8b800000
,
0x8c000000
,
0x8c800000
,
0x8d000000
,
0x8d800000
,
0x8e000000
,
0x8e800000
,
0x8f000000
,
0xc7800000
};
static
const
uint16_t
basetable
[
512
]
=
{
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0000
,
0x0001
,
0x0002
,
0x0004
,
0x0008
,
0x0010
,
0x0020
,
0x0040
,
0x0080
,
0x0100
,
0x0200
,
0x0400
,
0x0800
,
0x0c00
,
0x1000
,
0x1400
,
0x1800
,
0x1c00
,
0x2000
,
0x2400
,
0x2800
,
0x2c00
,
0x3000
,
0x3400
,
0x3800
,
0x3c00
,
0x4000
,
0x4400
,
0x4800
,
0x4c00
,
0x5000
,
0x5400
,
0x5800
,
0x5c00
,
0x6000
,
0x6400
,
0x6800
,
0x6c00
,
0x7000
,
0x7400
,
0x7800
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x7c00
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8000
,
0x8001
,
0x8002
,
0x8004
,
0x8008
,
0x8010
,
0x8020
,
0x8040
,
0x8080
,
0x8100
,
0x8200
,
0x8400
,
0x8800
,
0x8c00
,
0x9000
,
0x9400
,
0x9800
,
0x9c00
,
0xa000
,
0xa400
,
0xa800
,
0xac00
,
0xb000
,
0xb400
,
0xb800
,
0xbc00
,
0xc000
,
0xc400
,
0xc800
,
0xcc00
,
0xd000
,
0xd400
,
0xd800
,
0xdc00
,
0xe000
,
0xe400
,
0xe800
,
0xec00
,
0xf000
,
0xf400
,
0xf800
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
,
0xfc00
};
static
const
uint8_t
shifttable
[
512
]
=
{
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x17
,
0x16
,
0x15
,
0x14
,
0x13
,
0x12
,
0x11
,
0x10
,
0x0f
,
0x0e
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x0d
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x17
,
0x16
,
0x15
,
0x14
,
0x13
,
0x12
,
0x11
,
0x10
,
0x0f
,
0x0e
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x0d
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x18
,
0x0d
};
half_t
Float2Half
(
float
f
)
{
uint32_t
v
=
*
reinterpret_cast
<
uint32_t
*>
(
&
f
);
return
basetable
[(
v
>>
23
)
&
0x1ff
]
+
((
v
&
0x007fffff
)
>>
shifttable
[(
v
>>
23
)
&
0x1ff
]);
}
float
Half2Float
(
half_t
h
)
{
uint32_t
v
=
mantissatable
[
offsettable
[
h
>>
10
]
+
(
h
&
0x3ff
)]
+
exponenttable
[
h
>>
10
];
return
*
reinterpret_cast
<
float
*>
(
&
v
);
}
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
h_array
[
i
]
=
Float2Half
(
f_array
[
i
]);
}
}
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
)
{
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
f_array
[
i
]
=
Half2Float
(
h_array
[
i
]);
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_half.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace
paddle
{
namespace
lite
{
typedef
uint16_t
half_t
;
half_t
Float2Half
(
float
f
);
float
Half2Float
(
half_t
h
);
void
FloatArray2HalfArray
(
float
*
f_array
,
half_t
*
h_array
,
int
count
);
void
HalfArray2FloatArray
(
half_t
*
h_array
,
float
*
f_array
,
int
count
);
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace
paddle
{
namespace
lite
{
void
CLHelper
::
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
)
{
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
kernels
.
emplace_back
(
std
::
move
(
kernel
));
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
}
cl
::
Kernel
&
CLHelper
::
KernelAt
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
.
size
()
<<
" --- "
;
return
*
(
kernels
[
index
]);
}
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
return
context_
->
GetCommandQueue
();
}
cl
::
Context
&
CLHelper
::
OpenCLContext
()
{
return
context_
->
GetContext
();
}
std
::
vector
<
size_t
>
CLHelper
::
DefaultWorkSize
(
const
CLImage
&
image
)
{
// n c h w
auto
image_dim
=
image
.
tensor_dims
();
if
(
image_dim
.
size
()
==
4
)
{
auto
n
=
image_dim
[
0
];
auto
h
=
image_dim
[
2
];
auto
w
=
image_dim
[
3
];
auto
image_width
=
image
.
ImageWidth
();
auto
work_size_0
=
image_width
/
w
;
auto
work_size_1
=
w
;
auto
work_size_2
=
n
*
h
;
return
{
static_cast
<
size_t
>
(
work_size_0
),
static_cast
<
size_t
>
(
work_size_1
),
static_cast
<
size_t
>
(
work_size_2
)};
}
else
if
(
image_dim
.
size
()
==
2
)
{
return
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
image
.
ImageHeight
())};
}
else
if
(
image_dim
.
size
()
==
1
)
{
return
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
image
.
ImageWidth
()),
static_cast
<
size_t
>
(
1
)};
}
else
if
(
image_dim
.
size
()
==
3
)
{
auto
c
=
image_dim
[
0
];
auto
h
=
image_dim
[
1
];
auto
w
=
image_dim
[
2
];
return
{
static_cast
<
size_t
>
((
c
+
3
)
/
4
),
static_cast
<
size_t
>
(
w
),
static_cast
<
size_t
>
(
h
)};
}
else
{
LOG
(
FATAL
)
<<
"Not support this dimension, need to be implemented!"
;
return
{};
}
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_helper.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
namespace
paddle
{
namespace
lite
{
class
CLHelper
{
public:
CLHelper
()
=
default
;
explicit
CLHelper
(
CLContext
*
context
)
:
context_
(
context
)
{}
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
=
""
);
cl
::
Kernel
&
KernelAt
(
const
int
index
);
cl
::
CommandQueue
&
OpenCLCommandQueue
();
cl
::
Context
&
OpenCLContext
();
std
::
vector
<
size_t
>
DefaultWorkSize
(
const
CLImage
&
image
);
private:
CLContext
*
context_
;
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels
;
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image.h"
#include <glog/logging.h>
#include <array>
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
cl_image
)
{
int
width
=
cl_image
.
image_dims_
[
0
];
int
height
=
cl_image
.
image_dims_
[
1
];
half_t
*
image_data
=
new
half_t
[
height
*
width
*
4
];
cl
::
Image2D
&
image
=
cl_image
.
cl_image
();
const
std
::
array
<
size_t
,
3
>
origin
{
0
,
0
,
0
};
const
std
::
array
<
size_t
,
3
>
region
{
static_cast
<
size_t
>
(
width
),
static_cast
<
size_t
>
(
height
),
1
};
cl_int
err
=
CLEngine
::
Global
()
->
command_queue
().
enqueueReadImage
(
image
,
CL_TRUE
,
origin
,
region
,
0
,
0
,
image_data
,
nullptr
,
nullptr
);
CL_CHECK_ERRORS
(
err
);
float
*
tensor_data
=
new
float
[
cl_image
.
numel
()];
auto
*
converter
=
cl_image
.
image_converter
();
converter
->
ImageToNCHW
(
image_data
,
tensor_data
,
cl_image
.
image_dims_
,
cl_image
.
tensor_dims_
);
int
stride
=
cl_image
.
numel
()
/
20
;
stride
=
stride
>
0
?
stride
:
1
;
os
<<
" dims: "
<<
cl_image
.
tensor_dims_
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
cl_image
.
numel
();
i
+=
stride
)
{
os
<<
tensor_data
[
i
]
<<
" "
;
}
delete
[]
tensor_data
;
delete
[]
image_data
;
return
os
;
}
void
CLImage
::
SetTensorData
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto
numel
=
dim
.
product
();
#else
auto
numel
=
dim
.
production
();
#endif
tensor_data_
.
reset
(
new
float
[
numel
]);
memcpy
(
tensor_data_
.
get
(),
tensor_data
,
numel
*
sizeof
(
float
));
tensor_dims_
=
dim
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call SetTensorData first!"
;
image_converter_
.
reset
(
new
CLImageConverterFolder
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNormalCLImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call SetTensorData first!"
;
image_converter_
.
reset
(
new
CLImageConverterNormal
);
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitNImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call SetTensorData first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterNWBlock
());
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitDWImage
(
const
cl
::
Context
&
context
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call SetTensorData first!"
;
CHECK
(
tensor_dims_
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
image_converter_
.
reset
(
new
CLImageConverterDWBlock
());
InitCLImage
(
context
,
image_converter_
.
get
());
}
void
CLImage
::
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
)
{
CHECK
(
tensor_data_
==
nullptr
)
<<
" Empty image tensor data shouldn't have value"
;
tensor_dims_
=
dim
;
image_converter_
.
reset
(
new
CLImageConverterNormal
());
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_converter_
->
InitImageDimInfoWith
(
tensor_dims_
);
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
)
{
VLOG
(
3
)
<<
" to get image dims "
;
image_dims_
=
image_dims
;
VLOG
(
3
)
<<
" end get image dims "
<<
image_dims_
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
nullptr
);
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image"
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
)
{
CHECK
(
tensor_data_
!=
nullptr
)
<<
" Please call SetTensorData first!"
;
VLOG
(
3
)
<<
" begin init cl image "
;
image_dims_
=
converter
->
InitImageDimInfoWith
(
tensor_dims_
);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
half_t
*
image_data
=
new
half_t
[
image_dims_
.
product
()
*
4
];
#else
half_t
*
image_data
=
new
half_t
[
image_dims_
.
production
()
*
4
];
#endif
VLOG
(
3
)
<<
" convert to image "
;
converter
->
NCHWToImage
(
tensor_data_
.
get
(),
image_data
,
tensor_dims_
);
VLOG
(
3
)
<<
" end convert to image "
;
InitCLImage
(
context
,
image_dims_
[
0
],
image_dims_
[
1
],
image_data
);
delete
[]
image_data
;
tensor_data_
=
nullptr
;
cl_event_
=
CLEngine
::
Global
()
->
CreateEvent
(
context
);
initialized_
=
true
;
VLOG
(
3
)
<<
" end init cl image "
;
}
void
CLImage
::
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
)
{
cl
::
ImageFormat
img_format
(
CL_RGBA
,
CL_HALF_FLOAT
);
cl_int
err
;
cl_image_
.
reset
(
new
cl
::
Image2D
(
context
,
CL_MEM_READ_WRITE
|
(
data
?
CL_MEM_COPY_HOST_PTR
:
0
),
img_format
,
width
,
height
,
0
,
data
,
&
err
));
CL_CHECK_ERRORS
(
err
);
CHECK
(
err
==
CL_SUCCESS
)
<<
" Create image 2d error."
;
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <iostream>
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
namespace
paddle
{
namespace
lite
{
class
CLImage
{
// For debug
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CLImage
&
image
);
public:
CLImage
()
=
default
;
/*
* Will not hold input tensor data, memcpy in this method.
* */
void
SetTensorData
(
float
*
tensor_data
,
const
DDim
&
dim
);
bool
IsInit
()
{
return
initialized_
;
}
/*
* Need call SetTensorData first.
* Folder when one dim or two dim.
* */
void
InitCLImage
(
const
cl
::
Context
&
context
);
void
InitNormalCLImage
(
const
cl
::
Context
&
context
);
void
InitNImage
(
const
cl
::
Context
&
context
);
void
InitDWImage
(
const
cl
::
Context
&
context
);
void
InitEmptyImage
(
const
cl
::
Context
&
context
,
const
DDim
&
dim
);
void
InitEmptyWithImageDim
(
const
cl
::
Context
&
context
,
const
DDim
&
image_dims
);
cl
::
Image2D
&
cl_image
()
const
{
return
*
cl_image_
;
}
const
DDim
&
image_dims
()
const
{
return
image_dims_
;
}
inline
size_t
ImageWidth
()
const
{
return
image_dims_
[
0
];
}
inline
size_t
ImageHeight
()
const
{
return
image_dims_
[
1
];
}
const
DDim
&
tensor_dims
()
const
{
return
tensor_dims_
;
}
/*
* Resize original tensor dim.
* */
inline
CLImage
&
Resize
(
const
DDim
&
dims
)
{
tensor_dims_
=
dims
;
return
*
this
;
}
template
<
typename
T
>
T
*
data
()
const
{
CHECK
(
!
initialized_
)
<<
"CL image has initialized, tensor data has been "
"deleted, can't use tensor data!"
;
return
reinterpret_cast
<
T
*>
(
tensor_data_
);
}
/*
* Numel of tensor dim
* */
inline
int64_t
numel
()
const
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
return
tensor_dims_
.
product
();
#else
return
tensor_dims_
.
production
();
#endif
}
/*
* Original tensor dim
* */
cl
::
UserEvent
&
cl_event
()
const
{
return
*
cl_event_
;
}
CLImageConverterBase
*
image_converter
()
const
{
return
image_converter_
.
get
();
}
private:
void
InitCLImage
(
const
cl
::
Context
&
context
,
CLImageConverterBase
*
converter
);
void
InitCLImage
(
const
cl
::
Context
&
context
,
int
width
,
int
height
,
void
*
data
);
bool
initialized_
=
false
;
std
::
unique_ptr
<
cl
::
Image2D
>
cl_image_
{
nullptr
};
std
::
unique_ptr
<
cl
::
UserEvent
>
cl_event_
{
nullptr
};
DDim
tensor_dims_
;
DDim
image_dims_
;
std
::
unique_ptr
<
float
>
tensor_data_
{
nullptr
};
std
::
unique_ptr
<
CLImageConverterBase
>
image_converter_
{
nullptr
};
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
#include <glog/logging.h>
#include <vector>
namespace
paddle
{
namespace
lite
{
DDim
CLImageConverterDefault
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDefault
::
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
nchw
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDefault
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
image_dim
[
0
];
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterFolder
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
<=
2
)
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
size_t
width
=
(
tdim
[
1
]
+
3
)
/
4
;
size_t
height
=
tdim
[
0
];
width_of_one_block_
=
width
;
height_of_one_block_
=
height
;
c_block_
=
1
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
else
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
}
void
CLImageConverterFolder
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
else
{
size_t
tdim
[
2
]
=
{
1
,
1
};
if
(
tensor_dim
.
size
()
==
1
)
{
tdim
[
1
]
=
tensor_dim
[
0
];
}
else
{
tdim
[
0
]
=
tensor_dim
[
0
];
tdim
[
1
]
=
tensor_dim
[
1
];
}
DDim
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
size_t
width
=
image_dim
[
0
];
for
(
size_t
h
=
0
;
h
<
tdim
[
0
];
h
++
)
{
for
(
size_t
w
=
0
;
w
<
tdim
[
1
];
w
++
)
{
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]
=
Float2Half
(
tensor
[
h
*
tdim
[
1
]
+
w
]);
}
}
}
}
void
CLImageConverterFolder
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
if
(
tensor_dim
.
size
()
>
2
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
else
{
size_t
width
=
image_dim
[
0
];
size_t
H
=
1
,
W
=
1
;
if
(
tensor_dim
.
size
()
==
2
)
{
H
=
tensor_dim
[
0
];
W
=
tensor_dim
[
1
];
}
else
if
(
tensor_dim
.
size
()
==
1
)
{
W
=
tensor_dim
[
0
];
}
float
*
p
=
tensor
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
p
[
h
*
W
+
w
]
=
Half2Float
(
image
[(
h
*
width
+
w
/
4
)
*
4
+
(
w
%
4
)]);
}
}
}
}
DDim
CLImageConverterNWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
auto
image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
size_t
block
=
image_dim
[
0
]
/
tensor_dim
[
3
];
for
(
size_t
n
=
0
;
n
<
block
*
4
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
if
(
n
<
N
)
{
image
[
index
]
=
Float2Half
(
*
p
);
p
++
;
}
else
{
image
[
index
]
=
0.0
;
}
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
void
CLImageConverterNWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
0
];
size_t
C
=
tensor_dim
[
1
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
height
=
image_dim
[
1
];
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
for
(
size_t
h
=
0
;
h
<
H
;
++
h
)
{
for
(
size_t
w
=
0
;
w
<
W
;
++
w
)
{
size_t
index
=
4
*
c
*
(
width
*
H
)
+
4
*
h
*
width
+
4
*
W
*
(
n
/
4
)
+
w
*
4
+
n
%
4
;
*
p
=
Half2Float
(
image
[
index
]);
p
++
;
if
(
index
>=
(
width
*
height
*
4
))
{
LOG
(
INFO
)
<<
" index out of range "
;
}
}
}
}
}
VLOG
(
3
)
<<
" init done"
;
}
DDim
CLImageConverterDWBlock
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
,
H
,
W
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
H
=
tensor_dim
[
2
];
W
=
tensor_dim
[
3
];
size_t
width
=
W
*
((
N
+
3
)
/
4
);
size_t
height
=
C
*
H
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterDWBlock
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
1
];
C
=
new_dims
[
0
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
DDim
in_image_dim
=
InitImageDimInfoWith
(
tensor_dim
);
VLOG
(
3
)
<<
" tensor dim: "
<<
tensor_dim
;
VLOG
(
3
)
<<
" image dim: "
<<
in_image_dim
;
size_t
width
=
in_image_dim
[
0
];
size_t
w_block
=
width
/
W
;
float
*
p
=
tensor
;
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
w_block
*
4
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
if
(
c
<
C
)
{
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image
[
i2
]
=
Float2Half
(
*
p
);
i2
+=
4
;
p
++
;
}
else
{
image
[
i2
]
=
0.0
;
i2
+=
4
;
}
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
void
CLImageConverterDWBlock
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
float
*
p
=
tensor
;
size_t
N
=
tensor_dim
[
1
];
size_t
C
=
tensor_dim
[
0
];
size_t
H
=
tensor_dim
[
2
];
size_t
W
=
tensor_dim
[
3
];
size_t
width
=
image_dim
[
0
];
size_t
i0
=
0
;
for
(
size_t
n
=
0
;
n
<
N
;
n
++
)
{
for
(
size_t
c
=
0
;
c
<
C
;
c
++
)
{
size_t
i1
=
i0
+
(
c
/
4
)
*
W
;
for
(
size_t
h
=
0
;
h
<
H
;
h
++
)
{
size_t
i2
=
(
i1
<<
2
)
+
c
%
4
;
for
(
size_t
w
=
0
;
w
<
W
;
w
++
)
{
*
p
=
Half2Float
(
image
[
i2
]);
i2
+=
4
;
p
++
;
}
i1
+=
width
;
}
}
i0
+=
width
*
H
;
}
}
DDim
CLImageConverterNormal
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
size_t
j
=
0
;
j
<
tensor_dim
.
size
();
++
j
)
{
new_dims
[
4
-
tensor_dim
.
size
()
+
j
]
=
tensor_dim
[
j
];
}
size_t
N
,
C
,
H
,
W
;
N
=
new_dims
[
0
];
C
=
new_dims
[
1
];
H
=
new_dims
[
2
];
W
=
new_dims
[
3
];
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
c_block_
=
width
/
W
;
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterNormal
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
<=
4
&&
tensor_dim
.
size
()
>
0
)
<<
" Tensor dim is not support!"
;
CLImageConverterDefault
default_converter
;
default_converter
.
NCHWToImage
(
tensor
,
image
,
tensor_dim
);
}
void
CLImageConverterNormal
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{
CLImageConverterDefault
default_converter
;
default_converter
.
ImageToNCHW
(
image
,
tensor
,
image_dim
,
tensor_dim
);
}
DDim
CLImageConverterWinoTransWeight
::
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
{
CHECK
(
tensor_dim
.
size
()
==
4
)
<<
" Tensor dim is not 4."
;
size_t
N
,
C
;
N
=
tensor_dim
[
0
];
C
=
tensor_dim
[
1
];
size_t
width
=
(
C
+
3
)
/
4
;
size_t
height
=
N
*
16
;
// N * (wino_blk_size + 2) * (wino_blk_size + 2)
return
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
static_cast
<
DDim
::
value_type
>
(
width
),
static_cast
<
DDim
::
value_type
>
(
height
)}));
}
void
CLImageConverterWinoTransWeight
::
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
{}
void
CLImageConverterWinoTransWeight
::
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
{}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_image_converter.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace
paddle
{
namespace
lite
{
class
CLImageConverterBase
{
public:
virtual
~
CLImageConverterBase
()
{}
virtual
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
void
ImageToNCHW
(
half_t
*
image
,
float
*
nchw
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
)
=
0
;
virtual
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
)
=
0
;
};
class
CLImageConverterDefault
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
nchw
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterFolder
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNormal
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
/*
* width of original tensor
* */
inline
size_t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
/*
* height of original tensor
* */
inline
size_t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
int
GetCBlock
()
const
{
return
c_block_
;
}
private:
int
c_block_
;
int
width_of_one_block_
;
int
height_of_one_block_
;
};
class
CLImageConverterNWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterDWBlock
:
public
CLImageConverterBase
{
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
class
CLImageConverterWinoTransWeight
:
public
CLImageConverterBase
{
public:
DDim
InitImageDimInfoWith
(
const
DDim
&
tensor_dim
);
void
NCHWToImage
(
float
*
tensor
,
half_t
*
image
,
const
DDim
&
tensor_dim
);
void
ImageToNCHW
(
half_t
*
image
,
float
*
tensor
,
const
DDim
&
image_dim
,
const
DDim
&
tensor_dim
);
};
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
0 → 100644
浏览文件 @
bdf5e1fd
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
batchnorm
(
__private
const
int
out_width,
__read_only
image2d_t
input,
__read_only
image2d_t
new_scale_image,
__read_only
image2d_t
new_bias_image,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
float4
new_scale
=
read_imagef
(
new_scale_image,
sampler,
(
int2
)(
out_c,
0
))
;
float4
new_bias
=
read_imagef
(
new_bias_image,
sampler,
(
int2
)(
out_c,
0
))
;
int
pos_x
=
mad24
(
out_c,
out_width,
out_w
)
;
float4
in
=
read_imagef
(
input,
sampler,
(
int2
)(
pos_x,
out_nh
))
;
float4
out
=
mad
(
in,
new_scale,
new_bias
)
;
write_imagef
(
output,
(
int2
)(
pos_x,
out_nh
)
,
out
)
;
}
paddle/fluid/lite/opencl/cl_kernel/cl_common.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline
half4
activation
(
half4
in
#ifdef PRELU
,
half4
prelu_alpha
#endif
)
{
half4
output
;
#ifdef PRELU
output
=
select
(
prelu_alpha
*
in
,
in
,
in
>=
(
half4
)
0
.
0
);
#endif
#ifdef RELU
output
=
fmax
(
in
,
(
half4
)(
0
.
0
f
));
#endif
return
output
;
}
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
0 → 100644
浏览文件 @
bdf5e1fd
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
elementwise_add
(
__read_only
image2d_t
input,
__read_only
image2d_t
bias,
__write_only
image2d_t
outputImage
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int2
coords
;
coords.x
=
x
;
coords.y
=
y
;
float4
in
=
read_imagef
(
input,
sampler,
coords
)
;
float4
b
=
read_imagef
(
bias,
sampler,
coords
)
;
float4
output
=
in
+
b
;
write_imagef
(
outputImage,
coords,
output
)
;
}
paddle/fluid/lite/opencl/cl_test.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
namespace
paddle
{
namespace
lite
{
TEST
(
cl_test
,
engine_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
"/work/Develop/Paddle/paddle/fluid/lite/opencl"
);
engine
->
platform
();
engine
->
device
();
engine
->
command_queue
();
auto
&
context
=
engine
->
context
();
auto
program
=
engine
->
CreateProgram
(
context
,
engine
->
cl_path
()
+
"/cl_kernel/"
+
"elementwise_add_kernel.cl"
);
auto
event
=
engine
->
CreateEvent
(
context
);
CHECK
(
engine
->
BuildProgram
(
program
.
get
()));
}
TEST
(
cl_test
,
context_test
)
{
auto
*
engine
=
CLEngine
::
Global
();
CHECK
(
engine
->
IsInitSuccess
());
engine
->
set_cl_path
(
"/work/Develop/Paddle/paddle/fluid/lite/opencl"
);
CLContext
context
;
context
.
GetKernel
(
"batchnorm"
,
"batchnorm_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
context
.
GetKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
,
""
);
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.cc
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
)
{
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch
(
error
)
{
CASE_CL_CONSTANT
(
CL_SUCCESS
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_FOUND
)
CASE_CL_CONSTANT
(
CL_DEVICE_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_COMPILER_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_OBJECT_ALLOCATION_FAILURE
)
CASE_CL_CONSTANT
(
CL_OUT_OF_RESOURCES
)
CASE_CL_CONSTANT
(
CL_OUT_OF_HOST_MEMORY
)
CASE_CL_CONSTANT
(
CL_PROFILING_INFO_NOT_AVAILABLE
)
CASE_CL_CONSTANT
(
CL_MEM_COPY_OVERLAP
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_MISMATCH
)
CASE_CL_CONSTANT
(
CL_IMAGE_FORMAT_NOT_SUPPORTED
)
CASE_CL_CONSTANT
(
CL_BUILD_PROGRAM_FAILURE
)
CASE_CL_CONSTANT
(
CL_MAP_FAILURE
)
CASE_CL_CONSTANT
(
CL_MISALIGNED_SUB_BUFFER_OFFSET
)
CASE_CL_CONSTANT
(
CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE_TYPE
)
CASE_CL_CONSTANT
(
CL_INVALID_PLATFORM
)
CASE_CL_CONSTANT
(
CL_INVALID_DEVICE
)
CASE_CL_CONSTANT
(
CL_INVALID_CONTEXT
)
CASE_CL_CONSTANT
(
CL_INVALID_QUEUE_PROPERTIES
)
CASE_CL_CONSTANT
(
CL_INVALID_COMMAND_QUEUE
)
CASE_CL_CONSTANT
(
CL_INVALID_HOST_PTR
)
CASE_CL_CONSTANT
(
CL_INVALID_MEM_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_FORMAT_DESCRIPTOR
)
CASE_CL_CONSTANT
(
CL_INVALID_IMAGE_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_SAMPLER
)
CASE_CL_CONSTANT
(
CL_INVALID_BINARY
)
CASE_CL_CONSTANT
(
CL_INVALID_BUILD_OPTIONS
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM
)
CASE_CL_CONSTANT
(
CL_INVALID_PROGRAM_EXECUTABLE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_NAME
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_DEFINITION
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_INDEX
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_VALUE
)
CASE_CL_CONSTANT
(
CL_INVALID_ARG_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_KERNEL_ARGS
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_DIMENSION
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_GROUP_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_WORK_ITEM_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_OFFSET
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT_WAIT_LIST
)
CASE_CL_CONSTANT
(
CL_INVALID_EVENT
)
CASE_CL_CONSTANT
(
CL_INVALID_OPERATION
)
CASE_CL_CONSTANT
(
CL_INVALID_GL_OBJECT
)
CASE_CL_CONSTANT
(
CL_INVALID_BUFFER_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_MIP_LEVEL
)
CASE_CL_CONSTANT
(
CL_INVALID_GLOBAL_WORK_SIZE
)
CASE_CL_CONSTANT
(
CL_INVALID_PROPERTY
)
default:
return
"UNKNOWN ERROR CODE"
;
}
#undef CASE_CL_CONSTANT
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/opencl/cl_tool.h
0 → 100644
浏览文件 @
bdf5e1fd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <CL/cl.h>
namespace
paddle
{
namespace
lite
{
const
char
*
opencl_error_to_str
(
cl_int
error
);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
}
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/tools/build.sh
浏览文件 @
bdf5e1fd
...
...
@@ -25,6 +25,12 @@ function cmake_x86 {
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
}
function
cmake_cl
{
prepare_for_codegen
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
-DLITE_WITH_CL
=
ON
${
common_flags
}
}
# This method is only called in CI.
function
cmake_x86_for_CI
{
prepare_for_codegen
# fake an empty __generated_code__.cc to pass cmake.
...
...
@@ -422,6 +428,10 @@ function main {
cmake_x86
shift
;;
cmake_cl
)
cmake_cl
shift
;;
cmake_cuda
)
cmake_cuda
shift
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录